diff --git a/.gitattributes b/.gitattributes index 698281e232638f16cdba61d2d28b47b4a1485831..f89f57b650e1819dad066b77740c82368f4668f9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -93,3 +93,5 @@ onnx-rt/xclbins/strix/AMD_AIE2P_Nx4_Overlay.xclbin filter=lfs diff=lfs merge=lfs 1221-135766-0001.wav filter=lfs diff=lfs merge=lfs -text 1221-135766-0002.wav filter=lfs diff=lfs merge=lfs -text onnx-rt/bin/flexmlrt.dll filter=lfs diff=lfs merge=lfs -text +vaiml_par_0/0/buffer_info.json filter=lfs diff=lfs merge=lfs -text +vaiml_par_0/fused.viz.json filter=lfs diff=lfs merge=lfs -text diff --git a/aie_unsupported_original_ops.json b/aie_unsupported_original_ops.json new file mode 100644 index 0000000000000000000000000000000000000000..cf17b4cc247d16247dcab90b7fad04ca887a181a --- /dev/null +++ b/aie_unsupported_original_ops.json @@ -0,0 +1,166 @@ +[ + "/Add_111", + "/Add_112", + "/Add_129", + "/Add_130", + "/Add_147", + "/Add_148", + "/Add_165", + "/Add_166", + "/Add_183", + "/Add_184", + "/Add_201", + "/Add_202", + "/Add_21", + "/Add_219", + "/Add_22", + "/Add_220", + "/Add_237", + "/Add_238", + "/Add_255", + "/Add_256", + "/Add_3", + "/Add_39", + "/Add_4", + "/Add_40", + "/Add_57", + "/Add_58", + "/Add_75", + "/Add_76", + "/Add_93", + "/Add_94", + "/Cast", + "/Cast_10", + "/Cast_12", + "/Cast_15", + "/Cast_17", + "/Cast_2", + "/Cast_20", + "/Cast_22", + "/Cast_25", + "/Cast_27", + "/Cast_30", + "/Cast_32", + "/Cast_35", + "/Cast_37", + "/Cast_40", + "/Cast_42", + "/Cast_45", + "/Cast_47", + "/Cast_5", + "/Cast_50", + "/Cast_52", + "/Cast_55", + "/Cast_57", + "/Cast_60", + "/Cast_62", + "/Cast_65", + "/Cast_67", + "/Cast_7", + "/Cast_70", + "/Cast_72", + "/Concat_115", + "/Concat_176", + "/Concat_219", + "/Concat_298", + "/Concat_36", + "/Gather_1", + "/Gather_107", + "/Gather_129", + "/Gather_150", + "/Gather_171", + "/Gather_193", + "/Gather_214", + "/Gather_22", + "/Gather_236", + "/Gather_257", + "/Gather_278", + "/Gather_299", + "/Gather_44", + "/Gather_65", + "/Gather_86", + "/Reciprocal", + "/Reciprocal_1", + "/Reciprocal_10", + "/Reciprocal_11", + "/Reciprocal_12", + "/Reciprocal_13", + "/Reciprocal_14", + "/Reciprocal_2", + "/Reciprocal_3", + "/Reciprocal_4", + "/Reciprocal_5", + "/Reciprocal_6", + "/Reciprocal_7", + "/Reciprocal_8", + "/Reciprocal_9", + "/Unsqueeze", + "/Unsqueeze_116", + "/Unsqueeze_119", + "/Unsqueeze_120", + "/Unsqueeze_150", + "/Unsqueeze_153", + "/Unsqueeze_154", + "/Unsqueeze_184", + "/Unsqueeze_187", + "/Unsqueeze_188", + "/Unsqueeze_218", + "/Unsqueeze_219", + "/Unsqueeze_220", + "/Unsqueeze_221", + "/Unsqueeze_247", + "/Unsqueeze_250", + "/Unsqueeze_251", + "/Unsqueeze_281", + "/Unsqueeze_284", + "/Unsqueeze_285", + "/Unsqueeze_3", + "/Unsqueeze_315", + "/Unsqueeze_318", + "/Unsqueeze_319", + "/Unsqueeze_34", + "/Unsqueeze_349", + "/Unsqueeze_350", + "/Unsqueeze_351", + "/Unsqueeze_37", + "/Unsqueeze_371", + "/Unsqueeze_374", + "/Unsqueeze_375", + "/Unsqueeze_38", + "/Unsqueeze_4", + "/Unsqueeze_405", + "/Unsqueeze_408", + "/Unsqueeze_409", + "/Unsqueeze_439", + "/Unsqueeze_440", + "/Unsqueeze_454", + "/Unsqueeze_457", + "/Unsqueeze_458", + "/Unsqueeze_488", + "/Unsqueeze_491", + "/Unsqueeze_492", + "/Unsqueeze_522", + "/Unsqueeze_525", + "/Unsqueeze_526", + "/Unsqueeze_556", + "/Unsqueeze_559", + "/Unsqueeze_560", + "/Unsqueeze_590", + "/Unsqueeze_591", + "/Unsqueeze_592", + "/Unsqueeze_593", + "/Unsqueeze_68", + "/Unsqueeze_69", + "/Unsqueeze_82", + "/Unsqueeze_85", + "/Unsqueeze_86", + "Initializer_/Constant_1113_output_0", + "Initializer_/Constant_404_output_0", + "Initializer_/Constant_830_output_0", + "Initializer_/encoder_embed/Constant_7_output_0", + "Initializer_/encoder_pos_1/Constant_2_output_0", + "Initializer_/upsample_3/Constant_output_0", + "Initializer_gemm_output_shape_token_813", + "Initializer_onnx::Unsqueeze_910", + "gemm_output_reshape_token_815" +] diff --git a/context.json b/context.json new file mode 100644 index 0000000000000000000000000000000000000000..6b8437a67c66f5040a0a4cb79e49cec219b587d8 --- /dev/null +++ b/context.json @@ -0,0 +1,4263 @@ +{ + "metaDef": [ + { + "id": "vaiml_par_0", + "inputs": [ + "/Cast_10_output_0", + "/Cast_15_output_0", + "/Cast_20_output_0", + "/Cast_25_output_0", + "/Cast_30_output_0", + "/Cast_35_output_0", + "/Cast_40_output_0", + "/Cast_45_output_0", + "/Cast_50_output_0", + "/Cast_55_output_0", + "/Cast_5_output_0", + "/Cast_60_output_0", + "/Cast_65_output_0", + "/Cast_70_output_0", + "/Cast_output_0", + "/Unsqueeze_120_output_0", + "/Unsqueeze_154_output_0", + "/Unsqueeze_188_output_0", + "/Unsqueeze_251_output_0", + "/Unsqueeze_285_output_0", + "/Unsqueeze_319_output_0", + "/Unsqueeze_375_output_0", + "/Unsqueeze_38_output_0", + "/Unsqueeze_409_output_0", + "/Unsqueeze_458_output_0", + "/Unsqueeze_492_output_0", + "/Unsqueeze_4_output_0", + "/Unsqueeze_526_output_0", + "/Unsqueeze_560_output_0", + "/Unsqueeze_86_output_0", + "/encoder_embed/Unsqueeze_output_0", + "cached_avg_0", + "cached_avg_1", + "cached_avg_2", + "cached_avg_3", + "cached_avg_4", + "cached_conv1_0", + "cached_conv1_1", + "cached_conv1_2", + "cached_conv1_3", + "cached_conv1_4", + "cached_conv2_0", + "cached_conv2_1", + "cached_conv2_2", + "cached_conv2_3", + "cached_conv2_4", + "cached_key_0", + "cached_key_1", + "cached_key_2", + "cached_key_3", + "cached_key_4", + "cached_val2_0", + "cached_val2_1", + "cached_val2_2", + "cached_val2_3", + "cached_val2_4", + "cached_val_0", + "cached_val_1", + "cached_val_2", + "cached_val_3", + "cached_val_4" + ], + "outputs": [ + "new_cached_val_4", + "new_cached_val_3", + "new_cached_val_2", + "new_cached_val_1", + "new_cached_val_0", + "new_cached_val2_4", + "new_cached_val2_3", + "new_cached_val2_2", + "new_cached_val2_1", + "new_cached_val2_0", + "new_cached_key_4", + "new_cached_key_3", + "new_cached_key_2", + "new_cached_key_1", + "new_cached_key_0", + "new_cached_conv2_4", + "new_cached_conv2_3", + "new_cached_conv2_2", + "new_cached_conv2_1", + "new_cached_conv2_0", + "new_cached_conv1_4", + "new_cached_conv1_3", + "new_cached_conv1_2", + "new_cached_conv1_1", + "new_cached_conv1_0", + "new_cached_avg_4", + "new_cached_avg_3", + "new_cached_avg_2", + "new_cached_avg_1", + "new_cached_avg_0", + "gemm_output_reshape_arg_token_814" + ], + "nodes": [ + "new_cached_val_4", + "/Unsqueeze_602_output_0", + "/Slice_107_output_0", + "/Concat_227_output_0", + "/Gather_239_output_0", + "/Slice_104_output_0", + "/in_proj_11/Add_output_0", + "gemm_output_reshape_arg_token_610", + "gemm_input_reshape_arg_token_607", + "/Add_203_output_0", + "/proj_11/MatMul_output_0", + "/Mul_123_output_0", + "/Add_199_output_0", + "/CumSum_11_output_0", + "/Add_198_output_0", + "/feed_forward1/out_proj_11/Add_output_0", + "gemm_output_reshape_arg_token_604", + "gemm_input_reshape_arg_token_601", + "/feed_forward1/activation_11/Mul_output_0", + "/feed_forward1/in_proj_11/Add_output_0", + "gemm_output_reshape_arg_token_598", + "gemm_input_reshape_arg_token_595", + "/downsample_3/ReduceSum_1_output_0", + "/downsample_3/Mul_1_output_0", + "/downsample_3/Softmax_output_0", + "/downsample_3/ReduceSum_output_0", + "/downsample_3/Mul_output_0", + "/downsample_3/Reshape_output_0", + "/skip_modules.4/Add_output_0", + "/skip_modules.4/Mul_output_0", + "/out_combiner/Add_output_0", + "/out_combiner/Mul_output_0", + "/Add_35_output_0", + "/Mul_21_output_0", + "/Sub_5_output_0", + "/norm_final_1/Mul_1_output_0", + "/norm_final_1/Pow_output_0", + "/norm_final_1/Add_output_0", + "/norm_final_1/ReduceMean_output_0", + "/norm_final_1/Mul_output_0", + "/Add_34_output_0", + "/feed_forward3/out_proj_1/Add_output_0", + "gemm_output_reshape_arg_token_106", + "gemm_input_reshape_arg_token_103", + "/feed_forward3/activation_1/Mul_output_0", + "/feed_forward3/in_proj_1/Add_output_0", + "gemm_output_reshape_arg_token_100", + "gemm_input_reshape_arg_token_97", + "/Add_33_output_0", + "/Transpose_24_output_0", + "/pointwise_conv2_3/Conv_output_0", + "/activation_3/Mul_output_0", + "/activation_3/Sigmoid_output_0", + "/activation_3/Sub_output_0", + "/depthwise_conv_3/Conv_output_0", + "/Concat_35_output_0", + "/Gather_28_output_0", + "/Mul_20_output_0", + "/Sigmoid_3_output_0", + "/Split_3_output_0", + "/pointwise_conv1_3/Conv_output_0", + "/Transpose_23_output_0", + "/Add_32_output_0", + "/out_proj2_1/Add_output_0", + "gemm_output_reshape_arg_token_94", + "gemm_input_reshape_arg_token_91", + "/Transpose_22_output_0", + "/MatMul_9_output_0", + "/Transpose_21_output_0", + "/Reshape_20_output_0", + "/Concat_32_output_0", + "/Gather_26_output_0", + "/in_proj2_1/MatMul_output_0", + "/Add_30_output_0", + "/feed_forward2/out_proj_1/Add_output_0", + "gemm_output_reshape_arg_token_88", + "gemm_input_reshape_arg_token_85", + "/feed_forward2/activation_1/Mul_output_0", + "/feed_forward2/in_proj_1/Add_output_0", + "gemm_output_reshape_arg_token_82", + "gemm_input_reshape_arg_token_79", + "/Add_29_output_0", + "/Transpose_20_output_0", + "/pointwise_conv2_2/Conv_output_0", + "/activation_2/Mul_output_0", + "/activation_2/Sigmoid_output_0", + "/activation_2/Sub_output_0", + "/depthwise_conv_2/Conv_output_0", + "/Concat_31_output_0", + "/Gather_27_output_0", + "/Mul_18_output_0", + "/Sigmoid_2_output_0", + "/Split_2_output_0", + "/pointwise_conv1_2/Conv_output_0", + "/Transpose_19_output_0", + "/Add_28_output_0", + "/Add_27_output_0", + "gemm_output_reshape_arg_token_76", + "gemm_input_reshape_arg_token_73", + "/Transpose_18_output_0", + "/MatMul_7_output_0", + "/Transpose_13_output_0", + "/Reshape_14_output_0", + "/Concat_19_output_0", + "/Gather_25_output_0", + "/Slice_11_output_0", + "/in_proj_1/Add_output_0", + "gemm_output_reshape_arg_token_70", + "gemm_input_reshape_arg_token_67", + "/Add_23_output_0", + "/proj_1/MatMul_output_0", + "/Mul_13_output_0", + "/Add_19_output_0", + "/CumSum_1_output_0", + "/Add_18_output_0", + "/feed_forward1/out_proj_1/Add_output_0", + "gemm_output_reshape_arg_token_64", + "gemm_input_reshape_arg_token_61", + "/feed_forward1/activation_1/Mul_output_0", + "/feed_forward1/in_proj_1/Add_output_0", + "gemm_output_reshape_arg_token_58", + "gemm_input_reshape_arg_token_55", + "/Add_17_output_0", + "/Mul_10_output_0", + "/Sub_2_output_0", + "/norm_final/Mul_1_output_0", + "/norm_final/Pow_output_0", + "/norm_final/Add_output_0", + "/norm_final/ReduceMean_output_0", + "/norm_final/Mul_output_0", + "/Add_16_output_0", + "/feed_forward3/out_proj/Add_output_0", + "gemm_output_reshape_arg_token_52", + "gemm_input_reshape_arg_token_49", + "/feed_forward3/activation/Mul_output_0", + "/feed_forward3/in_proj/Add_output_0", + "gemm_output_reshape_arg_token_46", + "gemm_input_reshape_arg_token_43", + "/Add_15_output_0", + "/Transpose_12_output_0", + "/pointwise_conv2_1/Conv_output_0", + "/activation_1/Mul_output_0", + "/activation_1/Sigmoid_output_0", + "/activation_1/Sub_output_0", + "/depthwise_conv_1/Conv_output_0", + "/Concat_17_output_0", + "/Gather_7_output_0", + "/Mul_9_output_0", + "/Sigmoid_1_output_0", + "/Split_1_output_0", + "/pointwise_conv1_1/Conv_output_0", + "/Transpose_11_output_0", + "/Add_14_output_0", + "/out_proj2/Add_output_0", + "gemm_output_reshape_arg_token_40", + "gemm_input_reshape_arg_token_37", + "/Transpose_10_output_0", + "/MatMul_4_output_0", + "/Transpose_9_output_0", + "/Reshape_9_output_0", + "/Concat_14_output_0", + "/in_proj2/MatMul_output_0", + "/Add_12_output_0", + "/feed_forward2/out_proj/Add_output_0", + "gemm_output_reshape_arg_token_34", + "gemm_input_reshape_arg_token_31", + "/feed_forward2/activation/Mul_output_0", + "/feed_forward2/in_proj/Add_output_0", + "gemm_output_reshape_arg_token_28", + "gemm_input_reshape_arg_token_25", + "/Add_11_output_0", + "/Transpose_8_output_0", + "/pointwise_conv2/Conv_output_0", + "/activation/Mul_output_0", + "/activation/Sigmoid_output_0", + "/activation/Sub_output_0", + "/depthwise_conv/Conv_output_0", + "/Concat_13_output_0", + "/Gather_6_output_0", + "/Mul_7_output_0", + "/Sigmoid_output_0", + "/Split_output_0", + "/pointwise_conv1/Conv_output_0", + "/Transpose_7_output_0", + "/Add_10_output_0", + "/Add_9_output_0", + "gemm_output_reshape_arg_token_22", + "gemm_input_reshape_arg_token_19", + "/Transpose_6_output_0", + "/MatMul_2_output_0", + "/Transpose_1_output_0", + "/Reshape_3_output_0", + "/Concat_1_output_0", + "/Gather_4_output_0", + "/Slice_2_output_0", + "/in_proj/Add_output_0", + "gemm_output_reshape_arg_token_16", + "gemm_input_reshape_arg_token_13", + "/Add_5_output_0", + "/proj/MatMul_output_0", + "/Mul_2_output_0", + "/Add_1_output_0", + "/CumSum_output_0", + "/Add_output_0", + "/feed_forward1/out_proj/Add_output_0", + "gemm_output_reshape_arg_token_10", + "gemm_input_reshape_arg_token_7", + "/feed_forward1/activation/Mul_output_0", + "/feed_forward1/in_proj/Add_output_0", + "gemm_output_reshape_arg_token_4", + "gemm_input_reshape_arg_token_1", + "/Transpose_output_0", + "/encoder_embed/out/Add_output_0", + "gemm_output_reshape_arg", + "gemm_input_reshape_arg", + "/encoder_embed/Transpose_output_0", + "/encoder_embed/conv/conv.8/Mul_output_0", + "/encoder_embed/conv/conv.8/Sigmoid_output_0", + "/encoder_embed/conv/conv.8/Sub_output_0", + "/encoder_embed/conv/conv.6/Conv_output_0", + "/encoder_embed/conv/conv.5/Mul_output_0", + "/encoder_embed/conv/conv.5/Sigmoid_output_0", + "/encoder_embed/conv/conv.5/Sub_output_0", + "/encoder_embed/conv/conv.3/Conv_output_0", + "/encoder_embed/conv/conv.2/Mul_output_0", + "/encoder_embed/conv/conv.2/Sigmoid_output_0", + "/encoder_embed/conv/conv.2/Sub_output_0", + "/encoder_embed/conv/conv.0/Conv_output_0", + "/feed_forward1/activation/Sigmoid_output_0", + "/feed_forward1/activation/Sub_output_0", + "/Unsqueeze_1_output_0", + "/Mul_output_0", + "/Gather_2_output_0", + "/Softmax_output_0", + "/Reshape_7_output_0", + "/Add_8_output_0", + "/Reshape_6_output_0", + "/GatherElements_output_0", + "/Reshape_5_output_0", + "/MatMul_output_0", + "/Transpose_3_output_0", + "/Reshape_1_output_0", + "/Slice_3_output_0", + "/MatMul_1_output_0", + "/Transpose_2_output_0", + "/Reshape_output_0", + "/Slice_output_0", + "/Transpose_4_output_0", + "/Reshape_2_output_0", + "/Concat_output_0", + "/Gather_3_output_0", + "/Slice_1_output_0", + "/feed_forward2/activation/Sigmoid_output_0", + "/feed_forward2/activation/Sub_output_0", + "/Gather_5_output_0", + "/feed_forward3/activation/Sigmoid_output_0", + "/feed_forward3/activation/Sub_output_0", + "/feed_forward1/activation_1/Sigmoid_output_0", + "/feed_forward1/activation_1/Sub_output_0", + "/Unsqueeze_35_output_0", + "/Mul_11_output_0", + "/Gather_23_output_0", + "/Softmax_1_output_0", + "/Reshape_18_output_0", + "/Add_26_output_0", + "/Reshape_17_output_0", + "/GatherElements_1_output_0", + "/Reshape_16_output_0", + "/MatMul_5_output_0", + "/Transpose_15_output_0", + "/Reshape_12_output_0", + "/Slice_12_output_0", + "/MatMul_6_output_0", + "/Transpose_14_output_0", + "/Reshape_11_output_0", + "/Slice_9_output_0", + "/Transpose_16_output_0", + "/Reshape_13_output_0", + "/Concat_18_output_0", + "/Gather_24_output_0", + "/Slice_10_output_0", + "/feed_forward2/activation_1/Sigmoid_output_0", + "/feed_forward2/activation_1/Sub_output_0", + "/feed_forward3/activation_1/Sigmoid_output_0", + "/feed_forward3/activation_1/Sub_output_0", + "/out_combiner/Mul_1_output_0", + "/Slice_54_output_0", + "/upsample/Reshape_1_output_0", + "/upsample/Add_output_0", + "/upsample/Expand_output_0", + "/upsample/Unsqueeze_output_0", + "/Add_107_output_0", + "/Mul_65_output_0", + "/Sub_17_output_0", + "/norm_final_5/Mul_1_output_0", + "/norm_final_5/Pow_output_0", + "/norm_final_5/Add_output_0", + "/norm_final_5/ReduceMean_output_0", + "/norm_final_5/Mul_output_0", + "/Add_106_output_0", + "/feed_forward3/out_proj_5/Add_output_0", + "gemm_output_reshape_arg_token_322", + "gemm_input_reshape_arg_token_319", + "/feed_forward3/activation_5/Mul_output_0", + "/feed_forward3/in_proj_5/Add_output_0", + "gemm_output_reshape_arg_token_316", + "gemm_input_reshape_arg_token_313", + "/Add_105_output_0", + "/Transpose_72_output_0", + "/pointwise_conv2_11/Conv_output_0", + "/activation_11/Mul_output_0", + "/activation_11/Sigmoid_output_0", + "/activation_11/Sub_output_0", + "/depthwise_conv_11/Conv_output_0", + "/Concat_114_output_0", + "/Gather_113_output_0", + "/Mul_64_output_0", + "/Sigmoid_11_output_0", + "/Split_11_output_0", + "/pointwise_conv1_11/Conv_output_0", + "/Transpose_71_output_0", + "/Add_104_output_0", + "/out_proj2_5/Add_output_0", + "gemm_output_reshape_arg_token_310", + "gemm_input_reshape_arg_token_307", + "/Transpose_70_output_0", + "/MatMul_29_output_0", + "/Transpose_69_output_0", + "/Reshape_64_output_0", + "/Concat_111_output_0", + "/Gather_111_output_0", + "/in_proj2_5/MatMul_output_0", + "/Add_102_output_0", + "/feed_forward2/out_proj_5/Add_output_0", + "gemm_output_reshape_arg_token_304", + "gemm_input_reshape_arg_token_301", + "/feed_forward2/activation_5/Mul_output_0", + "/feed_forward2/in_proj_5/Add_output_0", + "gemm_output_reshape_arg_token_298", + "gemm_input_reshape_arg_token_295", + "/Add_101_output_0", + "/Transpose_68_output_0", + "/pointwise_conv2_10/Conv_output_0", + "/activation_10/Mul_output_0", + "/activation_10/Sigmoid_output_0", + "/activation_10/Sub_output_0", + "/depthwise_conv_10/Conv_output_0", + "/Concat_110_output_0", + "/Gather_112_output_0", + "/Mul_62_output_0", + "/Sigmoid_10_output_0", + "/Split_10_output_0", + "/pointwise_conv1_10/Conv_output_0", + "/Transpose_67_output_0", + "/Add_100_output_0", + "/Add_99_output_0", + "gemm_output_reshape_arg_token_292", + "gemm_input_reshape_arg_token_289", + "/Transpose_66_output_0", + "/MatMul_27_output_0", + "/Transpose_61_output_0", + "/Reshape_58_output_0", + "/Concat_98_output_0", + "/Gather_110_output_0", + "/Slice_47_output_0", + "/in_proj_5/Add_output_0", + "gemm_output_reshape_arg_token_286", + "gemm_input_reshape_arg_token_283", + "/Add_95_output_0", + "/proj_5/MatMul_output_0", + "/Mul_57_output_0", + "/Add_91_output_0", + "/CumSum_5_output_0", + "/Add_90_output_0", + "/feed_forward1/out_proj_5/Add_output_0", + "gemm_output_reshape_arg_token_280", + "gemm_input_reshape_arg_token_277", + "/feed_forward1/activation_5/Mul_output_0", + "/feed_forward1/in_proj_5/Add_output_0", + "gemm_output_reshape_arg_token_274", + "gemm_input_reshape_arg_token_271", + "/Add_89_output_0", + "/Mul_54_output_0", + "/Sub_14_output_0", + "/norm_final_4/Mul_1_output_0", + "/norm_final_4/Pow_output_0", + "/norm_final_4/Add_output_0", + "/norm_final_4/ReduceMean_output_0", + "/norm_final_4/Mul_output_0", + "/Add_88_output_0", + "/feed_forward3/out_proj_4/Add_output_0", + "gemm_output_reshape_arg_token_268", + "gemm_input_reshape_arg_token_265", + "/feed_forward3/activation_4/Mul_output_0", + "/feed_forward3/in_proj_4/Add_output_0", + "gemm_output_reshape_arg_token_262", + "gemm_input_reshape_arg_token_259", + "/Add_87_output_0", + "/Transpose_60_output_0", + "/pointwise_conv2_9/Conv_output_0", + "/activation_9/Mul_output_0", + "/activation_9/Sigmoid_output_0", + "/activation_9/Sub_output_0", + "/depthwise_conv_9/Conv_output_0", + "/Concat_96_output_0", + "/Gather_92_output_0", + "/Mul_53_output_0", + "/Sigmoid_9_output_0", + "/Split_9_output_0", + "/pointwise_conv1_9/Conv_output_0", + "/Transpose_59_output_0", + "/Add_86_output_0", + "/out_proj2_4/Add_output_0", + "gemm_output_reshape_arg_token_256", + "gemm_input_reshape_arg_token_253", + "/Transpose_58_output_0", + "/MatMul_24_output_0", + "/Transpose_57_output_0", + "/Reshape_53_output_0", + "/Concat_93_output_0", + "/Gather_90_output_0", + "/in_proj2_4/MatMul_output_0", + "/Add_84_output_0", + "/feed_forward2/out_proj_4/Add_output_0", + "gemm_output_reshape_arg_token_250", + "gemm_input_reshape_arg_token_247", + "/feed_forward2/activation_4/Mul_output_0", + "/feed_forward2/in_proj_4/Add_output_0", + "gemm_output_reshape_arg_token_244", + "gemm_input_reshape_arg_token_241", + "/Add_83_output_0", + "/Transpose_56_output_0", + "/pointwise_conv2_8/Conv_output_0", + "/activation_8/Mul_output_0", + "/activation_8/Sigmoid_output_0", + "/activation_8/Sub_output_0", + "/depthwise_conv_8/Conv_output_0", + "/Concat_92_output_0", + "/Gather_91_output_0", + "/Mul_51_output_0", + "/Sigmoid_8_output_0", + "/Split_8_output_0", + "/pointwise_conv1_8/Conv_output_0", + "/Transpose_55_output_0", + "/Add_82_output_0", + "/Add_81_output_0", + "gemm_output_reshape_arg_token_238", + "gemm_input_reshape_arg_token_235", + "/Transpose_54_output_0", + "/MatMul_22_output_0", + "/Transpose_49_output_0", + "/Reshape_47_output_0", + "/Concat_80_output_0", + "/Gather_89_output_0", + "/Slice_38_output_0", + "/in_proj_4/Add_output_0", + "gemm_output_reshape_arg_token_232", + "gemm_input_reshape_arg_token_229", + "/Add_77_output_0", + "/proj_4/MatMul_output_0", + "/Mul_46_output_0", + "/Add_73_output_0", + "/CumSum_4_output_0", + "/Add_72_output_0", + "/feed_forward1/out_proj_4/Add_output_0", + "gemm_output_reshape_arg_token_226", + "gemm_input_reshape_arg_token_223", + "/feed_forward1/activation_4/Mul_output_0", + "/feed_forward1/in_proj_4/Add_output_0", + "gemm_output_reshape_arg_token_220", + "gemm_input_reshape_arg_token_217", + "/Add_71_output_0", + "/Mul_43_output_0", + "/Sub_11_output_0", + "/norm_final_3/Mul_1_output_0", + "/norm_final_3/Pow_output_0", + "/norm_final_3/Add_output_0", + "/norm_final_3/ReduceMean_output_0", + "/norm_final_3/Mul_output_0", + "/Add_70_output_0", + "/feed_forward3/out_proj_3/Add_output_0", + "gemm_output_reshape_arg_token_214", + "gemm_input_reshape_arg_token_211", + "/feed_forward3/activation_3/Mul_output_0", + "/feed_forward3/in_proj_3/Add_output_0", + "gemm_output_reshape_arg_token_208", + "gemm_input_reshape_arg_token_205", + "/Add_69_output_0", + "/Transpose_48_output_0", + "/pointwise_conv2_7/Conv_output_0", + "/activation_7/Mul_output_0", + "/activation_7/Sigmoid_output_0", + "/activation_7/Sub_output_0", + "/depthwise_conv_7/Conv_output_0", + "/Concat_78_output_0", + "/Gather_71_output_0", + "/Mul_42_output_0", + "/Sigmoid_7_output_0", + "/Split_7_output_0", + "/pointwise_conv1_7/Conv_output_0", + "/Transpose_47_output_0", + "/Add_68_output_0", + "/out_proj2_3/Add_output_0", + "gemm_output_reshape_arg_token_202", + "gemm_input_reshape_arg_token_199", + "/Transpose_46_output_0", + "/MatMul_19_output_0", + "/Transpose_45_output_0", + "/Reshape_42_output_0", + "/Concat_75_output_0", + "/Gather_69_output_0", + "/in_proj2_3/MatMul_output_0", + "/Add_66_output_0", + "/feed_forward2/out_proj_3/Add_output_0", + "gemm_output_reshape_arg_token_196", + "gemm_input_reshape_arg_token_193", + "/feed_forward2/activation_3/Mul_output_0", + "/feed_forward2/in_proj_3/Add_output_0", + "gemm_output_reshape_arg_token_190", + "gemm_input_reshape_arg_token_187", + "/Add_65_output_0", + "/Transpose_44_output_0", + "/pointwise_conv2_6/Conv_output_0", + "/activation_6/Mul_output_0", + "/activation_6/Sigmoid_output_0", + "/activation_6/Sub_output_0", + "/depthwise_conv_6/Conv_output_0", + "/Concat_74_output_0", + "/Gather_70_output_0", + "/Mul_40_output_0", + "/Sigmoid_6_output_0", + "/Split_6_output_0", + "/pointwise_conv1_6/Conv_output_0", + "/Transpose_43_output_0", + "/Add_64_output_0", + "/Add_63_output_0", + "gemm_output_reshape_arg_token_184", + "gemm_input_reshape_arg_token_181", + "/Transpose_42_output_0", + "/MatMul_17_output_0", + "/Transpose_37_output_0", + "/Reshape_36_output_0", + "/Concat_62_output_0", + "/Gather_68_output_0", + "/Slice_29_output_0", + "/in_proj_3/Add_output_0", + "gemm_output_reshape_arg_token_178", + "gemm_input_reshape_arg_token_175", + "/Add_59_output_0", + "/proj_3/MatMul_output_0", + "/Mul_35_output_0", + "/Add_55_output_0", + "/CumSum_3_output_0", + "/Add_54_output_0", + "/feed_forward1/out_proj_3/Add_output_0", + "gemm_output_reshape_arg_token_172", + "gemm_input_reshape_arg_token_169", + "/feed_forward1/activation_3/Mul_output_0", + "/feed_forward1/in_proj_3/Add_output_0", + "gemm_output_reshape_arg_token_166", + "gemm_input_reshape_arg_token_163", + "/Add_53_output_0", + "/Mul_32_output_0", + "/Sub_8_output_0", + "/norm_final_2/Mul_1_output_0", + "/norm_final_2/Pow_output_0", + "/norm_final_2/Add_output_0", + "/norm_final_2/ReduceMean_output_0", + "/norm_final_2/Mul_output_0", + "/Add_52_output_0", + "/feed_forward3/out_proj_2/Add_output_0", + "gemm_output_reshape_arg_token_160", + "gemm_input_reshape_arg_token_157", + "/feed_forward3/activation_2/Mul_output_0", + "/feed_forward3/in_proj_2/Add_output_0", + "gemm_output_reshape_arg_token_154", + "gemm_input_reshape_arg_token_151", + "/Add_51_output_0", + "/Transpose_36_output_0", + "/pointwise_conv2_5/Conv_output_0", + "/activation_5/Mul_output_0", + "/activation_5/Sigmoid_output_0", + "/activation_5/Sub_output_0", + "/depthwise_conv_5/Conv_output_0", + "/Concat_60_output_0", + "/Gather_50_output_0", + "/Mul_31_output_0", + "/Sigmoid_5_output_0", + "/Split_5_output_0", + "/pointwise_conv1_5/Conv_output_0", + "/Transpose_35_output_0", + "/Add_50_output_0", + "/out_proj2_2/Add_output_0", + "gemm_output_reshape_arg_token_148", + "gemm_input_reshape_arg_token_145", + "/Transpose_34_output_0", + "/MatMul_14_output_0", + "/Transpose_33_output_0", + "/Reshape_31_output_0", + "/Concat_57_output_0", + "/in_proj2_2/MatMul_output_0", + "/Add_48_output_0", + "/feed_forward2/out_proj_2/Add_output_0", + "gemm_output_reshape_arg_token_142", + "gemm_input_reshape_arg_token_139", + "/feed_forward2/activation_2/Mul_output_0", + "/feed_forward2/in_proj_2/Add_output_0", + "gemm_output_reshape_arg_token_136", + "gemm_input_reshape_arg_token_133", + "/Add_47_output_0", + "/Transpose_32_output_0", + "/pointwise_conv2_4/Conv_output_0", + "/activation_4/Mul_output_0", + "/activation_4/Sigmoid_output_0", + "/activation_4/Sub_output_0", + "/depthwise_conv_4/Conv_output_0", + "/Concat_56_output_0", + "/Gather_49_output_0", + "/Mul_29_output_0", + "/Sigmoid_4_output_0", + "/Split_4_output_0", + "/pointwise_conv1_4/Conv_output_0", + "/Transpose_31_output_0", + "/Add_46_output_0", + "/Add_45_output_0", + "gemm_output_reshape_arg_token_130", + "gemm_input_reshape_arg_token_127", + "/Transpose_30_output_0", + "/MatMul_12_output_0", + "/Transpose_25_output_0", + "/Reshape_25_output_0", + "/Concat_44_output_0", + "/Gather_47_output_0", + "/Slice_20_output_0", + "/in_proj_2/Add_output_0", + "gemm_output_reshape_arg_token_124", + "gemm_input_reshape_arg_token_121", + "/Add_41_output_0", + "/proj_2/MatMul_output_0", + "/Mul_24_output_0", + "/Add_37_output_0", + "/CumSum_2_output_0", + "/Add_36_output_0", + "/feed_forward1/out_proj_2/Add_output_0", + "gemm_output_reshape_arg_token_118", + "gemm_input_reshape_arg_token_115", + "/feed_forward1/activation_2/Mul_output_0", + "/feed_forward1/in_proj_2/Add_output_0", + "gemm_output_reshape_arg_token_112", + "gemm_input_reshape_arg_token_109", + "/downsample/ReduceSum_1_output_0", + "/downsample/Mul_1_output_0", + "/downsample/Softmax_output_0", + "/downsample/ReduceSum_output_0", + "/downsample/Mul_output_0", + "/downsample/Reshape_output_0", + "/feed_forward1/activation_2/Sigmoid_output_0", + "/feed_forward1/activation_2/Sub_output_0", + "/Unsqueeze_83_output_0", + "/Mul_22_output_0", + "/Gather_45_output_0", + "/Softmax_2_output_0", + "/Reshape_29_output_0", + "/Add_44_output_0", + "/Reshape_28_output_0", + "/GatherElements_2_output_0", + "/Reshape_27_output_0", + "/MatMul_10_output_0", + "/Transpose_27_output_0", + "/Reshape_23_output_0", + "/Slice_21_output_0", + "/MatMul_11_output_0", + "/Transpose_26_output_0", + "/Reshape_22_output_0", + "/Slice_18_output_0", + "/Transpose_28_output_0", + "/Reshape_24_output_0", + "/Concat_43_output_0", + "/Gather_46_output_0", + "/Slice_19_output_0", + "/feed_forward2/activation_2/Sigmoid_output_0", + "/feed_forward2/activation_2/Sub_output_0", + "/Gather_48_output_0", + "/feed_forward3/activation_2/Sigmoid_output_0", + "/feed_forward3/activation_2/Sub_output_0", + "/feed_forward1/activation_3/Sigmoid_output_0", + "/feed_forward1/activation_3/Sub_output_0", + "/Unsqueeze_117_output_0", + "/Mul_33_output_0", + "/Gather_66_output_0", + "/Softmax_3_output_0", + "/Reshape_40_output_0", + "/Add_62_output_0", + "/Reshape_39_output_0", + "/GatherElements_3_output_0", + "/Reshape_38_output_0", + "/MatMul_15_output_0", + "/Transpose_39_output_0", + "/Reshape_34_output_0", + "/Slice_30_output_0", + "/MatMul_16_output_0", + "/Transpose_38_output_0", + "/Reshape_33_output_0", + "/Slice_27_output_0", + "/Transpose_40_output_0", + "/Reshape_35_output_0", + "/Concat_61_output_0", + "/Gather_67_output_0", + "/Slice_28_output_0", + "/feed_forward2/activation_3/Sigmoid_output_0", + "/feed_forward2/activation_3/Sub_output_0", + "/feed_forward3/activation_3/Sigmoid_output_0", + "/feed_forward3/activation_3/Sub_output_0", + "/feed_forward1/activation_4/Sigmoid_output_0", + "/feed_forward1/activation_4/Sub_output_0", + "/Unsqueeze_151_output_0", + "/Mul_44_output_0", + "/Gather_87_output_0", + "/Softmax_4_output_0", + "/Reshape_51_output_0", + "/Add_80_output_0", + "/Reshape_50_output_0", + "/GatherElements_4_output_0", + "/Reshape_49_output_0", + "/MatMul_20_output_0", + "/Transpose_51_output_0", + "/Reshape_45_output_0", + "/Slice_39_output_0", + "/MatMul_21_output_0", + "/Transpose_50_output_0", + "/Reshape_44_output_0", + "/Slice_36_output_0", + "/Transpose_52_output_0", + "/Reshape_46_output_0", + "/Concat_79_output_0", + "/Gather_88_output_0", + "/Slice_37_output_0", + "/feed_forward2/activation_4/Sigmoid_output_0", + "/feed_forward2/activation_4/Sub_output_0", + "/feed_forward3/activation_4/Sigmoid_output_0", + "/feed_forward3/activation_4/Sub_output_0", + "/feed_forward1/activation_5/Sigmoid_output_0", + "/feed_forward1/activation_5/Sub_output_0", + "/Unsqueeze_185_output_0", + "/Mul_55_output_0", + "/Gather_108_output_0", + "/Softmax_5_output_0", + "/Reshape_62_output_0", + "/Add_98_output_0", + "/Reshape_61_output_0", + "/GatherElements_5_output_0", + "/Reshape_60_output_0", + "/MatMul_25_output_0", + "/Transpose_63_output_0", + "/Reshape_56_output_0", + "/Slice_48_output_0", + "/MatMul_26_output_0", + "/Transpose_62_output_0", + "/Reshape_55_output_0", + "/Slice_45_output_0", + "/Transpose_64_output_0", + "/Reshape_57_output_0", + "/Concat_97_output_0", + "/Gather_109_output_0", + "/Slice_46_output_0", + "/feed_forward2/activation_5/Sigmoid_output_0", + "/feed_forward2/activation_5/Sub_output_0", + "/feed_forward3/activation_5/Sigmoid_output_0", + "/feed_forward3/activation_5/Sub_output_0", + "/skip_modules.4/Mul_1_output_0", + "/out_combiner_2/Add_output_0", + "/out_combiner_2/Mul_output_0", + "/out_combiner_1/Add_output_0", + "/out_combiner_1/Mul_output_0", + "/out_combiner_1/Mul_1_output_0", + "/Slice_82_output_0", + "/upsample_1/Reshape_1_output_0", + "/upsample_1/Add_output_0", + "/upsample_1/Expand_output_0", + "/upsample_1/Unsqueeze_output_0", + "/Add_161_output_0", + "/Mul_98_output_0", + "/Sub_26_output_0", + "/norm_final_8/Mul_1_output_0", + "/norm_final_8/Pow_output_0", + "/norm_final_8/Add_output_0", + "/norm_final_8/ReduceMean_output_0", + "/norm_final_8/Mul_output_0", + "/Add_160_output_0", + "/feed_forward3/out_proj_8/Add_output_0", + "gemm_output_reshape_arg_token_484", + "gemm_input_reshape_arg_token_481", + "/feed_forward3/activation_8/Mul_output_0", + "/feed_forward3/in_proj_8/Add_output_0", + "gemm_output_reshape_arg_token_478", + "gemm_input_reshape_arg_token_475", + "/Add_159_output_0", + "/Transpose_108_output_0", + "/pointwise_conv2_17/Conv_output_0", + "/activation_17/Mul_output_0", + "/activation_17/Sigmoid_output_0", + "/activation_17/Sub_output_0", + "/depthwise_conv_17/Conv_output_0", + "/Concat_175_output_0", + "/Gather_177_output_0", + "/Mul_97_output_0", + "/Sigmoid_17_output_0", + "/Split_17_output_0", + "/pointwise_conv1_17/Conv_output_0", + "/Transpose_107_output_0", + "/Add_158_output_0", + "/out_proj2_8/Add_output_0", + "gemm_output_reshape_arg_token_472", + "gemm_input_reshape_arg_token_469", + "/Transpose_106_output_0", + "/MatMul_44_output_0", + "/Transpose_105_output_0", + "/Reshape_97_output_0", + "/Concat_172_output_0", + "/Gather_175_output_0", + "/in_proj2_8/MatMul_output_0", + "/Add_156_output_0", + "/feed_forward2/out_proj_8/Add_output_0", + "gemm_output_reshape_arg_token_466", + "gemm_input_reshape_arg_token_463", + "/feed_forward2/activation_8/Mul_output_0", + "/feed_forward2/in_proj_8/Add_output_0", + "gemm_output_reshape_arg_token_460", + "gemm_input_reshape_arg_token_457", + "/Add_155_output_0", + "/Transpose_104_output_0", + "/pointwise_conv2_16/Conv_output_0", + "/activation_16/Mul_output_0", + "/activation_16/Sigmoid_output_0", + "/activation_16/Sub_output_0", + "/depthwise_conv_16/Conv_output_0", + "/Concat_171_output_0", + "/Gather_176_output_0", + "/Mul_95_output_0", + "/Sigmoid_16_output_0", + "/Split_16_output_0", + "/pointwise_conv1_16/Conv_output_0", + "/Transpose_103_output_0", + "/Add_154_output_0", + "/Add_153_output_0", + "gemm_output_reshape_arg_token_454", + "gemm_input_reshape_arg_token_451", + "/Transpose_102_output_0", + "/MatMul_42_output_0", + "/Transpose_97_output_0", + "/Reshape_91_output_0", + "/Concat_159_output_0", + "/Gather_174_output_0", + "/Slice_75_output_0", + "/in_proj_8/Add_output_0", + "gemm_output_reshape_arg_token_448", + "gemm_input_reshape_arg_token_445", + "/Add_149_output_0", + "/proj_8/MatMul_output_0", + "/Mul_90_output_0", + "/Add_145_output_0", + "/CumSum_8_output_0", + "/Add_144_output_0", + "/feed_forward1/out_proj_8/Add_output_0", + "gemm_output_reshape_arg_token_442", + "gemm_input_reshape_arg_token_439", + "/feed_forward1/activation_8/Mul_output_0", + "/feed_forward1/in_proj_8/Add_output_0", + "gemm_output_reshape_arg_token_436", + "gemm_input_reshape_arg_token_433", + "/Add_143_output_0", + "/Mul_87_output_0", + "/Sub_23_output_0", + "/norm_final_7/Mul_1_output_0", + "/norm_final_7/Pow_output_0", + "/norm_final_7/Add_output_0", + "/norm_final_7/ReduceMean_output_0", + "/norm_final_7/Mul_output_0", + "/Add_142_output_0", + "/feed_forward3/out_proj_7/Add_output_0", + "gemm_output_reshape_arg_token_430", + "gemm_input_reshape_arg_token_427", + "/feed_forward3/activation_7/Mul_output_0", + "/feed_forward3/in_proj_7/Add_output_0", + "gemm_output_reshape_arg_token_424", + "gemm_input_reshape_arg_token_421", + "/Add_141_output_0", + "/Transpose_96_output_0", + "/pointwise_conv2_15/Conv_output_0", + "/activation_15/Mul_output_0", + "/activation_15/Sigmoid_output_0", + "/activation_15/Sub_output_0", + "/depthwise_conv_15/Conv_output_0", + "/Concat_157_output_0", + "/Gather_156_output_0", + "/Mul_86_output_0", + "/Sigmoid_15_output_0", + "/Split_15_output_0", + "/pointwise_conv1_15/Conv_output_0", + "/Transpose_95_output_0", + "/Add_140_output_0", + "/out_proj2_7/Add_output_0", + "gemm_output_reshape_arg_token_418", + "gemm_input_reshape_arg_token_415", + "/Transpose_94_output_0", + "/MatMul_39_output_0", + "/Transpose_93_output_0", + "/Reshape_86_output_0", + "/Concat_154_output_0", + "/Gather_154_output_0", + "/in_proj2_7/MatMul_output_0", + "/Add_138_output_0", + "/feed_forward2/out_proj_7/Add_output_0", + "gemm_output_reshape_arg_token_412", + "gemm_input_reshape_arg_token_409", + "/feed_forward2/activation_7/Mul_output_0", + "/feed_forward2/in_proj_7/Add_output_0", + "gemm_output_reshape_arg_token_406", + "gemm_input_reshape_arg_token_403", + "/Add_137_output_0", + "/Transpose_92_output_0", + "/pointwise_conv2_14/Conv_output_0", + "/activation_14/Mul_output_0", + "/activation_14/Sigmoid_output_0", + "/activation_14/Sub_output_0", + "/depthwise_conv_14/Conv_output_0", + "/Concat_153_output_0", + "/Gather_155_output_0", + "/Mul_84_output_0", + "/Sigmoid_14_output_0", + "/Split_14_output_0", + "/pointwise_conv1_14/Conv_output_0", + "/Transpose_91_output_0", + "/Add_136_output_0", + "/Add_135_output_0", + "gemm_output_reshape_arg_token_400", + "gemm_input_reshape_arg_token_397", + "/Transpose_90_output_0", + "/MatMul_37_output_0", + "/Transpose_85_output_0", + "/Reshape_80_output_0", + "/Concat_141_output_0", + "/Gather_153_output_0", + "/Slice_66_output_0", + "/in_proj_7/Add_output_0", + "gemm_output_reshape_arg_token_394", + "gemm_input_reshape_arg_token_391", + "/Add_131_output_0", + "/proj_7/MatMul_output_0", + "/Mul_79_output_0", + "/Add_127_output_0", + "/CumSum_7_output_0", + "/Add_126_output_0", + "/feed_forward1/out_proj_7/Add_output_0", + "gemm_output_reshape_arg_token_388", + "gemm_input_reshape_arg_token_385", + "/feed_forward1/activation_7/Mul_output_0", + "/feed_forward1/in_proj_7/Add_output_0", + "gemm_output_reshape_arg_token_382", + "gemm_input_reshape_arg_token_379", + "/Add_125_output_0", + "/Mul_76_output_0", + "/Sub_20_output_0", + "/norm_final_6/Mul_1_output_0", + "/norm_final_6/Pow_output_0", + "/norm_final_6/Add_output_0", + "/norm_final_6/ReduceMean_output_0", + "/norm_final_6/Mul_output_0", + "/Add_124_output_0", + "/feed_forward3/out_proj_6/Add_output_0", + "gemm_output_reshape_arg_token_376", + "gemm_input_reshape_arg_token_373", + "/feed_forward3/activation_6/Mul_output_0", + "/feed_forward3/in_proj_6/Add_output_0", + "gemm_output_reshape_arg_token_370", + "gemm_input_reshape_arg_token_367", + "/Add_123_output_0", + "/Transpose_84_output_0", + "/pointwise_conv2_13/Conv_output_0", + "/activation_13/Mul_output_0", + "/activation_13/Sigmoid_output_0", + "/activation_13/Sub_output_0", + "/depthwise_conv_13/Conv_output_0", + "/Concat_139_output_0", + "/Gather_135_output_0", + "/Mul_75_output_0", + "/Sigmoid_13_output_0", + "/Split_13_output_0", + "/pointwise_conv1_13/Conv_output_0", + "/Transpose_83_output_0", + "/Add_122_output_0", + "/out_proj2_6/Add_output_0", + "gemm_output_reshape_arg_token_364", + "gemm_input_reshape_arg_token_361", + "/Transpose_82_output_0", + "/MatMul_34_output_0", + "/Transpose_81_output_0", + "/Reshape_75_output_0", + "/Concat_136_output_0", + "/in_proj2_6/MatMul_output_0", + "/Add_120_output_0", + "/feed_forward2/out_proj_6/Add_output_0", + "gemm_output_reshape_arg_token_358", + "gemm_input_reshape_arg_token_355", + "/feed_forward2/activation_6/Mul_output_0", + "/feed_forward2/in_proj_6/Add_output_0", + "gemm_output_reshape_arg_token_352", + "gemm_input_reshape_arg_token_349", + "/Add_119_output_0", + "/Transpose_80_output_0", + "/pointwise_conv2_12/Conv_output_0", + "/activation_12/Mul_output_0", + "/activation_12/Sigmoid_output_0", + "/activation_12/Sub_output_0", + "/depthwise_conv_12/Conv_output_0", + "/Concat_135_output_0", + "/Gather_134_output_0", + "/Mul_73_output_0", + "/Sigmoid_12_output_0", + "/Split_12_output_0", + "/pointwise_conv1_12/Conv_output_0", + "/Transpose_79_output_0", + "/Add_118_output_0", + "/Add_117_output_0", + "gemm_output_reshape_arg_token_346", + "gemm_input_reshape_arg_token_343", + "/Transpose_78_output_0", + "/MatMul_32_output_0", + "/Transpose_73_output_0", + "/Reshape_69_output_0", + "/Concat_123_output_0", + "/Gather_132_output_0", + "/Slice_57_output_0", + "/in_proj_6/Add_output_0", + "gemm_output_reshape_arg_token_340", + "gemm_input_reshape_arg_token_337", + "/Add_113_output_0", + "/proj_6/MatMul_output_0", + "/Mul_68_output_0", + "/Add_109_output_0", + "/CumSum_6_output_0", + "/Add_108_output_0", + "/feed_forward1/out_proj_6/Add_output_0", + "gemm_output_reshape_arg_token_334", + "gemm_input_reshape_arg_token_331", + "/feed_forward1/activation_6/Mul_output_0", + "/feed_forward1/in_proj_6/Add_output_0", + "gemm_output_reshape_arg_token_328", + "gemm_input_reshape_arg_token_325", + "/downsample_1/ReduceSum_1_output_0", + "/downsample_1/Mul_1_output_0", + "/downsample_1/Softmax_output_0", + "/downsample_1/ReduceSum_output_0", + "/downsample_1/Mul_output_0", + "/downsample_1/Reshape_output_0", + "/feed_forward1/activation_6/Sigmoid_output_0", + "/feed_forward1/activation_6/Sub_output_0", + "/Unsqueeze_248_output_0", + "/Mul_66_output_0", + "/Gather_130_output_0", + "/Softmax_6_output_0", + "/Reshape_73_output_0", + "/Add_116_output_0", + "/Reshape_72_output_0", + "/GatherElements_6_output_0", + "/Reshape_71_output_0", + "/MatMul_30_output_0", + "/Transpose_75_output_0", + "/Reshape_67_output_0", + "/Slice_58_output_0", + "/MatMul_31_output_0", + "/Transpose_74_output_0", + "/Reshape_66_output_0", + "/Slice_55_output_0", + "/Transpose_76_output_0", + "/Reshape_68_output_0", + "/Concat_122_output_0", + "/Gather_131_output_0", + "/Slice_56_output_0", + "/feed_forward2/activation_6/Sigmoid_output_0", + "/feed_forward2/activation_6/Sub_output_0", + "/Gather_133_output_0", + "/feed_forward3/activation_6/Sigmoid_output_0", + "/feed_forward3/activation_6/Sub_output_0", + "/feed_forward1/activation_7/Sigmoid_output_0", + "/feed_forward1/activation_7/Sub_output_0", + "/Unsqueeze_282_output_0", + "/Mul_77_output_0", + "/Gather_151_output_0", + "/Softmax_7_output_0", + "/Reshape_84_output_0", + "/Add_134_output_0", + "/Reshape_83_output_0", + "/GatherElements_7_output_0", + "/Reshape_82_output_0", + "/MatMul_35_output_0", + "/Transpose_87_output_0", + "/Reshape_78_output_0", + "/Slice_67_output_0", + "/MatMul_36_output_0", + "/Transpose_86_output_0", + "/Reshape_77_output_0", + "/Slice_64_output_0", + "/Transpose_88_output_0", + "/Reshape_79_output_0", + "/Concat_140_output_0", + "/Gather_152_output_0", + "/Slice_65_output_0", + "/feed_forward2/activation_7/Sigmoid_output_0", + "/feed_forward2/activation_7/Sub_output_0", + "/feed_forward3/activation_7/Sigmoid_output_0", + "/feed_forward3/activation_7/Sub_output_0", + "/feed_forward1/activation_8/Sigmoid_output_0", + "/feed_forward1/activation_8/Sub_output_0", + "/Unsqueeze_316_output_0", + "/Mul_88_output_0", + "/Gather_172_output_0", + "/Softmax_8_output_0", + "/Reshape_95_output_0", + "/Add_152_output_0", + "/Reshape_94_output_0", + "/GatherElements_8_output_0", + "/Reshape_93_output_0", + "/MatMul_40_output_0", + "/Transpose_99_output_0", + "/Reshape_89_output_0", + "/Slice_76_output_0", + "/MatMul_41_output_0", + "/Transpose_98_output_0", + "/Reshape_88_output_0", + "/Slice_73_output_0", + "/Transpose_100_output_0", + "/Reshape_90_output_0", + "/Concat_158_output_0", + "/Gather_173_output_0", + "/Slice_74_output_0", + "/feed_forward2/activation_8/Sigmoid_output_0", + "/feed_forward2/activation_8/Sub_output_0", + "/feed_forward3/activation_8/Sigmoid_output_0", + "/feed_forward3/activation_8/Sub_output_0", + "/out_combiner_2/Mul_1_output_0", + "/Slice_101_output_0", + "/upsample_2/Reshape_1_output_0", + "/upsample_2/Add_output_0", + "/upsample_2/Expand_output_0", + "/upsample_2/Unsqueeze_output_0", + "/Add_197_output_0", + "/Mul_120_output_0", + "/Sub_32_output_0", + "/norm_final_10/Mul_1_output_0", + "/norm_final_10/Pow_output_0", + "/norm_final_10/Add_output_0", + "/norm_final_10/ReduceMean_output_0", + "/norm_final_10/Mul_output_0", + "/Add_196_output_0", + "/feed_forward3/out_proj_10/Add_output_0", + "gemm_output_reshape_arg_token_592", + "gemm_input_reshape_arg_token_589", + "/feed_forward3/activation_10/Mul_output_0", + "/feed_forward3/in_proj_10/Add_output_0", + "gemm_output_reshape_arg_token_586", + "gemm_input_reshape_arg_token_583", + "/Add_195_output_0", + "/Transpose_132_output_0", + "/pointwise_conv2_21/Conv_output_0", + "/activation_21/Mul_output_0", + "/activation_21/Sigmoid_output_0", + "/activation_21/Sub_output_0", + "/depthwise_conv_21/Conv_output_0", + "/Concat_218_output_0", + "/Gather_220_output_0", + "/Mul_119_output_0", + "/Sigmoid_21_output_0", + "/Split_21_output_0", + "/pointwise_conv1_21/Conv_output_0", + "/Transpose_131_output_0", + "/Add_194_output_0", + "/out_proj2_10/Add_output_0", + "gemm_output_reshape_arg_token_580", + "gemm_input_reshape_arg_token_577", + "/Transpose_130_output_0", + "/MatMul_54_output_0", + "/Transpose_129_output_0", + "/Reshape_119_output_0", + "/Concat_215_output_0", + "/Gather_218_output_0", + "/in_proj2_10/MatMul_output_0", + "/Add_192_output_0", + "/feed_forward2/out_proj_10/Add_output_0", + "gemm_output_reshape_arg_token_574", + "gemm_input_reshape_arg_token_571", + "/feed_forward2/activation_10/Mul_output_0", + "/feed_forward2/in_proj_10/Add_output_0", + "gemm_output_reshape_arg_token_568", + "gemm_input_reshape_arg_token_565", + "/Add_191_output_0", + "/Transpose_128_output_0", + "/pointwise_conv2_20/Conv_output_0", + "/activation_20/Mul_output_0", + "/activation_20/Sigmoid_output_0", + "/activation_20/Sub_output_0", + "/depthwise_conv_20/Conv_output_0", + "/Concat_214_output_0", + "/Gather_219_output_0", + "/Mul_117_output_0", + "/Sigmoid_20_output_0", + "/Split_20_output_0", + "/pointwise_conv1_20/Conv_output_0", + "/Transpose_127_output_0", + "/Add_190_output_0", + "/Add_189_output_0", + "gemm_output_reshape_arg_token_562", + "gemm_input_reshape_arg_token_559", + "/Transpose_126_output_0", + "/MatMul_52_output_0", + "/Transpose_121_output_0", + "/Reshape_113_output_0", + "/Concat_202_output_0", + "/Gather_217_output_0", + "/Slice_94_output_0", + "/in_proj_10/Add_output_0", + "gemm_output_reshape_arg_token_556", + "gemm_input_reshape_arg_token_553", + "/Add_185_output_0", + "/proj_10/MatMul_output_0", + "/Mul_112_output_0", + "/Add_181_output_0", + "/CumSum_10_output_0", + "/Add_180_output_0", + "/feed_forward1/out_proj_10/Add_output_0", + "gemm_output_reshape_arg_token_550", + "gemm_input_reshape_arg_token_547", + "/feed_forward1/activation_10/Mul_output_0", + "/feed_forward1/in_proj_10/Add_output_0", + "gemm_output_reshape_arg_token_544", + "gemm_input_reshape_arg_token_541", + "/Add_179_output_0", + "/Mul_109_output_0", + "/Sub_29_output_0", + "/norm_final_9/Mul_1_output_0", + "/norm_final_9/Pow_output_0", + "/norm_final_9/Add_output_0", + "/norm_final_9/ReduceMean_output_0", + "/norm_final_9/Mul_output_0", + "/Add_178_output_0", + "/feed_forward3/out_proj_9/Add_output_0", + "gemm_output_reshape_arg_token_538", + "gemm_input_reshape_arg_token_535", + "/feed_forward3/activation_9/Mul_output_0", + "/feed_forward3/in_proj_9/Add_output_0", + "gemm_output_reshape_arg_token_532", + "gemm_input_reshape_arg_token_529", + "/Add_177_output_0", + "/Transpose_120_output_0", + "/pointwise_conv2_19/Conv_output_0", + "/activation_19/Mul_output_0", + "/activation_19/Sigmoid_output_0", + "/activation_19/Sub_output_0", + "/depthwise_conv_19/Conv_output_0", + "/Concat_200_output_0", + "/Gather_199_output_0", + "/Mul_108_output_0", + "/Sigmoid_19_output_0", + "/Split_19_output_0", + "/pointwise_conv1_19/Conv_output_0", + "/Transpose_119_output_0", + "/Add_176_output_0", + "/out_proj2_9/Add_output_0", + "gemm_output_reshape_arg_token_526", + "gemm_input_reshape_arg_token_523", + "/Transpose_118_output_0", + "/MatMul_49_output_0", + "/Transpose_117_output_0", + "/Reshape_108_output_0", + "/Concat_197_output_0", + "/in_proj2_9/MatMul_output_0", + "/Add_174_output_0", + "/feed_forward2/out_proj_9/Add_output_0", + "gemm_output_reshape_arg_token_520", + "gemm_input_reshape_arg_token_517", + "/feed_forward2/activation_9/Mul_output_0", + "/feed_forward2/in_proj_9/Add_output_0", + "gemm_output_reshape_arg_token_514", + "gemm_input_reshape_arg_token_511", + "/Add_173_output_0", + "/Transpose_116_output_0", + "/pointwise_conv2_18/Conv_output_0", + "/activation_18/Mul_output_0", + "/activation_18/Sigmoid_output_0", + "/activation_18/Sub_output_0", + "/depthwise_conv_18/Conv_output_0", + "/Concat_196_output_0", + "/Gather_198_output_0", + "/Mul_106_output_0", + "/Sigmoid_18_output_0", + "/Split_18_output_0", + "/pointwise_conv1_18/Conv_output_0", + "/Transpose_115_output_0", + "/Add_172_output_0", + "/Add_171_output_0", + "gemm_output_reshape_arg_token_508", + "gemm_input_reshape_arg_token_505", + "/Transpose_114_output_0", + "/MatMul_47_output_0", + "/Transpose_109_output_0", + "/Reshape_102_output_0", + "/Concat_184_output_0", + "/Gather_196_output_0", + "/Slice_85_output_0", + "/in_proj_9/Add_output_0", + "gemm_output_reshape_arg_token_502", + "gemm_input_reshape_arg_token_499", + "/Add_167_output_0", + "/proj_9/MatMul_output_0", + "/Mul_101_output_0", + "/Add_163_output_0", + "/CumSum_9_output_0", + "/Add_162_output_0", + "/feed_forward1/out_proj_9/Add_output_0", + "gemm_output_reshape_arg_token_496", + "gemm_input_reshape_arg_token_493", + "/feed_forward1/activation_9/Mul_output_0", + "/feed_forward1/in_proj_9/Add_output_0", + "gemm_output_reshape_arg_token_490", + "gemm_input_reshape_arg_token_487", + "/downsample_2/ReduceSum_1_output_0", + "/downsample_2/Mul_1_output_0", + "/downsample_2/Softmax_output_0", + "/downsample_2/ReduceSum_output_0", + "/downsample_2/Mul_output_0", + "/downsample_2/Reshape_output_0", + "/feed_forward1/activation_9/Sigmoid_output_0", + "/feed_forward1/activation_9/Sub_output_0", + "/Unsqueeze_372_output_0", + "/Mul_99_output_0", + "/Gather_194_output_0", + "/Softmax_9_output_0", + "/Reshape_106_output_0", + "/Add_170_output_0", + "/Reshape_105_output_0", + "/GatherElements_9_output_0", + "/Reshape_104_output_0", + "/MatMul_45_output_0", + "/Transpose_111_output_0", + "/Reshape_100_output_0", + "/Slice_86_output_0", + "/MatMul_46_output_0", + "/Transpose_110_output_0", + "/Reshape_99_output_0", + "/Slice_83_output_0", + "/Transpose_112_output_0", + "/Reshape_101_output_0", + "/Concat_183_output_0", + "/Gather_195_output_0", + "/Slice_84_output_0", + "/feed_forward2/activation_9/Sigmoid_output_0", + "/feed_forward2/activation_9/Sub_output_0", + "/Gather_197_output_0", + "/feed_forward3/activation_9/Sigmoid_output_0", + "/feed_forward3/activation_9/Sub_output_0", + "/feed_forward1/activation_10/Sigmoid_output_0", + "/feed_forward1/activation_10/Sub_output_0", + "/Unsqueeze_406_output_0", + "/Mul_110_output_0", + "/Gather_215_output_0", + "/Softmax_10_output_0", + "/Reshape_117_output_0", + "/Add_188_output_0", + "/Reshape_116_output_0", + "/GatherElements_10_output_0", + "/Reshape_115_output_0", + "/MatMul_50_output_0", + "/Transpose_123_output_0", + "/Reshape_111_output_0", + "/Slice_95_output_0", + "/MatMul_51_output_0", + "/Transpose_122_output_0", + "/Reshape_110_output_0", + "/Slice_92_output_0", + "/Transpose_124_output_0", + "/Reshape_112_output_0", + "/Concat_201_output_0", + "/Gather_216_output_0", + "/Slice_93_output_0", + "/feed_forward2/activation_10/Sigmoid_output_0", + "/feed_forward2/activation_10/Sub_output_0", + "/feed_forward3/activation_10/Sigmoid_output_0", + "/feed_forward3/activation_10/Sub_output_0", + "/feed_forward1/activation_11/Sigmoid_output_0", + "/feed_forward1/activation_11/Sub_output_0", + "/Unsqueeze_455_output_0", + "/Mul_121_output_0", + "/Gather_237_output_0", + "/Unsqueeze_603_output_0", + "/Slice_116_output_0", + "/Concat_245_output_0", + "/Gather_260_output_0", + "/Slice_113_output_0", + "/in_proj_12/Add_output_0", + "gemm_output_reshape_arg_token_664", + "gemm_input_reshape_arg_token_661", + "/Add_221_output_0", + "/proj_12/MatMul_output_0", + "/Mul_134_output_0", + "/Add_217_output_0", + "/CumSum_12_output_0", + "/Add_216_output_0", + "/feed_forward1/out_proj_12/Add_output_0", + "gemm_output_reshape_arg_token_658", + "gemm_input_reshape_arg_token_655", + "/feed_forward1/activation_12/Mul_output_0", + "/feed_forward1/in_proj_12/Add_output_0", + "gemm_output_reshape_arg_token_652", + "gemm_input_reshape_arg_token_649", + "/Add_215_output_0", + "/Mul_131_output_0", + "/Sub_35_output_0", + "/norm_final_11/Mul_1_output_0", + "/norm_final_11/Pow_output_0", + "/norm_final_11/Add_output_0", + "/norm_final_11/ReduceMean_output_0", + "/norm_final_11/Mul_output_0", + "/Add_214_output_0", + "/feed_forward3/out_proj_11/Add_output_0", + "gemm_output_reshape_arg_token_646", + "gemm_input_reshape_arg_token_643", + "/feed_forward3/activation_11/Mul_output_0", + "/feed_forward3/in_proj_11/Add_output_0", + "gemm_output_reshape_arg_token_640", + "gemm_input_reshape_arg_token_637", + "/Add_213_output_0", + "/Transpose_144_output_0", + "/pointwise_conv2_23/Conv_output_0", + "/activation_23/Mul_output_0", + "/activation_23/Sigmoid_output_0", + "/activation_23/Sub_output_0", + "/depthwise_conv_23/Conv_output_0", + "/Concat_243_output_0", + "/Gather_242_output_0", + "/Mul_130_output_0", + "/Sigmoid_23_output_0", + "/Split_23_output_0", + "/pointwise_conv1_23/Conv_output_0", + "/Transpose_143_output_0", + "/Add_212_output_0", + "/out_proj2_11/Add_output_0", + "gemm_output_reshape_arg_token_634", + "gemm_input_reshape_arg_token_631", + "/Transpose_142_output_0", + "/MatMul_59_output_0", + "/Transpose_141_output_0", + "/Reshape_130_output_0", + "/Concat_240_output_0", + "/Gather_240_output_0", + "/in_proj2_11/MatMul_output_0", + "/Add_210_output_0", + "/feed_forward2/out_proj_11/Add_output_0", + "gemm_output_reshape_arg_token_628", + "gemm_input_reshape_arg_token_625", + "/feed_forward2/activation_11/Mul_output_0", + "/feed_forward2/in_proj_11/Add_output_0", + "gemm_output_reshape_arg_token_622", + "gemm_input_reshape_arg_token_619", + "/Add_209_output_0", + "/Transpose_140_output_0", + "/pointwise_conv2_22/Conv_output_0", + "/activation_22/Mul_output_0", + "/activation_22/Sigmoid_output_0", + "/activation_22/Sub_output_0", + "/depthwise_conv_22/Conv_output_0", + "/Concat_239_output_0", + "/Gather_241_output_0", + "/Mul_128_output_0", + "/Sigmoid_22_output_0", + "/Split_22_output_0", + "/pointwise_conv1_22/Conv_output_0", + "/Transpose_139_output_0", + "/Add_208_output_0", + "/Add_207_output_0", + "gemm_output_reshape_arg_token_616", + "gemm_input_reshape_arg_token_613", + "/Transpose_138_output_0", + "/MatMul_57_output_0", + "/Transpose_133_output_0", + "/Reshape_124_output_0", + "/Softmax_11_output_0", + "/Reshape_128_output_0", + "/Add_206_output_0", + "/Reshape_127_output_0", + "/GatherElements_11_output_0", + "/Reshape_126_output_0", + "/MatMul_55_output_0", + "/Transpose_135_output_0", + "/Reshape_122_output_0", + "/Slice_105_output_0", + "/MatMul_56_output_0", + "/Transpose_134_output_0", + "/Reshape_121_output_0", + "/Slice_102_output_0", + "/Transpose_136_output_0", + "/Reshape_123_output_0", + "/Concat_226_output_0", + "/Gather_238_output_0", + "/Slice_103_output_0", + "/feed_forward2/activation_11/Sigmoid_output_0", + "/feed_forward2/activation_11/Sub_output_0", + "/feed_forward3/activation_11/Sigmoid_output_0", + "/feed_forward3/activation_11/Sub_output_0", + "/feed_forward1/activation_12/Sigmoid_output_0", + "/feed_forward1/activation_12/Sub_output_0", + "/Unsqueeze_489_output_0", + "/Mul_132_output_0", + "/Gather_258_output_0", + "/Unsqueeze_604_output_0", + "/Slice_125_output_0", + "/Concat_263_output_0", + "/Gather_281_output_0", + "/Slice_122_output_0", + "/in_proj_13/Add_output_0", + "gemm_output_reshape_arg_token_718", + "gemm_input_reshape_arg_token_715", + "/Add_239_output_0", + "/proj_13/MatMul_output_0", + "/Mul_145_output_0", + "/Add_235_output_0", + "/CumSum_13_output_0", + "/Add_234_output_0", + "/feed_forward1/out_proj_13/Add_output_0", + "gemm_output_reshape_arg_token_712", + "gemm_input_reshape_arg_token_709", + "/feed_forward1/activation_13/Mul_output_0", + "/feed_forward1/in_proj_13/Add_output_0", + "gemm_output_reshape_arg_token_706", + "gemm_input_reshape_arg_token_703", + "/Add_233_output_0", + "/Mul_142_output_0", + "/Sub_38_output_0", + "/norm_final_12/Mul_1_output_0", + "/norm_final_12/Pow_output_0", + "/norm_final_12/Add_output_0", + "/norm_final_12/ReduceMean_output_0", + "/norm_final_12/Mul_output_0", + "/Add_232_output_0", + "/feed_forward3/out_proj_12/Add_output_0", + "gemm_output_reshape_arg_token_700", + "gemm_input_reshape_arg_token_697", + "/feed_forward3/activation_12/Mul_output_0", + "/feed_forward3/in_proj_12/Add_output_0", + "gemm_output_reshape_arg_token_694", + "gemm_input_reshape_arg_token_691", + "/Add_231_output_0", + "/Transpose_156_output_0", + "/pointwise_conv2_25/Conv_output_0", + "/activation_25/Mul_output_0", + "/activation_25/Sigmoid_output_0", + "/activation_25/Sub_output_0", + "/depthwise_conv_25/Conv_output_0", + "/Concat_261_output_0", + "/Gather_263_output_0", + "/Mul_141_output_0", + "/Sigmoid_25_output_0", + "/Split_25_output_0", + "/pointwise_conv1_25/Conv_output_0", + "/Transpose_155_output_0", + "/Add_230_output_0", + "/out_proj2_12/Add_output_0", + "gemm_output_reshape_arg_token_688", + "gemm_input_reshape_arg_token_685", + "/Transpose_154_output_0", + "/MatMul_64_output_0", + "/Transpose_153_output_0", + "/Reshape_141_output_0", + "/Concat_258_output_0", + "/Gather_261_output_0", + "/in_proj2_12/MatMul_output_0", + "/Add_228_output_0", + "/feed_forward2/out_proj_12/Add_output_0", + "gemm_output_reshape_arg_token_682", + "gemm_input_reshape_arg_token_679", + "/feed_forward2/activation_12/Mul_output_0", + "/feed_forward2/in_proj_12/Add_output_0", + "gemm_output_reshape_arg_token_676", + "gemm_input_reshape_arg_token_673", + "/Add_227_output_0", + "/Transpose_152_output_0", + "/pointwise_conv2_24/Conv_output_0", + "/activation_24/Mul_output_0", + "/activation_24/Sigmoid_output_0", + "/activation_24/Sub_output_0", + "/depthwise_conv_24/Conv_output_0", + "/Concat_257_output_0", + "/Gather_262_output_0", + "/Mul_139_output_0", + "/Sigmoid_24_output_0", + "/Split_24_output_0", + "/pointwise_conv1_24/Conv_output_0", + "/Transpose_151_output_0", + "/Add_226_output_0", + "/Add_225_output_0", + "gemm_output_reshape_arg_token_670", + "gemm_input_reshape_arg_token_667", + "/Transpose_150_output_0", + "/MatMul_62_output_0", + "/Transpose_145_output_0", + "/Reshape_135_output_0", + "/Softmax_12_output_0", + "/Reshape_139_output_0", + "/Add_224_output_0", + "/Reshape_138_output_0", + "/GatherElements_12_output_0", + "/Reshape_137_output_0", + "/MatMul_60_output_0", + "/Transpose_147_output_0", + "/Reshape_133_output_0", + "/Slice_114_output_0", + "/MatMul_61_output_0", + "/Transpose_146_output_0", + "/Reshape_132_output_0", + "/Slice_111_output_0", + "/Transpose_148_output_0", + "/Reshape_134_output_0", + "/Concat_244_output_0", + "/Gather_259_output_0", + "/Slice_112_output_0", + "/feed_forward2/activation_12/Sigmoid_output_0", + "/feed_forward2/activation_12/Sub_output_0", + "/feed_forward3/activation_12/Sigmoid_output_0", + "/feed_forward3/activation_12/Sub_output_0", + "/feed_forward1/activation_13/Sigmoid_output_0", + "/feed_forward1/activation_13/Sub_output_0", + "/Unsqueeze_523_output_0", + "/Mul_143_output_0", + "/Gather_279_output_0", + "/Unsqueeze_605_output_0", + "/Slice_134_output_0", + "/Concat_281_output_0", + "/Gather_302_output_0", + "/Slice_131_output_0", + "/in_proj_14/Add_output_0", + "gemm_output_reshape_arg_token_772", + "gemm_input_reshape_arg_token_769", + "/Add_257_output_0", + "/proj_14/MatMul_output_0", + "/Mul_156_output_0", + "/Add_253_output_0", + "/CumSum_14_output_0", + "/Add_252_output_0", + "/feed_forward1/out_proj_14/Add_output_0", + "gemm_output_reshape_arg_token_766", + "gemm_input_reshape_arg_token_763", + "/feed_forward1/activation_14/Mul_output_0", + "/feed_forward1/in_proj_14/Add_output_0", + "gemm_output_reshape_arg_token_760", + "gemm_input_reshape_arg_token_757", + "/Add_251_output_0", + "/Mul_153_output_0", + "/Sub_41_output_0", + "/norm_final_13/Mul_1_output_0", + "/norm_final_13/Pow_output_0", + "/norm_final_13/Add_output_0", + "/norm_final_13/ReduceMean_output_0", + "/norm_final_13/Mul_output_0", + "/Add_250_output_0", + "/feed_forward3/out_proj_13/Add_output_0", + "gemm_output_reshape_arg_token_754", + "gemm_input_reshape_arg_token_751", + "/feed_forward3/activation_13/Mul_output_0", + "/feed_forward3/in_proj_13/Add_output_0", + "gemm_output_reshape_arg_token_748", + "gemm_input_reshape_arg_token_745", + "/Add_249_output_0", + "/Transpose_168_output_0", + "/pointwise_conv2_27/Conv_output_0", + "/activation_27/Mul_output_0", + "/activation_27/Sigmoid_output_0", + "/activation_27/Sub_output_0", + "/depthwise_conv_27/Conv_output_0", + "/Concat_279_output_0", + "/Gather_284_output_0", + "/Mul_152_output_0", + "/Sigmoid_27_output_0", + "/Split_27_output_0", + "/pointwise_conv1_27/Conv_output_0", + "/Transpose_167_output_0", + "/Add_248_output_0", + "/out_proj2_13/Add_output_0", + "gemm_output_reshape_arg_token_742", + "gemm_input_reshape_arg_token_739", + "/Transpose_166_output_0", + "/MatMul_69_output_0", + "/Transpose_165_output_0", + "/Reshape_152_output_0", + "/Concat_276_output_0", + "/Gather_282_output_0", + "/in_proj2_13/MatMul_output_0", + "/Add_246_output_0", + "/feed_forward2/out_proj_13/Add_output_0", + "gemm_output_reshape_arg_token_736", + "gemm_input_reshape_arg_token_733", + "/feed_forward2/activation_13/Mul_output_0", + "/feed_forward2/in_proj_13/Add_output_0", + "gemm_output_reshape_arg_token_730", + "gemm_input_reshape_arg_token_727", + "/Add_245_output_0", + "/Transpose_164_output_0", + "/pointwise_conv2_26/Conv_output_0", + "/activation_26/Mul_output_0", + "/activation_26/Sigmoid_output_0", + "/activation_26/Sub_output_0", + "/depthwise_conv_26/Conv_output_0", + "/Concat_275_output_0", + "/Gather_283_output_0", + "/Mul_150_output_0", + "/Sigmoid_26_output_0", + "/Split_26_output_0", + "/pointwise_conv1_26/Conv_output_0", + "/Transpose_163_output_0", + "/Add_244_output_0", + "/Add_243_output_0", + "gemm_output_reshape_arg_token_724", + "gemm_input_reshape_arg_token_721", + "/Transpose_162_output_0", + "/MatMul_67_output_0", + "/Transpose_157_output_0", + "/Reshape_146_output_0", + "/Softmax_13_output_0", + "/Reshape_150_output_0", + "/Add_242_output_0", + "/Reshape_149_output_0", + "/GatherElements_13_output_0", + "/Reshape_148_output_0", + "/MatMul_65_output_0", + "/Transpose_159_output_0", + "/Reshape_144_output_0", + "/Slice_123_output_0", + "/MatMul_66_output_0", + "/Transpose_158_output_0", + "/Reshape_143_output_0", + "/Slice_120_output_0", + "/Transpose_160_output_0", + "/Reshape_145_output_0", + "/Concat_262_output_0", + "/Gather_280_output_0", + "/Slice_121_output_0", + "/feed_forward2/activation_13/Sigmoid_output_0", + "/feed_forward2/activation_13/Sub_output_0", + "/feed_forward3/activation_13/Sigmoid_output_0", + "/feed_forward3/activation_13/Sub_output_0", + "/feed_forward1/activation_14/Sigmoid_output_0", + "/feed_forward1/activation_14/Sub_output_0", + "/Unsqueeze_557_output_0", + "/Mul_154_output_0", + "/Gather_300_output_0", + "new_cached_val_3", + "/Unsqueeze_445_output_0", + "/Slice_88_output_0", + "/Unsqueeze_446_output_0", + "/Slice_97_output_0", + "new_cached_val_2", + "/Unsqueeze_358_output_0", + "/Slice_60_output_0", + "/Unsqueeze_359_output_0", + "/Slice_69_output_0", + "/Unsqueeze_360_output_0", + "/Slice_78_output_0", + "new_cached_val_1", + "/Unsqueeze_230_output_0", + "/Slice_23_output_0", + "/Unsqueeze_231_output_0", + "/Slice_32_output_0", + "/Unsqueeze_232_output_0", + "/Slice_41_output_0", + "/Unsqueeze_233_output_0", + "/Slice_50_output_0", + "new_cached_val_0", + "/Unsqueeze_74_output_0", + "/Slice_5_output_0", + "/Unsqueeze_75_output_0", + "/Slice_14_output_0", + "new_cached_val2_4", + "/Unsqueeze_606_output_0", + "/Slice_109_output_0", + "/Unsqueeze_607_output_0", + "/Slice_118_output_0", + "/Unsqueeze_608_output_0", + "/Slice_127_output_0", + "/Unsqueeze_609_output_0", + "/Slice_136_output_0", + "/Concat_294_output_0", + "/Gather_303_output_0", + "/in_proj2_14/MatMul_output_0", + "/Add_264_output_0", + "/feed_forward2/out_proj_14/Add_output_0", + "gemm_output_reshape_arg_token_790", + "gemm_input_reshape_arg_token_787", + "/feed_forward2/activation_14/Mul_output_0", + "/feed_forward2/in_proj_14/Add_output_0", + "gemm_output_reshape_arg_token_784", + "gemm_input_reshape_arg_token_781", + "/Add_263_output_0", + "/Transpose_176_output_0", + "/pointwise_conv2_28/Conv_output_0", + "/activation_28/Mul_output_0", + "/activation_28/Sigmoid_output_0", + "/activation_28/Sub_output_0", + "/depthwise_conv_28/Conv_output_0", + "/Concat_293_output_0", + "/Gather_304_output_0", + "/Mul_161_output_0", + "/Sigmoid_28_output_0", + "/Split_28_output_0", + "/pointwise_conv1_28/Conv_output_0", + "/Transpose_175_output_0", + "/Add_262_output_0", + "/Add_261_output_0", + "gemm_output_reshape_arg_token_778", + "gemm_input_reshape_arg_token_775", + "/Transpose_174_output_0", + "/MatMul_72_output_0", + "/Transpose_169_output_0", + "/Reshape_157_output_0", + "/Softmax_14_output_0", + "/Reshape_161_output_0", + "/Add_260_output_0", + "/Reshape_160_output_0", + "/GatherElements_14_output_0", + "/Reshape_159_output_0", + "/MatMul_70_output_0", + "/Transpose_171_output_0", + "/Reshape_155_output_0", + "/Slice_132_output_0", + "/MatMul_71_output_0", + "/Transpose_170_output_0", + "/Reshape_154_output_0", + "/Slice_129_output_0", + "/Transpose_172_output_0", + "/Reshape_156_output_0", + "/Concat_280_output_0", + "/Gather_301_output_0", + "/Slice_130_output_0", + "/feed_forward2/activation_14/Sigmoid_output_0", + "/feed_forward2/activation_14/Sub_output_0", + "new_cached_val2_3", + "/Unsqueeze_447_output_0", + "/Slice_90_output_0", + "/Unsqueeze_448_output_0", + "/Slice_99_output_0", + "new_cached_val2_2", + "/Unsqueeze_361_output_0", + "/Slice_62_output_0", + "/Unsqueeze_362_output_0", + "/Slice_71_output_0", + "/Unsqueeze_363_output_0", + "/Slice_80_output_0", + "new_cached_val2_1", + "/Unsqueeze_234_output_0", + "/Slice_25_output_0", + "/Unsqueeze_235_output_0", + "/Slice_34_output_0", + "/Unsqueeze_236_output_0", + "/Slice_43_output_0", + "/Unsqueeze_237_output_0", + "/Slice_52_output_0", + "new_cached_val2_0", + "/Unsqueeze_76_output_0", + "/Slice_7_output_0", + "/Unsqueeze_77_output_0", + "/Slice_16_output_0", + "new_cached_key_4", + "/Unsqueeze_598_output_0", + "/Slice_106_output_0", + "/Unsqueeze_599_output_0", + "/Slice_115_output_0", + "/Unsqueeze_600_output_0", + "/Slice_124_output_0", + "/Unsqueeze_601_output_0", + "/Slice_133_output_0", + "new_cached_key_3", + "/Unsqueeze_443_output_0", + "/Slice_87_output_0", + "/Unsqueeze_444_output_0", + "/Slice_96_output_0", + "new_cached_key_2", + "/Unsqueeze_355_output_0", + "/Slice_59_output_0", + "/Unsqueeze_356_output_0", + "/Slice_68_output_0", + "/Unsqueeze_357_output_0", + "/Slice_77_output_0", + "new_cached_key_1", + "/Unsqueeze_226_output_0", + "/Slice_22_output_0", + "/Unsqueeze_227_output_0", + "/Slice_31_output_0", + "/Unsqueeze_228_output_0", + "/Slice_40_output_0", + "/Unsqueeze_229_output_0", + "/Slice_49_output_0", + "new_cached_key_0", + "/Unsqueeze_72_output_0", + "/Slice_4_output_0", + "/Unsqueeze_73_output_0", + "/Slice_13_output_0", + "new_cached_conv2_4", + "/Unsqueeze_614_output_0", + "/Slice_110_output_0", + "/Unsqueeze_615_output_0", + "/Slice_119_output_0", + "/Unsqueeze_616_output_0", + "/Slice_128_output_0", + "/Unsqueeze_617_output_0", + "/Slice_137_output_0", + "/Concat_297_output_0", + "/Gather_305_output_0", + "/Mul_163_output_0", + "/Sigmoid_29_output_0", + "/Split_29_output_0", + "/pointwise_conv1_29/Conv_output_0", + "/Transpose_179_output_0", + "/Add_266_output_0", + "/out_proj2_14/Add_output_0", + "gemm_output_reshape_arg_token_796", + "gemm_input_reshape_arg_token_793", + "/Transpose_178_output_0", + "/MatMul_74_output_0", + "/Transpose_177_output_0", + "/Reshape_163_output_0", + "new_cached_conv2_3", + "/Unsqueeze_451_output_0", + "/Slice_91_output_0", + "/Unsqueeze_452_output_0", + "/Slice_100_output_0", + "new_cached_conv2_2", + "/Unsqueeze_367_output_0", + "/Slice_63_output_0", + "/Unsqueeze_368_output_0", + "/Slice_72_output_0", + "/Unsqueeze_369_output_0", + "/Slice_81_output_0", + "new_cached_conv2_1", + "/Unsqueeze_242_output_0", + "/Slice_26_output_0", + "/Unsqueeze_243_output_0", + "/Slice_35_output_0", + "/Unsqueeze_244_output_0", + "/Slice_44_output_0", + "/Unsqueeze_245_output_0", + "/Slice_53_output_0", + "new_cached_conv2_0", + "/Unsqueeze_80_output_0", + "/Slice_8_output_0", + "/Unsqueeze_81_output_0", + "/Slice_17_output_0", + "new_cached_conv1_4", + "/Unsqueeze_610_output_0", + "/Slice_108_output_0", + "/Unsqueeze_611_output_0", + "/Slice_117_output_0", + "/Unsqueeze_612_output_0", + "/Slice_126_output_0", + "/Unsqueeze_613_output_0", + "/Slice_135_output_0", + "new_cached_conv1_3", + "/Unsqueeze_449_output_0", + "/Slice_89_output_0", + "/Unsqueeze_450_output_0", + "/Slice_98_output_0", + "new_cached_conv1_2", + "/Unsqueeze_364_output_0", + "/Slice_61_output_0", + "/Unsqueeze_365_output_0", + "/Slice_70_output_0", + "/Unsqueeze_366_output_0", + "/Slice_79_output_0", + "new_cached_conv1_1", + "/Unsqueeze_238_output_0", + "/Slice_24_output_0", + "/Unsqueeze_239_output_0", + "/Slice_33_output_0", + "/Unsqueeze_240_output_0", + "/Slice_42_output_0", + "/Unsqueeze_241_output_0", + "/Slice_51_output_0", + "new_cached_conv1_0", + "/Unsqueeze_78_output_0", + "/Slice_6_output_0", + "/Unsqueeze_79_output_0", + "/Slice_15_output_0", + "new_cached_avg_4", + "/Unsqueeze_594_output_0", + "/Gather_245_output_0", + "/Unsqueeze_595_output_0", + "/Gather_266_output_0", + "/Unsqueeze_596_output_0", + "/Gather_287_output_0", + "/Unsqueeze_597_output_0", + "/Gather_308_output_0", + "new_cached_avg_3", + "/Unsqueeze_441_output_0", + "/Gather_202_output_0", + "/Unsqueeze_442_output_0", + "/Gather_223_output_0", + "new_cached_avg_2", + "/Unsqueeze_352_output_0", + "/Gather_138_output_0", + "/Unsqueeze_353_output_0", + "/Gather_159_output_0", + "/Unsqueeze_354_output_0", + "/Gather_180_output_0", + "new_cached_avg_1", + "/Unsqueeze_222_output_0", + "/Gather_53_output_0", + "/Unsqueeze_223_output_0", + "/Gather_74_output_0", + "/Unsqueeze_224_output_0", + "/Gather_95_output_0", + "/Unsqueeze_225_output_0", + "/Gather_116_output_0", + "new_cached_avg_0", + "/Unsqueeze_70_output_0", + "/Gather_10_output_0", + "/Unsqueeze_71_output_0", + "/Gather_31_output_0", + "gemm_output_reshape_arg_token_814", + "gemm_input_reshape_arg_token_811", + "/Transpose_181_output_0", + "/downsample_output/ReduceSum_1_output_0", + "/downsample_output/Mul_1_output_0", + "/downsample_output/Softmax_output_0", + "/downsample_output/ReduceSum_output_0", + "/downsample_output/Mul_output_0", + "/downsample_output/Reshape_output_0", + "/out_combiner_3/Add_output_0", + "/out_combiner_3/Mul_output_0", + "/out_combiner_3/Mul_1_output_0", + "/Slice_138_output_0", + "/upsample_3/Reshape_1_output_0", + "/upsample_3/Add_output_0", + "/upsample_3/Expand_output_0", + "/upsample_3/Unsqueeze_output_0", + "/Add_269_output_0", + "/Mul_164_output_0", + "/Sub_44_output_0", + "/norm_final_14/Mul_1_output_0", + "/norm_final_14/Pow_output_0", + "/norm_final_14/Add_output_0", + "/norm_final_14/ReduceMean_output_0", + "/norm_final_14/Mul_output_0", + "/Add_268_output_0", + "/feed_forward3/out_proj_14/Add_output_0", + "gemm_output_reshape_arg_token_808", + "gemm_input_reshape_arg_token_805", + "/feed_forward3/activation_14/Mul_output_0", + "/feed_forward3/in_proj_14/Add_output_0", + "gemm_output_reshape_arg_token_802", + "gemm_input_reshape_arg_token_799", + "/Add_267_output_0", + "/Transpose_180_output_0", + "/pointwise_conv2_29/Conv_output_0", + "/activation_29/Mul_output_0", + "/activation_29/Sigmoid_output_0", + "/activation_29/Sub_output_0", + "/depthwise_conv_29/Conv_output_0", + "/feed_forward3/activation_14/Sigmoid_output_0", + "/feed_forward3/activation_14/Sub_output_0" + ], + "constantInitializers": [ + "/Add_115_output_0", + "/Add_133_output_0", + "/Add_151_output_0", + "/Add_169_output_0", + "/Add_187_output_0", + "/Add_205_output_0", + "/Add_223_output_0", + "/Add_241_output_0", + "/Add_259_output_0", + "/Add_25_output_0", + "/Add_43_output_0", + "/Add_61_output_0", + "/Add_79_output_0", + "/Add_7_output_0", + "/Add_97_output_0", + "/Concat_100_output_0", + "/Concat_102_output_0", + "/Concat_106_output_0", + "/Concat_107_output_0", + "/Concat_108_output_0", + "/Concat_10_output_0", + "/Concat_112_output_0", + "/Concat_11_output_0", + "/Concat_124_output_0", + "/Concat_125_output_0", + "/Concat_127_output_0", + "/Concat_131_output_0", + "/Concat_132_output_0", + "/Concat_133_output_0", + "/Concat_137_output_0", + "/Concat_142_output_0", + "/Concat_143_output_0", + "/Concat_145_output_0", + "/Concat_149_output_0", + "/Concat_150_output_0", + "/Concat_151_output_0", + "/Concat_155_output_0", + "/Concat_15_output_0", + "/Concat_160_output_0", + "/Concat_161_output_0", + "/Concat_163_output_0", + "/Concat_167_output_0", + "/Concat_168_output_0", + "/Concat_169_output_0", + "/Concat_173_output_0", + "/Concat_185_output_0", + "/Concat_186_output_0", + "/Concat_188_output_0", + "/Concat_192_output_0", + "/Concat_193_output_0", + "/Concat_194_output_0", + "/Concat_198_output_0", + "/Concat_203_output_0", + "/Concat_204_output_0", + "/Concat_206_output_0", + "/Concat_20_output_0", + "/Concat_210_output_0", + "/Concat_211_output_0", + "/Concat_212_output_0", + "/Concat_216_output_0", + "/Concat_21_output_0", + "/Concat_228_output_0", + "/Concat_229_output_0", + "/Concat_231_output_0", + "/Concat_235_output_0", + "/Concat_236_output_0", + "/Concat_237_output_0", + "/Concat_23_output_0", + "/Concat_241_output_0", + "/Concat_246_output_0", + "/Concat_247_output_0", + "/Concat_249_output_0", + "/Concat_253_output_0", + "/Concat_254_output_0", + "/Concat_255_output_0", + "/Concat_259_output_0", + "/Concat_264_output_0", + "/Concat_265_output_0", + "/Concat_266_output_0", + "/Concat_267_output_0", + "/Concat_271_output_0", + "/Concat_272_output_0", + "/Concat_273_output_0", + "/Concat_277_output_0", + "/Concat_27_output_0", + "/Concat_282_output_0", + "/Concat_283_output_0", + "/Concat_285_output_0", + "/Concat_289_output_0", + "/Concat_28_output_0", + "/Concat_290_output_0", + "/Concat_291_output_0", + "/Concat_295_output_0", + "/Concat_29_output_0", + "/Concat_2_output_0", + "/Concat_33_output_0", + "/Concat_3_output_0", + "/Concat_45_output_0", + "/Concat_46_output_0", + "/Concat_48_output_0", + "/Concat_52_output_0", + "/Concat_53_output_0", + "/Concat_54_output_0", + "/Concat_58_output_0", + "/Concat_5_output_0", + "/Concat_63_output_0", + "/Concat_64_output_0", + "/Concat_66_output_0", + "/Concat_70_output_0", + "/Concat_71_output_0", + "/Concat_72_output_0", + "/Concat_76_output_0", + "/Concat_81_output_0", + "/Concat_82_output_0", + "/Concat_84_output_0", + "/Concat_88_output_0", + "/Concat_89_output_0", + "/Concat_90_output_0", + "/Concat_94_output_0", + "/Concat_99_output_0", + "/Concat_9_output_0", + "/Constant_1113_output_0", + "/Constant_1118_output_0", + "/Constant_246_output_0", + "/Constant_285_output_0", + "/Constant_404_output_0", + "/Constant_568_output_0", + "/Constant_614_output_0", + "/Constant_631_output_0", + "/Constant_69_output_0", + "/Constant_830_output_0", + "/Constant_964_output_0", + "/Reshape_107_new_shape", + "/Reshape_109_new_shape", + "/Reshape_10_new_shape", + "/Reshape_118_new_shape", + "/Reshape_120_new_shape", + "/Reshape_129_new_shape", + "/Reshape_131_new_shape", + "/Reshape_140_new_shape", + "/Reshape_142_new_shape", + "/Reshape_151_new_shape", + "/Reshape_153_new_shape", + "/Reshape_162_new_shape", + "/Reshape_164_new_shape", + "/Reshape_19_new_shape", + "/Reshape_21_new_shape", + "/Reshape_30_new_shape", + "/Reshape_32_new_shape", + "/Reshape_41_new_shape", + "/Reshape_43_new_shape", + "/Reshape_52_new_shape", + "/Reshape_54_new_shape", + "/Reshape_63_new_shape", + "/Reshape_65_new_shape", + "/Reshape_74_new_shape", + "/Reshape_76_new_shape", + "/Reshape_85_new_shape", + "/Reshape_87_new_shape", + "/Reshape_8_new_shape", + "/Reshape_96_new_shape", + "/Reshape_98_new_shape", + "/Transpose_101_output_0", + "/Transpose_113_output_0", + "/Transpose_125_output_0", + "/Transpose_137_output_0", + "/Transpose_149_output_0", + "/Transpose_161_output_0", + "/Transpose_173_output_0", + "/Transpose_17_output_0", + "/Transpose_29_output_0", + "/Transpose_41_output_0", + "/Transpose_53_output_0", + "/Transpose_5_output_0", + "/Transpose_65_output_0", + "/Transpose_77_output_0", + "/Transpose_89_output_0", + "/Unsqueeze_246_output_0", + "/Unsqueeze_321_output_0", + "/Unsqueeze_344_output_0", + "/Unsqueeze_370_output_0", + "/Unsqueeze_40_output_0", + "/Unsqueeze_411_output_0", + "/Unsqueeze_434_output_0", + "/Unsqueeze_453_output_0", + "/Unsqueeze_562_output_0", + "/Unsqueeze_585_output_0", + "/Unsqueeze_618_output_0", + "/Unsqueeze_63_output_0", + "/downsample/Concat_output_0", + "/downsample_1/Concat_output_0", + "/downsample_2/Concat_output_0", + "/downsample_3/Concat_output_0", + "/downsample_output/Concat_output_0", + "/encoder_embed/Constant_7_output_0", + "/encoder_embed/Reshape_new_shape", + "/encoder_pos_1/Constant_2_output_0", + "/feed_forward1/activation_2/Constant_output_0", + "/norm_final/Constant_output_0", + "/norm_final_1/Constant_1_output_0", + "/norm_final_1/Constant_output_0", + "/norm_final_10/Constant_output_0", + "/norm_final_11/Constant_output_0", + "/norm_final_12/Constant_output_0", + "/norm_final_13/Constant_output_0", + "/norm_final_14/Constant_output_0", + "/norm_final_2/Constant_output_0", + "/norm_final_3/Constant_output_0", + "/norm_final_4/Constant_output_0", + "/norm_final_5/Constant_output_0", + "/norm_final_6/Constant_output_0", + "/norm_final_7/Constant_output_0", + "/norm_final_8/Constant_output_0", + "/norm_final_9/Constant_output_0", + "/upsample/Concat_1_output_0", + "/upsample/Where_output_0", + "/upsample_1/Concat_1_output_0", + "/upsample_1/Where_output_0", + "/upsample_2/Concat_1_output_0", + "/upsample_2/Where_output_0", + "/upsample_3/Concat_1_output_0", + "/upsample_3/Constant_output_0", + "/upsample_3/Where_output_0", + "encoder.downsample_output.query", + "encoder.encoder_embed.conv.0.bias", + "encoder.encoder_embed.conv.0.weight", + "encoder.encoder_embed.conv.3.bias", + "encoder.encoder_embed.conv.3.weight", + "encoder.encoder_embed.conv.6.bias", + "encoder.encoder_embed.conv.6.weight", + "encoder.encoder_embed.out.bias", + "encoder.encoders.0.layers.0.bypass_scale", + "encoder.encoders.0.layers.0.conv_module1.depthwise_conv.bias", + "encoder.encoders.0.layers.0.conv_module1.depthwise_conv.weight", + "encoder.encoders.0.layers.0.conv_module1.pointwise_conv1.bias", + "encoder.encoders.0.layers.0.conv_module1.pointwise_conv1.weight", + "encoder.encoders.0.layers.0.conv_module1.pointwise_conv2.bias", + "encoder.encoders.0.layers.0.conv_module1.pointwise_conv2.weight", + "encoder.encoders.0.layers.0.conv_module2.depthwise_conv.bias", + "encoder.encoders.0.layers.0.conv_module2.depthwise_conv.weight", + "encoder.encoders.0.layers.0.conv_module2.pointwise_conv1.bias", + "encoder.encoders.0.layers.0.conv_module2.pointwise_conv1.weight", + "encoder.encoders.0.layers.0.conv_module2.pointwise_conv2.bias", + "encoder.encoders.0.layers.0.conv_module2.pointwise_conv2.weight", + "encoder.encoders.0.layers.0.feed_forward1.in_proj.bias", + "encoder.encoders.0.layers.0.feed_forward1.out_proj.bias", + "encoder.encoders.0.layers.0.feed_forward2.in_proj.bias", + "encoder.encoders.0.layers.0.feed_forward2.out_proj.bias", + "encoder.encoders.0.layers.0.feed_forward3.in_proj.bias", + "encoder.encoders.0.layers.0.feed_forward3.out_proj.bias", + "encoder.encoders.0.layers.0.self_attn.in_proj.bias", + "encoder.encoders.0.layers.0.self_attn.out_proj.bias", + "encoder.encoders.0.layers.0.self_attn.out_proj2.bias", + "encoder.encoders.0.layers.1.bypass_scale", + "encoder.encoders.0.layers.1.conv_module1.depthwise_conv.bias", + "encoder.encoders.0.layers.1.conv_module1.depthwise_conv.weight", + "encoder.encoders.0.layers.1.conv_module1.pointwise_conv1.bias", + "encoder.encoders.0.layers.1.conv_module1.pointwise_conv1.weight", + "encoder.encoders.0.layers.1.conv_module1.pointwise_conv2.bias", + "encoder.encoders.0.layers.1.conv_module1.pointwise_conv2.weight", + "encoder.encoders.0.layers.1.conv_module2.depthwise_conv.bias", + "encoder.encoders.0.layers.1.conv_module2.depthwise_conv.weight", + "encoder.encoders.0.layers.1.conv_module2.pointwise_conv1.bias", + "encoder.encoders.0.layers.1.conv_module2.pointwise_conv1.weight", + "encoder.encoders.0.layers.1.conv_module2.pointwise_conv2.bias", + "encoder.encoders.0.layers.1.conv_module2.pointwise_conv2.weight", + "encoder.encoders.0.layers.1.feed_forward1.in_proj.bias", + "encoder.encoders.0.layers.1.feed_forward1.out_proj.bias", + "encoder.encoders.0.layers.1.feed_forward2.in_proj.bias", + "encoder.encoders.0.layers.1.feed_forward2.out_proj.bias", + "encoder.encoders.0.layers.1.feed_forward3.in_proj.bias", + "encoder.encoders.0.layers.1.feed_forward3.out_proj.bias", + "encoder.encoders.0.layers.1.self_attn.in_proj.bias", + "encoder.encoders.0.layers.1.self_attn.out_proj.bias", + "encoder.encoders.0.layers.1.self_attn.out_proj2.bias", + "encoder.encoders.1.downsample.query", + "encoder.encoders.1.encoder.layers.0.bypass_scale", + "encoder.encoders.1.encoder.layers.0.conv_module1.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.0.conv_module1.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.0.conv_module2.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.0.conv_module2.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.0.feed_forward1.in_proj.bias", + "encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.bias", + "encoder.encoders.1.encoder.layers.0.feed_forward2.in_proj.bias", + "encoder.encoders.1.encoder.layers.0.feed_forward2.out_proj.bias", + "encoder.encoders.1.encoder.layers.0.feed_forward3.in_proj.bias", + "encoder.encoders.1.encoder.layers.0.feed_forward3.out_proj.bias", + "encoder.encoders.1.encoder.layers.0.self_attn.in_proj.bias", + "encoder.encoders.1.encoder.layers.0.self_attn.out_proj.bias", + "encoder.encoders.1.encoder.layers.0.self_attn.out_proj2.bias", + "encoder.encoders.1.encoder.layers.1.bypass_scale", + "encoder.encoders.1.encoder.layers.1.conv_module1.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.1.conv_module1.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.1.conv_module2.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.1.conv_module2.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.1.feed_forward1.in_proj.bias", + "encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.bias", + "encoder.encoders.1.encoder.layers.1.feed_forward2.in_proj.bias", + "encoder.encoders.1.encoder.layers.1.feed_forward2.out_proj.bias", + "encoder.encoders.1.encoder.layers.1.feed_forward3.in_proj.bias", + "encoder.encoders.1.encoder.layers.1.feed_forward3.out_proj.bias", + "encoder.encoders.1.encoder.layers.1.self_attn.in_proj.bias", + "encoder.encoders.1.encoder.layers.1.self_attn.out_proj.bias", + "encoder.encoders.1.encoder.layers.1.self_attn.out_proj2.bias", + "encoder.encoders.1.encoder.layers.2.bypass_scale", + "encoder.encoders.1.encoder.layers.2.conv_module1.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.2.conv_module1.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.2.conv_module2.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.2.conv_module2.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.2.feed_forward1.in_proj.bias", + "encoder.encoders.1.encoder.layers.2.feed_forward1.out_proj.bias", + "encoder.encoders.1.encoder.layers.2.feed_forward2.in_proj.bias", + "encoder.encoders.1.encoder.layers.2.feed_forward2.out_proj.bias", + "encoder.encoders.1.encoder.layers.2.feed_forward3.in_proj.bias", + "encoder.encoders.1.encoder.layers.2.feed_forward3.out_proj.bias", + "encoder.encoders.1.encoder.layers.2.self_attn.in_proj.bias", + "encoder.encoders.1.encoder.layers.2.self_attn.out_proj.bias", + "encoder.encoders.1.encoder.layers.2.self_attn.out_proj2.bias", + "encoder.encoders.1.encoder.layers.3.bypass_scale", + "encoder.encoders.1.encoder.layers.3.conv_module1.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.3.conv_module1.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.3.conv_module2.depthwise_conv.bias", + "encoder.encoders.1.encoder.layers.3.conv_module2.depthwise_conv.weight", + "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv1.bias", + "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv1.weight", + "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv2.bias", + "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv2.weight", + "encoder.encoders.1.encoder.layers.3.feed_forward1.in_proj.bias", + "encoder.encoders.1.encoder.layers.3.feed_forward1.out_proj.bias", + "encoder.encoders.1.encoder.layers.3.feed_forward2.in_proj.bias", + "encoder.encoders.1.encoder.layers.3.feed_forward2.out_proj.bias", + "encoder.encoders.1.encoder.layers.3.feed_forward3.in_proj.bias", + "encoder.encoders.1.encoder.layers.3.feed_forward3.out_proj.bias", + "encoder.encoders.1.encoder.layers.3.self_attn.in_proj.bias", + "encoder.encoders.1.encoder.layers.3.self_attn.out_proj.bias", + "encoder.encoders.1.encoder.layers.3.self_attn.out_proj2.bias", + "encoder.encoders.1.out_combiner.weight1", + "encoder.encoders.2.downsample.query", + "encoder.encoders.2.encoder.layers.0.bypass_scale", + "encoder.encoders.2.encoder.layers.0.conv_module1.depthwise_conv.bias", + "encoder.encoders.2.encoder.layers.0.conv_module1.depthwise_conv.weight", + "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv1.bias", + "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv1.weight", + "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv2.bias", + "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv2.weight", + "encoder.encoders.2.encoder.layers.0.conv_module2.depthwise_conv.bias", + "encoder.encoders.2.encoder.layers.0.conv_module2.depthwise_conv.weight", + "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv1.bias", + "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv1.weight", + "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv2.bias", + "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv2.weight", + "encoder.encoders.2.encoder.layers.0.feed_forward1.in_proj.bias", + "encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.bias", + "encoder.encoders.2.encoder.layers.0.feed_forward2.in_proj.bias", + "encoder.encoders.2.encoder.layers.0.feed_forward2.out_proj.bias", + "encoder.encoders.2.encoder.layers.0.feed_forward3.in_proj.bias", + "encoder.encoders.2.encoder.layers.0.feed_forward3.out_proj.bias", + "encoder.encoders.2.encoder.layers.0.self_attn.in_proj.bias", + "encoder.encoders.2.encoder.layers.0.self_attn.out_proj.bias", + "encoder.encoders.2.encoder.layers.0.self_attn.out_proj2.bias", + "encoder.encoders.2.encoder.layers.1.bypass_scale", + "encoder.encoders.2.encoder.layers.1.conv_module1.depthwise_conv.bias", + "encoder.encoders.2.encoder.layers.1.conv_module1.depthwise_conv.weight", + "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv1.bias", + "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv1.weight", + "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv2.bias", + "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv2.weight", + "encoder.encoders.2.encoder.layers.1.conv_module2.depthwise_conv.bias", + "encoder.encoders.2.encoder.layers.1.conv_module2.depthwise_conv.weight", + "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv1.bias", + "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv1.weight", + "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv2.bias", + "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv2.weight", + "encoder.encoders.2.encoder.layers.1.feed_forward1.in_proj.bias", + "encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.bias", + "encoder.encoders.2.encoder.layers.1.feed_forward2.in_proj.bias", + "encoder.encoders.2.encoder.layers.1.feed_forward2.out_proj.bias", + "encoder.encoders.2.encoder.layers.1.feed_forward3.in_proj.bias", + "encoder.encoders.2.encoder.layers.1.feed_forward3.out_proj.bias", + "encoder.encoders.2.encoder.layers.1.self_attn.in_proj.bias", + "encoder.encoders.2.encoder.layers.1.self_attn.out_proj.bias", + "encoder.encoders.2.encoder.layers.1.self_attn.out_proj2.bias", + "encoder.encoders.2.encoder.layers.2.bypass_scale", + "encoder.encoders.2.encoder.layers.2.conv_module1.depthwise_conv.bias", + "encoder.encoders.2.encoder.layers.2.conv_module1.depthwise_conv.weight", + "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv1.bias", + "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv1.weight", + "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv2.bias", + "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv2.weight", + "encoder.encoders.2.encoder.layers.2.conv_module2.depthwise_conv.bias", + "encoder.encoders.2.encoder.layers.2.conv_module2.depthwise_conv.weight", + "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv1.bias", + "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv1.weight", + "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv2.bias", + "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv2.weight", + "encoder.encoders.2.encoder.layers.2.feed_forward1.in_proj.bias", + "encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.bias", + "encoder.encoders.2.encoder.layers.2.feed_forward2.in_proj.bias", + "encoder.encoders.2.encoder.layers.2.feed_forward2.out_proj.bias", + "encoder.encoders.2.encoder.layers.2.feed_forward3.in_proj.bias", + "encoder.encoders.2.encoder.layers.2.feed_forward3.out_proj.bias", + "encoder.encoders.2.encoder.layers.2.self_attn.in_proj.bias", + "encoder.encoders.2.encoder.layers.2.self_attn.out_proj.bias", + "encoder.encoders.2.encoder.layers.2.self_attn.out_proj2.bias", + "encoder.encoders.2.out_combiner.weight1", + "encoder.encoders.3.downsample.query", + "encoder.encoders.3.encoder.layers.0.bypass_scale", + "encoder.encoders.3.encoder.layers.0.conv_module1.depthwise_conv.bias", + "encoder.encoders.3.encoder.layers.0.conv_module1.depthwise_conv.weight", + "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv1.bias", + "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv1.weight", + "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv2.bias", + "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv2.weight", + "encoder.encoders.3.encoder.layers.0.conv_module2.depthwise_conv.bias", + "encoder.encoders.3.encoder.layers.0.conv_module2.depthwise_conv.weight", + "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv1.bias", + "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv1.weight", + "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv2.bias", + "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv2.weight", + "encoder.encoders.3.encoder.layers.0.feed_forward1.in_proj.bias", + "encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.bias", + "encoder.encoders.3.encoder.layers.0.feed_forward2.in_proj.bias", + "encoder.encoders.3.encoder.layers.0.feed_forward2.out_proj.bias", + "encoder.encoders.3.encoder.layers.0.feed_forward3.in_proj.bias", + "encoder.encoders.3.encoder.layers.0.feed_forward3.out_proj.bias", + "encoder.encoders.3.encoder.layers.0.self_attn.in_proj.bias", + "encoder.encoders.3.encoder.layers.0.self_attn.out_proj.bias", + "encoder.encoders.3.encoder.layers.0.self_attn.out_proj2.bias", + "encoder.encoders.3.encoder.layers.1.bypass_scale", + "encoder.encoders.3.encoder.layers.1.conv_module1.depthwise_conv.bias", + "encoder.encoders.3.encoder.layers.1.conv_module1.depthwise_conv.weight", + "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv1.bias", + "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv1.weight", + "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv2.bias", + "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv2.weight", + "encoder.encoders.3.encoder.layers.1.conv_module2.depthwise_conv.bias", + "encoder.encoders.3.encoder.layers.1.conv_module2.depthwise_conv.weight", + "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv1.bias", + "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv1.weight", + "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv2.bias", + "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv2.weight", + "encoder.encoders.3.encoder.layers.1.feed_forward1.in_proj.bias", + "encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.bias", + "encoder.encoders.3.encoder.layers.1.feed_forward2.in_proj.bias", + "encoder.encoders.3.encoder.layers.1.feed_forward2.out_proj.bias", + "encoder.encoders.3.encoder.layers.1.feed_forward3.in_proj.bias", + "encoder.encoders.3.encoder.layers.1.feed_forward3.out_proj.bias", + "encoder.encoders.3.encoder.layers.1.self_attn.in_proj.bias", + "encoder.encoders.3.encoder.layers.1.self_attn.out_proj.bias", + "encoder.encoders.3.encoder.layers.1.self_attn.out_proj2.bias", + "encoder.encoders.3.out_combiner.weight1", + "encoder.encoders.4.downsample.query", + "encoder.encoders.4.encoder.layers.0.bypass_scale", + "encoder.encoders.4.encoder.layers.0.conv_module1.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.0.conv_module1.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.0.conv_module2.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.0.conv_module2.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.0.feed_forward1.in_proj.bias", + "encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.bias", + "encoder.encoders.4.encoder.layers.0.feed_forward2.in_proj.bias", + "encoder.encoders.4.encoder.layers.0.feed_forward2.out_proj.bias", + "encoder.encoders.4.encoder.layers.0.feed_forward3.in_proj.bias", + "encoder.encoders.4.encoder.layers.0.feed_forward3.out_proj.bias", + "encoder.encoders.4.encoder.layers.0.self_attn.in_proj.bias", + "encoder.encoders.4.encoder.layers.0.self_attn.out_proj.bias", + "encoder.encoders.4.encoder.layers.0.self_attn.out_proj2.bias", + "encoder.encoders.4.encoder.layers.1.bypass_scale", + "encoder.encoders.4.encoder.layers.1.conv_module1.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.1.conv_module1.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.1.conv_module2.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.1.conv_module2.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.1.feed_forward1.in_proj.bias", + "encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.bias", + "encoder.encoders.4.encoder.layers.1.feed_forward2.in_proj.bias", + "encoder.encoders.4.encoder.layers.1.feed_forward2.out_proj.bias", + "encoder.encoders.4.encoder.layers.1.feed_forward3.in_proj.bias", + "encoder.encoders.4.encoder.layers.1.feed_forward3.out_proj.bias", + "encoder.encoders.4.encoder.layers.1.self_attn.in_proj.bias", + "encoder.encoders.4.encoder.layers.1.self_attn.out_proj.bias", + "encoder.encoders.4.encoder.layers.1.self_attn.out_proj2.bias", + "encoder.encoders.4.encoder.layers.2.bypass_scale", + "encoder.encoders.4.encoder.layers.2.conv_module1.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.2.conv_module1.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.2.conv_module2.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.2.conv_module2.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.2.feed_forward1.in_proj.bias", + "encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.bias", + "encoder.encoders.4.encoder.layers.2.feed_forward2.in_proj.bias", + "encoder.encoders.4.encoder.layers.2.feed_forward2.out_proj.bias", + "encoder.encoders.4.encoder.layers.2.feed_forward3.in_proj.bias", + "encoder.encoders.4.encoder.layers.2.feed_forward3.out_proj.bias", + "encoder.encoders.4.encoder.layers.2.self_attn.in_proj.bias", + "encoder.encoders.4.encoder.layers.2.self_attn.out_proj.bias", + "encoder.encoders.4.encoder.layers.2.self_attn.out_proj2.bias", + "encoder.encoders.4.encoder.layers.3.bypass_scale", + "encoder.encoders.4.encoder.layers.3.conv_module1.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.3.conv_module1.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.3.conv_module2.depthwise_conv.bias", + "encoder.encoders.4.encoder.layers.3.conv_module2.depthwise_conv.weight", + "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv1.bias", + "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv1.weight", + "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv2.bias", + "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv2.weight", + "encoder.encoders.4.encoder.layers.3.feed_forward1.in_proj.bias", + "encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.bias", + "encoder.encoders.4.encoder.layers.3.feed_forward2.in_proj.bias", + "encoder.encoders.4.encoder.layers.3.feed_forward2.out_proj.bias", + "encoder.encoders.4.encoder.layers.3.feed_forward3.in_proj.bias", + "encoder.encoders.4.encoder.layers.3.feed_forward3.out_proj.bias", + "encoder.encoders.4.encoder.layers.3.self_attn.in_proj.bias", + "encoder.encoders.4.encoder.layers.3.self_attn.out_proj.bias", + "encoder.encoders.4.encoder.layers.3.self_attn.out_proj2.bias", + "encoder.encoders.4.out_combiner.weight1", + "encoder.skip_modules.4.weight1", + "encoder_proj.bias", + "gemm_input_shape_token_0", + "gemm_input_shape_token_102", + "gemm_input_shape_token_108", + "gemm_input_shape_token_114", + "gemm_input_shape_token_12", + "gemm_input_shape_token_120", + "gemm_input_shape_token_132", + "gemm_input_shape_token_138", + "gemm_input_shape_token_150", + "gemm_input_shape_token_156", + "gemm_input_shape_token_162", + "gemm_input_shape_token_168", + "gemm_input_shape_token_174", + "gemm_input_shape_token_186", + "gemm_input_shape_token_192", + "gemm_input_shape_token_204", + "gemm_input_shape_token_210", + "gemm_input_shape_token_216", + "gemm_input_shape_token_222", + "gemm_input_shape_token_228", + "gemm_input_shape_token_24", + "gemm_input_shape_token_240", + "gemm_input_shape_token_246", + "gemm_input_shape_token_258", + "gemm_input_shape_token_264", + "gemm_input_shape_token_270", + "gemm_input_shape_token_276", + "gemm_input_shape_token_282", + "gemm_input_shape_token_294", + "gemm_input_shape_token_30", + "gemm_input_shape_token_300", + "gemm_input_shape_token_312", + "gemm_input_shape_token_318", + "gemm_input_shape_token_324", + "gemm_input_shape_token_330", + "gemm_input_shape_token_336", + "gemm_input_shape_token_348", + "gemm_input_shape_token_354", + "gemm_input_shape_token_366", + "gemm_input_shape_token_372", + "gemm_input_shape_token_378", + "gemm_input_shape_token_384", + "gemm_input_shape_token_390", + "gemm_input_shape_token_402", + "gemm_input_shape_token_408", + "gemm_input_shape_token_42", + "gemm_input_shape_token_420", + "gemm_input_shape_token_426", + "gemm_input_shape_token_432", + "gemm_input_shape_token_438", + "gemm_input_shape_token_444", + "gemm_input_shape_token_456", + "gemm_input_shape_token_462", + "gemm_input_shape_token_474", + "gemm_input_shape_token_48", + "gemm_input_shape_token_480", + "gemm_input_shape_token_486", + "gemm_input_shape_token_492", + "gemm_input_shape_token_498", + "gemm_input_shape_token_510", + "gemm_input_shape_token_516", + "gemm_input_shape_token_528", + "gemm_input_shape_token_534", + "gemm_input_shape_token_54", + "gemm_input_shape_token_540", + "gemm_input_shape_token_546", + "gemm_input_shape_token_552", + "gemm_input_shape_token_564", + "gemm_input_shape_token_570", + "gemm_input_shape_token_582", + "gemm_input_shape_token_588", + "gemm_input_shape_token_594", + "gemm_input_shape_token_6", + "gemm_input_shape_token_60", + "gemm_input_shape_token_600", + "gemm_input_shape_token_606", + "gemm_input_shape_token_618", + "gemm_input_shape_token_624", + "gemm_input_shape_token_636", + "gemm_input_shape_token_642", + "gemm_input_shape_token_648", + "gemm_input_shape_token_654", + "gemm_input_shape_token_66", + "gemm_input_shape_token_660", + "gemm_input_shape_token_672", + "gemm_input_shape_token_678", + "gemm_input_shape_token_690", + "gemm_input_shape_token_696", + "gemm_input_shape_token_702", + "gemm_input_shape_token_708", + "gemm_input_shape_token_714", + "gemm_input_shape_token_726", + "gemm_input_shape_token_732", + "gemm_input_shape_token_744", + "gemm_input_shape_token_750", + "gemm_input_shape_token_756", + "gemm_input_shape_token_762", + "gemm_input_shape_token_768", + "gemm_input_shape_token_78", + "gemm_input_shape_token_780", + "gemm_input_shape_token_786", + "gemm_input_shape_token_798", + "gemm_input_shape_token_804", + "gemm_input_shape_token_810", + "gemm_input_shape_token_84", + "gemm_input_shape_token_96", + "gemm_output_shape", + "gemm_output_shape_token_105", + "gemm_output_shape_token_111", + "gemm_output_shape_token_117", + "gemm_output_shape_token_123", + "gemm_output_shape_token_129", + "gemm_output_shape_token_135", + "gemm_output_shape_token_141", + "gemm_output_shape_token_147", + "gemm_output_shape_token_15", + "gemm_output_shape_token_153", + "gemm_output_shape_token_159", + "gemm_output_shape_token_165", + "gemm_output_shape_token_171", + "gemm_output_shape_token_177", + "gemm_output_shape_token_183", + "gemm_output_shape_token_189", + "gemm_output_shape_token_195", + "gemm_output_shape_token_201", + "gemm_output_shape_token_207", + "gemm_output_shape_token_21", + "gemm_output_shape_token_213", + "gemm_output_shape_token_219", + "gemm_output_shape_token_225", + "gemm_output_shape_token_231", + "gemm_output_shape_token_237", + "gemm_output_shape_token_243", + "gemm_output_shape_token_249", + "gemm_output_shape_token_255", + "gemm_output_shape_token_261", + "gemm_output_shape_token_267", + "gemm_output_shape_token_27", + "gemm_output_shape_token_273", + "gemm_output_shape_token_279", + "gemm_output_shape_token_285", + "gemm_output_shape_token_291", + "gemm_output_shape_token_297", + "gemm_output_shape_token_3", + "gemm_output_shape_token_303", + "gemm_output_shape_token_309", + "gemm_output_shape_token_315", + "gemm_output_shape_token_321", + "gemm_output_shape_token_327", + "gemm_output_shape_token_33", + "gemm_output_shape_token_333", + "gemm_output_shape_token_339", + "gemm_output_shape_token_345", + "gemm_output_shape_token_351", + "gemm_output_shape_token_357", + "gemm_output_shape_token_363", + "gemm_output_shape_token_369", + "gemm_output_shape_token_375", + "gemm_output_shape_token_381", + "gemm_output_shape_token_387", + "gemm_output_shape_token_39", + "gemm_output_shape_token_393", + "gemm_output_shape_token_399", + "gemm_output_shape_token_405", + "gemm_output_shape_token_411", + "gemm_output_shape_token_417", + "gemm_output_shape_token_423", + "gemm_output_shape_token_429", + "gemm_output_shape_token_435", + "gemm_output_shape_token_441", + "gemm_output_shape_token_447", + "gemm_output_shape_token_45", + "gemm_output_shape_token_453", + "gemm_output_shape_token_459", + "gemm_output_shape_token_465", + "gemm_output_shape_token_471", + "gemm_output_shape_token_477", + "gemm_output_shape_token_483", + "gemm_output_shape_token_489", + "gemm_output_shape_token_495", + "gemm_output_shape_token_501", + "gemm_output_shape_token_507", + "gemm_output_shape_token_51", + "gemm_output_shape_token_513", + "gemm_output_shape_token_519", + "gemm_output_shape_token_525", + "gemm_output_shape_token_531", + "gemm_output_shape_token_537", + "gemm_output_shape_token_543", + "gemm_output_shape_token_549", + "gemm_output_shape_token_555", + "gemm_output_shape_token_561", + "gemm_output_shape_token_567", + "gemm_output_shape_token_57", + "gemm_output_shape_token_573", + "gemm_output_shape_token_579", + "gemm_output_shape_token_585", + "gemm_output_shape_token_591", + "gemm_output_shape_token_597", + "gemm_output_shape_token_603", + "gemm_output_shape_token_609", + "gemm_output_shape_token_615", + "gemm_output_shape_token_621", + "gemm_output_shape_token_627", + "gemm_output_shape_token_63", + "gemm_output_shape_token_633", + "gemm_output_shape_token_639", + "gemm_output_shape_token_645", + "gemm_output_shape_token_651", + "gemm_output_shape_token_657", + "gemm_output_shape_token_663", + "gemm_output_shape_token_669", + "gemm_output_shape_token_675", + "gemm_output_shape_token_681", + "gemm_output_shape_token_687", + "gemm_output_shape_token_69", + "gemm_output_shape_token_693", + "gemm_output_shape_token_699", + "gemm_output_shape_token_705", + "gemm_output_shape_token_711", + "gemm_output_shape_token_717", + "gemm_output_shape_token_723", + "gemm_output_shape_token_729", + "gemm_output_shape_token_735", + "gemm_output_shape_token_741", + "gemm_output_shape_token_747", + "gemm_output_shape_token_75", + "gemm_output_shape_token_753", + "gemm_output_shape_token_759", + "gemm_output_shape_token_765", + "gemm_output_shape_token_771", + "gemm_output_shape_token_777", + "gemm_output_shape_token_783", + "gemm_output_shape_token_789", + "gemm_output_shape_token_795", + "gemm_output_shape_token_801", + "gemm_output_shape_token_807", + "gemm_output_shape_token_81", + "gemm_output_shape_token_87", + "gemm_output_shape_token_9", + "gemm_output_shape_token_93", + "gemm_output_shape_token_99", + "onnx::Add_7732", + "onnx::Add_7927", + "onnx::Add_8060", + "onnx::Add_8318", + "onnx::MatMul_7353", + "onnx::MatMul_7356", + "onnx::MatMul_7357", + "onnx::MatMul_7359", + "onnx::MatMul_7360", + "onnx::MatMul_7398", + "onnx::MatMul_7403", + "onnx::MatMul_7404", + "onnx::MatMul_7405", + "onnx::MatMul_7411", + "onnx::MatMul_7416", + "onnx::MatMul_7417", + "onnx::MatMul_7418", + "onnx::MatMul_7419", + "onnx::MatMul_7421", + "onnx::MatMul_7422", + "onnx::MatMul_7460", + "onnx::MatMul_7465", + "onnx::MatMul_7466", + "onnx::MatMul_7467", + "onnx::MatMul_7473", + "onnx::MatMul_7478", + "onnx::MatMul_7479", + "onnx::MatMul_7483", + "onnx::MatMul_7484", + "onnx::MatMul_7486", + "onnx::MatMul_7487", + "onnx::MatMul_7525", + "onnx::MatMul_7530", + "onnx::MatMul_7531", + "onnx::MatMul_7532", + "onnx::MatMul_7538", + "onnx::MatMul_7543", + "onnx::MatMul_7544", + "onnx::MatMul_7545", + "onnx::MatMul_7546", + "onnx::MatMul_7548", + "onnx::MatMul_7549", + "onnx::MatMul_7587", + "onnx::MatMul_7592", + "onnx::MatMul_7593", + "onnx::MatMul_7594", + "onnx::MatMul_7600", + "onnx::MatMul_7605", + "onnx::MatMul_7606", + "onnx::MatMul_7607", + "onnx::MatMul_7608", + "onnx::MatMul_7610", + "onnx::MatMul_7611", + "onnx::MatMul_7649", + "onnx::MatMul_7654", + "onnx::MatMul_7655", + "onnx::MatMul_7656", + "onnx::MatMul_7662", + "onnx::MatMul_7667", + "onnx::MatMul_7668", + "onnx::MatMul_7669", + "onnx::MatMul_7670", + "onnx::MatMul_7672", + "onnx::MatMul_7673", + "onnx::MatMul_7711", + "onnx::MatMul_7716", + "onnx::MatMul_7717", + "onnx::MatMul_7718", + "onnx::MatMul_7724", + "onnx::MatMul_7729", + "onnx::MatMul_7730", + "onnx::MatMul_7740", + "onnx::MatMul_7741", + "onnx::MatMul_7743", + "onnx::MatMul_7744", + "onnx::MatMul_7782", + "onnx::MatMul_7787", + "onnx::MatMul_7788", + "onnx::MatMul_7789", + "onnx::MatMul_7795", + "onnx::MatMul_7800", + "onnx::MatMul_7801", + "onnx::MatMul_7802", + "onnx::MatMul_7803", + "onnx::MatMul_7805", + "onnx::MatMul_7806", + "onnx::MatMul_7844", + "onnx::MatMul_7849", + "onnx::MatMul_7850", + "onnx::MatMul_7851", + "onnx::MatMul_7857", + "onnx::MatMul_7862", + "onnx::MatMul_7863", + "onnx::MatMul_7864", + "onnx::MatMul_7865", + "onnx::MatMul_7867", + "onnx::MatMul_7868", + "onnx::MatMul_7906", + "onnx::MatMul_7911", + "onnx::MatMul_7912", + "onnx::MatMul_7913", + "onnx::MatMul_7919", + "onnx::MatMul_7924", + "onnx::MatMul_7925", + "onnx::MatMul_7935", + "onnx::MatMul_7936", + "onnx::MatMul_7938", + "onnx::MatMul_7939", + "onnx::MatMul_7977", + "onnx::MatMul_7982", + "onnx::MatMul_7983", + "onnx::MatMul_7984", + "onnx::MatMul_7990", + "onnx::MatMul_7995", + "onnx::MatMul_7996", + "onnx::MatMul_7997", + "onnx::MatMul_7998", + "onnx::MatMul_8000", + "onnx::MatMul_8001", + "onnx::MatMul_8039", + "onnx::MatMul_8044", + "onnx::MatMul_8045", + "onnx::MatMul_8046", + "onnx::MatMul_8052", + "onnx::MatMul_8057", + "onnx::MatMul_8058", + "onnx::MatMul_8069", + "onnx::MatMul_8070", + "onnx::MatMul_8072", + "onnx::MatMul_8073", + "onnx::MatMul_8111", + "onnx::MatMul_8116", + "onnx::MatMul_8117", + "onnx::MatMul_8118", + "onnx::MatMul_8124", + "onnx::MatMul_8129", + "onnx::MatMul_8130", + "onnx::MatMul_8131", + "onnx::MatMul_8132", + "onnx::MatMul_8134", + "onnx::MatMul_8135", + "onnx::MatMul_8173", + "onnx::MatMul_8178", + "onnx::MatMul_8179", + "onnx::MatMul_8180", + "onnx::MatMul_8186", + "onnx::MatMul_8191", + "onnx::MatMul_8192", + "onnx::MatMul_8193", + "onnx::MatMul_8194", + "onnx::MatMul_8196", + "onnx::MatMul_8197", + "onnx::MatMul_8235", + "onnx::MatMul_8240", + "onnx::MatMul_8241", + "onnx::MatMul_8242", + "onnx::MatMul_8248", + "onnx::MatMul_8253", + "onnx::MatMul_8254", + "onnx::MatMul_8255", + "onnx::MatMul_8256", + "onnx::MatMul_8258", + "onnx::MatMul_8259", + "onnx::MatMul_8297", + "onnx::MatMul_8302", + "onnx::MatMul_8303", + "onnx::MatMul_8304", + "onnx::MatMul_8310", + "onnx::MatMul_8315", + "onnx::MatMul_8316", + "onnx::MatMul_8324", + "onnx::Mul_7736", + "onnx::Mul_7931", + "onnx::Mul_8064", + "onnx::Mul_8065", + "onnx::Mul_8322", + "onnx::Unsqueeze_910" + ], + "device": "VAIML", + "vaimlParam": { + "vaimlModelPath": "./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0", + "aiAnalyzerProfiling": true, + "deviceName": "stx", + "outputShapes": [ + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + }, + { + "shapes": [ + "2", + "36", + "1", + "96" + ] + }, + { + "shapes": [ + "3", + "72", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + }, + { + "shapes": [ + "2", + "288", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + }, + { + "shapes": [ + "2", + "36", + "1", + "96" + ] + }, + { + "shapes": [ + "3", + "72", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + }, + { + "shapes": [ + "2", + "288", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "192" + ] + }, + { + "shapes": [ + "2", + "36", + "1", + "192" + ] + }, + { + "shapes": [ + "3", + "72", + "1", + "192" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "192" + ] + }, + { + "shapes": [ + "2", + "288", + "1", + "192" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "3", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "3", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384" + ] + }, + { + "shapes": [ + "2", + "1", + "384" + ] + }, + { + "shapes": [ + "3", + "1", + "384" + ] + }, + { + "shapes": [ + "4", + "1", + "384" + ] + }, + { + "shapes": [ + "2", + "1", + "384" + ] + }, + { + "shapes": [ + "36", + "512" + ] + } + ], + "inputShapes": [ + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "18", + "1", + "1" + ] + }, + { + "shapes": [ + "18", + "1", + "1" + ] + }, + { + "shapes": [ + "18", + "1", + "1" + ] + }, + { + "shapes": [ + "9", + "1", + "1" + ] + }, + { + "shapes": [ + "72", + "1", + "1" + ] + }, + { + "shapes": [ + "9", + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "72", + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "36", + "1", + "1" + ] + }, + { + "shapes": [ + "1", + "1", + "151", + "80" + ] + }, + { + "shapes": [ + "2", + "1", + "384" + ] + }, + { + "shapes": [ + "4", + "1", + "384" + ] + }, + { + "shapes": [ + "3", + "1", + "384" + ] + }, + { + "shapes": [ + "2", + "1", + "384" + ] + }, + { + "shapes": [ + "4", + "1", + "384" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "3", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "3", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "4", + "1", + "384", + "30" + ] + }, + { + "shapes": [ + "2", + "288", + "1", + "192" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "192" + ] + }, + { + "shapes": [ + "3", + "72", + "1", + "192" + ] + }, + { + "shapes": [ + "2", + "36", + "1", + "192" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "192" + ] + }, + { + "shapes": [ + "2", + "288", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + }, + { + "shapes": [ + "3", + "72", + "1", + "96" + ] + }, + { + "shapes": [ + "2", + "36", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + }, + { + "shapes": [ + "2", + "288", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + }, + { + "shapes": [ + "3", + "72", + "1", + "96" + ] + }, + { + "shapes": [ + "2", + "36", + "1", + "96" + ] + }, + { + "shapes": [ + "4", + "144", + "1", + "96" + ] + } + ], + "vaimlUnarchivePath": "./encoder-epoch-99-avg-1-T-151_static_20250527144825", + "subgraphName": "vaiml_par_0", + "priority": "normal", + "configJson": "vitisai_config.json", + "deviceBatchSize": 1, + "inputNames": [ + "/Cast_10_output_0", + "/Cast_15_output_0", + "/Cast_20_output_0", + "/Cast_25_output_0", + "/Cast_30_output_0", + "/Cast_35_output_0", + "/Cast_40_output_0", + "/Cast_45_output_0", + "/Cast_50_output_0", + "/Cast_55_output_0", + "/Cast_5_output_0", + "/Cast_60_output_0", + "/Cast_65_output_0", + "/Cast_70_output_0", + "/Cast_output_0", + "/Unsqueeze_120_output_0", + "/Unsqueeze_154_output_0", + "/Unsqueeze_188_output_0", + "/Unsqueeze_251_output_0", + "/Unsqueeze_285_output_0", + "/Unsqueeze_319_output_0", + "/Unsqueeze_375_output_0", + "/Unsqueeze_38_output_0", + "/Unsqueeze_409_output_0", + "/Unsqueeze_458_output_0", + "/Unsqueeze_492_output_0", + "/Unsqueeze_4_output_0", + "/Unsqueeze_526_output_0", + "/Unsqueeze_560_output_0", + "/Unsqueeze_86_output_0", + "/encoder_embed/Unsqueeze_output_0", + "cached_avg_0", + "cached_avg_1", + "cached_avg_2", + "cached_avg_3", + "cached_avg_4", + "cached_conv1_0", + "cached_conv1_1", + "cached_conv1_2", + "cached_conv1_3", + "cached_conv1_4", + "cached_conv2_0", + "cached_conv2_1", + "cached_conv2_2", + "cached_conv2_3", + "cached_conv2_4", + "cached_key_0", + "cached_key_1", + "cached_key_2", + "cached_key_3", + "cached_key_4", + "cached_val2_0", + "cached_val2_1", + "cached_val2_2", + "cached_val2_3", + "cached_val2_4", + "cached_val_0", + "cached_val_1", + "cached_val_2", + "cached_val_3", + "cached_val_4" + ], + "outputNames": [ + "new_cached_val_4", + "new_cached_val_3", + "new_cached_val_2", + "new_cached_val_1", + "new_cached_val_0", + "new_cached_val2_4", + "new_cached_val2_3", + "new_cached_val2_2", + "new_cached_val2_1", + "new_cached_val2_0", + "new_cached_key_4", + "new_cached_key_3", + "new_cached_key_2", + "new_cached_key_1", + "new_cached_key_0", + "new_cached_conv2_4", + "new_cached_conv2_3", + "new_cached_conv2_2", + "new_cached_conv2_1", + "new_cached_conv2_0", + "new_cached_conv1_4", + "new_cached_conv1_3", + "new_cached_conv1_2", + "new_cached_conv1_1", + "new_cached_conv1_0", + "new_cached_avg_4", + "new_cached_avg_3", + "new_cached_avg_2", + "new_cached_avg_1", + "new_cached_avg_0", + "gemm_output_reshape_arg_token_814" + ] + } + } + ], + "config": { + "passes": [ + { + "name": "init", + "plugin": "vaip-pass_init" + }, + { + "name": "vaiml_partition", + "plugin": "vaip-pass_vaiml_partition", + "vaimlConfig": { + "thresholdGopsPercent": 0, + "keepOutputs": true + } + } + ], + "cacheDir": "./", + "cacheKey": "encoder-epoch-99-avg-1-T-151_static_20250527144825", + "version": { + "versionInfos": [ + { + "packageName": "vai-rt", + "commit": "git_id: 1597619bc1301d6cbf04c2d418580d9ee0920b40 build 2456 at 20250528072124", + "version": "v1.0" + }, + { + "packageName": "onnxruntime", + "commit": "ce6617838c5b2290e727cde6c2f7bc678e44cf1a", + "version": "ce6617" + }, + { + "packageName": "glog", + "commit": "b33e3bad4c46c8a6345525fd822af355e5ef9446", + "version": "v0.6.0" + }, + { + "packageName": "gsl", + "commit": "a3534567187d2edc428efd3f13466ff75fe5805c", + "version": "v4.0.0" + }, + { + "packageName": "json", + "commit": "bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d", + "version": "v3.11.2" + }, + { + "packageName": "protobuf", + "commit": "f0dc78d7e6e331b8c6bb2d5283e06aa26883ca7c", + "version": "v21.12" + }, + { + "packageName": "onnxruntime", + "commit": "ce6617838c5b2290e727cde6c2f7bc678e44cf1a", + "version": "test-tag-5255-gce661" + }, + { + "packageName": "xcompiler", + "commit": "123a1919231e049cd1cd9a61fb4e9121a279e0c1", + "version": "rc3-2002-g123a" + }, + { + "packageName": "xir", + "commit": "67ef0dad98acdbd8750d0df34031394b349d305d", + "version": "rc3-66-g67ef" + }, + { + "packageName": "target-factory", + "commit": "b0cf91e186c01cb0e2313e376e23fe05b64ef667", + "version": "SR-945504-455-gb0cf" + }, + { + "packageName": "trace-logging", + "commit": "02720323ad17edf139f1be97bb8eb56c6c24d06d", + "version": "rc3-4-g0272" + }, + { + "packageName": "vart", + "commit": "e2e6cef3e3540f763fc79b9ab3bc4faeebf3af1f", + "version": "rc3-292-ge2e6" + }, + { + "packageName": "graph-engine", + "commit": "1205492245ca4b93f5c9818f43d097b893db20fc", + "version": "rc3-243-g1205" + }, + { + "packageName": "DynamicDispatch", + "commit": "813e37472838597128bfce0f6d7c12864ef87fac", + "version": "RAI-1.3GA-601-g813e3" + }, + { + "packageName": "VAIP", + "commit": "4256075606e5502208704b2344b25089d93427a0", + "version": "cp_dev_no_morphizen-282-g42560" + }, + { + "packageName": "morphizen", + "commit": "6404eb08b480a043e83be6c369c08788385d9d7d", + "version": "6404eb" + } + ] + }, + "onnxPath": "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/models/encoder-epoch-99-avg-1-T-151_static_20250527144825.onnx", + "sessionOptions": { + "config_file": "vitisai_config.json" + }, + "aiAnalyzerVisualization": true, + "aiAnalyzerProfiling": true, + "onnxMd5File": "54bcb1ebe11c2810ff44b6aa9d673d26", + "onnxMd5A": "1f49658531cf7d7051b7447f688f65ad", + "onnxMd5B": "dd2d91d29fcbcdd10f059e15813a2eaf" + }, + "events": [ + { + "name": "before_compile_onnx_model_internal", + "ph": "X", + "ts": "814806", + "pid": "3549848", + "tid": "3549848", + "args": { + "memUsage": {} + }, + "dur": "232015" + }, + { + "id": "before_compile_onnx_model_internal_mem_usage_1", + "ph": "v", + "ts": "814806", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "id": "before_compile_onnx_model_internal_mem_usage_2", + "ph": "v", + "ts": "1046821", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "name": "check_cache_hit", + "ph": "X", + "ts": "1046901", + "pid": "3549848", + "tid": "3549848", + "args": { + "memUsage": {} + }, + "dur": "2" + }, + { + "id": "check_cache_hit_mem_usage_1", + "ph": "v", + "ts": "1046901", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "id": "check_cache_hit_mem_usage_2", + "ph": "v", + "ts": "1046904", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "name": "0-init@vaip-pass_init", + "ph": "X", + "ts": "1046950", + "pid": "3549848", + "tid": "3549848", + "args": { + "memUsage": {} + }, + "dur": "30" + }, + { + "id": "0-init@vaip-pass_init_mem_usage_1", + "ph": "v", + "ts": "1046950", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "id": "0-init@vaip-pass_init_mem_usage_2", + "ph": "v", + "ts": "1046981", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "name": "1-vaiml_partition@vaip-pass_vaiml_partition", + "ph": "X", + "ts": "1046984", + "pid": "3549848", + "tid": "3549848", + "args": { + "memUsage": {} + }, + "dur": "8947175900" + }, + { + "id": "1-vaiml_partition@vaip-pass_vaiml_partition_mem_usage_1", + "ph": "v", + "ts": "1046984", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "id": "1-vaiml_partition@vaip-pass_vaiml_partition_mem_usage_2", + "ph": "v", + "ts": "8948222884", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "name": "update_cache", + "ph": "X", + "ts": "1046907", + "pid": "3549848", + "tid": "3549848", + "args": { + "memUsage": {} + }, + "dur": "8947176345" + }, + { + "id": "update_cache_mem_usage_1", + "ph": "v", + "ts": "1046907", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + }, + { + "id": "update_cache_mem_usage_2", + "ph": "v", + "ts": "8948223253", + "pid": "3549848", + "args": { + "dumps": { + "process_totals": { + "peak_memory": "0", + "current_memory": "0" + } + } + } + } + ], + "cacheFiles": [ + "context.json" + ] +} diff --git a/final-vaiml-pass-summary.txt b/final-vaiml-pass-summary.txt new file mode 100644 index 0000000000000000000000000000000000000000..367223df1b88b6e7bf219845a8a1db3ce03b945b --- /dev/null +++ b/final-vaiml-pass-summary.txt @@ -0,0 +1,24 @@ +--------- Final Summary of VAIML Pass ---------- +VAIP commit: 4256075606e5502208704b2344b25089d93427a0 +Model: /proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/models/encoder-epoch-99-avg-1-T-151_static_20250527144825.onnx +Model signature: 54bcb1ebe11c2810ff44b6aa9d673d26 +Device: stx +Model data type: float32 +Device data type: bfloat16 +Number of operators in the model: 2222 +GOPs of the model: 7.1802 +Number of operators supported by VAIML: 2065 (92.934%) +GOPs supported by VAIML: 7.180 (100.000%) +Number of subgraphs supported by VAIML: 1 + NOTE: Number of subgraph supported by VAIML does not include subgraphs below GOPs% threshold. +Number of operators offloaded by VAIML: 2065 (92.934%) +GOPs offloaded by VAIML: 7.180 (100.000%) +Number of subgraphs offloaded by VAIML: 1 +Number of subgraphs with compilation errors (fall back to CPU): 0 +Number of subgraphs below 0% GOPs threshold (fall back to CPU): 0 +Stats for offloaded subgraphs +Subgraph vaiml_par_0 stats: + Operators: 2065 (92.934%) + GOPs : 7.180 (100.000%) OPs: 7,180,197,684 + + diff --git a/gops.csv b/gops.csv new file mode 100644 index 0000000000000000000000000000000000000000..5799d16223514cdff398e44f88806ef80f10dffd --- /dev/null +++ b/gops.csv @@ -0,0 +1,2223 @@ +Node,OPs,Note +/Add,55296 +/Add_1,55296 +/Add_10,55296 +/Add_100,27648 +/Add_101,27648 +/Add_102,27648 +/Add_104,27648 +/Add_105,27648 +/Add_106,27648 +/Add_107,27648 +/Add_108,13824 +/Add_109,13824 +/Add_11,55296 +/Add_111,36 +/Add_112,2 +/Add_113,13824 +/Add_116,25920 +/Add_118,13824 +/Add_119,13824 +/Add_12,55296 +/Add_120,13824 +/Add_122,13824 +/Add_123,13824 +/Add_124,13824 +/Add_125,13824 +/Add_126,13824 +/Add_127,13824 +/Add_129,36 +/Add_130,2 +/Add_131,13824 +/Add_134,25920 +/Add_136,13824 +/Add_137,13824 +/Add_138,13824 +/Add_14,55296 +/Add_140,13824 +/Add_141,13824 +/Add_142,13824 +/Add_143,13824 +/Add_144,13824 +/Add_145,13824 +/Add_147,36 +/Add_148,2 +/Add_149,13824 +/Add_15,55296 +/Add_152,25920 +/Add_154,13824 +/Add_155,13824 +/Add_156,13824 +/Add_158,13824 +/Add_159,13824 +/Add_16,55296 +/Add_160,13824 +/Add_161,13824 +/Add_162,6912 +/Add_163,6912 +/Add_165,18 +/Add_166,2 +/Add_167,6912 +/Add_17,55296 +/Add_170,6480 +/Add_172,6912 +/Add_173,6912 +/Add_174,6912 +/Add_176,6912 +/Add_177,6912 +/Add_178,6912 +/Add_179,6912 +/Add_18,55296 +/Add_180,6912 +/Add_181,6912 +/Add_183,18 +/Add_184,2 +/Add_185,6912 +/Add_188,6480 +/Add_19,55296 +/Add_190,6912 +/Add_191,6912 +/Add_192,6912 +/Add_194,6912 +/Add_195,6912 +/Add_196,6912 +/Add_197,6912 +/Add_198,27648 +/Add_199,27648 +/Add_201,72 +/Add_202,2 +/Add_203,27648 +/Add_206,103680 +/Add_208,27648 +/Add_209,27648 +/Add_21,144 +/Add_210,27648 +/Add_212,27648 +/Add_213,27648 +/Add_214,27648 +/Add_215,27648 +/Add_216,27648 +/Add_217,27648 +/Add_219,72 +/Add_22,2 +/Add_220,2 +/Add_221,27648 +/Add_224,103680 +/Add_226,27648 +/Add_227,27648 +/Add_228,27648 +/Add_23,55296 +/Add_230,27648 +/Add_231,27648 +/Add_232,27648 +/Add_233,27648 +/Add_234,27648 +/Add_235,27648 +/Add_237,72 +/Add_238,2 +/Add_239,27648 +/Add_242,103680 +/Add_244,27648 +/Add_245,27648 +/Add_246,27648 +/Add_248,27648 +/Add_249,27648 +/Add_250,27648 +/Add_251,27648 +/Add_252,27648 +/Add_253,27648 +/Add_255,72 +/Add_256,2 +/Add_257,27648 +/Add_26,414720 +/Add_260,103680 +/Add_262,27648 +/Add_263,27648 +/Add_264,27648 +/Add_266,27648 +/Add_267,27648 +/Add_268,27648 +/Add_269,27648 +/Add_28,55296 +/Add_29,55296 +/Add_3,144 +/Add_30,55296 +/Add_32,55296 +/Add_33,55296 +/Add_34,55296 +/Add_35,55296 +/Add_36,27648 +/Add_37,27648 +/Add_39,72 +/Add_4,2 +/Add_40,2 +/Add_41,27648 +/Add_44,103680 +/Add_46,27648 +/Add_47,27648 +/Add_48,27648 +/Add_5,55296 +/Add_50,27648 +/Add_51,27648 +/Add_52,27648 +/Add_53,27648 +/Add_54,27648 +/Add_55,27648 +/Add_57,72 +/Add_58,2 +/Add_59,27648 +/Add_62,103680 +/Add_64,27648 +/Add_65,27648 +/Add_66,27648 +/Add_68,27648 +/Add_69,27648 +/Add_70,27648 +/Add_71,27648 +/Add_72,27648 +/Add_73,27648 +/Add_75,72 +/Add_76,2 +/Add_77,27648 +/Add_8,414720 +/Add_80,103680 +/Add_82,27648 +/Add_83,27648 +/Add_84,27648 +/Add_86,27648 +/Add_87,27648 +/Add_88,27648 +/Add_89,27648 +/Add_90,27648 +/Add_91,27648 +/Add_93,72 +/Add_94,2 +/Add_95,27648 +/Add_98,103680 +/Cast,0 +/Cast_10,0 +/Cast_12,0 +/Cast_15,0 +/Cast_17,0 +/Cast_2,0 +/Cast_20,0 +/Cast_22,0 +/Cast_25,0 +/Cast_27,0 +/Cast_30,0 +/Cast_32,0 +/Cast_35,0 +/Cast_37,0 +/Cast_40,0 +/Cast_42,0 +/Cast_45,0 +/Cast_47,0 +/Cast_5,0 +/Cast_50,0 +/Cast_52,0 +/Cast_55,0 +/Cast_57,0 +/Cast_60,0 +/Cast_62,0 +/Cast_65,0 +/Cast_67,0 +/Cast_7,0 +/Cast_70,0 +/Cast_72,0 +/Concat,384 +/Concat_1,192 +/Concat_110,23040 +/Concat_111,192 +/Concat_114,23040 +/Concat_115,2 +/Concat_116,768 +/Concat_117,55296 +/Concat_118,27648 +/Concat_119,27648 +/Concat_120,23040 +/Concat_121,23040 +/Concat_122,384 +/Concat_123,192 +/Concat_13,23040 +/Concat_135,23040 +/Concat_136,192 +/Concat_139,23040 +/Concat_14,192 +/Concat_140,384 +/Concat_141,192 +/Concat_153,23040 +/Concat_154,192 +/Concat_157,23040 +/Concat_158,384 +/Concat_159,192 +/Concat_17,23040 +/Concat_171,23040 +/Concat_172,192 +/Concat_175,23040 +/Concat_176,2 +/Concat_177,768 +/Concat_178,27648 +/Concat_179,13824 +/Concat_18,384 +/Concat_180,13824 +/Concat_181,23040 +/Concat_182,23040 +/Concat_183,384 +/Concat_184,192 +/Concat_19,192 +/Concat_196,23040 +/Concat_197,192 +/Concat_200,23040 +/Concat_201,384 +/Concat_202,192 +/Concat_214,23040 +/Concat_215,192 +/Concat_218,23040 +/Concat_219,2 +/Concat_220,768 +/Concat_221,13824 +/Concat_222,6912 +/Concat_223,6912 +/Concat_224,23040 +/Concat_225,23040 +/Concat_226,384 +/Concat_227,192 +/Concat_239,23040 +/Concat_240,192 +/Concat_243,23040 +/Concat_244,384 +/Concat_245,192 +/Concat_257,23040 +/Concat_258,192 +/Concat_261,23040 +/Concat_262,384 +/Concat_263,192 +/Concat_275,23040 +/Concat_276,192 +/Concat_279,23040 +/Concat_280,384 +/Concat_281,192 +/Concat_293,23040 +/Concat_294,192 +/Concat_297,23040 +/Concat_298,2 +/Concat_299,768 +/Concat_300,55296 +/Concat_301,27648 +/Concat_302,27648 +/Concat_303,23040 +/Concat_304,23040 +/Concat_31,23040 +/Concat_32,192 +/Concat_35,23040 +/Concat_36,2 +/Concat_37,768 +/Concat_38,110592 +/Concat_39,55296 +/Concat_40,55296 +/Concat_41,23040 +/Concat_42,23040 +/Concat_43,384 +/Concat_44,192 +/Concat_56,23040 +/Concat_57,192 +/Concat_60,23040 +/Concat_61,384 +/Concat_62,192 +/Concat_74,23040 +/Concat_75,192 +/Concat_78,23040 +/Concat_79,384 +/Concat_80,192 +/Concat_92,23040 +/Concat_93,192 +/Concat_96,23040 +/Concat_97,384 +/Concat_98,192 +/CumSum,55296 +/CumSum_1,55296 +/CumSum_10,6912 +/CumSum_11,27648 +/CumSum_12,27648 +/CumSum_13,27648 +/CumSum_14,27648 +/CumSum_2,27648 +/CumSum_3,27648 +/CumSum_4,27648 +/CumSum_5,27648 +/CumSum_6,13824 +/CumSum_7,13824 +/CumSum_8,13824 +/CumSum_9,6912 +/GatherElements,0 +/GatherElements_1,0 +/GatherElements_10,0 +/GatherElements_11,0 +/GatherElements_12,0 +/GatherElements_13,0 +/GatherElements_14,0 +/GatherElements_2,0 +/GatherElements_3,0 +/GatherElements_4,0 +/GatherElements_5,0 +/GatherElements_6,0 +/GatherElements_7,0 +/GatherElements_8,0 +/GatherElements_9,0 +/Gather_1,2 +/Gather_10,768 +/Gather_107,2 +/Gather_108,768 +/Gather_109,55296 +/Gather_110,27648 +/Gather_111,27648 +/Gather_112,23040 +/Gather_113,23040 +/Gather_116,768 +/Gather_129,2 +/Gather_130,768 +/Gather_131,27648 +/Gather_132,13824 +/Gather_133,13824 +/Gather_134,23040 +/Gather_135,23040 +/Gather_138,768 +/Gather_150,2 +/Gather_151,768 +/Gather_152,27648 +/Gather_153,13824 +/Gather_154,13824 +/Gather_155,23040 +/Gather_156,23040 +/Gather_159,768 +/Gather_171,2 +/Gather_172,768 +/Gather_173,27648 +/Gather_174,13824 +/Gather_175,13824 +/Gather_176,23040 +/Gather_177,23040 +/Gather_180,768 +/Gather_193,2 +/Gather_194,768 +/Gather_195,13824 +/Gather_196,6912 +/Gather_197,6912 +/Gather_198,23040 +/Gather_199,23040 +/Gather_2,768 +/Gather_202,768 +/Gather_214,2 +/Gather_215,768 +/Gather_216,13824 +/Gather_217,6912 +/Gather_218,6912 +/Gather_219,23040 +/Gather_22,2 +/Gather_220,23040 +/Gather_223,768 +/Gather_23,768 +/Gather_236,2 +/Gather_237,768 +/Gather_238,55296 +/Gather_239,27648 +/Gather_24,110592 +/Gather_240,27648 +/Gather_241,23040 +/Gather_242,23040 +/Gather_245,768 +/Gather_25,55296 +/Gather_257,2 +/Gather_258,768 +/Gather_259,55296 +/Gather_26,55296 +/Gather_260,27648 +/Gather_261,27648 +/Gather_262,23040 +/Gather_263,23040 +/Gather_266,768 +/Gather_27,23040 +/Gather_278,2 +/Gather_279,768 +/Gather_28,23040 +/Gather_280,55296 +/Gather_281,27648 +/Gather_282,27648 +/Gather_283,23040 +/Gather_284,23040 +/Gather_287,768 +/Gather_299,2 +/Gather_3,110592 +/Gather_300,768 +/Gather_301,55296 +/Gather_302,27648 +/Gather_303,27648 +/Gather_304,23040 +/Gather_305,23040 +/Gather_308,768 +/Gather_31,768 +/Gather_4,55296 +/Gather_44,2 +/Gather_45,768 +/Gather_46,55296 +/Gather_47,27648 +/Gather_48,27648 +/Gather_49,23040 +/Gather_5,55296 +/Gather_50,23040 +/Gather_53,768 +/Gather_6,23040 +/Gather_65,2 +/Gather_66,768 +/Gather_67,55296 +/Gather_68,27648 +/Gather_69,27648 +/Gather_7,23040 +/Gather_70,23040 +/Gather_71,23040 +/Gather_74,768 +/Gather_86,2 +/Gather_87,768 +/Gather_88,55296 +/Gather_89,27648 +/Gather_90,27648 +/Gather_91,23040 +/Gather_92,23040 +/Gather_95,768 +/MatMul,2482560 +/MatMul_1,10368000 +/MatMul_10,619200 +/MatMul_11,2592000 +/MatMul_12,1251072 +/MatMul_13/MatMulAddFusion,10644480 +/MatMul_14,1251072 +/MatMul_15,619200 +/MatMul_16,2592000 +/MatMul_17,1251072 +/MatMul_18/MatMulAddFusion,10644480 +/MatMul_19,1251072 +/MatMul_2,4990464 +/MatMul_20,619200 +/MatMul_21,2592000 +/MatMul_22,1251072 +/MatMul_23/MatMulAddFusion,10644480 +/MatMul_24,1251072 +/MatMul_25,619200 +/MatMul_26,2592000 +/MatMul_27,1251072 +/MatMul_28/MatMulAddFusion,10644480 +/MatMul_29,1251072 +/MatMul_3/MatMulAddFusion,21288960 +/MatMul_30,154080 +/MatMul_31,648000 +/MatMul_32,314496 +/MatMul_33/MatMulAddFusion,5322240 +/MatMul_34,314496 +/MatMul_35,154080 +/MatMul_36,648000 +/MatMul_37,314496 +/MatMul_38/MatMulAddFusion,5322240 +/MatMul_39,314496 +/MatMul_4,4990464 +/MatMul_40,154080 +/MatMul_41,648000 +/MatMul_42,314496 +/MatMul_43/MatMulAddFusion,5322240 +/MatMul_44,314496 +/MatMul_45,38160 +/MatMul_46,162000 +/MatMul_47,79488 +/MatMul_48/MatMulAddFusion,2661120 +/MatMul_49,79488 +/MatMul_5,2482560 +/MatMul_50,38160 +/MatMul_51,162000 +/MatMul_52,79488 +/MatMul_53/MatMulAddFusion,2661120 +/MatMul_54,79488 +/MatMul_55,619200 +/MatMul_56,2592000 +/MatMul_57,1251072 +/MatMul_58/MatMulAddFusion,10644480 +/MatMul_59,1251072 +/MatMul_6,10368000 +/MatMul_60,619200 +/MatMul_61,2592000 +/MatMul_62,1251072 +/MatMul_63/MatMulAddFusion,10644480 +/MatMul_64,1251072 +/MatMul_65,619200 +/MatMul_66,2592000 +/MatMul_67,1251072 +/MatMul_68/MatMulAddFusion,10644480 +/MatMul_69,1251072 +/MatMul_7,4990464 +/MatMul_70,619200 +/MatMul_71,2592000 +/MatMul_72,1251072 +/MatMul_73/MatMulAddFusion,10644480 +/MatMul_74,1251072 +/MatMul_8/MatMulAddFusion,21288960 +/MatMul_9,4990464 +/Mul,768 +/Mul_10,55296 +/Mul_101,6912 +/Mul_106,6912 +/Mul_108,6912 +/Mul_109,6912 +/Mul_11,768 +/Mul_110,768 +/Mul_112,6912 +/Mul_117,6912 +/Mul_119,6912 +/Mul_120,6912 +/Mul_121,768 +/Mul_123,27648 +/Mul_128,27648 +/Mul_13,55296 +/Mul_130,27648 +/Mul_131,27648 +/Mul_132,768 +/Mul_134,27648 +/Mul_139,27648 +/Mul_141,27648 +/Mul_142,27648 +/Mul_143,768 +/Mul_145,27648 +/Mul_150,27648 +/Mul_152,27648 +/Mul_153,27648 +/Mul_154,768 +/Mul_156,27648 +/Mul_161,27648 +/Mul_163,27648 +/Mul_164,27648 +/Mul_18,55296 +/Mul_2,55296 +/Mul_20,55296 +/Mul_21,55296 +/Mul_22,768 +/Mul_24,27648 +/Mul_29,27648 +/Mul_31,27648 +/Mul_32,27648 +/Mul_33,768 +/Mul_35,27648 +/Mul_40,27648 +/Mul_42,27648 +/Mul_43,27648 +/Mul_44,768 +/Mul_46,27648 +/Mul_51,27648 +/Mul_53,27648 +/Mul_54,27648 +/Mul_55,768 +/Mul_57,27648 +/Mul_62,27648 +/Mul_64,27648 +/Mul_65,27648 +/Mul_66,768 +/Mul_68,13824 +/Mul_7,55296 +/Mul_73,13824 +/Mul_75,13824 +/Mul_76,13824 +/Mul_77,768 +/Mul_79,13824 +/Mul_84,13824 +/Mul_86,13824 +/Mul_87,13824 +/Mul_88,768 +/Mul_9,55296 +/Mul_90,13824 +/Mul_95,13824 +/Mul_97,13824 +/Mul_98,13824 +/Mul_99,768 +/Reciprocal,576 +/Reciprocal_1,576 +/Reciprocal_10,72 +/Reciprocal_11,288 +/Reciprocal_12,288 +/Reciprocal_13,288 +/Reciprocal_14,288 +/Reciprocal_2,288 +/Reciprocal_3,288 +/Reciprocal_4,288 +/Reciprocal_5,288 +/Reciprocal_6,144 +/Reciprocal_7,144 +/Reciprocal_8,144 +/Reciprocal_9,72 +/Reshape,2 +/Reshape_1,2 +/Reshape_100,2 +/Reshape_101,2 +/Reshape_102,2 +/Reshape_104,2 +/Reshape_105,2 +/Reshape_106,2 +/Reshape_107_new_reshape,2 +/Reshape_108,2 +/Reshape_109_new_reshape,2 +/Reshape_10_new_reshape,2 +/Reshape_11,2 +/Reshape_110,2 +/Reshape_111,2 +/Reshape_112,2 +/Reshape_113,2 +/Reshape_115,2 +/Reshape_116,2 +/Reshape_117,2 +/Reshape_118_new_reshape,2 +/Reshape_119,2 +/Reshape_12,2 +/Reshape_120_new_reshape,2 +/Reshape_121,2 +/Reshape_122,2 +/Reshape_123,2 +/Reshape_124,2 +/Reshape_126,2 +/Reshape_127,2 +/Reshape_128,2 +/Reshape_129_new_reshape,2 +/Reshape_13,2 +/Reshape_130,2 +/Reshape_131_new_reshape,2 +/Reshape_132,2 +/Reshape_133,2 +/Reshape_134,2 +/Reshape_135,2 +/Reshape_137,2 +/Reshape_138,2 +/Reshape_139,2 +/Reshape_14,2 +/Reshape_140_new_reshape,2 +/Reshape_141,2 +/Reshape_142_new_reshape,2 +/Reshape_143,2 +/Reshape_144,2 +/Reshape_145,2 +/Reshape_146,2 +/Reshape_148,2 +/Reshape_149,2 +/Reshape_150,2 +/Reshape_151_new_reshape,2 +/Reshape_152,2 +/Reshape_153_new_reshape,2 +/Reshape_154,2 +/Reshape_155,2 +/Reshape_156,2 +/Reshape_157,2 +/Reshape_159,2 +/Reshape_16,2 +/Reshape_160,2 +/Reshape_161,2 +/Reshape_162_new_reshape,2 +/Reshape_163,2 +/Reshape_164_new_reshape,2 +/Reshape_17,2 +/Reshape_18,2 +/Reshape_19_new_reshape,2 +/Reshape_2,2 +/Reshape_20,2 +/Reshape_21_new_reshape,2 +/Reshape_22,2 +/Reshape_23,2 +/Reshape_24,2 +/Reshape_25,2 +/Reshape_27,2 +/Reshape_28,2 +/Reshape_29,2 +/Reshape_3,2 +/Reshape_30_new_reshape,2 +/Reshape_31,2 +/Reshape_32_new_reshape,2 +/Reshape_33,2 +/Reshape_34,2 +/Reshape_35,2 +/Reshape_36,2 +/Reshape_38,2 +/Reshape_39,2 +/Reshape_40,2 +/Reshape_41_new_reshape,2 +/Reshape_42,2 +/Reshape_43_new_reshape,2 +/Reshape_44,2 +/Reshape_45,2 +/Reshape_46,2 +/Reshape_47,2 +/Reshape_49,2 +/Reshape_5,2 +/Reshape_50,2 +/Reshape_51,2 +/Reshape_52_new_reshape,2 +/Reshape_53,2 +/Reshape_54_new_reshape,2 +/Reshape_55,2 +/Reshape_56,2 +/Reshape_57,2 +/Reshape_58,2 +/Reshape_6,2 +/Reshape_60,2 +/Reshape_61,2 +/Reshape_62,2 +/Reshape_63_new_reshape,2 +/Reshape_64,2 +/Reshape_65_new_reshape,2 +/Reshape_66,2 +/Reshape_67,2 +/Reshape_68,2 +/Reshape_69,2 +/Reshape_7,2 +/Reshape_71,2 +/Reshape_72,2 +/Reshape_73,2 +/Reshape_74_new_reshape,2 +/Reshape_75,2 +/Reshape_76_new_reshape,2 +/Reshape_77,2 +/Reshape_78,2 +/Reshape_79,2 +/Reshape_80,2 +/Reshape_82,2 +/Reshape_83,2 +/Reshape_84,2 +/Reshape_85_new_reshape,2 +/Reshape_86,2 +/Reshape_87_new_reshape,2 +/Reshape_88,2 +/Reshape_89,2 +/Reshape_8_new_reshape,2 +/Reshape_9,2 +/Reshape_90,2 +/Reshape_91,2 +/Reshape_93,2 +/Reshape_94,2 +/Reshape_95,2 +/Reshape_96_new_reshape,2 +/Reshape_97,2 +/Reshape_98_new_reshape,2 +/Reshape_99,2 +/Sigmoid,1769472 +/Sigmoid_1,1769472 +/Sigmoid_10,884736 +/Sigmoid_11,884736 +/Sigmoid_12,442368 +/Sigmoid_13,442368 +/Sigmoid_14,442368 +/Sigmoid_15,442368 +/Sigmoid_16,442368 +/Sigmoid_17,442368 +/Sigmoid_18,221184 +/Sigmoid_19,221184 +/Sigmoid_2,1769472 +/Sigmoid_20,221184 +/Sigmoid_21,221184 +/Sigmoid_22,884736 +/Sigmoid_23,884736 +/Sigmoid_24,884736 +/Sigmoid_25,884736 +/Sigmoid_26,884736 +/Sigmoid_27,884736 +/Sigmoid_28,884736 +/Sigmoid_29,884736 +/Sigmoid_3,1769472 +/Sigmoid_4,884736 +/Sigmoid_5,884736 +/Sigmoid_6,884736 +/Sigmoid_7,884736 +/Sigmoid_8,884736 +/Sigmoid_9,884736 +/Slice,27648 +/Slice_1,27648 +/Slice_10,27648 +/Slice_100,23040 +/Slice_101,55296 +/Slice_102,13824 +/Slice_103,13824 +/Slice_104,6912 +/Slice_105,2304 +/Slice_106,55296 +/Slice_107,27648 +/Slice_108,23040 +/Slice_109,27648 +/Slice_11,13824 +/Slice_110,23040 +/Slice_111,13824 +/Slice_112,13824 +/Slice_113,6912 +/Slice_114,2304 +/Slice_115,55296 +/Slice_116,27648 +/Slice_117,23040 +/Slice_118,27648 +/Slice_119,23040 +/Slice_12,4608 +/Slice_120,13824 +/Slice_121,13824 +/Slice_122,6912 +/Slice_123,2304 +/Slice_124,55296 +/Slice_125,27648 +/Slice_126,23040 +/Slice_127,27648 +/Slice_128,23040 +/Slice_129,13824 +/Slice_13,110592 +/Slice_130,13824 +/Slice_131,6912 +/Slice_132,2304 +/Slice_133,55296 +/Slice_134,27648 +/Slice_135,23040 +/Slice_136,27648 +/Slice_137,23040 +/Slice_138,55296 +/Slice_14,55296 +/Slice_15,23040 +/Slice_16,55296 +/Slice_17,23040 +/Slice_18,13824 +/Slice_19,13824 +/Slice_2,13824 +/Slice_20,6912 +/Slice_21,2304 +/Slice_22,55296 +/Slice_23,27648 +/Slice_24,23040 +/Slice_25,27648 +/Slice_26,23040 +/Slice_27,13824 +/Slice_28,13824 +/Slice_29,6912 +/Slice_3,4608 +/Slice_30,2304 +/Slice_31,55296 +/Slice_32,27648 +/Slice_33,23040 +/Slice_34,27648 +/Slice_35,23040 +/Slice_36,13824 +/Slice_37,13824 +/Slice_38,6912 +/Slice_39,2304 +/Slice_4,110592 +/Slice_40,55296 +/Slice_41,27648 +/Slice_42,23040 +/Slice_43,27648 +/Slice_44,23040 +/Slice_45,13824 +/Slice_46,13824 +/Slice_47,6912 +/Slice_48,2304 +/Slice_49,55296 +/Slice_5,55296 +/Slice_50,27648 +/Slice_51,23040 +/Slice_52,27648 +/Slice_53,23040 +/Slice_54,55296 +/Slice_55,6912 +/Slice_56,6912 +/Slice_57,3456 +/Slice_58,1152 +/Slice_59,27648 +/Slice_6,23040 +/Slice_60,13824 +/Slice_61,23040 +/Slice_62,13824 +/Slice_63,23040 +/Slice_64,6912 +/Slice_65,6912 +/Slice_66,3456 +/Slice_67,1152 +/Slice_68,27648 +/Slice_69,13824 +/Slice_7,55296 +/Slice_70,23040 +/Slice_71,13824 +/Slice_72,23040 +/Slice_73,6912 +/Slice_74,6912 +/Slice_75,3456 +/Slice_76,1152 +/Slice_77,27648 +/Slice_78,13824 +/Slice_79,23040 +/Slice_8,23040 +/Slice_80,13824 +/Slice_81,23040 +/Slice_82,55296 +/Slice_83,3456 +/Slice_84,3456 +/Slice_85,1728 +/Slice_86,576 +/Slice_87,13824 +/Slice_88,6912 +/Slice_89,23040 +/Slice_9,27648 +/Slice_90,6912 +/Slice_91,23040 +/Slice_92,3456 +/Slice_93,3456 +/Slice_94,1728 +/Slice_95,576 +/Slice_96,13824 +/Slice_97,6912 +/Slice_98,23040 +/Slice_99,6912 +/Softmax,14929920 +/Softmax_1,14929920 +/Softmax_10,233280 +/Softmax_11,3732480 +/Softmax_12,3732480 +/Softmax_13,3732480 +/Softmax_14,3732480 +/Softmax_2,3732480 +/Softmax_3,3732480 +/Softmax_4,3732480 +/Softmax_5,3732480 +/Softmax_6,933120 +/Softmax_7,933120 +/Softmax_8,933120 +/Softmax_9,233280 +/Split,0 +/Split_1,0 +/Split_10,0 +/Split_11,0 +/Split_12,0 +/Split_13,0 +/Split_14,0 +/Split_15,0 +/Split_16,0 +/Split_17,0 +/Split_18,0 +/Split_19,0 +/Split_2,0 +/Split_20,0 +/Split_21,0 +/Split_22,0 +/Split_23,0 +/Split_24,0 +/Split_25,0 +/Split_26,0 +/Split_27,0 +/Split_28,0 +/Split_29,0 +/Split_3,0 +/Split_4,0 +/Split_5,0 +/Split_6,0 +/Split_7,0 +/Split_8,0 +/Split_9,0 +/Sub_11,27648 +/Sub_14,27648 +/Sub_17,27648 +/Sub_2,55296 +/Sub_20,13824 +/Sub_23,13824 +/Sub_26,13824 +/Sub_29,6912 +/Sub_32,6912 +/Sub_35,27648 +/Sub_38,27648 +/Sub_41,27648 +/Sub_44,27648 +/Sub_5,55296 +/Sub_8,27648 +/Transpose,0 +/Transpose_1,0 +/Transpose_10,0 +/Transpose_100,0 +/Transpose_102,0 +/Transpose_103,0 +/Transpose_104,0 +/Transpose_105,0 +/Transpose_106,0 +/Transpose_107,0 +/Transpose_108,0 +/Transpose_109,0 +/Transpose_11,0 +/Transpose_110,0 +/Transpose_111,0 +/Transpose_112,0 +/Transpose_114,0 +/Transpose_115,0 +/Transpose_116,0 +/Transpose_117,0 +/Transpose_118,0 +/Transpose_119,0 +/Transpose_12,0 +/Transpose_120,0 +/Transpose_121,0 +/Transpose_122,0 +/Transpose_123,0 +/Transpose_124,0 +/Transpose_126,0 +/Transpose_127,0 +/Transpose_128,0 +/Transpose_129,0 +/Transpose_13,0 +/Transpose_130,0 +/Transpose_131,0 +/Transpose_132,0 +/Transpose_133,0 +/Transpose_134,0 +/Transpose_135,0 +/Transpose_136,0 +/Transpose_138,0 +/Transpose_139,0 +/Transpose_14,0 +/Transpose_140,0 +/Transpose_141,0 +/Transpose_142,0 +/Transpose_143,0 +/Transpose_144,0 +/Transpose_145,0 +/Transpose_146,0 +/Transpose_147,0 +/Transpose_148,0 +/Transpose_15,0 +/Transpose_150,0 +/Transpose_151,0 +/Transpose_152,0 +/Transpose_153,0 +/Transpose_154,0 +/Transpose_155,0 +/Transpose_156,0 +/Transpose_157,0 +/Transpose_158,0 +/Transpose_159,0 +/Transpose_16,0 +/Transpose_160,0 +/Transpose_162,0 +/Transpose_163,0 +/Transpose_164,0 +/Transpose_165,0 +/Transpose_166,0 +/Transpose_167,0 +/Transpose_168,0 +/Transpose_169,0 +/Transpose_170,0 +/Transpose_171,0 +/Transpose_172,0 +/Transpose_174,0 +/Transpose_175,0 +/Transpose_176,0 +/Transpose_177,0 +/Transpose_178,0 +/Transpose_179,0 +/Transpose_18,0 +/Transpose_180,0 +/Transpose_181,0 +/Transpose_19,0 +/Transpose_2,0 +/Transpose_20,0 +/Transpose_21,0 +/Transpose_22,0 +/Transpose_23,0 +/Transpose_24,0 +/Transpose_25,0 +/Transpose_26,0 +/Transpose_27,0 +/Transpose_28,0 +/Transpose_3,0 +/Transpose_30,0 +/Transpose_31,0 +/Transpose_32,0 +/Transpose_33,0 +/Transpose_34,0 +/Transpose_35,0 +/Transpose_36,0 +/Transpose_37,0 +/Transpose_38,0 +/Transpose_39,0 +/Transpose_4,0 +/Transpose_40,0 +/Transpose_42,0 +/Transpose_43,0 +/Transpose_44,0 +/Transpose_45,0 +/Transpose_46,0 +/Transpose_47,0 +/Transpose_48,0 +/Transpose_49,0 +/Transpose_50,0 +/Transpose_51,0 +/Transpose_52,0 +/Transpose_54,0 +/Transpose_55,0 +/Transpose_56,0 +/Transpose_57,0 +/Transpose_58,0 +/Transpose_59,0 +/Transpose_6,0 +/Transpose_60,0 +/Transpose_61,0 +/Transpose_62,0 +/Transpose_63,0 +/Transpose_64,0 +/Transpose_66,0 +/Transpose_67,0 +/Transpose_68,0 +/Transpose_69,0 +/Transpose_7,0 +/Transpose_70,0 +/Transpose_71,0 +/Transpose_72,0 +/Transpose_73,0 +/Transpose_74,0 +/Transpose_75,0 +/Transpose_76,0 +/Transpose_78,0 +/Transpose_79,0 +/Transpose_8,0 +/Transpose_80,0 +/Transpose_81,0 +/Transpose_82,0 +/Transpose_83,0 +/Transpose_84,0 +/Transpose_85,0 +/Transpose_86,0 +/Transpose_87,0 +/Transpose_88,0 +/Transpose_9,0 +/Transpose_90,0 +/Transpose_91,0 +/Transpose_92,0 +/Transpose_93,0 +/Transpose_94,0 +/Transpose_95,0 +/Transpose_96,0 +/Transpose_97,0 +/Transpose_98,0 +/Transpose_99,0 +/Unsqueeze,2 +/Unsqueeze_1,768 +/Unsqueeze_116,2 +/Unsqueeze_117,768 +/Unsqueeze_119,2 +/Unsqueeze_120,72 +/Unsqueeze_150,2 +/Unsqueeze_151,768 +/Unsqueeze_153,2 +/Unsqueeze_154,72 +/Unsqueeze_184,2 +/Unsqueeze_185,768 +/Unsqueeze_187,2 +/Unsqueeze_188,72 +/Unsqueeze_218,2 +/Unsqueeze_219,2 +/Unsqueeze_220,2 +/Unsqueeze_221,2 +/Unsqueeze_222,768 +/Unsqueeze_223,768 +/Unsqueeze_224,768 +/Unsqueeze_225,768 +/Unsqueeze_226,55296 +/Unsqueeze_227,55296 +/Unsqueeze_228,55296 +/Unsqueeze_229,55296 +/Unsqueeze_230,27648 +/Unsqueeze_231,27648 +/Unsqueeze_232,27648 +/Unsqueeze_233,27648 +/Unsqueeze_234,27648 +/Unsqueeze_235,27648 +/Unsqueeze_236,27648 +/Unsqueeze_237,27648 +/Unsqueeze_238,23040 +/Unsqueeze_239,23040 +/Unsqueeze_240,23040 +/Unsqueeze_241,23040 +/Unsqueeze_242,23040 +/Unsqueeze_243,23040 +/Unsqueeze_244,23040 +/Unsqueeze_245,23040 +/Unsqueeze_247,2 +/Unsqueeze_248,768 +/Unsqueeze_250,2 +/Unsqueeze_251,36 +/Unsqueeze_281,2 +/Unsqueeze_282,768 +/Unsqueeze_284,2 +/Unsqueeze_285,36 +/Unsqueeze_3,2 +/Unsqueeze_315,2 +/Unsqueeze_316,768 +/Unsqueeze_318,2 +/Unsqueeze_319,36 +/Unsqueeze_34,2 +/Unsqueeze_349,2 +/Unsqueeze_35,768 +/Unsqueeze_350,2 +/Unsqueeze_351,2 +/Unsqueeze_352,768 +/Unsqueeze_353,768 +/Unsqueeze_354,768 +/Unsqueeze_355,27648 +/Unsqueeze_356,27648 +/Unsqueeze_357,27648 +/Unsqueeze_358,13824 +/Unsqueeze_359,13824 +/Unsqueeze_360,13824 +/Unsqueeze_361,13824 +/Unsqueeze_362,13824 +/Unsqueeze_363,13824 +/Unsqueeze_364,23040 +/Unsqueeze_365,23040 +/Unsqueeze_366,23040 +/Unsqueeze_367,23040 +/Unsqueeze_368,23040 +/Unsqueeze_369,23040 +/Unsqueeze_37,2 +/Unsqueeze_371,2 +/Unsqueeze_372,768 +/Unsqueeze_374,2 +/Unsqueeze_375,18 +/Unsqueeze_38,144 +/Unsqueeze_4,144 +/Unsqueeze_405,2 +/Unsqueeze_406,768 +/Unsqueeze_408,2 +/Unsqueeze_409,18 +/Unsqueeze_439,2 +/Unsqueeze_440,2 +/Unsqueeze_441,768 +/Unsqueeze_442,768 +/Unsqueeze_443,13824 +/Unsqueeze_444,13824 +/Unsqueeze_445,6912 +/Unsqueeze_446,6912 +/Unsqueeze_447,6912 +/Unsqueeze_448,6912 +/Unsqueeze_449,23040 +/Unsqueeze_450,23040 +/Unsqueeze_451,23040 +/Unsqueeze_452,23040 +/Unsqueeze_454,2 +/Unsqueeze_455,768 +/Unsqueeze_457,2 +/Unsqueeze_458,72 +/Unsqueeze_488,2 +/Unsqueeze_489,768 +/Unsqueeze_491,2 +/Unsqueeze_492,72 +/Unsqueeze_522,2 +/Unsqueeze_523,768 +/Unsqueeze_525,2 +/Unsqueeze_526,72 +/Unsqueeze_556,2 +/Unsqueeze_557,768 +/Unsqueeze_559,2 +/Unsqueeze_560,72 +/Unsqueeze_590,2 +/Unsqueeze_591,2 +/Unsqueeze_592,2 +/Unsqueeze_593,2 +/Unsqueeze_594,768 +/Unsqueeze_595,768 +/Unsqueeze_596,768 +/Unsqueeze_597,768 +/Unsqueeze_598,55296 +/Unsqueeze_599,55296 +/Unsqueeze_600,55296 +/Unsqueeze_601,55296 +/Unsqueeze_602,27648 +/Unsqueeze_603,27648 +/Unsqueeze_604,27648 +/Unsqueeze_605,27648 +/Unsqueeze_606,27648 +/Unsqueeze_607,27648 +/Unsqueeze_608,27648 +/Unsqueeze_609,27648 +/Unsqueeze_610,23040 +/Unsqueeze_611,23040 +/Unsqueeze_612,23040 +/Unsqueeze_613,23040 +/Unsqueeze_614,23040 +/Unsqueeze_615,23040 +/Unsqueeze_616,23040 +/Unsqueeze_617,23040 +/Unsqueeze_68,2 +/Unsqueeze_69,2 +/Unsqueeze_70,768 +/Unsqueeze_71,768 +/Unsqueeze_72,110592 +/Unsqueeze_73,110592 +/Unsqueeze_74,55296 +/Unsqueeze_75,55296 +/Unsqueeze_76,55296 +/Unsqueeze_77,55296 +/Unsqueeze_78,23040 +/Unsqueeze_79,23040 +/Unsqueeze_80,23040 +/Unsqueeze_81,23040 +/Unsqueeze_82,2 +/Unsqueeze_83,768 +/Unsqueeze_85,2 +/Unsqueeze_86,72 +/activation/Mul,55296 +/activation/Sigmoid,1769472 +/activation/Sub,55296 +/activation_1/Mul,55296 +/activation_1/Sigmoid,1769472 +/activation_1/Sub,55296 +/activation_10/Mul,27648 +/activation_10/Sigmoid,884736 +/activation_10/Sub,27648 +/activation_11/Mul,27648 +/activation_11/Sigmoid,884736 +/activation_11/Sub,27648 +/activation_12/Mul,13824 +/activation_12/Sigmoid,442368 +/activation_12/Sub,13824 +/activation_13/Mul,13824 +/activation_13/Sigmoid,442368 +/activation_13/Sub,13824 +/activation_14/Mul,13824 +/activation_14/Sigmoid,442368 +/activation_14/Sub,13824 +/activation_15/Mul,13824 +/activation_15/Sigmoid,442368 +/activation_15/Sub,13824 +/activation_16/Mul,13824 +/activation_16/Sigmoid,442368 +/activation_16/Sub,13824 +/activation_17/Mul,13824 +/activation_17/Sigmoid,442368 +/activation_17/Sub,13824 +/activation_18/Mul,6912 +/activation_18/Sigmoid,221184 +/activation_18/Sub,6912 +/activation_19/Mul,6912 +/activation_19/Sigmoid,221184 +/activation_19/Sub,6912 +/activation_2/Mul,55296 +/activation_2/Sigmoid,1769472 +/activation_2/Sub,55296 +/activation_20/Mul,6912 +/activation_20/Sigmoid,221184 +/activation_20/Sub,6912 +/activation_21/Mul,6912 +/activation_21/Sigmoid,221184 +/activation_21/Sub,6912 +/activation_22/Mul,27648 +/activation_22/Sigmoid,884736 +/activation_22/Sub,27648 +/activation_23/Mul,27648 +/activation_23/Sigmoid,884736 +/activation_23/Sub,27648 +/activation_24/Mul,27648 +/activation_24/Sigmoid,884736 +/activation_24/Sub,27648 +/activation_25/Mul,27648 +/activation_25/Sigmoid,884736 +/activation_25/Sub,27648 +/activation_26/Mul,27648 +/activation_26/Sigmoid,884736 +/activation_26/Sub,27648 +/activation_27/Mul,27648 +/activation_27/Sigmoid,884736 +/activation_27/Sub,27648 +/activation_28/Mul,27648 +/activation_28/Sigmoid,884736 +/activation_28/Sub,27648 +/activation_29/Mul,27648 +/activation_29/Sigmoid,884736 +/activation_29/Sub,27648 +/activation_3/Mul,55296 +/activation_3/Sigmoid,1769472 +/activation_3/Sub,55296 +/activation_4/Mul,27648 +/activation_4/Sigmoid,884736 +/activation_4/Sub,27648 +/activation_5/Mul,27648 +/activation_5/Sigmoid,884736 +/activation_5/Sub,27648 +/activation_6/Mul,27648 +/activation_6/Sigmoid,884736 +/activation_6/Sub,27648 +/activation_7/Mul,27648 +/activation_7/Sigmoid,884736 +/activation_7/Sub,27648 +/activation_8/Mul,27648 +/activation_8/Sigmoid,884736 +/activation_8/Sub,27648 +/activation_9/Mul,27648 +/activation_9/Sigmoid,884736 +/activation_9/Sub,27648 +/depthwise_conv/Conv,1769472 +/depthwise_conv_1/Conv,1769472 +/depthwise_conv_10/Conv,884736 +/depthwise_conv_11/Conv,884736 +/depthwise_conv_12/Conv,442368 +/depthwise_conv_13/Conv,442368 +/depthwise_conv_14/Conv,442368 +/depthwise_conv_15/Conv,442368 +/depthwise_conv_16/Conv,442368 +/depthwise_conv_17/Conv,442368 +/depthwise_conv_18/Conv,221184 +/depthwise_conv_19/Conv,221184 +/depthwise_conv_2/Conv,1769472 +/depthwise_conv_20/Conv,221184 +/depthwise_conv_21/Conv,221184 +/depthwise_conv_22/Conv,884736 +/depthwise_conv_23/Conv,884736 +/depthwise_conv_24/Conv,884736 +/depthwise_conv_25/Conv,884736 +/depthwise_conv_26/Conv,884736 +/depthwise_conv_27/Conv,884736 +/depthwise_conv_28/Conv,884736 +/depthwise_conv_29/Conv,884736 +/depthwise_conv_3/Conv,1769472 +/depthwise_conv_4/Conv,884736 +/depthwise_conv_5/Conv,884736 +/depthwise_conv_6/Conv,884736 +/depthwise_conv_7/Conv,884736 +/depthwise_conv_8/Conv,884736 +/depthwise_conv_9/Conv,884736 +/downsample/Mul,55296 +/downsample/Mul_1,55296 +/downsample/ReduceSum,1536 +/downsample/ReduceSum_1,1536 +/downsample/Reshape,2 +/downsample/Softmax,5184 +/downsample_1/Mul,55296 +/downsample_1/Mul_1,55296 +/downsample_1/ReduceSum,3072 +/downsample_1/ReduceSum_1,3072 +/downsample_1/Reshape,2 +/downsample_1/Softmax,5184 +/downsample_2/Mul,55296 +/downsample_2/Mul_1,55296 +/downsample_2/ReduceSum,6144 +/downsample_2/ReduceSum_1,6144 +/downsample_2/Reshape,2 +/downsample_2/Softmax,5184 +/downsample_3/Mul,55296 +/downsample_3/Mul_1,55296 +/downsample_3/ReduceSum,1536 +/downsample_3/ReduceSum_1,1536 +/downsample_3/Reshape,2 +/downsample_3/Softmax,5184 +/downsample_output/Mul,55296 +/downsample_output/Mul_1,55296 +/downsample_output/ReduceSum,1536 +/downsample_output/ReduceSum_1,1536 +/downsample_output/Reshape,2 +/downsample_output/Softmax,5184 +/encoder_embed/Reshape_new_reshape,2 +/encoder_embed/Transpose,0 +/encoder_embed/Unsqueeze,24160 +/encoder_embed/conv/conv.0/Conv,1907200 +/encoder_embed/conv/conv.2/Mul,190720 +/encoder_embed/conv/conv.2/Sigmoid,6103040 +/encoder_embed/conv/conv.2/Sub,190720 +/encoder_embed/conv/conv.3/Conv,13483392 +/encoder_embed/conv/conv.5/Mul,184704 +/encoder_embed/conv/conv.5/Sigmoid,5910528 +/encoder_embed/conv/conv.5/Sub,184704 +/encoder_embed/conv/conv.6/Conv,101210112 +/encoder_embed/conv/conv.8/Mul,350208 +/encoder_embed/conv/conv.8/Sigmoid,11206656 +/encoder_embed/conv/conv.8/Sub,350208 +/encoder_embed/out/MatMul/MatMulAddFusion,21288960 +/encoder_proj/MatMul/MatMulAddFusion,18911232 +/feed_forward1/activation/Mul,147456 +/feed_forward1/activation/Sigmoid,4718592 +/feed_forward1/activation/Sub,147456 +/feed_forward1/activation_1/Mul,147456 +/feed_forward1/activation_1/Sigmoid,4718592 +/feed_forward1/activation_1/Sub,147456 +/feed_forward1/activation_10/Mul,36864 +/feed_forward1/activation_10/Sigmoid,1179648 +/feed_forward1/activation_10/Sub,36864 +/feed_forward1/activation_11/Mul,73728 +/feed_forward1/activation_11/Sigmoid,2359296 +/feed_forward1/activation_11/Sub,73728 +/feed_forward1/activation_12/Mul,73728 +/feed_forward1/activation_12/Sigmoid,2359296 +/feed_forward1/activation_12/Sub,73728 +/feed_forward1/activation_13/Mul,73728 +/feed_forward1/activation_13/Sigmoid,2359296 +/feed_forward1/activation_13/Sub,73728 +/feed_forward1/activation_14/Mul,73728 +/feed_forward1/activation_14/Sigmoid,2359296 +/feed_forward1/activation_14/Sub,73728 +/feed_forward1/activation_2/Mul,73728 +/feed_forward1/activation_2/Sigmoid,2359296 +/feed_forward1/activation_2/Sub,73728 +/feed_forward1/activation_3/Mul,73728 +/feed_forward1/activation_3/Sigmoid,2359296 +/feed_forward1/activation_3/Sub,73728 +/feed_forward1/activation_4/Mul,73728 +/feed_forward1/activation_4/Sigmoid,2359296 +/feed_forward1/activation_4/Sub,73728 +/feed_forward1/activation_5/Mul,73728 +/feed_forward1/activation_5/Sigmoid,2359296 +/feed_forward1/activation_5/Sub,73728 +/feed_forward1/activation_6/Mul,73728 +/feed_forward1/activation_6/Sigmoid,2359296 +/feed_forward1/activation_6/Sub,73728 +/feed_forward1/activation_7/Mul,73728 +/feed_forward1/activation_7/Sigmoid,2359296 +/feed_forward1/activation_7/Sub,73728 +/feed_forward1/activation_8/Mul,73728 +/feed_forward1/activation_8/Sigmoid,2359296 +/feed_forward1/activation_8/Sub,73728 +/feed_forward1/activation_9/Mul,36864 +/feed_forward1/activation_9/Sigmoid,1179648 +/feed_forward1/activation_9/Sub,36864 +/feed_forward1/in_proj/MatMul/MatMulAddFusion,151142400 +/feed_forward1/in_proj_1/MatMul/MatMulAddFusion,151142400 +/feed_forward1/in_proj_10/MatMul/MatMulAddFusion,75534336 +/feed_forward1/in_proj_11/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_12/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_13/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_14/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_2/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_3/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_4/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_5/MatMul/MatMulAddFusion,75571200 +/feed_forward1/in_proj_6/MatMul/MatMulAddFusion,151068672 +/feed_forward1/in_proj_7/MatMul/MatMulAddFusion,151068672 +/feed_forward1/in_proj_8/MatMul/MatMulAddFusion,151068672 +/feed_forward1/in_proj_9/MatMul/MatMulAddFusion,75534336 +/feed_forward1/out_proj/MatMul/MatMulAddFusion,21288960 +/feed_forward1/out_proj_1/MatMul/MatMulAddFusion,21288960 +/feed_forward1/out_proj_10/MatMul/MatMulAddFusion,2661120 +/feed_forward1/out_proj_11/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_12/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_13/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_14/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_2/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_3/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_4/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_5/MatMul/MatMulAddFusion,10644480 +/feed_forward1/out_proj_6/MatMul/MatMulAddFusion,5322240 +/feed_forward1/out_proj_7/MatMul/MatMulAddFusion,5322240 +/feed_forward1/out_proj_8/MatMul/MatMulAddFusion,5322240 +/feed_forward1/out_proj_9/MatMul/MatMulAddFusion,2661120 +/feed_forward2/activation/Mul,147456 +/feed_forward2/activation/Sigmoid,4718592 +/feed_forward2/activation/Sub,147456 +/feed_forward2/activation_1/Mul,147456 +/feed_forward2/activation_1/Sigmoid,4718592 +/feed_forward2/activation_1/Sub,147456 +/feed_forward2/activation_10/Mul,36864 +/feed_forward2/activation_10/Sigmoid,1179648 +/feed_forward2/activation_10/Sub,36864 +/feed_forward2/activation_11/Mul,73728 +/feed_forward2/activation_11/Sigmoid,2359296 +/feed_forward2/activation_11/Sub,73728 +/feed_forward2/activation_12/Mul,73728 +/feed_forward2/activation_12/Sigmoid,2359296 +/feed_forward2/activation_12/Sub,73728 +/feed_forward2/activation_13/Mul,73728 +/feed_forward2/activation_13/Sigmoid,2359296 +/feed_forward2/activation_13/Sub,73728 +/feed_forward2/activation_14/Mul,73728 +/feed_forward2/activation_14/Sigmoid,2359296 +/feed_forward2/activation_14/Sub,73728 +/feed_forward2/activation_2/Mul,73728 +/feed_forward2/activation_2/Sigmoid,2359296 +/feed_forward2/activation_2/Sub,73728 +/feed_forward2/activation_3/Mul,73728 +/feed_forward2/activation_3/Sigmoid,2359296 +/feed_forward2/activation_3/Sub,73728 +/feed_forward2/activation_4/Mul,73728 +/feed_forward2/activation_4/Sigmoid,2359296 +/feed_forward2/activation_4/Sub,73728 +/feed_forward2/activation_5/Mul,73728 +/feed_forward2/activation_5/Sigmoid,2359296 +/feed_forward2/activation_5/Sub,73728 +/feed_forward2/activation_6/Mul,73728 +/feed_forward2/activation_6/Sigmoid,2359296 +/feed_forward2/activation_6/Sub,73728 +/feed_forward2/activation_7/Mul,73728 +/feed_forward2/activation_7/Sigmoid,2359296 +/feed_forward2/activation_7/Sub,73728 +/feed_forward2/activation_8/Mul,73728 +/feed_forward2/activation_8/Sigmoid,2359296 +/feed_forward2/activation_8/Sub,73728 +/feed_forward2/activation_9/Mul,36864 +/feed_forward2/activation_9/Sigmoid,1179648 +/feed_forward2/activation_9/Sub,36864 +/feed_forward2/in_proj/MatMul/MatMulAddFusion,151142400 +/feed_forward2/in_proj_1/MatMul/MatMulAddFusion,151142400 +/feed_forward2/in_proj_10/MatMul/MatMulAddFusion,75534336 +/feed_forward2/in_proj_11/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_12/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_13/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_14/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_2/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_3/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_4/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_5/MatMul/MatMulAddFusion,75571200 +/feed_forward2/in_proj_6/MatMul/MatMulAddFusion,151068672 +/feed_forward2/in_proj_7/MatMul/MatMulAddFusion,151068672 +/feed_forward2/in_proj_8/MatMul/MatMulAddFusion,151068672 +/feed_forward2/in_proj_9/MatMul/MatMulAddFusion,75534336 +/feed_forward2/out_proj/MatMul/MatMulAddFusion,21288960 +/feed_forward2/out_proj_1/MatMul/MatMulAddFusion,21288960 +/feed_forward2/out_proj_10/MatMul/MatMulAddFusion,2661120 +/feed_forward2/out_proj_11/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_12/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_13/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_14/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_2/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_3/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_4/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_5/MatMul/MatMulAddFusion,10644480 +/feed_forward2/out_proj_6/MatMul/MatMulAddFusion,5322240 +/feed_forward2/out_proj_7/MatMul/MatMulAddFusion,5322240 +/feed_forward2/out_proj_8/MatMul/MatMulAddFusion,5322240 +/feed_forward2/out_proj_9/MatMul/MatMulAddFusion,2661120 +/feed_forward3/activation/Mul,147456 +/feed_forward3/activation/Sigmoid,4718592 +/feed_forward3/activation/Sub,147456 +/feed_forward3/activation_1/Mul,147456 +/feed_forward3/activation_1/Sigmoid,4718592 +/feed_forward3/activation_1/Sub,147456 +/feed_forward3/activation_10/Mul,36864 +/feed_forward3/activation_10/Sigmoid,1179648 +/feed_forward3/activation_10/Sub,36864 +/feed_forward3/activation_11/Mul,73728 +/feed_forward3/activation_11/Sigmoid,2359296 +/feed_forward3/activation_11/Sub,73728 +/feed_forward3/activation_12/Mul,73728 +/feed_forward3/activation_12/Sigmoid,2359296 +/feed_forward3/activation_12/Sub,73728 +/feed_forward3/activation_13/Mul,73728 +/feed_forward3/activation_13/Sigmoid,2359296 +/feed_forward3/activation_13/Sub,73728 +/feed_forward3/activation_14/Mul,73728 +/feed_forward3/activation_14/Sigmoid,2359296 +/feed_forward3/activation_14/Sub,73728 +/feed_forward3/activation_2/Mul,73728 +/feed_forward3/activation_2/Sigmoid,2359296 +/feed_forward3/activation_2/Sub,73728 +/feed_forward3/activation_3/Mul,73728 +/feed_forward3/activation_3/Sigmoid,2359296 +/feed_forward3/activation_3/Sub,73728 +/feed_forward3/activation_4/Mul,73728 +/feed_forward3/activation_4/Sigmoid,2359296 +/feed_forward3/activation_4/Sub,73728 +/feed_forward3/activation_5/Mul,73728 +/feed_forward3/activation_5/Sigmoid,2359296 +/feed_forward3/activation_5/Sub,73728 +/feed_forward3/activation_6/Mul,73728 +/feed_forward3/activation_6/Sigmoid,2359296 +/feed_forward3/activation_6/Sub,73728 +/feed_forward3/activation_7/Mul,73728 +/feed_forward3/activation_7/Sigmoid,2359296 +/feed_forward3/activation_7/Sub,73728 +/feed_forward3/activation_8/Mul,73728 +/feed_forward3/activation_8/Sigmoid,2359296 +/feed_forward3/activation_8/Sub,73728 +/feed_forward3/activation_9/Mul,36864 +/feed_forward3/activation_9/Sigmoid,1179648 +/feed_forward3/activation_9/Sub,36864 +/feed_forward3/in_proj/MatMul/MatMulAddFusion,151142400 +/feed_forward3/in_proj_1/MatMul/MatMulAddFusion,151142400 +/feed_forward3/in_proj_10/MatMul/MatMulAddFusion,75534336 +/feed_forward3/in_proj_11/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_12/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_13/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_14/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_2/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_3/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_4/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_5/MatMul/MatMulAddFusion,75571200 +/feed_forward3/in_proj_6/MatMul/MatMulAddFusion,151068672 +/feed_forward3/in_proj_7/MatMul/MatMulAddFusion,151068672 +/feed_forward3/in_proj_8/MatMul/MatMulAddFusion,151068672 +/feed_forward3/in_proj_9/MatMul/MatMulAddFusion,75534336 +/feed_forward3/out_proj/MatMul/MatMulAddFusion,21288960 +/feed_forward3/out_proj_1/MatMul/MatMulAddFusion,21288960 +/feed_forward3/out_proj_10/MatMul/MatMulAddFusion,2661120 +/feed_forward3/out_proj_11/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_12/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_13/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_14/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_2/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_3/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_4/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_5/MatMul/MatMulAddFusion,10644480 +/feed_forward3/out_proj_6/MatMul/MatMulAddFusion,5322240 +/feed_forward3/out_proj_7/MatMul/MatMulAddFusion,5322240 +/feed_forward3/out_proj_8/MatMul/MatMulAddFusion,5322240 +/feed_forward3/out_proj_9/MatMul/MatMulAddFusion,2661120 +/in_proj/MatMul/MatMulAddFusion,37822464 +/in_proj2/MatMul,5322240 +/in_proj2_1/MatMul,5322240 +/in_proj2_10/MatMul,665280 +/in_proj2_11/MatMul,2661120 +/in_proj2_12/MatMul,2661120 +/in_proj2_13/MatMul,2661120 +/in_proj2_14/MatMul,2661120 +/in_proj2_2/MatMul,2661120 +/in_proj2_3/MatMul,2661120 +/in_proj2_4/MatMul,2661120 +/in_proj2_5/MatMul,2661120 +/in_proj2_6/MatMul,1330560 +/in_proj2_7/MatMul,1330560 +/in_proj2_8/MatMul,1330560 +/in_proj2_9/MatMul,665280 +/in_proj_1/MatMul/MatMulAddFusion,37822464 +/in_proj_10/MatMul/MatMulAddFusion,4727808 +/in_proj_11/MatMul/MatMulAddFusion,18911232 +/in_proj_12/MatMul/MatMulAddFusion,18911232 +/in_proj_13/MatMul/MatMulAddFusion,18911232 +/in_proj_14/MatMul/MatMulAddFusion,18911232 +/in_proj_2/MatMul/MatMulAddFusion,18911232 +/in_proj_3/MatMul/MatMulAddFusion,18911232 +/in_proj_4/MatMul/MatMulAddFusion,18911232 +/in_proj_5/MatMul/MatMulAddFusion,18911232 +/in_proj_6/MatMul/MatMulAddFusion,9455616 +/in_proj_7/MatMul/MatMulAddFusion,9455616 +/in_proj_8/MatMul/MatMulAddFusion,9455616 +/in_proj_9/MatMul/MatMulAddFusion,4727808 +/norm_final/Add,144 +/norm_final/Mul,55296 +/norm_final/Mul_1,55296 +/norm_final/Pow,4608 +/norm_final/ReduceMean,768 +/norm_final_1/Add,144 +/norm_final_1/Mul,55296 +/norm_final_1/Mul_1,55296 +/norm_final_1/Pow,4608 +/norm_final_1/ReduceMean,768 +/norm_final_10/Add,18 +/norm_final_10/Mul,6912 +/norm_final_10/Mul_1,6912 +/norm_final_10/Pow,576 +/norm_final_10/ReduceMean,768 +/norm_final_11/Add,72 +/norm_final_11/Mul,27648 +/norm_final_11/Mul_1,27648 +/norm_final_11/Pow,2304 +/norm_final_11/ReduceMean,768 +/norm_final_12/Add,72 +/norm_final_12/Mul,27648 +/norm_final_12/Mul_1,27648 +/norm_final_12/Pow,2304 +/norm_final_12/ReduceMean,768 +/norm_final_13/Add,72 +/norm_final_13/Mul,27648 +/norm_final_13/Mul_1,27648 +/norm_final_13/Pow,2304 +/norm_final_13/ReduceMean,768 +/norm_final_14/Add,72 +/norm_final_14/Mul,27648 +/norm_final_14/Mul_1,27648 +/norm_final_14/Pow,2304 +/norm_final_14/ReduceMean,768 +/norm_final_2/Add,72 +/norm_final_2/Mul,27648 +/norm_final_2/Mul_1,27648 +/norm_final_2/Pow,2304 +/norm_final_2/ReduceMean,768 +/norm_final_3/Add,72 +/norm_final_3/Mul,27648 +/norm_final_3/Mul_1,27648 +/norm_final_3/Pow,2304 +/norm_final_3/ReduceMean,768 +/norm_final_4/Add,72 +/norm_final_4/Mul,27648 +/norm_final_4/Mul_1,27648 +/norm_final_4/Pow,2304 +/norm_final_4/ReduceMean,768 +/norm_final_5/Add,72 +/norm_final_5/Mul,27648 +/norm_final_5/Mul_1,27648 +/norm_final_5/Pow,2304 +/norm_final_5/ReduceMean,768 +/norm_final_6/Add,36 +/norm_final_6/Mul,13824 +/norm_final_6/Mul_1,13824 +/norm_final_6/Pow,1152 +/norm_final_6/ReduceMean,768 +/norm_final_7/Add,36 +/norm_final_7/Mul,13824 +/norm_final_7/Mul_1,13824 +/norm_final_7/Pow,1152 +/norm_final_7/ReduceMean,768 +/norm_final_8/Add,36 +/norm_final_8/Mul,13824 +/norm_final_8/Mul_1,13824 +/norm_final_8/Pow,1152 +/norm_final_8/ReduceMean,768 +/norm_final_9/Add,18 +/norm_final_9/Mul,6912 +/norm_final_9/Mul_1,6912 +/norm_final_9/Pow,576 +/norm_final_9/ReduceMean,768 +/out_combiner/Add,55296 +/out_combiner/Mul,55296 +/out_combiner/Mul_1,55296 +/out_combiner_1/Add,55296 +/out_combiner_1/Mul,55296 +/out_combiner_1/Mul_1,55296 +/out_combiner_2/Add,55296 +/out_combiner_2/Mul,55296 +/out_combiner_2/Mul_1,55296 +/out_combiner_3/Add,55296 +/out_combiner_3/Mul,55296 +/out_combiner_3/Mul_1,55296 +/out_proj2/MatMul/MatMulAddFusion,21288960 +/out_proj2_1/MatMul/MatMulAddFusion,21288960 +/out_proj2_10/MatMul/MatMulAddFusion,2661120 +/out_proj2_11/MatMul/MatMulAddFusion,10644480 +/out_proj2_12/MatMul/MatMulAddFusion,10644480 +/out_proj2_13/MatMul/MatMulAddFusion,10644480 +/out_proj2_14/MatMul/MatMulAddFusion,10644480 +/out_proj2_2/MatMul/MatMulAddFusion,10644480 +/out_proj2_3/MatMul/MatMulAddFusion,10644480 +/out_proj2_4/MatMul/MatMulAddFusion,10644480 +/out_proj2_5/MatMul/MatMulAddFusion,10644480 +/out_proj2_6/MatMul/MatMulAddFusion,5322240 +/out_proj2_7/MatMul/MatMulAddFusion,5322240 +/out_proj2_8/MatMul/MatMulAddFusion,5322240 +/out_proj2_9/MatMul/MatMulAddFusion,2661120 +/pointwise_conv1/Conv,42577920 +/pointwise_conv1_1/Conv,42577920 +/pointwise_conv1_10/Conv,21288960 +/pointwise_conv1_11/Conv,21288960 +/pointwise_conv1_12/Conv,10644480 +/pointwise_conv1_13/Conv,10644480 +/pointwise_conv1_14/Conv,10644480 +/pointwise_conv1_15/Conv,10644480 +/pointwise_conv1_16/Conv,10644480 +/pointwise_conv1_17/Conv,10644480 +/pointwise_conv1_18/Conv,5322240 +/pointwise_conv1_19/Conv,5322240 +/pointwise_conv1_2/Conv,42577920 +/pointwise_conv1_20/Conv,5322240 +/pointwise_conv1_21/Conv,5322240 +/pointwise_conv1_22/Conv,21288960 +/pointwise_conv1_23/Conv,21288960 +/pointwise_conv1_24/Conv,21288960 +/pointwise_conv1_25/Conv,21288960 +/pointwise_conv1_26/Conv,21288960 +/pointwise_conv1_27/Conv,21288960 +/pointwise_conv1_28/Conv,21288960 +/pointwise_conv1_29/Conv,21288960 +/pointwise_conv1_3/Conv,42577920 +/pointwise_conv1_4/Conv,21288960 +/pointwise_conv1_5/Conv,21288960 +/pointwise_conv1_6/Conv,21288960 +/pointwise_conv1_7/Conv,21288960 +/pointwise_conv1_8/Conv,21288960 +/pointwise_conv1_9/Conv,21288960 +/pointwise_conv2/Conv,21288960 +/pointwise_conv2_1/Conv,21288960 +/pointwise_conv2_10/Conv,10644480 +/pointwise_conv2_11/Conv,10644480 +/pointwise_conv2_12/Conv,5322240 +/pointwise_conv2_13/Conv,5322240 +/pointwise_conv2_14/Conv,5322240 +/pointwise_conv2_15/Conv,5322240 +/pointwise_conv2_16/Conv,5322240 +/pointwise_conv2_17/Conv,5322240 +/pointwise_conv2_18/Conv,2661120 +/pointwise_conv2_19/Conv,2661120 +/pointwise_conv2_2/Conv,21288960 +/pointwise_conv2_20/Conv,2661120 +/pointwise_conv2_21/Conv,2661120 +/pointwise_conv2_22/Conv,10644480 +/pointwise_conv2_23/Conv,10644480 +/pointwise_conv2_24/Conv,10644480 +/pointwise_conv2_25/Conv,10644480 +/pointwise_conv2_26/Conv,10644480 +/pointwise_conv2_27/Conv,10644480 +/pointwise_conv2_28/Conv,10644480 +/pointwise_conv2_29/Conv,10644480 +/pointwise_conv2_3/Conv,21288960 +/pointwise_conv2_4/Conv,10644480 +/pointwise_conv2_5/Conv,10644480 +/pointwise_conv2_6/Conv,10644480 +/pointwise_conv2_7/Conv,10644480 +/pointwise_conv2_8/Conv,10644480 +/pointwise_conv2_9/Conv,10644480 +/proj/MatMul,21288960 +/proj_1/MatMul,21288960 +/proj_10/MatMul,2661120 +/proj_11/MatMul,10644480 +/proj_12/MatMul,10644480 +/proj_13/MatMul,10644480 +/proj_14/MatMul,10644480 +/proj_2/MatMul,10644480 +/proj_3/MatMul,10644480 +/proj_4/MatMul,10644480 +/proj_5/MatMul,10644480 +/proj_6/MatMul,5322240 +/proj_7/MatMul,5322240 +/proj_8/MatMul,5322240 +/proj_9/MatMul,2661120 +/skip_modules.4/Add,55296 +/skip_modules.4/Mul,55296 +/skip_modules.4/Mul_1,55296 +/upsample/Add,55296 +/upsample/Expand,0 +/upsample/Reshape_1,2 +/upsample/Unsqueeze,27648 +/upsample_1/Add,55296 +/upsample_1/Expand,0 +/upsample_1/Reshape_1,2 +/upsample_1/Unsqueeze,13824 +/upsample_2/Add,55296 +/upsample_2/Expand,0 +/upsample_2/Reshape_1,2 +/upsample_2/Unsqueeze,6912 +/upsample_3/Add,55296 +/upsample_3/Expand,0 +/upsample_3/Reshape_1,2 +/upsample_3/Unsqueeze,27648 +gemm_input_reshape_token_104,2 +gemm_input_reshape_token_110,2 +gemm_input_reshape_token_116,2 +gemm_input_reshape_token_122,2 +gemm_input_reshape_token_134,2 +gemm_input_reshape_token_14,2 +gemm_input_reshape_token_140,2 +gemm_input_reshape_token_152,2 +gemm_input_reshape_token_158,2 +gemm_input_reshape_token_164,2 +gemm_input_reshape_token_170,2 +gemm_input_reshape_token_176,2 +gemm_input_reshape_token_188,2 +gemm_input_reshape_token_194,2 +gemm_input_reshape_token_2,2 +gemm_input_reshape_token_206,2 +gemm_input_reshape_token_212,2 +gemm_input_reshape_token_218,2 +gemm_input_reshape_token_224,2 +gemm_input_reshape_token_230,2 +gemm_input_reshape_token_242,2 +gemm_input_reshape_token_248,2 +gemm_input_reshape_token_26,2 +gemm_input_reshape_token_260,2 +gemm_input_reshape_token_266,2 +gemm_input_reshape_token_272,2 +gemm_input_reshape_token_278,2 +gemm_input_reshape_token_284,2 +gemm_input_reshape_token_296,2 +gemm_input_reshape_token_302,2 +gemm_input_reshape_token_314,2 +gemm_input_reshape_token_32,2 +gemm_input_reshape_token_320,2 +gemm_input_reshape_token_326,2 +gemm_input_reshape_token_332,2 +gemm_input_reshape_token_338,2 +gemm_input_reshape_token_350,2 +gemm_input_reshape_token_356,2 +gemm_input_reshape_token_368,2 +gemm_input_reshape_token_374,2 +gemm_input_reshape_token_380,2 +gemm_input_reshape_token_386,2 +gemm_input_reshape_token_392,2 +gemm_input_reshape_token_404,2 +gemm_input_reshape_token_410,2 +gemm_input_reshape_token_422,2 +gemm_input_reshape_token_428,2 +gemm_input_reshape_token_434,2 +gemm_input_reshape_token_44,2 +gemm_input_reshape_token_440,2 +gemm_input_reshape_token_446,2 +gemm_input_reshape_token_458,2 +gemm_input_reshape_token_464,2 +gemm_input_reshape_token_476,2 +gemm_input_reshape_token_482,2 +gemm_input_reshape_token_488,2 +gemm_input_reshape_token_494,2 +gemm_input_reshape_token_50,2 +gemm_input_reshape_token_500,2 +gemm_input_reshape_token_512,2 +gemm_input_reshape_token_518,2 +gemm_input_reshape_token_530,2 +gemm_input_reshape_token_536,2 +gemm_input_reshape_token_542,2 +gemm_input_reshape_token_548,2 +gemm_input_reshape_token_554,2 +gemm_input_reshape_token_56,2 +gemm_input_reshape_token_566,2 +gemm_input_reshape_token_572,2 +gemm_input_reshape_token_584,2 +gemm_input_reshape_token_590,2 +gemm_input_reshape_token_596,2 +gemm_input_reshape_token_602,2 +gemm_input_reshape_token_608,2 +gemm_input_reshape_token_62,2 +gemm_input_reshape_token_620,2 +gemm_input_reshape_token_626,2 +gemm_input_reshape_token_638,2 +gemm_input_reshape_token_644,2 +gemm_input_reshape_token_650,2 +gemm_input_reshape_token_656,2 +gemm_input_reshape_token_662,2 +gemm_input_reshape_token_674,2 +gemm_input_reshape_token_68,2 +gemm_input_reshape_token_680,2 +gemm_input_reshape_token_692,2 +gemm_input_reshape_token_698,2 +gemm_input_reshape_token_704,2 +gemm_input_reshape_token_710,2 +gemm_input_reshape_token_716,2 +gemm_input_reshape_token_728,2 +gemm_input_reshape_token_734,2 +gemm_input_reshape_token_746,2 +gemm_input_reshape_token_752,2 +gemm_input_reshape_token_758,2 +gemm_input_reshape_token_764,2 +gemm_input_reshape_token_770,2 +gemm_input_reshape_token_782,2 +gemm_input_reshape_token_788,2 +gemm_input_reshape_token_8,2 +gemm_input_reshape_token_80,2 +gemm_input_reshape_token_800,2 +gemm_input_reshape_token_806,2 +gemm_input_reshape_token_812,2 +gemm_input_reshape_token_86,2 +gemm_input_reshape_token_98,2 +gemm_output_reshape,2 +gemm_output_reshape_token_101,2 +gemm_output_reshape_token_107,2 +gemm_output_reshape_token_11,2 +gemm_output_reshape_token_113,2 +gemm_output_reshape_token_119,2 +gemm_output_reshape_token_125,2 +gemm_output_reshape_token_131,2 +gemm_output_reshape_token_137,2 +gemm_output_reshape_token_143,2 +gemm_output_reshape_token_149,2 +gemm_output_reshape_token_155,2 +gemm_output_reshape_token_161,2 +gemm_output_reshape_token_167,2 +gemm_output_reshape_token_17,2 +gemm_output_reshape_token_173,2 +gemm_output_reshape_token_179,2 +gemm_output_reshape_token_185,2 +gemm_output_reshape_token_191,2 +gemm_output_reshape_token_197,2 +gemm_output_reshape_token_203,2 +gemm_output_reshape_token_209,2 +gemm_output_reshape_token_215,2 +gemm_output_reshape_token_221,2 +gemm_output_reshape_token_227,2 +gemm_output_reshape_token_23,2 +gemm_output_reshape_token_233,2 +gemm_output_reshape_token_239,2 +gemm_output_reshape_token_245,2 +gemm_output_reshape_token_251,2 +gemm_output_reshape_token_257,2 +gemm_output_reshape_token_263,2 +gemm_output_reshape_token_269,2 +gemm_output_reshape_token_275,2 +gemm_output_reshape_token_281,2 +gemm_output_reshape_token_287,2 +gemm_output_reshape_token_29,2 +gemm_output_reshape_token_293,2 +gemm_output_reshape_token_299,2 +gemm_output_reshape_token_305,2 +gemm_output_reshape_token_311,2 +gemm_output_reshape_token_317,2 +gemm_output_reshape_token_323,2 +gemm_output_reshape_token_329,2 +gemm_output_reshape_token_335,2 +gemm_output_reshape_token_341,2 +gemm_output_reshape_token_347,2 +gemm_output_reshape_token_35,2 +gemm_output_reshape_token_353,2 +gemm_output_reshape_token_359,2 +gemm_output_reshape_token_365,2 +gemm_output_reshape_token_371,2 +gemm_output_reshape_token_377,2 +gemm_output_reshape_token_383,2 +gemm_output_reshape_token_389,2 +gemm_output_reshape_token_395,2 +gemm_output_reshape_token_401,2 +gemm_output_reshape_token_407,2 +gemm_output_reshape_token_41,2 +gemm_output_reshape_token_413,2 +gemm_output_reshape_token_419,2 +gemm_output_reshape_token_425,2 +gemm_output_reshape_token_431,2 +gemm_output_reshape_token_437,2 +gemm_output_reshape_token_443,2 +gemm_output_reshape_token_449,2 +gemm_output_reshape_token_455,2 +gemm_output_reshape_token_461,2 +gemm_output_reshape_token_467,2 +gemm_output_reshape_token_47,2 +gemm_output_reshape_token_473,2 +gemm_output_reshape_token_479,2 +gemm_output_reshape_token_485,2 +gemm_output_reshape_token_491,2 +gemm_output_reshape_token_497,2 +gemm_output_reshape_token_5,2 +gemm_output_reshape_token_503,2 +gemm_output_reshape_token_509,2 +gemm_output_reshape_token_515,2 +gemm_output_reshape_token_521,2 +gemm_output_reshape_token_527,2 +gemm_output_reshape_token_53,2 +gemm_output_reshape_token_533,2 +gemm_output_reshape_token_539,2 +gemm_output_reshape_token_545,2 +gemm_output_reshape_token_551,2 +gemm_output_reshape_token_557,2 +gemm_output_reshape_token_563,2 +gemm_output_reshape_token_569,2 +gemm_output_reshape_token_575,2 +gemm_output_reshape_token_581,2 +gemm_output_reshape_token_587,2 +gemm_output_reshape_token_59,2 +gemm_output_reshape_token_593,2 +gemm_output_reshape_token_599,2 +gemm_output_reshape_token_605,2 +gemm_output_reshape_token_611,2 +gemm_output_reshape_token_617,2 +gemm_output_reshape_token_623,2 +gemm_output_reshape_token_629,2 +gemm_output_reshape_token_635,2 +gemm_output_reshape_token_641,2 +gemm_output_reshape_token_647,2 +gemm_output_reshape_token_65,2 +gemm_output_reshape_token_653,2 +gemm_output_reshape_token_659,2 +gemm_output_reshape_token_665,2 +gemm_output_reshape_token_671,2 +gemm_output_reshape_token_677,2 +gemm_output_reshape_token_683,2 +gemm_output_reshape_token_689,2 +gemm_output_reshape_token_695,2 +gemm_output_reshape_token_701,2 +gemm_output_reshape_token_707,2 +gemm_output_reshape_token_71,2 +gemm_output_reshape_token_713,2 +gemm_output_reshape_token_719,2 +gemm_output_reshape_token_725,2 +gemm_output_reshape_token_731,2 +gemm_output_reshape_token_737,2 +gemm_output_reshape_token_743,2 +gemm_output_reshape_token_749,2 +gemm_output_reshape_token_755,2 +gemm_output_reshape_token_761,2 +gemm_output_reshape_token_767,2 +gemm_output_reshape_token_77,2 +gemm_output_reshape_token_773,2 +gemm_output_reshape_token_779,2 +gemm_output_reshape_token_785,2 +gemm_output_reshape_token_791,2 +gemm_output_reshape_token_797,2 +gemm_output_reshape_token_803,2 +gemm_output_reshape_token_809,2 +gemm_output_reshape_token_815,2 +gemm_output_reshape_token_83,2 +gemm_output_reshape_token_89,2 +gemm_output_reshape_token_95,2 diff --git a/graph_nodes.json b/graph_nodes.json new file mode 100644 index 0000000000000000000000000000000000000000..04d20741627cc807dfd3c6a971a2fb0b4cf9054d --- /dev/null +++ b/graph_nodes.json @@ -0,0 +1,45746 @@ +[ + { + "node_id:": 0, + "node_inputs:": [ + { + "input_dimension:": "2 1 ", + "input_name:": "cached_len_0" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_22", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_22_output_0" + } + ] + }, + { + "node_id:": 1, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_22_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_37", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_37_output_0" + } + ] + }, + { + "node_id:": 2, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_22_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_34", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_34_output_0" + } + ] + }, + { + "node_id:": 3, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_34_output_0" + } + ], + "node_name:": "/Cast_5", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_5_output_0" + } + ] + }, + { + "node_id:": 4, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 ", + "input_name:": "cached_avg_0" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_23", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_23_output_0" + } + ] + }, + { + "node_id:": 5, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_23_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_5_output_0" + } + ], + "node_name:": "/Mul_11", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_11_output_0" + } + ] + }, + { + "node_id:": 6, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_11_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_35", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_35_output_0" + } + ] + }, + { + "node_id:": 7, + "node_inputs:": [ + { + "input_dimension:": "2 1 ", + "input_name:": "cached_len_0" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_1", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_1_output_0" + } + ] + }, + { + "node_id:": 8, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_3", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_3_output_0" + } + ] + }, + { + "node_id:": 9, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_output_0" + } + ] + }, + { + "node_id:": 10, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_output_0" + } + ], + "node_name:": "/Cast", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_output_0" + } + ] + }, + { + "node_id:": 11, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 ", + "input_name:": "cached_avg_0" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_2", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_2_output_0" + } + ] + }, + { + "node_id:": 12, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_2_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_output_0" + } + ], + "node_name:": "/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_output_0" + } + ] + }, + { + "node_id:": 13, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_1", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_1_output_0" + } + ] + }, + { + "node_id:": 14, + "node_inputs:": [ + { + "input_dimension:": "1 151 80 ", + "input_name:": "x" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/encoder_embed/Unsqueeze", + "node_outputs:": [ + { + "output_dimension:": "1 1 151 80 ", + "output_name:": "/encoder_embed/Unsqueeze_output_0" + } + ] + }, + { + "node_id:": 15, + "node_inputs:": [ + { + "input_dimension:": "1 1 151 80 ", + "input_name:": "/encoder_embed/Unsqueeze_output_0" + }, + { + "input_dimension:": "8 1 3 3 ", + "input_name:": "encoder.encoder_embed.conv.0.weight" + }, + { + "input_dimension:": "8 ", + "input_name:": "encoder.encoder_embed.conv.0.bias" + } + ], + "node_name:": "/encoder_embed/conv/conv.0/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 8 149 80 ", + "output_name:": "/encoder_embed/conv/conv.0/Conv_output_0" + } + ] + }, + { + "node_id:": 16, + "node_inputs:": [ + { + "input_dimension:": "1 8 149 80 ", + "input_name:": "/encoder_embed/conv/conv.0/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.2/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 8 149 80 ", + "output_name:": "/encoder_embed/conv/conv.2/Sub_output_0" + } + ] + }, + { + "node_id:": 17, + "node_inputs:": [ + { + "input_dimension:": "1 8 149 80 ", + "input_name:": "/encoder_embed/conv/conv.2/Sub_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.2/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 8 149 80 ", + "output_name:": "/encoder_embed/conv/conv.2/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 18, + "node_inputs:": [ + { + "input_dimension:": "1 8 149 80 ", + "input_name:": "/encoder_embed/conv/conv.0/Conv_output_0" + }, + { + "input_dimension:": "1 8 149 80 ", + "input_name:": "/encoder_embed/conv/conv.2/Sigmoid_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.2/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 8 149 80 ", + "output_name:": "/encoder_embed/conv/conv.2/Mul_output_0" + } + ] + }, + { + "node_id:": 19, + "node_inputs:": [ + { + "input_dimension:": "1 8 149 80 ", + "input_name:": "/encoder_embed/conv/conv.2/Mul_output_0" + }, + { + "input_dimension:": "32 8 3 3 ", + "input_name:": "encoder.encoder_embed.conv.3.weight" + }, + { + "input_dimension:": "32 ", + "input_name:": "encoder.encoder_embed.conv.3.bias" + } + ], + "node_name:": "/encoder_embed/conv/conv.3/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 32 74 39 ", + "output_name:": "/encoder_embed/conv/conv.3/Conv_output_0" + } + ] + }, + { + "node_id:": 20, + "node_inputs:": [ + { + "input_dimension:": "1 32 74 39 ", + "input_name:": "/encoder_embed/conv/conv.3/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.5/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 32 74 39 ", + "output_name:": "/encoder_embed/conv/conv.5/Sub_output_0" + } + ] + }, + { + "node_id:": 21, + "node_inputs:": [ + { + "input_dimension:": "1 32 74 39 ", + "input_name:": "/encoder_embed/conv/conv.5/Sub_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.5/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 32 74 39 ", + "output_name:": "/encoder_embed/conv/conv.5/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 22, + "node_inputs:": [ + { + "input_dimension:": "1 32 74 39 ", + "input_name:": "/encoder_embed/conv/conv.3/Conv_output_0" + }, + { + "input_dimension:": "1 32 74 39 ", + "input_name:": "/encoder_embed/conv/conv.5/Sigmoid_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.5/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 32 74 39 ", + "output_name:": "/encoder_embed/conv/conv.5/Mul_output_0" + } + ] + }, + { + "node_id:": 23, + "node_inputs:": [ + { + "input_dimension:": "1 32 74 39 ", + "input_name:": "/encoder_embed/conv/conv.5/Mul_output_0" + }, + { + "input_dimension:": "128 32 3 3 ", + "input_name:": "encoder.encoder_embed.conv.6.weight" + }, + { + "input_dimension:": "128 ", + "input_name:": "encoder.encoder_embed.conv.6.bias" + } + ], + "node_name:": "/encoder_embed/conv/conv.6/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 128 72 19 ", + "output_name:": "/encoder_embed/conv/conv.6/Conv_output_0" + } + ] + }, + { + "node_id:": 24, + "node_inputs:": [ + { + "input_dimension:": "1 128 72 19 ", + "input_name:": "/encoder_embed/conv/conv.6/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.8/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 128 72 19 ", + "output_name:": "/encoder_embed/conv/conv.8/Sub_output_0" + } + ] + }, + { + "node_id:": 25, + "node_inputs:": [ + { + "input_dimension:": "1 128 72 19 ", + "input_name:": "/encoder_embed/conv/conv.8/Sub_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.8/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 128 72 19 ", + "output_name:": "/encoder_embed/conv/conv.8/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 26, + "node_inputs:": [ + { + "input_dimension:": "1 128 72 19 ", + "input_name:": "/encoder_embed/conv/conv.6/Conv_output_0" + }, + { + "input_dimension:": "1 128 72 19 ", + "input_name:": "/encoder_embed/conv/conv.8/Sigmoid_output_0" + } + ], + "node_name:": "/encoder_embed/conv/conv.8/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 128 72 19 ", + "output_name:": "/encoder_embed/conv/conv.8/Mul_output_0" + } + ] + }, + { + "node_id:": 27, + "node_inputs:": [ + { + "input_dimension:": "1 128 72 19 ", + "input_name:": "/encoder_embed/conv/conv.8/Mul_output_0" + } + ], + "node_name:": "/encoder_embed/Transpose", + "node_outputs:": [ + { + "output_dimension:": "1 72 128 19 ", + "output_name:": "/encoder_embed/Transpose_output_0" + } + ] + }, + { + "node_id:": 28, + "node_inputs:": [ + { + "input_dimension:": "1 72 384 ", + "input_name:": "/encoder_embed/out/Add_output_0" + } + ], + "node_name:": "/Transpose", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Transpose_output_0" + } + ] + }, + { + "node_id:": 29, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/in_proj/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation/Sub", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/activation/Sub_output_0" + } + ] + }, + { + "node_id:": 30, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/activation/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/activation/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 31, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/in_proj/Add_output_0" + }, + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/activation/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/activation/Mul_output_0" + } + ] + }, + { + "node_id:": 32, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/feed_forward1/out_proj/Add_output_0" + } + ], + "node_name:": "/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_output_0" + } + ] + }, + { + "node_id:": 33, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/CumSum_output_0" + } + ] + }, + { + "node_id:": 34, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/CumSum_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_1_output_0" + } + ], + "node_name:": "/Add_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_1_output_0" + } + ] + }, + { + "node_id:": 35, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Unsqueeze_2_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_3_output_0" + } + ], + "node_name:": "/Add_3", + "node_outputs:": [ + { + "output_dimension:": "72 1 ", + "output_name:": "/Add_3_output_0" + } + ] + }, + { + "node_id:": 36, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Add_3_output_0" + } + ], + "node_name:": "/Cast_2", + "node_outputs:": [ + { + "output_dimension:": "72 1 ", + "output_name:": "/Cast_2_output_0" + } + ] + }, + { + "node_id:": 37, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Cast_2_output_0" + } + ], + "node_name:": "/Reciprocal", + "node_outputs:": [ + { + "output_dimension:": "72 1 ", + "output_name:": "/Reciprocal_output_0" + } + ] + }, + { + "node_id:": 38, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Reciprocal_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_4", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/Unsqueeze_4_output_0" + } + ] + }, + { + "node_id:": 39, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_1_output_0" + }, + { + "input_dimension:": "72 1 1 ", + "input_name:": "/Unsqueeze_4_output_0" + } + ], + "node_name:": "/Mul_2", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Mul_2_output_0" + } + ] + }, + { + "node_id:": 40, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Mul_2_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7359" + } + ], + "node_name:": "/proj/MatMul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/proj/MatMul_output_0" + } + ] + }, + { + "node_id:": 41, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/proj/MatMul_output_0" + } + ], + "node_name:": "/Add_5", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_5_output_0" + } + ] + }, + { + "node_id:": 42, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Slice_1_output_0" + } + ] + }, + { + "node_id:": 43, + "node_inputs:": [ + { + "input_dimension:": "2 288 1 192 ", + "input_name:": "cached_key_0" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_3", + "node_outputs:": [ + { + "output_dimension:": "288 1 192 ", + "output_name:": "/Gather_3_output_0" + } + ] + }, + { + "node_id:": 44, + "node_inputs:": [ + { + "input_dimension:": "288 1 192 ", + "input_name:": "/Gather_3_output_0" + }, + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Slice_1_output_0" + } + ], + "node_name:": "/Concat", + "node_outputs:": [ + { + "output_dimension:": "360 1 192 ", + "output_name:": "/Concat_output_0" + } + ] + }, + { + "node_id:": 45, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_3", + "node_outputs:": [ + { + "output_dimension:": "72 1 32 ", + "output_name:": "/Slice_3_output_0" + } + ] + }, + { + "node_id:": 46, + "node_inputs:": [ + { + "input_dimension:": "72 1 32 ", + "input_name:": "/Slice_3_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_3_output_0" + } + ], + "node_name:": "/Reshape_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 8 4 ", + "output_name:": "/Reshape_1_output_0" + } + ] + }, + { + "node_id:": 47, + "node_inputs:": [ + { + "input_dimension:": "72 1 8 4 ", + "input_name:": "/Reshape_1_output_0" + } + ], + "node_name:": "/Transpose_3", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 4 ", + "output_name:": "/Transpose_3_output_0" + } + ] + }, + { + "node_id:": 48, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 4 ", + "input_name:": "/Transpose_3_output_0" + }, + { + "input_dimension:": "1 8 4 431 ", + "input_name:": "/Transpose_5_output_0" + } + ], + "node_name:": "/MatMul", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 431 ", + "output_name:": "/MatMul_output_0" + } + ] + }, + { + "node_id:": 49, + "node_inputs:": [ + { + "input_dimension:": "360 1 192 ", + "input_name:": "/Concat_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_2", + "node_outputs:": [ + { + "output_dimension:": "360 1 8 24 ", + "output_name:": "/Reshape_2_output_0" + } + ] + }, + { + "node_id:": 50, + "node_inputs:": [ + { + "input_dimension:": "360 1 8 24 ", + "input_name:": "/Reshape_2_output_0" + } + ], + "node_name:": "/Transpose_4", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 360 ", + "output_name:": "/Transpose_4_output_0" + } + ] + }, + { + "node_id:": 51, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Slice_output_0" + } + ] + }, + { + "node_id:": 52, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Slice_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_2_output_0" + } + ], + "node_name:": "/Reshape", + "node_outputs:": [ + { + "output_dimension:": "72 1 8 24 ", + "output_name:": "/Reshape_output_0" + } + ] + }, + { + "node_id:": 53, + "node_inputs:": [ + { + "input_dimension:": "72 1 8 24 ", + "input_name:": "/Reshape_output_0" + } + ], + "node_name:": "/Transpose_2", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 24 ", + "output_name:": "/Transpose_2_output_0" + } + ] + }, + { + "node_id:": 54, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 24 ", + "input_name:": "/Transpose_2_output_0" + }, + { + "input_dimension:": "1 8 24 360 ", + "input_name:": "/Transpose_4_output_0" + } + ], + "node_name:": "/MatMul_1", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 360 ", + "output_name:": "/MatMul_1_output_0" + } + ] + }, + { + "node_id:": 55, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 431 ", + "input_name:": "/MatMul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_9_output_0" + } + ], + "node_name:": "/Reshape_5", + "node_outputs:": [ + { + "output_dimension:": "576 431 ", + "output_name:": "/Reshape_5_output_0" + } + ] + }, + { + "node_id:": 56, + "node_inputs:": [ + { + "input_dimension:": "576 431 ", + "input_name:": "/Reshape_5_output_0" + }, + { + "input_dimension:": "576 360 ", + "input_name:": "/Add_7_output_0" + } + ], + "node_name:": "/GatherElements", + "node_outputs:": [ + { + "output_dimension:": "576 360 ", + "output_name:": "/GatherElements_output_0" + } + ] + }, + { + "node_id:": 57, + "node_inputs:": [ + { + "input_dimension:": "576 360 ", + "input_name:": "/GatherElements_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_10_output_0" + } + ], + "node_name:": "/Reshape_6", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 360 ", + "output_name:": "/Reshape_6_output_0" + } + ] + }, + { + "node_id:": 58, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 360 ", + "input_name:": "/MatMul_1_output_0" + }, + { + "input_dimension:": "1 8 72 360 ", + "input_name:": "/Reshape_6_output_0" + } + ], + "node_name:": "/Add_8", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 360 ", + "output_name:": "/Add_8_output_0" + } + ] + }, + { + "node_id:": 59, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 360 ", + "input_name:": "/Add_8_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_11_output_0" + } + ], + "node_name:": "/Reshape_7", + "node_outputs:": [ + { + "output_dimension:": "8 72 360 ", + "output_name:": "/Reshape_7_output_0" + } + ] + }, + { + "node_id:": 60, + "node_inputs:": [ + { + "input_dimension:": "8 72 360 ", + "input_name:": "/Reshape_7_output_0" + } + ], + "node_name:": "/Softmax", + "node_outputs:": [ + { + "output_dimension:": "8 72 360 ", + "output_name:": "/Softmax_output_0" + } + ] + }, + { + "node_id:": 61, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_2", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_2_output_0" + } + ] + }, + { + "node_id:": 62, + "node_inputs:": [ + { + "input_dimension:": "2 288 1 96 ", + "input_name:": "cached_val_0" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_4", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Gather_4_output_0" + } + ] + }, + { + "node_id:": 63, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Gather_4_output_0" + }, + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_2_output_0" + } + ], + "node_name:": "/Concat_1", + "node_outputs:": [ + { + "output_dimension:": "360 1 96 ", + "output_name:": "/Concat_1_output_0" + } + ] + }, + { + "node_id:": 64, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_1_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_5_output_0" + } + ], + "node_name:": "/Reshape_3", + "node_outputs:": [ + { + "output_dimension:": "360 8 12 ", + "output_name:": "/Reshape_3_output_0" + } + ] + }, + { + "node_id:": 65, + "node_inputs:": [ + { + "input_dimension:": "360 8 12 ", + "input_name:": "/Reshape_3_output_0" + } + ], + "node_name:": "/Transpose_1", + "node_outputs:": [ + { + "output_dimension:": "8 360 12 ", + "output_name:": "/Transpose_1_output_0" + } + ] + }, + { + "node_id:": 66, + "node_inputs:": [ + { + "input_dimension:": "8 72 360 ", + "input_name:": "/Softmax_output_0" + }, + { + "input_dimension:": "8 360 12 ", + "input_name:": "/Transpose_1_output_0" + } + ], + "node_name:": "/MatMul_2", + "node_outputs:": [ + { + "output_dimension:": "8 72 12 ", + "output_name:": "/MatMul_2_output_0" + } + ] + }, + { + "node_id:": 67, + "node_inputs:": [ + { + "input_dimension:": "8 72 12 ", + "input_name:": "/MatMul_2_output_0" + } + ], + "node_name:": "/Transpose_6", + "node_outputs:": [ + { + "output_dimension:": "72 8 12 ", + "output_name:": "/Transpose_6_output_0" + } + ] + }, + { + "node_id:": 68, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_5_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_9_output_0" + } + ], + "node_name:": "/Add_10", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_10_output_0" + } + ] + }, + { + "node_id:": 69, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_10_output_0" + } + ], + "node_name:": "/Transpose_7", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Transpose_7_output_0" + } + ] + }, + { + "node_id:": 70, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Transpose_7_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 72 ", + "output_name:": "/pointwise_conv1/Conv_output_0" + } + ] + }, + { + "node_id:": 71, + "node_inputs:": [ + { + "input_dimension:": "1 768 72 ", + "input_name:": "/pointwise_conv1/Conv_output_0" + } + ], + "node_name:": "/Split", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_output_0" + }, + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_output_1" + } + ] + }, + { + "node_id:": 72, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_output_1" + } + ], + "node_name:": "/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 73, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Sigmoid_output_0" + } + ], + "node_name:": "/Mul_7", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Mul_7_output_0" + } + ] + }, + { + "node_id:": 74, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv1_0" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_6", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_6_output_0" + } + ] + }, + { + "node_id:": 75, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_6_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Mul_7_output_0" + } + ], + "node_name:": "/Concat_13", + "node_outputs:": [ + { + "output_dimension:": "1 384 102 ", + "output_name:": "/Concat_13_output_0" + } + ] + }, + { + "node_id:": 76, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_13_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/depthwise_conv/Conv_output_0" + } + ] + }, + { + "node_id:": 77, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation/Sub_output_0" + } + ] + }, + { + "node_id:": 78, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation/Sub_output_0" + } + ], + "node_name:": "/activation/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 79, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv/Conv_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation/Sigmoid_output_0" + } + ], + "node_name:": "/activation/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation/Mul_output_0" + } + ] + }, + { + "node_id:": 80, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/pointwise_conv2/Conv_output_0" + } + ] + }, + { + "node_id:": 81, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/pointwise_conv2/Conv_output_0" + } + ], + "node_name:": "/Transpose_8", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Transpose_8_output_0" + } + ] + }, + { + "node_id:": 82, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_10_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_8_output_0" + } + ], + "node_name:": "/Add_11", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_11_output_0" + } + ] + }, + { + "node_id:": 83, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/in_proj/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation/Sub", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/activation/Sub_output_0" + } + ] + }, + { + "node_id:": 84, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/activation/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/activation/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 85, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/in_proj/Add_output_0" + }, + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/activation/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/activation/Mul_output_0" + } + ] + }, + { + "node_id:": 86, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_11_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/feed_forward2/out_proj/Add_output_0" + } + ], + "node_name:": "/Add_12", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_12_output_0" + } + ] + }, + { + "node_id:": 87, + "node_inputs:": [ + { + "input_dimension:": "2 288 1 96 ", + "input_name:": "cached_val2_0" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_5", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Gather_5_output_0" + } + ] + }, + { + "node_id:": 88, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_12_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7405" + } + ], + "node_name:": "/in_proj2/MatMul", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/in_proj2/MatMul_output_0" + } + ] + }, + { + "node_id:": 89, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Gather_5_output_0" + }, + { + "input_dimension:": "72 1 96 ", + "input_name:": "/in_proj2/MatMul_output_0" + } + ], + "node_name:": "/Concat_14", + "node_outputs:": [ + { + "output_dimension:": "360 1 96 ", + "output_name:": "/Concat_14_output_0" + } + ] + }, + { + "node_id:": 90, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_14_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_15_output_0" + } + ], + "node_name:": "/Reshape_9", + "node_outputs:": [ + { + "output_dimension:": "360 8 12 ", + "output_name:": "/Reshape_9_output_0" + } + ] + }, + { + "node_id:": 91, + "node_inputs:": [ + { + "input_dimension:": "360 8 12 ", + "input_name:": "/Reshape_9_output_0" + } + ], + "node_name:": "/Transpose_9", + "node_outputs:": [ + { + "output_dimension:": "8 360 12 ", + "output_name:": "/Transpose_9_output_0" + } + ] + }, + { + "node_id:": 92, + "node_inputs:": [ + { + "input_dimension:": "8 72 360 ", + "input_name:": "/Softmax_output_0" + }, + { + "input_dimension:": "8 360 12 ", + "input_name:": "/Transpose_9_output_0" + } + ], + "node_name:": "/MatMul_4", + "node_outputs:": [ + { + "output_dimension:": "8 72 12 ", + "output_name:": "/MatMul_4_output_0" + } + ] + }, + { + "node_id:": 93, + "node_inputs:": [ + { + "input_dimension:": "8 72 12 ", + "input_name:": "/MatMul_4_output_0" + } + ], + "node_name:": "/Transpose_10", + "node_outputs:": [ + { + "output_dimension:": "72 8 12 ", + "output_name:": "/Transpose_10_output_0" + } + ] + }, + { + "node_id:": 94, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_12_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_proj2/Add_output_0" + } + ], + "node_name:": "/Add_14", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_14_output_0" + } + ] + }, + { + "node_id:": 95, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_14_output_0" + } + ], + "node_name:": "/Transpose_11", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Transpose_11_output_0" + } + ] + }, + { + "node_id:": 96, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Transpose_11_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_1/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 72 ", + "output_name:": "/pointwise_conv1_1/Conv_output_0" + } + ] + }, + { + "node_id:": 97, + "node_inputs:": [ + { + "input_dimension:": "1 768 72 ", + "input_name:": "/pointwise_conv1_1/Conv_output_0" + } + ], + "node_name:": "/Split_1", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_1_output_0" + }, + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_1_output_1" + } + ] + }, + { + "node_id:": 98, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_1_output_1" + } + ], + "node_name:": "/Sigmoid_1", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Sigmoid_1_output_0" + } + ] + }, + { + "node_id:": 99, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_1_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Sigmoid_1_output_0" + } + ], + "node_name:": "/Mul_9", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Mul_9_output_0" + } + ] + }, + { + "node_id:": 100, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv2_0" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_7", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_7_output_0" + } + ] + }, + { + "node_id:": 101, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_7_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Mul_9_output_0" + } + ], + "node_name:": "/Concat_17", + "node_outputs:": [ + { + "output_dimension:": "1 384 102 ", + "output_name:": "/Concat_17_output_0" + } + ] + }, + { + "node_id:": 102, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_17_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_1/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/depthwise_conv_1/Conv_output_0" + } + ] + }, + { + "node_id:": 103, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv_1/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_1/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_1/Sub_output_0" + } + ] + }, + { + "node_id:": 104, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_1/Sub_output_0" + } + ], + "node_name:": "/activation_1/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_1/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 105, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv_1/Conv_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_1/Sigmoid_output_0" + } + ], + "node_name:": "/activation_1/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_1/Mul_output_0" + } + ] + }, + { + "node_id:": 106, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_1/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_1/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/pointwise_conv2_1/Conv_output_0" + } + ] + }, + { + "node_id:": 107, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/pointwise_conv2_1/Conv_output_0" + } + ], + "node_name:": "/Transpose_12", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Transpose_12_output_0" + } + ] + }, + { + "node_id:": 108, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_14_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_12_output_0" + } + ], + "node_name:": "/Add_15", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_15_output_0" + } + ] + }, + { + "node_id:": 109, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/in_proj/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation/Sub", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/activation/Sub_output_0" + } + ] + }, + { + "node_id:": 110, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/activation/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/activation/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 111, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/in_proj/Add_output_0" + }, + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/activation/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/activation/Mul_output_0" + } + ] + }, + { + "node_id:": 112, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_15_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/feed_forward3/out_proj/Add_output_0" + } + ], + "node_name:": "/Add_16", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_16_output_0" + } + ] + }, + { + "node_id:": 113, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_16_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_16_output_0" + } + ], + "node_name:": "/norm_final/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/norm_final/Mul_output_0" + } + ] + }, + { + "node_id:": 114, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/norm_final/Mul_output_0" + } + ], + "node_name:": "/norm_final/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/norm_final/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 115, + "node_inputs:": [ + { + "input_dimension:": "72 1 1 ", + "input_name:": "/norm_final/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final/Constant_output_0" + } + ], + "node_name:": "/norm_final/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/norm_final/Add_output_0" + } + ] + }, + { + "node_id:": 116, + "node_inputs:": [ + { + "input_dimension:": "72 1 1 ", + "input_name:": "/norm_final/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final/Pow", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/norm_final/Pow_output_0" + } + ] + }, + { + "node_id:": 117, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_16_output_0" + }, + { + "input_dimension:": "72 1 1 ", + "input_name:": "/norm_final/Pow_output_0" + } + ], + "node_name:": "/norm_final/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/norm_final/Mul_1_output_0" + } + ] + }, + { + "node_id:": 118, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/norm_final/Mul_1_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_output_0" + } + ], + "node_name:": "/Sub_2", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Sub_2_output_0" + } + ] + }, + { + "node_id:": 119, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Sub_2_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.0.layers.0.bypass_scale" + } + ], + "node_name:": "/Mul_10", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Mul_10_output_0" + } + ] + }, + { + "node_id:": 120, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Mul_10_output_0" + } + ], + "node_name:": "/Add_17", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_17_output_0" + } + ] + }, + { + "node_id:": 121, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_1/Sub", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/activation_1/Sub_output_0" + } + ] + }, + { + "node_id:": 122, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/activation_1/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_1/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/activation_1/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 123, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/activation_1/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_1/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/activation_1/Mul_output_0" + } + ] + }, + { + "node_id:": 124, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_17_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/feed_forward1/out_proj_1/Add_output_0" + } + ], + "node_name:": "/Add_18", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_18_output_0" + } + ] + }, + { + "node_id:": 125, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_18_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/CumSum_1_output_0" + } + ] + }, + { + "node_id:": 126, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/CumSum_1_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_35_output_0" + } + ], + "node_name:": "/Add_19", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_19_output_0" + } + ] + }, + { + "node_id:": 127, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Unsqueeze_36_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_37_output_0" + } + ], + "node_name:": "/Add_21", + "node_outputs:": [ + { + "output_dimension:": "72 1 ", + "output_name:": "/Add_21_output_0" + } + ] + }, + { + "node_id:": 128, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Add_21_output_0" + } + ], + "node_name:": "/Cast_7", + "node_outputs:": [ + { + "output_dimension:": "72 1 ", + "output_name:": "/Cast_7_output_0" + } + ] + }, + { + "node_id:": 129, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Cast_7_output_0" + } + ], + "node_name:": "/Reciprocal_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 ", + "output_name:": "/Reciprocal_1_output_0" + } + ] + }, + { + "node_id:": 130, + "node_inputs:": [ + { + "input_dimension:": "72 1 ", + "input_name:": "/Reciprocal_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_38", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/Unsqueeze_38_output_0" + } + ] + }, + { + "node_id:": 131, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_19_output_0" + }, + { + "input_dimension:": "72 1 1 ", + "input_name:": "/Unsqueeze_38_output_0" + } + ], + "node_name:": "/Mul_13", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Mul_13_output_0" + } + ] + }, + { + "node_id:": 132, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Mul_13_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7421" + } + ], + "node_name:": "/proj_1/MatMul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/proj_1/MatMul_output_0" + } + ] + }, + { + "node_id:": 133, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_18_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/proj_1/MatMul_output_0" + } + ], + "node_name:": "/Add_23", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_23_output_0" + } + ] + }, + { + "node_id:": 134, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_10", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Slice_10_output_0" + } + ] + }, + { + "node_id:": 135, + "node_inputs:": [ + { + "input_dimension:": "2 288 1 192 ", + "input_name:": "cached_key_0" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_24", + "node_outputs:": [ + { + "output_dimension:": "288 1 192 ", + "output_name:": "/Gather_24_output_0" + } + ] + }, + { + "node_id:": 136, + "node_inputs:": [ + { + "input_dimension:": "288 1 192 ", + "input_name:": "/Gather_24_output_0" + }, + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Slice_10_output_0" + } + ], + "node_name:": "/Concat_18", + "node_outputs:": [ + { + "output_dimension:": "360 1 192 ", + "output_name:": "/Concat_18_output_0" + } + ] + }, + { + "node_id:": 137, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_12", + "node_outputs:": [ + { + "output_dimension:": "72 1 32 ", + "output_name:": "/Slice_12_output_0" + } + ] + }, + { + "node_id:": 138, + "node_inputs:": [ + { + "input_dimension:": "72 1 32 ", + "input_name:": "/Slice_12_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_21_output_0" + } + ], + "node_name:": "/Reshape_12", + "node_outputs:": [ + { + "output_dimension:": "72 1 8 4 ", + "output_name:": "/Reshape_12_output_0" + } + ] + }, + { + "node_id:": 139, + "node_inputs:": [ + { + "input_dimension:": "72 1 8 4 ", + "input_name:": "/Reshape_12_output_0" + } + ], + "node_name:": "/Transpose_15", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 4 ", + "output_name:": "/Transpose_15_output_0" + } + ] + }, + { + "node_id:": 140, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 4 ", + "input_name:": "/Transpose_15_output_0" + }, + { + "input_dimension:": "1 8 4 431 ", + "input_name:": "/Transpose_17_output_0" + } + ], + "node_name:": "/MatMul_5", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 431 ", + "output_name:": "/MatMul_5_output_0" + } + ] + }, + { + "node_id:": 141, + "node_inputs:": [ + { + "input_dimension:": "360 1 192 ", + "input_name:": "/Concat_18_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_13", + "node_outputs:": [ + { + "output_dimension:": "360 1 8 24 ", + "output_name:": "/Reshape_13_output_0" + } + ] + }, + { + "node_id:": 142, + "node_inputs:": [ + { + "input_dimension:": "360 1 8 24 ", + "input_name:": "/Reshape_13_output_0" + } + ], + "node_name:": "/Transpose_16", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 360 ", + "output_name:": "/Transpose_16_output_0" + } + ] + }, + { + "node_id:": 143, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_9", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Slice_9_output_0" + } + ] + }, + { + "node_id:": 144, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Slice_9_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_20_output_0" + } + ], + "node_name:": "/Reshape_11", + "node_outputs:": [ + { + "output_dimension:": "72 1 8 24 ", + "output_name:": "/Reshape_11_output_0" + } + ] + }, + { + "node_id:": 145, + "node_inputs:": [ + { + "input_dimension:": "72 1 8 24 ", + "input_name:": "/Reshape_11_output_0" + } + ], + "node_name:": "/Transpose_14", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 24 ", + "output_name:": "/Transpose_14_output_0" + } + ] + }, + { + "node_id:": 146, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 24 ", + "input_name:": "/Transpose_14_output_0" + }, + { + "input_dimension:": "1 8 24 360 ", + "input_name:": "/Transpose_16_output_0" + } + ], + "node_name:": "/MatMul_6", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 360 ", + "output_name:": "/MatMul_6_output_0" + } + ] + }, + { + "node_id:": 147, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 431 ", + "input_name:": "/MatMul_5_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_27_output_0" + } + ], + "node_name:": "/Reshape_16", + "node_outputs:": [ + { + "output_dimension:": "576 431 ", + "output_name:": "/Reshape_16_output_0" + } + ] + }, + { + "node_id:": 148, + "node_inputs:": [ + { + "input_dimension:": "576 431 ", + "input_name:": "/Reshape_16_output_0" + }, + { + "input_dimension:": "576 360 ", + "input_name:": "/Add_25_output_0" + } + ], + "node_name:": "/GatherElements_1", + "node_outputs:": [ + { + "output_dimension:": "576 360 ", + "output_name:": "/GatherElements_1_output_0" + } + ] + }, + { + "node_id:": 149, + "node_inputs:": [ + { + "input_dimension:": "576 360 ", + "input_name:": "/GatherElements_1_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_28_output_0" + } + ], + "node_name:": "/Reshape_17", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 360 ", + "output_name:": "/Reshape_17_output_0" + } + ] + }, + { + "node_id:": 150, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 360 ", + "input_name:": "/MatMul_6_output_0" + }, + { + "input_dimension:": "1 8 72 360 ", + "input_name:": "/Reshape_17_output_0" + } + ], + "node_name:": "/Add_26", + "node_outputs:": [ + { + "output_dimension:": "1 8 72 360 ", + "output_name:": "/Add_26_output_0" + } + ] + }, + { + "node_id:": 151, + "node_inputs:": [ + { + "input_dimension:": "1 8 72 360 ", + "input_name:": "/Add_26_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_29_output_0" + } + ], + "node_name:": "/Reshape_18", + "node_outputs:": [ + { + "output_dimension:": "8 72 360 ", + "output_name:": "/Reshape_18_output_0" + } + ] + }, + { + "node_id:": 152, + "node_inputs:": [ + { + "input_dimension:": "8 72 360 ", + "input_name:": "/Reshape_18_output_0" + } + ], + "node_name:": "/Softmax_1", + "node_outputs:": [ + { + "output_dimension:": "8 72 360 ", + "output_name:": "/Softmax_1_output_0" + } + ] + }, + { + "node_id:": 153, + "node_inputs:": [ + { + "input_dimension:": "72 1 512 ", + "input_name:": "/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_11", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_11_output_0" + } + ] + }, + { + "node_id:": 154, + "node_inputs:": [ + { + "input_dimension:": "2 288 1 96 ", + "input_name:": "cached_val_0" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_25", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Gather_25_output_0" + } + ] + }, + { + "node_id:": 155, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Gather_25_output_0" + }, + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_11_output_0" + } + ], + "node_name:": "/Concat_19", + "node_outputs:": [ + { + "output_dimension:": "360 1 96 ", + "output_name:": "/Concat_19_output_0" + } + ] + }, + { + "node_id:": 156, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_19_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_23_output_0" + } + ], + "node_name:": "/Reshape_14", + "node_outputs:": [ + { + "output_dimension:": "360 8 12 ", + "output_name:": "/Reshape_14_output_0" + } + ] + }, + { + "node_id:": 157, + "node_inputs:": [ + { + "input_dimension:": "360 8 12 ", + "input_name:": "/Reshape_14_output_0" + } + ], + "node_name:": "/Transpose_13", + "node_outputs:": [ + { + "output_dimension:": "8 360 12 ", + "output_name:": "/Transpose_13_output_0" + } + ] + }, + { + "node_id:": 158, + "node_inputs:": [ + { + "input_dimension:": "8 72 360 ", + "input_name:": "/Softmax_1_output_0" + }, + { + "input_dimension:": "8 360 12 ", + "input_name:": "/Transpose_13_output_0" + } + ], + "node_name:": "/MatMul_7", + "node_outputs:": [ + { + "output_dimension:": "8 72 12 ", + "output_name:": "/MatMul_7_output_0" + } + ] + }, + { + "node_id:": 159, + "node_inputs:": [ + { + "input_dimension:": "8 72 12 ", + "input_name:": "/MatMul_7_output_0" + } + ], + "node_name:": "/Transpose_18", + "node_outputs:": [ + { + "output_dimension:": "72 8 12 ", + "output_name:": "/Transpose_18_output_0" + } + ] + }, + { + "node_id:": 160, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_23_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_27_output_0" + } + ], + "node_name:": "/Add_28", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_28_output_0" + } + ] + }, + { + "node_id:": 161, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_28_output_0" + } + ], + "node_name:": "/Transpose_19", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Transpose_19_output_0" + } + ] + }, + { + "node_id:": 162, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Transpose_19_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_2/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 72 ", + "output_name:": "/pointwise_conv1_2/Conv_output_0" + } + ] + }, + { + "node_id:": 163, + "node_inputs:": [ + { + "input_dimension:": "1 768 72 ", + "input_name:": "/pointwise_conv1_2/Conv_output_0" + } + ], + "node_name:": "/Split_2", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_2_output_0" + }, + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_2_output_1" + } + ] + }, + { + "node_id:": 164, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_2_output_1" + } + ], + "node_name:": "/Sigmoid_2", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Sigmoid_2_output_0" + } + ] + }, + { + "node_id:": 165, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_2_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Sigmoid_2_output_0" + } + ], + "node_name:": "/Mul_18", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Mul_18_output_0" + } + ] + }, + { + "node_id:": 166, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv1_0" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_27", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_27_output_0" + } + ] + }, + { + "node_id:": 167, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_27_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Mul_18_output_0" + } + ], + "node_name:": "/Concat_31", + "node_outputs:": [ + { + "output_dimension:": "1 384 102 ", + "output_name:": "/Concat_31_output_0" + } + ] + }, + { + "node_id:": 168, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_31_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_2/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/depthwise_conv_2/Conv_output_0" + } + ] + }, + { + "node_id:": 169, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv_2/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_2/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_2/Sub_output_0" + } + ] + }, + { + "node_id:": 170, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_2/Sub_output_0" + } + ], + "node_name:": "/activation_2/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_2/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 171, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv_2/Conv_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_2/Sigmoid_output_0" + } + ], + "node_name:": "/activation_2/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_2/Mul_output_0" + } + ] + }, + { + "node_id:": 172, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_2/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_2/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/pointwise_conv2_2/Conv_output_0" + } + ] + }, + { + "node_id:": 173, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/pointwise_conv2_2/Conv_output_0" + } + ], + "node_name:": "/Transpose_20", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Transpose_20_output_0" + } + ] + }, + { + "node_id:": 174, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_28_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_20_output_0" + } + ], + "node_name:": "/Add_29", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_29_output_0" + } + ] + }, + { + "node_id:": 175, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_1/Sub", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/activation_1/Sub_output_0" + } + ] + }, + { + "node_id:": 176, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/activation_1/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_1/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/activation_1/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 177, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/activation_1/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_1/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/activation_1/Mul_output_0" + } + ] + }, + { + "node_id:": 178, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_29_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/feed_forward2/out_proj_1/Add_output_0" + } + ], + "node_name:": "/Add_30", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_30_output_0" + } + ] + }, + { + "node_id:": 179, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_30_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7467" + } + ], + "node_name:": "/in_proj2_1/MatMul", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/in_proj2_1/MatMul_output_0" + } + ] + }, + { + "node_id:": 180, + "node_inputs:": [ + { + "input_dimension:": "2 288 1 96 ", + "input_name:": "cached_val2_0" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_26", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Gather_26_output_0" + } + ] + }, + { + "node_id:": 181, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Gather_26_output_0" + }, + { + "input_dimension:": "72 1 96 ", + "input_name:": "/in_proj2_1/MatMul_output_0" + } + ], + "node_name:": "/Concat_32", + "node_outputs:": [ + { + "output_dimension:": "360 1 96 ", + "output_name:": "/Concat_32_output_0" + } + ] + }, + { + "node_id:": 182, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_32_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_33_output_0" + } + ], + "node_name:": "/Reshape_20", + "node_outputs:": [ + { + "output_dimension:": "360 8 12 ", + "output_name:": "/Reshape_20_output_0" + } + ] + }, + { + "node_id:": 183, + "node_inputs:": [ + { + "input_dimension:": "360 8 12 ", + "input_name:": "/Reshape_20_output_0" + } + ], + "node_name:": "/Transpose_21", + "node_outputs:": [ + { + "output_dimension:": "8 360 12 ", + "output_name:": "/Transpose_21_output_0" + } + ] + }, + { + "node_id:": 184, + "node_inputs:": [ + { + "input_dimension:": "8 72 360 ", + "input_name:": "/Softmax_1_output_0" + }, + { + "input_dimension:": "8 360 12 ", + "input_name:": "/Transpose_21_output_0" + } + ], + "node_name:": "/MatMul_9", + "node_outputs:": [ + { + "output_dimension:": "8 72 12 ", + "output_name:": "/MatMul_9_output_0" + } + ] + }, + { + "node_id:": 185, + "node_inputs:": [ + { + "input_dimension:": "8 72 12 ", + "input_name:": "/MatMul_9_output_0" + } + ], + "node_name:": "/Transpose_22", + "node_outputs:": [ + { + "output_dimension:": "72 8 12 ", + "output_name:": "/Transpose_22_output_0" + } + ] + }, + { + "node_id:": 186, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_30_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_proj2_1/Add_output_0" + } + ], + "node_name:": "/Add_32", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_32_output_0" + } + ] + }, + { + "node_id:": 187, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_32_output_0" + } + ], + "node_name:": "/Transpose_23", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Transpose_23_output_0" + } + ] + }, + { + "node_id:": 188, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Transpose_23_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_3/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 72 ", + "output_name:": "/pointwise_conv1_3/Conv_output_0" + } + ] + }, + { + "node_id:": 189, + "node_inputs:": [ + { + "input_dimension:": "1 768 72 ", + "input_name:": "/pointwise_conv1_3/Conv_output_0" + } + ], + "node_name:": "/Split_3", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_3_output_0" + }, + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Split_3_output_1" + } + ] + }, + { + "node_id:": 190, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_3_output_1" + } + ], + "node_name:": "/Sigmoid_3", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Sigmoid_3_output_0" + } + ] + }, + { + "node_id:": 191, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Split_3_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Sigmoid_3_output_0" + } + ], + "node_name:": "/Mul_20", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/Mul_20_output_0" + } + ] + }, + { + "node_id:": 192, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv2_0" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_28", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_28_output_0" + } + ] + }, + { + "node_id:": 193, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_28_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/Mul_20_output_0" + } + ], + "node_name:": "/Concat_35", + "node_outputs:": [ + { + "output_dimension:": "1 384 102 ", + "output_name:": "/Concat_35_output_0" + } + ] + }, + { + "node_id:": 194, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_35_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_3/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/depthwise_conv_3/Conv_output_0" + } + ] + }, + { + "node_id:": 195, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv_3/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_3/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_3/Sub_output_0" + } + ] + }, + { + "node_id:": 196, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_3/Sub_output_0" + } + ], + "node_name:": "/activation_3/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_3/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 197, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/depthwise_conv_3/Conv_output_0" + }, + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_3/Sigmoid_output_0" + } + ], + "node_name:": "/activation_3/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/activation_3/Mul_output_0" + } + ] + }, + { + "node_id:": 198, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/activation_3/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_3/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 72 ", + "output_name:": "/pointwise_conv2_3/Conv_output_0" + } + ] + }, + { + "node_id:": 199, + "node_inputs:": [ + { + "input_dimension:": "1 384 72 ", + "input_name:": "/pointwise_conv2_3/Conv_output_0" + } + ], + "node_name:": "/Transpose_24", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Transpose_24_output_0" + } + ] + }, + { + "node_id:": 200, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_32_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_24_output_0" + } + ], + "node_name:": "/Add_33", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_33_output_0" + } + ] + }, + { + "node_id:": 201, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_1/Sub", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/activation_1/Sub_output_0" + } + ] + }, + { + "node_id:": 202, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/activation_1/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_1/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/activation_1/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 203, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/in_proj_1/Add_output_0" + }, + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/activation_1/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_1/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/activation_1/Mul_output_0" + } + ] + }, + { + "node_id:": 204, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_33_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/feed_forward3/out_proj_1/Add_output_0" + } + ], + "node_name:": "/Add_34", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_34_output_0" + } + ] + }, + { + "node_id:": 205, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_34_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_34_output_0" + } + ], + "node_name:": "/norm_final_1/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/norm_final_1/Mul_output_0" + } + ] + }, + { + "node_id:": 206, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/norm_final_1/Mul_output_0" + } + ], + "node_name:": "/norm_final_1/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/norm_final_1/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 207, + "node_inputs:": [ + { + "input_dimension:": "72 1 1 ", + "input_name:": "/norm_final_1/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_output_0" + } + ], + "node_name:": "/norm_final_1/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/norm_final_1/Add_output_0" + } + ] + }, + { + "node_id:": 208, + "node_inputs:": [ + { + "input_dimension:": "72 1 1 ", + "input_name:": "/norm_final_1/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_1/Pow", + "node_outputs:": [ + { + "output_dimension:": "72 1 1 ", + "output_name:": "/norm_final_1/Pow_output_0" + } + ] + }, + { + "node_id:": 209, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_34_output_0" + }, + { + "input_dimension:": "72 1 1 ", + "input_name:": "/norm_final_1/Pow_output_0" + } + ], + "node_name:": "/norm_final_1/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/norm_final_1/Mul_1_output_0" + } + ] + }, + { + "node_id:": 210, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/norm_final_1/Mul_1_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_17_output_0" + } + ], + "node_name:": "/Sub_5", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Sub_5_output_0" + } + ] + }, + { + "node_id:": 211, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Sub_5_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.0.layers.1.bypass_scale" + } + ], + "node_name:": "/Mul_21", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Mul_21_output_0" + } + ] + }, + { + "node_id:": 212, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_17_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Mul_21_output_0" + } + ], + "node_name:": "/Add_35", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_35_output_0" + } + ] + }, + { + "node_id:": 213, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_107", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_107_output_0" + } + ] + }, + { + "node_id:": 214, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_107_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_187", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_187_output_0" + } + ] + }, + { + "node_id:": 215, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_107_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_184", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_184_output_0" + } + ] + }, + { + "node_id:": 216, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_184_output_0" + } + ], + "node_name:": "/Cast_25", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_25_output_0" + } + ] + }, + { + "node_id:": 217, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_108", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_108_output_0" + } + ] + }, + { + "node_id:": 218, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_108_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_25_output_0" + } + ], + "node_name:": "/Mul_55", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_55_output_0" + } + ] + }, + { + "node_id:": 219, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_55_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_185", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_185_output_0" + } + ] + }, + { + "node_id:": 220, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_1" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_86", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_86_output_0" + } + ] + }, + { + "node_id:": 221, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_86_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_153", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_153_output_0" + } + ] + }, + { + "node_id:": 222, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_86_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_150", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_150_output_0" + } + ] + }, + { + "node_id:": 223, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_150_output_0" + } + ], + "node_name:": "/Cast_20", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_20_output_0" + } + ] + }, + { + "node_id:": 224, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_1" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_87", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_87_output_0" + } + ] + }, + { + "node_id:": 225, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_87_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_20_output_0" + } + ], + "node_name:": "/Mul_44", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_44_output_0" + } + ] + }, + { + "node_id:": 226, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_44_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_151", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_151_output_0" + } + ] + }, + { + "node_id:": 227, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_65", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_65_output_0" + } + ] + }, + { + "node_id:": 228, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_65_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_119", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_119_output_0" + } + ] + }, + { + "node_id:": 229, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_65_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_116", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_116_output_0" + } + ] + }, + { + "node_id:": 230, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_116_output_0" + } + ], + "node_name:": "/Cast_15", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_15_output_0" + } + ] + }, + { + "node_id:": 231, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_66", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_66_output_0" + } + ] + }, + { + "node_id:": 232, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_66_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_15_output_0" + } + ], + "node_name:": "/Mul_33", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_33_output_0" + } + ] + }, + { + "node_id:": 233, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_33_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_117", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_117_output_0" + } + ] + }, + { + "node_id:": 234, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_1" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_44", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_44_output_0" + } + ] + }, + { + "node_id:": 235, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_44_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_85", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_85_output_0" + } + ] + }, + { + "node_id:": 236, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_44_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_82", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_82_output_0" + } + ] + }, + { + "node_id:": 237, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_82_output_0" + } + ], + "node_name:": "/Cast_10", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_10_output_0" + } + ] + }, + { + "node_id:": 238, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_1" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_45", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_45_output_0" + } + ] + }, + { + "node_id:": 239, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_45_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_10_output_0" + } + ], + "node_name:": "/Mul_22", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_22_output_0" + } + ] + }, + { + "node_id:": 240, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_22_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_83", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_83_output_0" + } + ] + }, + { + "node_id:": 241, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_35_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/downsample/Concat_output_0" + } + ], + "node_name:": "/downsample/Reshape", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample/Reshape_output_0" + } + ] + }, + { + "node_id:": 242, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample/Reshape_output_0" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.downsample.query" + } + ], + "node_name:": "/downsample/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample/Mul_output_0" + } + ] + }, + { + "node_id:": 243, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample/Mul_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_246_output_0" + } + ], + "node_name:": "/downsample/ReduceSum", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 1 ", + "output_name:": "/downsample/ReduceSum_output_0" + } + ] + }, + { + "node_id:": 244, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 1 ", + "input_name:": "/downsample/ReduceSum_output_0" + } + ], + "node_name:": "/downsample/Softmax", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 1 ", + "output_name:": "/downsample/Softmax_output_0" + } + ] + }, + { + "node_id:": 245, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample/Reshape_output_0" + }, + { + "input_dimension:": "36 2 1 1 ", + "input_name:": "/downsample/Softmax_output_0" + } + ], + "node_name:": "/downsample/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample/Mul_1_output_0" + } + ] + }, + { + "node_id:": 246, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample/Mul_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/downsample/ReduceSum_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/downsample/ReduceSum_1_output_0" + } + ] + }, + { + "node_id:": 247, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_2/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_2/Sub_output_0" + } + ] + }, + { + "node_id:": 248, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_2/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_2/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_2/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 249, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_2/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_2/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_2/Mul_output_0" + } + ] + }, + { + "node_id:": 250, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample/ReduceSum_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_2/Add_output_0" + } + ], + "node_name:": "/Add_36", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_36_output_0" + } + ] + }, + { + "node_id:": 251, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_36_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_2", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_2_output_0" + } + ] + }, + { + "node_id:": 252, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_2_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_83_output_0" + } + ], + "node_name:": "/Add_37", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_37_output_0" + } + ] + }, + { + "node_id:": 253, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_84_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_85_output_0" + } + ], + "node_name:": "/Add_39", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_39_output_0" + } + ] + }, + { + "node_id:": 254, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_39_output_0" + } + ], + "node_name:": "/Cast_12", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_12_output_0" + } + ] + }, + { + "node_id:": 255, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_12_output_0" + } + ], + "node_name:": "/Reciprocal_2", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_2_output_0" + } + ] + }, + { + "node_id:": 256, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_2_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_86", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_86_output_0" + } + ] + }, + { + "node_id:": 257, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_37_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_86_output_0" + } + ], + "node_name:": "/Mul_24", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_24_output_0" + } + ] + }, + { + "node_id:": 258, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_24_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7486" + } + ], + "node_name:": "/proj_2/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_2/MatMul_output_0" + } + ] + }, + { + "node_id:": 259, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_36_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_2/MatMul_output_0" + } + ], + "node_name:": "/Add_41", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_41_output_0" + } + ] + }, + { + "node_id:": 260, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_19", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_19_output_0" + } + ] + }, + { + "node_id:": 261, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_1" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_46", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_46_output_0" + } + ] + }, + { + "node_id:": 262, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_46_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_19_output_0" + } + ], + "node_name:": "/Concat_43", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_43_output_0" + } + ] + }, + { + "node_id:": 263, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_21", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_21_output_0" + } + ] + }, + { + "node_id:": 264, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_21_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_46_output_0" + } + ], + "node_name:": "/Reshape_23", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_23_output_0" + } + ] + }, + { + "node_id:": 265, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_23_output_0" + } + ], + "node_name:": "/Transpose_27", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_27_output_0" + } + ] + }, + { + "node_id:": 266, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_27_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_29_output_0" + } + ], + "node_name:": "/MatMul_10", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_10_output_0" + } + ] + }, + { + "node_id:": 267, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_43_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_24", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_24_output_0" + } + ] + }, + { + "node_id:": 268, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_24_output_0" + } + ], + "node_name:": "/Transpose_28", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_28_output_0" + } + ] + }, + { + "node_id:": 269, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_18", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_18_output_0" + } + ] + }, + { + "node_id:": 270, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_18_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_45_output_0" + } + ], + "node_name:": "/Reshape_22", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_22_output_0" + } + ] + }, + { + "node_id:": 271, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_22_output_0" + } + ], + "node_name:": "/Transpose_26", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_26_output_0" + } + ] + }, + { + "node_id:": 272, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_26_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_28_output_0" + } + ], + "node_name:": "/MatMul_11", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_11_output_0" + } + ] + }, + { + "node_id:": 273, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_10_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_52_output_0" + } + ], + "node_name:": "/Reshape_27", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_27_output_0" + } + ] + }, + { + "node_id:": 274, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_27_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_43_output_0" + } + ], + "node_name:": "/GatherElements_2", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_2_output_0" + } + ] + }, + { + "node_id:": 275, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_2_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_53_output_0" + } + ], + "node_name:": "/Reshape_28", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_28_output_0" + } + ] + }, + { + "node_id:": 276, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_11_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_28_output_0" + } + ], + "node_name:": "/Add_44", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_44_output_0" + } + ] + }, + { + "node_id:": 277, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_44_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_54_output_0" + } + ], + "node_name:": "/Reshape_29", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_29_output_0" + } + ] + }, + { + "node_id:": 278, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_29_output_0" + } + ], + "node_name:": "/Softmax_2", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_2_output_0" + } + ] + }, + { + "node_id:": 279, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_20", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_20_output_0" + } + ] + }, + { + "node_id:": 280, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_1" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_47", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_47_output_0" + } + ] + }, + { + "node_id:": 281, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_47_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_20_output_0" + } + ], + "node_name:": "/Concat_44", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_44_output_0" + } + ] + }, + { + "node_id:": 282, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_44_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_48_output_0" + } + ], + "node_name:": "/Reshape_25", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_25_output_0" + } + ] + }, + { + "node_id:": 283, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_25_output_0" + } + ], + "node_name:": "/Transpose_25", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_25_output_0" + } + ] + }, + { + "node_id:": 284, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_2_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_25_output_0" + } + ], + "node_name:": "/MatMul_12", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_12_output_0" + } + ] + }, + { + "node_id:": 285, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_12_output_0" + } + ], + "node_name:": "/Transpose_30", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_30_output_0" + } + ] + }, + { + "node_id:": 286, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_41_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_45_output_0" + } + ], + "node_name:": "/Add_46", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_46_output_0" + } + ] + }, + { + "node_id:": 287, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_46_output_0" + } + ], + "node_name:": "/Transpose_31", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_31_output_0" + } + ] + }, + { + "node_id:": 288, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_31_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_4/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_4/Conv_output_0" + } + ] + }, + { + "node_id:": 289, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_4/Conv_output_0" + } + ], + "node_name:": "/Split_4", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_4_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_4_output_1" + } + ] + }, + { + "node_id:": 290, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_4_output_1" + } + ], + "node_name:": "/Sigmoid_4", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_4_output_0" + } + ] + }, + { + "node_id:": 291, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_4_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_4_output_0" + } + ], + "node_name:": "/Mul_29", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_29_output_0" + } + ] + }, + { + "node_id:": 292, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_1" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_49", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_49_output_0" + } + ] + }, + { + "node_id:": 293, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_49_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_29_output_0" + } + ], + "node_name:": "/Concat_56", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_56_output_0" + } + ] + }, + { + "node_id:": 294, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_56_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_4/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_4/Conv_output_0" + } + ] + }, + { + "node_id:": 295, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_4/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_4/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_4/Sub_output_0" + } + ] + }, + { + "node_id:": 296, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_4/Sub_output_0" + } + ], + "node_name:": "/activation_4/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_4/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 297, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_4/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_4/Sigmoid_output_0" + } + ], + "node_name:": "/activation_4/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_4/Mul_output_0" + } + ] + }, + { + "node_id:": 298, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_4/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_4/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_4/Conv_output_0" + } + ] + }, + { + "node_id:": 299, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_4/Conv_output_0" + } + ], + "node_name:": "/Transpose_32", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_32_output_0" + } + ] + }, + { + "node_id:": 300, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_46_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_32_output_0" + } + ], + "node_name:": "/Add_47", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_47_output_0" + } + ] + }, + { + "node_id:": 301, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_2/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_2/Sub_output_0" + } + ] + }, + { + "node_id:": 302, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_2/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_2/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_2/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 303, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_2/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_2/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_2/Mul_output_0" + } + ] + }, + { + "node_id:": 304, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_47_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_2/Add_output_0" + } + ], + "node_name:": "/Add_48", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_48_output_0" + } + ] + }, + { + "node_id:": 305, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_1" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_48", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_48_output_0" + } + ] + }, + { + "node_id:": 306, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_48_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7532" + } + ], + "node_name:": "/in_proj2_2/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_2/MatMul_output_0" + } + ] + }, + { + "node_id:": 307, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_48_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_2/MatMul_output_0" + } + ], + "node_name:": "/Concat_57", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_57_output_0" + } + ] + }, + { + "node_id:": 308, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_57_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_58_output_0" + } + ], + "node_name:": "/Reshape_31", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_31_output_0" + } + ] + }, + { + "node_id:": 309, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_31_output_0" + } + ], + "node_name:": "/Transpose_33", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_33_output_0" + } + ] + }, + { + "node_id:": 310, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_2_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_33_output_0" + } + ], + "node_name:": "/MatMul_14", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_14_output_0" + } + ] + }, + { + "node_id:": 311, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_14_output_0" + } + ], + "node_name:": "/Transpose_34", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_34_output_0" + } + ] + }, + { + "node_id:": 312, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_48_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_2/Add_output_0" + } + ], + "node_name:": "/Add_50", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_50_output_0" + } + ] + }, + { + "node_id:": 313, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_50_output_0" + } + ], + "node_name:": "/Transpose_35", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_35_output_0" + } + ] + }, + { + "node_id:": 314, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_35_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_5/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_5/Conv_output_0" + } + ] + }, + { + "node_id:": 315, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_5/Conv_output_0" + } + ], + "node_name:": "/Split_5", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_5_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_5_output_1" + } + ] + }, + { + "node_id:": 316, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_5_output_1" + } + ], + "node_name:": "/Sigmoid_5", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_5_output_0" + } + ] + }, + { + "node_id:": 317, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_5_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_5_output_0" + } + ], + "node_name:": "/Mul_31", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_31_output_0" + } + ] + }, + { + "node_id:": 318, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_1" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_50", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_50_output_0" + } + ] + }, + { + "node_id:": 319, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_50_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_31_output_0" + } + ], + "node_name:": "/Concat_60", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_60_output_0" + } + ] + }, + { + "node_id:": 320, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_60_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_5/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_5/Conv_output_0" + } + ] + }, + { + "node_id:": 321, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_5/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_5/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_5/Sub_output_0" + } + ] + }, + { + "node_id:": 322, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_5/Sub_output_0" + } + ], + "node_name:": "/activation_5/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_5/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 323, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_5/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_5/Sigmoid_output_0" + } + ], + "node_name:": "/activation_5/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_5/Mul_output_0" + } + ] + }, + { + "node_id:": 324, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_5/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_5/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_5/Conv_output_0" + } + ] + }, + { + "node_id:": 325, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_5/Conv_output_0" + } + ], + "node_name:": "/Transpose_36", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_36_output_0" + } + ] + }, + { + "node_id:": 326, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_50_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_36_output_0" + } + ], + "node_name:": "/Add_51", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_51_output_0" + } + ] + }, + { + "node_id:": 327, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_2/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_2/Sub_output_0" + } + ] + }, + { + "node_id:": 328, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_2/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_2/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_2/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 329, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_2/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_2/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_2/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_2/Mul_output_0" + } + ] + }, + { + "node_id:": 330, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_51_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_2/Add_output_0" + } + ], + "node_name:": "/Add_52", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_52_output_0" + } + ] + }, + { + "node_id:": 331, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_52_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_52_output_0" + } + ], + "node_name:": "/norm_final_2/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_2/Mul_output_0" + } + ] + }, + { + "node_id:": 332, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_2/Mul_output_0" + } + ], + "node_name:": "/norm_final_2/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_2/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 333, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_2/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_2/Constant_output_0" + } + ], + "node_name:": "/norm_final_2/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_2/Add_output_0" + } + ] + }, + { + "node_id:": 334, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_2/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_2/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_2/Pow_output_0" + } + ] + }, + { + "node_id:": 335, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_52_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_2/Pow_output_0" + } + ], + "node_name:": "/norm_final_2/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_2/Mul_1_output_0" + } + ] + }, + { + "node_id:": 336, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_2/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample/ReduceSum_1_output_0" + } + ], + "node_name:": "/Sub_8", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_8_output_0" + } + ] + }, + { + "node_id:": 337, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_8_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.1.encoder.layers.0.bypass_scale" + } + ], + "node_name:": "/Mul_32", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_32_output_0" + } + ] + }, + { + "node_id:": 338, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample/ReduceSum_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_32_output_0" + } + ], + "node_name:": "/Add_53", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_53_output_0" + } + ] + }, + { + "node_id:": 339, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_3/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_3/Sub_output_0" + } + ] + }, + { + "node_id:": 340, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_3/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_3/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_3/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 341, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_3/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_3/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_3/Mul_output_0" + } + ] + }, + { + "node_id:": 342, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_53_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_3/Add_output_0" + } + ], + "node_name:": "/Add_54", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_54_output_0" + } + ] + }, + { + "node_id:": 343, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_54_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_3", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_3_output_0" + } + ] + }, + { + "node_id:": 344, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_3_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_117_output_0" + } + ], + "node_name:": "/Add_55", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_55_output_0" + } + ] + }, + { + "node_id:": 345, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_118_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_119_output_0" + } + ], + "node_name:": "/Add_57", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_57_output_0" + } + ] + }, + { + "node_id:": 346, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_57_output_0" + } + ], + "node_name:": "/Cast_17", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_17_output_0" + } + ] + }, + { + "node_id:": 347, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_17_output_0" + } + ], + "node_name:": "/Reciprocal_3", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_3_output_0" + } + ] + }, + { + "node_id:": 348, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_3_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_120", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_120_output_0" + } + ] + }, + { + "node_id:": 349, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_55_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_120_output_0" + } + ], + "node_name:": "/Mul_35", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_35_output_0" + } + ] + }, + { + "node_id:": 350, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_35_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7548" + } + ], + "node_name:": "/proj_3/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_3/MatMul_output_0" + } + ] + }, + { + "node_id:": 351, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_54_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_3/MatMul_output_0" + } + ], + "node_name:": "/Add_59", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_59_output_0" + } + ] + }, + { + "node_id:": 352, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_28", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_28_output_0" + } + ] + }, + { + "node_id:": 353, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_67", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_67_output_0" + } + ] + }, + { + "node_id:": 354, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_67_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_28_output_0" + } + ], + "node_name:": "/Concat_61", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_61_output_0" + } + ] + }, + { + "node_id:": 355, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_30", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_30_output_0" + } + ] + }, + { + "node_id:": 356, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_30_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_64_output_0" + } + ], + "node_name:": "/Reshape_34", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_34_output_0" + } + ] + }, + { + "node_id:": 357, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_34_output_0" + } + ], + "node_name:": "/Transpose_39", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_39_output_0" + } + ] + }, + { + "node_id:": 358, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_39_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_41_output_0" + } + ], + "node_name:": "/MatMul_15", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_15_output_0" + } + ] + }, + { + "node_id:": 359, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_61_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_35", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_35_output_0" + } + ] + }, + { + "node_id:": 360, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_35_output_0" + } + ], + "node_name:": "/Transpose_40", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_40_output_0" + } + ] + }, + { + "node_id:": 361, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_27", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_27_output_0" + } + ] + }, + { + "node_id:": 362, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_27_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_63_output_0" + } + ], + "node_name:": "/Reshape_33", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_33_output_0" + } + ] + }, + { + "node_id:": 363, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_33_output_0" + } + ], + "node_name:": "/Transpose_38", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_38_output_0" + } + ] + }, + { + "node_id:": 364, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_38_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_40_output_0" + } + ], + "node_name:": "/MatMul_16", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_16_output_0" + } + ] + }, + { + "node_id:": 365, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_15_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_70_output_0" + } + ], + "node_name:": "/Reshape_38", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_38_output_0" + } + ] + }, + { + "node_id:": 366, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_38_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_61_output_0" + } + ], + "node_name:": "/GatherElements_3", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_3_output_0" + } + ] + }, + { + "node_id:": 367, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_3_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_71_output_0" + } + ], + "node_name:": "/Reshape_39", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_39_output_0" + } + ] + }, + { + "node_id:": 368, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_16_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_39_output_0" + } + ], + "node_name:": "/Add_62", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_62_output_0" + } + ] + }, + { + "node_id:": 369, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_62_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_72_output_0" + } + ], + "node_name:": "/Reshape_40", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_40_output_0" + } + ] + }, + { + "node_id:": 370, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_40_output_0" + } + ], + "node_name:": "/Softmax_3", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_3_output_0" + } + ] + }, + { + "node_id:": 371, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_29", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_29_output_0" + } + ] + }, + { + "node_id:": 372, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_68", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_68_output_0" + } + ] + }, + { + "node_id:": 373, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_68_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_29_output_0" + } + ], + "node_name:": "/Concat_62", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_62_output_0" + } + ] + }, + { + "node_id:": 374, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_62_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_66_output_0" + } + ], + "node_name:": "/Reshape_36", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_36_output_0" + } + ] + }, + { + "node_id:": 375, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_36_output_0" + } + ], + "node_name:": "/Transpose_37", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_37_output_0" + } + ] + }, + { + "node_id:": 376, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_3_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_37_output_0" + } + ], + "node_name:": "/MatMul_17", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_17_output_0" + } + ] + }, + { + "node_id:": 377, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_17_output_0" + } + ], + "node_name:": "/Transpose_42", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_42_output_0" + } + ] + }, + { + "node_id:": 378, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_59_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_63_output_0" + } + ], + "node_name:": "/Add_64", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_64_output_0" + } + ] + }, + { + "node_id:": 379, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_64_output_0" + } + ], + "node_name:": "/Transpose_43", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_43_output_0" + } + ] + }, + { + "node_id:": 380, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_43_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_6/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_6/Conv_output_0" + } + ] + }, + { + "node_id:": 381, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_6/Conv_output_0" + } + ], + "node_name:": "/Split_6", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_6_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_6_output_1" + } + ] + }, + { + "node_id:": 382, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_6_output_1" + } + ], + "node_name:": "/Sigmoid_6", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_6_output_0" + } + ] + }, + { + "node_id:": 383, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_6_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_6_output_0" + } + ], + "node_name:": "/Mul_40", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_40_output_0" + } + ] + }, + { + "node_id:": 384, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_70", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_70_output_0" + } + ] + }, + { + "node_id:": 385, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_70_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_40_output_0" + } + ], + "node_name:": "/Concat_74", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_74_output_0" + } + ] + }, + { + "node_id:": 386, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_74_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_6/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_6/Conv_output_0" + } + ] + }, + { + "node_id:": 387, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_6/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_6/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_6/Sub_output_0" + } + ] + }, + { + "node_id:": 388, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_6/Sub_output_0" + } + ], + "node_name:": "/activation_6/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_6/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 389, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_6/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_6/Sigmoid_output_0" + } + ], + "node_name:": "/activation_6/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_6/Mul_output_0" + } + ] + }, + { + "node_id:": 390, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_6/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_6/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_6/Conv_output_0" + } + ] + }, + { + "node_id:": 391, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_6/Conv_output_0" + } + ], + "node_name:": "/Transpose_44", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_44_output_0" + } + ] + }, + { + "node_id:": 392, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_64_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_44_output_0" + } + ], + "node_name:": "/Add_65", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_65_output_0" + } + ] + }, + { + "node_id:": 393, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_3/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_3/Sub_output_0" + } + ] + }, + { + "node_id:": 394, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_3/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_3/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_3/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 395, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_3/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_3/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_3/Mul_output_0" + } + ] + }, + { + "node_id:": 396, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_65_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_3/Add_output_0" + } + ], + "node_name:": "/Add_66", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_66_output_0" + } + ] + }, + { + "node_id:": 397, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_66_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7594" + } + ], + "node_name:": "/in_proj2_3/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_3/MatMul_output_0" + } + ] + }, + { + "node_id:": 398, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_69", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_69_output_0" + } + ] + }, + { + "node_id:": 399, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_69_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_3/MatMul_output_0" + } + ], + "node_name:": "/Concat_75", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_75_output_0" + } + ] + }, + { + "node_id:": 400, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_75_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_76_output_0" + } + ], + "node_name:": "/Reshape_42", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_42_output_0" + } + ] + }, + { + "node_id:": 401, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_42_output_0" + } + ], + "node_name:": "/Transpose_45", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_45_output_0" + } + ] + }, + { + "node_id:": 402, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_3_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_45_output_0" + } + ], + "node_name:": "/MatMul_19", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_19_output_0" + } + ] + }, + { + "node_id:": 403, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_19_output_0" + } + ], + "node_name:": "/Transpose_46", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_46_output_0" + } + ] + }, + { + "node_id:": 404, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_66_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_3/Add_output_0" + } + ], + "node_name:": "/Add_68", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_68_output_0" + } + ] + }, + { + "node_id:": 405, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_68_output_0" + } + ], + "node_name:": "/Transpose_47", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_47_output_0" + } + ] + }, + { + "node_id:": 406, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_47_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_7/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_7/Conv_output_0" + } + ] + }, + { + "node_id:": 407, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_7/Conv_output_0" + } + ], + "node_name:": "/Split_7", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_7_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_7_output_1" + } + ] + }, + { + "node_id:": 408, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_7_output_1" + } + ], + "node_name:": "/Sigmoid_7", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_7_output_0" + } + ] + }, + { + "node_id:": 409, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_7_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_7_output_0" + } + ], + "node_name:": "/Mul_42", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_42_output_0" + } + ] + }, + { + "node_id:": 410, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_71", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_71_output_0" + } + ] + }, + { + "node_id:": 411, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_71_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_42_output_0" + } + ], + "node_name:": "/Concat_78", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_78_output_0" + } + ] + }, + { + "node_id:": 412, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_78_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_7/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_7/Conv_output_0" + } + ] + }, + { + "node_id:": 413, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_7/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_7/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_7/Sub_output_0" + } + ] + }, + { + "node_id:": 414, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_7/Sub_output_0" + } + ], + "node_name:": "/activation_7/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_7/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 415, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_7/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_7/Sigmoid_output_0" + } + ], + "node_name:": "/activation_7/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_7/Mul_output_0" + } + ] + }, + { + "node_id:": 416, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_7/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_7/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_7/Conv_output_0" + } + ] + }, + { + "node_id:": 417, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_7/Conv_output_0" + } + ], + "node_name:": "/Transpose_48", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_48_output_0" + } + ] + }, + { + "node_id:": 418, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_68_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_48_output_0" + } + ], + "node_name:": "/Add_69", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_69_output_0" + } + ] + }, + { + "node_id:": 419, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_3/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_3/Sub_output_0" + } + ] + }, + { + "node_id:": 420, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_3/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_3/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_3/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 421, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_3/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_3/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_3/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_3/Mul_output_0" + } + ] + }, + { + "node_id:": 422, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_69_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_3/Add_output_0" + } + ], + "node_name:": "/Add_70", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_70_output_0" + } + ] + }, + { + "node_id:": 423, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_70_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_70_output_0" + } + ], + "node_name:": "/norm_final_3/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_3/Mul_output_0" + } + ] + }, + { + "node_id:": 424, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_3/Mul_output_0" + } + ], + "node_name:": "/norm_final_3/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_3/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 425, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_3/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_3/Constant_output_0" + } + ], + "node_name:": "/norm_final_3/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_3/Add_output_0" + } + ] + }, + { + "node_id:": 426, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_3/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_3/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_3/Pow_output_0" + } + ] + }, + { + "node_id:": 427, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_70_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_3/Pow_output_0" + } + ], + "node_name:": "/norm_final_3/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_3/Mul_1_output_0" + } + ] + }, + { + "node_id:": 428, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_3/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_53_output_0" + } + ], + "node_name:": "/Sub_11", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_11_output_0" + } + ] + }, + { + "node_id:": 429, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_11_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.1.encoder.layers.1.bypass_scale" + } + ], + "node_name:": "/Mul_43", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_43_output_0" + } + ] + }, + { + "node_id:": 430, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_53_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_43_output_0" + } + ], + "node_name:": "/Add_71", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_71_output_0" + } + ] + }, + { + "node_id:": 431, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_4/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_4/Sub_output_0" + } + ] + }, + { + "node_id:": 432, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_4/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_4/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_4/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 433, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_4/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_4/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_4/Mul_output_0" + } + ] + }, + { + "node_id:": 434, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_71_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_4/Add_output_0" + } + ], + "node_name:": "/Add_72", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_72_output_0" + } + ] + }, + { + "node_id:": 435, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_72_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_4", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_4_output_0" + } + ] + }, + { + "node_id:": 436, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_4_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_151_output_0" + } + ], + "node_name:": "/Add_73", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_73_output_0" + } + ] + }, + { + "node_id:": 437, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_152_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_153_output_0" + } + ], + "node_name:": "/Add_75", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_75_output_0" + } + ] + }, + { + "node_id:": 438, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_75_output_0" + } + ], + "node_name:": "/Cast_22", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_22_output_0" + } + ] + }, + { + "node_id:": 439, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_22_output_0" + } + ], + "node_name:": "/Reciprocal_4", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_4_output_0" + } + ] + }, + { + "node_id:": 440, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_4_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_154", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_154_output_0" + } + ] + }, + { + "node_id:": 441, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_73_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_154_output_0" + } + ], + "node_name:": "/Mul_46", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_46_output_0" + } + ] + }, + { + "node_id:": 442, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_46_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7610" + } + ], + "node_name:": "/proj_4/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_4/MatMul_output_0" + } + ] + }, + { + "node_id:": 443, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_72_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_4/MatMul_output_0" + } + ], + "node_name:": "/Add_77", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_77_output_0" + } + ] + }, + { + "node_id:": 444, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_37", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_37_output_0" + } + ] + }, + { + "node_id:": 445, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_1" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_88", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_88_output_0" + } + ] + }, + { + "node_id:": 446, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_88_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_37_output_0" + } + ], + "node_name:": "/Concat_79", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_79_output_0" + } + ] + }, + { + "node_id:": 447, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_39", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_39_output_0" + } + ] + }, + { + "node_id:": 448, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_39_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_82_output_0" + } + ], + "node_name:": "/Reshape_45", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_45_output_0" + } + ] + }, + { + "node_id:": 449, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_45_output_0" + } + ], + "node_name:": "/Transpose_51", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_51_output_0" + } + ] + }, + { + "node_id:": 450, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_51_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_53_output_0" + } + ], + "node_name:": "/MatMul_20", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_20_output_0" + } + ] + }, + { + "node_id:": 451, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_79_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_46", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_46_output_0" + } + ] + }, + { + "node_id:": 452, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_46_output_0" + } + ], + "node_name:": "/Transpose_52", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_52_output_0" + } + ] + }, + { + "node_id:": 453, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_36", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_36_output_0" + } + ] + }, + { + "node_id:": 454, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_36_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_81_output_0" + } + ], + "node_name:": "/Reshape_44", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_44_output_0" + } + ] + }, + { + "node_id:": 455, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_44_output_0" + } + ], + "node_name:": "/Transpose_50", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_50_output_0" + } + ] + }, + { + "node_id:": 456, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_50_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_52_output_0" + } + ], + "node_name:": "/MatMul_21", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_21_output_0" + } + ] + }, + { + "node_id:": 457, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_20_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_88_output_0" + } + ], + "node_name:": "/Reshape_49", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_49_output_0" + } + ] + }, + { + "node_id:": 458, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_49_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_79_output_0" + } + ], + "node_name:": "/GatherElements_4", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_4_output_0" + } + ] + }, + { + "node_id:": 459, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_4_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_89_output_0" + } + ], + "node_name:": "/Reshape_50", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_50_output_0" + } + ] + }, + { + "node_id:": 460, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_21_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_50_output_0" + } + ], + "node_name:": "/Add_80", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_80_output_0" + } + ] + }, + { + "node_id:": 461, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_80_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_90_output_0" + } + ], + "node_name:": "/Reshape_51", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_51_output_0" + } + ] + }, + { + "node_id:": 462, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_51_output_0" + } + ], + "node_name:": "/Softmax_4", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_4_output_0" + } + ] + }, + { + "node_id:": 463, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_38", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_38_output_0" + } + ] + }, + { + "node_id:": 464, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_1" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_89", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_89_output_0" + } + ] + }, + { + "node_id:": 465, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_89_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_38_output_0" + } + ], + "node_name:": "/Concat_80", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_80_output_0" + } + ] + }, + { + "node_id:": 466, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_80_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_84_output_0" + } + ], + "node_name:": "/Reshape_47", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_47_output_0" + } + ] + }, + { + "node_id:": 467, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_47_output_0" + } + ], + "node_name:": "/Transpose_49", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_49_output_0" + } + ] + }, + { + "node_id:": 468, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_4_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_49_output_0" + } + ], + "node_name:": "/MatMul_22", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_22_output_0" + } + ] + }, + { + "node_id:": 469, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_22_output_0" + } + ], + "node_name:": "/Transpose_54", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_54_output_0" + } + ] + }, + { + "node_id:": 470, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_77_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_81_output_0" + } + ], + "node_name:": "/Add_82", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_82_output_0" + } + ] + }, + { + "node_id:": 471, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_82_output_0" + } + ], + "node_name:": "/Transpose_55", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_55_output_0" + } + ] + }, + { + "node_id:": 472, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_55_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_8/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_8/Conv_output_0" + } + ] + }, + { + "node_id:": 473, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_8/Conv_output_0" + } + ], + "node_name:": "/Split_8", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_8_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_8_output_1" + } + ] + }, + { + "node_id:": 474, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_8_output_1" + } + ], + "node_name:": "/Sigmoid_8", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_8_output_0" + } + ] + }, + { + "node_id:": 475, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_8_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_8_output_0" + } + ], + "node_name:": "/Mul_51", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_51_output_0" + } + ] + }, + { + "node_id:": 476, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_1" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_91", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_91_output_0" + } + ] + }, + { + "node_id:": 477, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_91_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_51_output_0" + } + ], + "node_name:": "/Concat_92", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_92_output_0" + } + ] + }, + { + "node_id:": 478, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_92_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_8/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_8/Conv_output_0" + } + ] + }, + { + "node_id:": 479, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_8/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_8/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_8/Sub_output_0" + } + ] + }, + { + "node_id:": 480, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_8/Sub_output_0" + } + ], + "node_name:": "/activation_8/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_8/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 481, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_8/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_8/Sigmoid_output_0" + } + ], + "node_name:": "/activation_8/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_8/Mul_output_0" + } + ] + }, + { + "node_id:": 482, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_8/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_8/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_8/Conv_output_0" + } + ] + }, + { + "node_id:": 483, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_8/Conv_output_0" + } + ], + "node_name:": "/Transpose_56", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_56_output_0" + } + ] + }, + { + "node_id:": 484, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_82_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_56_output_0" + } + ], + "node_name:": "/Add_83", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_83_output_0" + } + ] + }, + { + "node_id:": 485, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_4/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_4/Sub_output_0" + } + ] + }, + { + "node_id:": 486, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_4/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_4/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_4/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 487, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_4/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_4/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_4/Mul_output_0" + } + ] + }, + { + "node_id:": 488, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_83_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_4/Add_output_0" + } + ], + "node_name:": "/Add_84", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_84_output_0" + } + ] + }, + { + "node_id:": 489, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_84_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7656" + } + ], + "node_name:": "/in_proj2_4/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_4/MatMul_output_0" + } + ] + }, + { + "node_id:": 490, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_1" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_90", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_90_output_0" + } + ] + }, + { + "node_id:": 491, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_90_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_4/MatMul_output_0" + } + ], + "node_name:": "/Concat_93", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_93_output_0" + } + ] + }, + { + "node_id:": 492, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_93_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_94_output_0" + } + ], + "node_name:": "/Reshape_53", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_53_output_0" + } + ] + }, + { + "node_id:": 493, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_53_output_0" + } + ], + "node_name:": "/Transpose_57", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_57_output_0" + } + ] + }, + { + "node_id:": 494, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_4_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_57_output_0" + } + ], + "node_name:": "/MatMul_24", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_24_output_0" + } + ] + }, + { + "node_id:": 495, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_24_output_0" + } + ], + "node_name:": "/Transpose_58", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_58_output_0" + } + ] + }, + { + "node_id:": 496, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_84_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_4/Add_output_0" + } + ], + "node_name:": "/Add_86", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_86_output_0" + } + ] + }, + { + "node_id:": 497, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_86_output_0" + } + ], + "node_name:": "/Transpose_59", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_59_output_0" + } + ] + }, + { + "node_id:": 498, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_59_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_9/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_9/Conv_output_0" + } + ] + }, + { + "node_id:": 499, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_9/Conv_output_0" + } + ], + "node_name:": "/Split_9", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_9_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_9_output_1" + } + ] + }, + { + "node_id:": 500, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_9_output_1" + } + ], + "node_name:": "/Sigmoid_9", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_9_output_0" + } + ] + }, + { + "node_id:": 501, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_9_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_9_output_0" + } + ], + "node_name:": "/Mul_53", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_53_output_0" + } + ] + }, + { + "node_id:": 502, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_1" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_92", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_92_output_0" + } + ] + }, + { + "node_id:": 503, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_92_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_53_output_0" + } + ], + "node_name:": "/Concat_96", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_96_output_0" + } + ] + }, + { + "node_id:": 504, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_96_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_9/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_9/Conv_output_0" + } + ] + }, + { + "node_id:": 505, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_9/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_9/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_9/Sub_output_0" + } + ] + }, + { + "node_id:": 506, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_9/Sub_output_0" + } + ], + "node_name:": "/activation_9/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_9/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 507, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_9/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_9/Sigmoid_output_0" + } + ], + "node_name:": "/activation_9/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_9/Mul_output_0" + } + ] + }, + { + "node_id:": 508, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_9/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_9/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_9/Conv_output_0" + } + ] + }, + { + "node_id:": 509, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_9/Conv_output_0" + } + ], + "node_name:": "/Transpose_60", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_60_output_0" + } + ] + }, + { + "node_id:": 510, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_86_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_60_output_0" + } + ], + "node_name:": "/Add_87", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_87_output_0" + } + ] + }, + { + "node_id:": 511, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_4/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_4/Sub_output_0" + } + ] + }, + { + "node_id:": 512, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_4/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_4/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_4/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 513, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_4/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_4/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_4/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_4/Mul_output_0" + } + ] + }, + { + "node_id:": 514, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_87_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_4/Add_output_0" + } + ], + "node_name:": "/Add_88", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_88_output_0" + } + ] + }, + { + "node_id:": 515, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_88_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_88_output_0" + } + ], + "node_name:": "/norm_final_4/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_4/Mul_output_0" + } + ] + }, + { + "node_id:": 516, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_4/Mul_output_0" + } + ], + "node_name:": "/norm_final_4/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_4/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 517, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_4/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_4/Constant_output_0" + } + ], + "node_name:": "/norm_final_4/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_4/Add_output_0" + } + ] + }, + { + "node_id:": 518, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_4/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_4/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_4/Pow_output_0" + } + ] + }, + { + "node_id:": 519, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_88_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_4/Pow_output_0" + } + ], + "node_name:": "/norm_final_4/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_4/Mul_1_output_0" + } + ] + }, + { + "node_id:": 520, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_4/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_71_output_0" + } + ], + "node_name:": "/Sub_14", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_14_output_0" + } + ] + }, + { + "node_id:": 521, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_14_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.1.encoder.layers.2.bypass_scale" + } + ], + "node_name:": "/Mul_54", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_54_output_0" + } + ] + }, + { + "node_id:": 522, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_71_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_54_output_0" + } + ], + "node_name:": "/Add_89", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_89_output_0" + } + ] + }, + { + "node_id:": 523, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_5/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_5/Sub_output_0" + } + ] + }, + { + "node_id:": 524, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_5/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_5/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_5/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 525, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_5/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_5/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_5/Mul_output_0" + } + ] + }, + { + "node_id:": 526, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_89_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_5/Add_output_0" + } + ], + "node_name:": "/Add_90", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_90_output_0" + } + ] + }, + { + "node_id:": 527, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_90_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_5", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_5_output_0" + } + ] + }, + { + "node_id:": 528, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_5_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_185_output_0" + } + ], + "node_name:": "/Add_91", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_91_output_0" + } + ] + }, + { + "node_id:": 529, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_186_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_187_output_0" + } + ], + "node_name:": "/Add_93", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_93_output_0" + } + ] + }, + { + "node_id:": 530, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_93_output_0" + } + ], + "node_name:": "/Cast_27", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_27_output_0" + } + ] + }, + { + "node_id:": 531, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_27_output_0" + } + ], + "node_name:": "/Reciprocal_5", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_5_output_0" + } + ] + }, + { + "node_id:": 532, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_5_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_188", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_188_output_0" + } + ] + }, + { + "node_id:": 533, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_91_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_188_output_0" + } + ], + "node_name:": "/Mul_57", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_57_output_0" + } + ] + }, + { + "node_id:": 534, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_57_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7672" + } + ], + "node_name:": "/proj_5/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_5/MatMul_output_0" + } + ] + }, + { + "node_id:": 535, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_90_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_5/MatMul_output_0" + } + ], + "node_name:": "/Add_95", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_95_output_0" + } + ] + }, + { + "node_id:": 536, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_46", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_46_output_0" + } + ] + }, + { + "node_id:": 537, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_109", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_109_output_0" + } + ] + }, + { + "node_id:": 538, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_109_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_46_output_0" + } + ], + "node_name:": "/Concat_97", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_97_output_0" + } + ] + }, + { + "node_id:": 539, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_48", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_48_output_0" + } + ] + }, + { + "node_id:": 540, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_48_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_100_output_0" + } + ], + "node_name:": "/Reshape_56", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_56_output_0" + } + ] + }, + { + "node_id:": 541, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_56_output_0" + } + ], + "node_name:": "/Transpose_63", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_63_output_0" + } + ] + }, + { + "node_id:": 542, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_63_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_65_output_0" + } + ], + "node_name:": "/MatMul_25", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_25_output_0" + } + ] + }, + { + "node_id:": 543, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_97_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_57", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_57_output_0" + } + ] + }, + { + "node_id:": 544, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_57_output_0" + } + ], + "node_name:": "/Transpose_64", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_64_output_0" + } + ] + }, + { + "node_id:": 545, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_45", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_45_output_0" + } + ] + }, + { + "node_id:": 546, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_45_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_99_output_0" + } + ], + "node_name:": "/Reshape_55", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_55_output_0" + } + ] + }, + { + "node_id:": 547, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_55_output_0" + } + ], + "node_name:": "/Transpose_62", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_62_output_0" + } + ] + }, + { + "node_id:": 548, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_62_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_64_output_0" + } + ], + "node_name:": "/MatMul_26", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_26_output_0" + } + ] + }, + { + "node_id:": 549, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_25_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_106_output_0" + } + ], + "node_name:": "/Reshape_60", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_60_output_0" + } + ] + }, + { + "node_id:": 550, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_60_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_97_output_0" + } + ], + "node_name:": "/GatherElements_5", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_5_output_0" + } + ] + }, + { + "node_id:": 551, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_5_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_107_output_0" + } + ], + "node_name:": "/Reshape_61", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_61_output_0" + } + ] + }, + { + "node_id:": 552, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_26_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_61_output_0" + } + ], + "node_name:": "/Add_98", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_98_output_0" + } + ] + }, + { + "node_id:": 553, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_98_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_108_output_0" + } + ], + "node_name:": "/Reshape_62", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_62_output_0" + } + ] + }, + { + "node_id:": 554, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_62_output_0" + } + ], + "node_name:": "/Softmax_5", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_5_output_0" + } + ] + }, + { + "node_id:": 555, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_47", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_47_output_0" + } + ] + }, + { + "node_id:": 556, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_110", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_110_output_0" + } + ] + }, + { + "node_id:": 557, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_110_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_47_output_0" + } + ], + "node_name:": "/Concat_98", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_98_output_0" + } + ] + }, + { + "node_id:": 558, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_98_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_102_output_0" + } + ], + "node_name:": "/Reshape_58", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_58_output_0" + } + ] + }, + { + "node_id:": 559, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_58_output_0" + } + ], + "node_name:": "/Transpose_61", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_61_output_0" + } + ] + }, + { + "node_id:": 560, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_5_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_61_output_0" + } + ], + "node_name:": "/MatMul_27", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_27_output_0" + } + ] + }, + { + "node_id:": 561, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_27_output_0" + } + ], + "node_name:": "/Transpose_66", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_66_output_0" + } + ] + }, + { + "node_id:": 562, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_95_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_99_output_0" + } + ], + "node_name:": "/Add_100", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_100_output_0" + } + ] + }, + { + "node_id:": 563, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_100_output_0" + } + ], + "node_name:": "/Transpose_67", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_67_output_0" + } + ] + }, + { + "node_id:": 564, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_67_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_10/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_10/Conv_output_0" + } + ] + }, + { + "node_id:": 565, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_10/Conv_output_0" + } + ], + "node_name:": "/Split_10", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_10_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_10_output_1" + } + ] + }, + { + "node_id:": 566, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_10_output_1" + } + ], + "node_name:": "/Sigmoid_10", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_10_output_0" + } + ] + }, + { + "node_id:": 567, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_10_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_10_output_0" + } + ], + "node_name:": "/Mul_62", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_62_output_0" + } + ] + }, + { + "node_id:": 568, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_112", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_112_output_0" + } + ] + }, + { + "node_id:": 569, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_112_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_62_output_0" + } + ], + "node_name:": "/Concat_110", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_110_output_0" + } + ] + }, + { + "node_id:": 570, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_110_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_10/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_10/Conv_output_0" + } + ] + }, + { + "node_id:": 571, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_10/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_10/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_10/Sub_output_0" + } + ] + }, + { + "node_id:": 572, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_10/Sub_output_0" + } + ], + "node_name:": "/activation_10/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_10/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 573, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_10/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_10/Sigmoid_output_0" + } + ], + "node_name:": "/activation_10/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_10/Mul_output_0" + } + ] + }, + { + "node_id:": 574, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_10/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_10/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_10/Conv_output_0" + } + ] + }, + { + "node_id:": 575, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_10/Conv_output_0" + } + ], + "node_name:": "/Transpose_68", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_68_output_0" + } + ] + }, + { + "node_id:": 576, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_100_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_68_output_0" + } + ], + "node_name:": "/Add_101", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_101_output_0" + } + ] + }, + { + "node_id:": 577, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_5/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_5/Sub_output_0" + } + ] + }, + { + "node_id:": 578, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_5/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_5/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_5/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 579, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_5/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_5/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_5/Mul_output_0" + } + ] + }, + { + "node_id:": 580, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_101_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_5/Add_output_0" + } + ], + "node_name:": "/Add_102", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_102_output_0" + } + ] + }, + { + "node_id:": 581, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_102_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7718" + } + ], + "node_name:": "/in_proj2_5/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_5/MatMul_output_0" + } + ] + }, + { + "node_id:": 582, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_111", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_111_output_0" + } + ] + }, + { + "node_id:": 583, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_111_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_5/MatMul_output_0" + } + ], + "node_name:": "/Concat_111", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_111_output_0" + } + ] + }, + { + "node_id:": 584, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_111_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_112_output_0" + } + ], + "node_name:": "/Reshape_64", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_64_output_0" + } + ] + }, + { + "node_id:": 585, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_64_output_0" + } + ], + "node_name:": "/Transpose_69", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_69_output_0" + } + ] + }, + { + "node_id:": 586, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_5_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_69_output_0" + } + ], + "node_name:": "/MatMul_29", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_29_output_0" + } + ] + }, + { + "node_id:": 587, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_29_output_0" + } + ], + "node_name:": "/Transpose_70", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_70_output_0" + } + ] + }, + { + "node_id:": 588, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_102_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_5/Add_output_0" + } + ], + "node_name:": "/Add_104", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_104_output_0" + } + ] + }, + { + "node_id:": 589, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_104_output_0" + } + ], + "node_name:": "/Transpose_71", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_71_output_0" + } + ] + }, + { + "node_id:": 590, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_71_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_11/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_11/Conv_output_0" + } + ] + }, + { + "node_id:": 591, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_11/Conv_output_0" + } + ], + "node_name:": "/Split_11", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_11_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_11_output_1" + } + ] + }, + { + "node_id:": 592, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_11_output_1" + } + ], + "node_name:": "/Sigmoid_11", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_11_output_0" + } + ] + }, + { + "node_id:": 593, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_11_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_11_output_0" + } + ], + "node_name:": "/Mul_64", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_64_output_0" + } + ] + }, + { + "node_id:": 594, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_1" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_113", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_113_output_0" + } + ] + }, + { + "node_id:": 595, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_113_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_64_output_0" + } + ], + "node_name:": "/Concat_114", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_114_output_0" + } + ] + }, + { + "node_id:": 596, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_114_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_11/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_11/Conv_output_0" + } + ] + }, + { + "node_id:": 597, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_11/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_11/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_11/Sub_output_0" + } + ] + }, + { + "node_id:": 598, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_11/Sub_output_0" + } + ], + "node_name:": "/activation_11/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_11/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 599, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_11/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_11/Sigmoid_output_0" + } + ], + "node_name:": "/activation_11/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_11/Mul_output_0" + } + ] + }, + { + "node_id:": 600, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_11/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_11/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_11/Conv_output_0" + } + ] + }, + { + "node_id:": 601, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_11/Conv_output_0" + } + ], + "node_name:": "/Transpose_72", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_72_output_0" + } + ] + }, + { + "node_id:": 602, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_104_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_72_output_0" + } + ], + "node_name:": "/Add_105", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_105_output_0" + } + ] + }, + { + "node_id:": 603, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_5/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_5/Sub_output_0" + } + ] + }, + { + "node_id:": 604, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_5/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_5/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_5/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 605, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_5/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_5/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_5/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_5/Mul_output_0" + } + ] + }, + { + "node_id:": 606, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_105_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_5/Add_output_0" + } + ], + "node_name:": "/Add_106", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_106_output_0" + } + ] + }, + { + "node_id:": 607, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_106_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_106_output_0" + } + ], + "node_name:": "/norm_final_5/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_5/Mul_output_0" + } + ] + }, + { + "node_id:": 608, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_5/Mul_output_0" + } + ], + "node_name:": "/norm_final_5/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_5/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 609, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_5/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_5/Constant_output_0" + } + ], + "node_name:": "/norm_final_5/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_5/Add_output_0" + } + ] + }, + { + "node_id:": 610, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_5/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_5/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_5/Pow_output_0" + } + ] + }, + { + "node_id:": 611, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_106_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_5/Pow_output_0" + } + ], + "node_name:": "/norm_final_5/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_5/Mul_1_output_0" + } + ] + }, + { + "node_id:": 612, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_5/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_89_output_0" + } + ], + "node_name:": "/Sub_17", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_17_output_0" + } + ] + }, + { + "node_id:": 613, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_17_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.1.encoder.layers.3.bypass_scale" + } + ], + "node_name:": "/Mul_65", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_65_output_0" + } + ] + }, + { + "node_id:": 614, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_89_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_65_output_0" + } + ], + "node_name:": "/Add_107", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_107_output_0" + } + ] + }, + { + "node_id:": 615, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_107_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/upsample/Unsqueeze", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 384 ", + "output_name:": "/upsample/Unsqueeze_output_0" + } + ] + }, + { + "node_id:": 616, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 384 ", + "input_name:": "/upsample/Unsqueeze_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/upsample/Where_output_0" + } + ], + "node_name:": "/upsample/Expand", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/upsample/Expand_output_0" + } + ] + }, + { + "node_id:": 617, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/upsample/Expand_output_0" + }, + { + "input_dimension:": "2 1 384 ", + "input_name:": "onnx::Add_7732" + } + ], + "node_name:": "/upsample/Add", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/upsample/Add_output_0" + } + ] + }, + { + "node_id:": 618, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/upsample/Add_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/upsample/Concat_1_output_0" + } + ], + "node_name:": "/upsample/Reshape_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/upsample/Reshape_1_output_0" + } + ] + }, + { + "node_id:": 619, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/upsample/Reshape_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_246_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_54", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Slice_54_output_0" + } + ] + }, + { + "node_id:": 620, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Slice_54_output_0" + }, + { + "input_dimension:": "", + "input_name:": "onnx::Mul_7736" + } + ], + "node_name:": "/out_combiner/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner/Mul_1_output_0" + } + ] + }, + { + "node_id:": 621, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_35_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.1.out_combiner.weight1" + } + ], + "node_name:": "/out_combiner/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner/Mul_output_0" + } + ] + }, + { + "node_id:": 622, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner/Mul_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner/Mul_1_output_0" + } + ], + "node_name:": "/out_combiner/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner/Add_output_0" + } + ] + }, + { + "node_id:": 623, + "node_inputs:": [ + { + "input_dimension:": "3 1 ", + "input_name:": "cached_len_2" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_171", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_171_output_0" + } + ] + }, + { + "node_id:": 624, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_171_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_318", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_318_output_0" + } + ] + }, + { + "node_id:": 625, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_171_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_315", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_315_output_0" + } + ] + }, + { + "node_id:": 626, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_315_output_0" + } + ], + "node_name:": "/Cast_40", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_40_output_0" + } + ] + }, + { + "node_id:": 627, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 ", + "input_name:": "cached_avg_2" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_172", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_172_output_0" + } + ] + }, + { + "node_id:": 628, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_172_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_40_output_0" + } + ], + "node_name:": "/Mul_88", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_88_output_0" + } + ] + }, + { + "node_id:": 629, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_88_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_316", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_316_output_0" + } + ] + }, + { + "node_id:": 630, + "node_inputs:": [ + { + "input_dimension:": "3 1 ", + "input_name:": "cached_len_2" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_150", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_150_output_0" + } + ] + }, + { + "node_id:": 631, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_150_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_284", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_284_output_0" + } + ] + }, + { + "node_id:": 632, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_150_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_281", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_281_output_0" + } + ] + }, + { + "node_id:": 633, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_281_output_0" + } + ], + "node_name:": "/Cast_35", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_35_output_0" + } + ] + }, + { + "node_id:": 634, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 ", + "input_name:": "cached_avg_2" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_151", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_151_output_0" + } + ] + }, + { + "node_id:": 635, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_151_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_35_output_0" + } + ], + "node_name:": "/Mul_77", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_77_output_0" + } + ] + }, + { + "node_id:": 636, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_77_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_282", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_282_output_0" + } + ] + }, + { + "node_id:": 637, + "node_inputs:": [ + { + "input_dimension:": "3 1 ", + "input_name:": "cached_len_2" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_129", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_129_output_0" + } + ] + }, + { + "node_id:": 638, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_129_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_250", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_250_output_0" + } + ] + }, + { + "node_id:": 639, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_129_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_247", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_247_output_0" + } + ] + }, + { + "node_id:": 640, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_247_output_0" + } + ], + "node_name:": "/Cast_30", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_30_output_0" + } + ] + }, + { + "node_id:": 641, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 ", + "input_name:": "cached_avg_2" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_130", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_130_output_0" + } + ] + }, + { + "node_id:": 642, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_130_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_30_output_0" + } + ], + "node_name:": "/Mul_66", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_66_output_0" + } + ] + }, + { + "node_id:": 643, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_66_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_248", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_248_output_0" + } + ] + }, + { + "node_id:": 644, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner/Add_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/downsample_1/Concat_output_0" + } + ], + "node_name:": "/downsample_1/Reshape", + "node_outputs:": [ + { + "output_dimension:": "18 4 1 384 ", + "output_name:": "/downsample_1/Reshape_output_0" + } + ] + }, + { + "node_id:": 645, + "node_inputs:": [ + { + "input_dimension:": "18 4 1 384 ", + "input_name:": "/downsample_1/Reshape_output_0" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.downsample.query" + } + ], + "node_name:": "/downsample_1/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 4 1 384 ", + "output_name:": "/downsample_1/Mul_output_0" + } + ] + }, + { + "node_id:": 646, + "node_inputs:": [ + { + "input_dimension:": "18 4 1 384 ", + "input_name:": "/downsample_1/Mul_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_246_output_0" + } + ], + "node_name:": "/downsample_1/ReduceSum", + "node_outputs:": [ + { + "output_dimension:": "18 4 1 1 ", + "output_name:": "/downsample_1/ReduceSum_output_0" + } + ] + }, + { + "node_id:": 647, + "node_inputs:": [ + { + "input_dimension:": "18 4 1 1 ", + "input_name:": "/downsample_1/ReduceSum_output_0" + } + ], + "node_name:": "/downsample_1/Softmax", + "node_outputs:": [ + { + "output_dimension:": "18 4 1 1 ", + "output_name:": "/downsample_1/Softmax_output_0" + } + ] + }, + { + "node_id:": 648, + "node_inputs:": [ + { + "input_dimension:": "18 4 1 384 ", + "input_name:": "/downsample_1/Reshape_output_0" + }, + { + "input_dimension:": "18 4 1 1 ", + "input_name:": "/downsample_1/Softmax_output_0" + } + ], + "node_name:": "/downsample_1/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "18 4 1 384 ", + "output_name:": "/downsample_1/Mul_1_output_0" + } + ] + }, + { + "node_id:": 649, + "node_inputs:": [ + { + "input_dimension:": "18 4 1 384 ", + "input_name:": "/downsample_1/Mul_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/downsample_1/ReduceSum_1", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/downsample_1/ReduceSum_1_output_0" + } + ] + }, + { + "node_id:": 650, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_6/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_6/Sub_output_0" + } + ] + }, + { + "node_id:": 651, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_6/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_6/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_6/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 652, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_6/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_6/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_6/Mul_output_0" + } + ] + }, + { + "node_id:": 653, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/downsample_1/ReduceSum_1_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward1/out_proj_6/Add_output_0" + } + ], + "node_name:": "/Add_108", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_108_output_0" + } + ] + }, + { + "node_id:": 654, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_108_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_6", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/CumSum_6_output_0" + } + ] + }, + { + "node_id:": 655, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/CumSum_6_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_248_output_0" + } + ], + "node_name:": "/Add_109", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_109_output_0" + } + ] + }, + { + "node_id:": 656, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Unsqueeze_249_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_250_output_0" + } + ], + "node_name:": "/Add_111", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Add_111_output_0" + } + ] + }, + { + "node_id:": 657, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Add_111_output_0" + } + ], + "node_name:": "/Cast_32", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Cast_32_output_0" + } + ] + }, + { + "node_id:": 658, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Cast_32_output_0" + } + ], + "node_name:": "/Reciprocal_6", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Reciprocal_6_output_0" + } + ] + }, + { + "node_id:": 659, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Reciprocal_6_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_251", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/Unsqueeze_251_output_0" + } + ] + }, + { + "node_id:": 660, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_109_output_0" + }, + { + "input_dimension:": "18 1 1 ", + "input_name:": "/Unsqueeze_251_output_0" + } + ], + "node_name:": "/Mul_68", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Mul_68_output_0" + } + ] + }, + { + "node_id:": 661, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_68_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7743" + } + ], + "node_name:": "/proj_6/MatMul", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/proj_6/MatMul_output_0" + } + ] + }, + { + "node_id:": 662, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_108_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/proj_6/MatMul_output_0" + } + ], + "node_name:": "/Add_113", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_113_output_0" + } + ] + }, + { + "node_id:": 663, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_56", + "node_outputs:": [ + { + "output_dimension:": "18 1 192 ", + "output_name:": "/Slice_56_output_0" + } + ] + }, + { + "node_id:": 664, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 192 ", + "input_name:": "cached_key_2" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_131", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Gather_131_output_0" + } + ] + }, + { + "node_id:": 665, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Gather_131_output_0" + }, + { + "input_dimension:": "18 1 192 ", + "input_name:": "/Slice_56_output_0" + } + ], + "node_name:": "/Concat_122", + "node_outputs:": [ + { + "output_dimension:": "90 1 192 ", + "output_name:": "/Concat_122_output_0" + } + ] + }, + { + "node_id:": 666, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_58", + "node_outputs:": [ + { + "output_dimension:": "18 1 32 ", + "output_name:": "/Slice_58_output_0" + } + ] + }, + { + "node_id:": 667, + "node_inputs:": [ + { + "input_dimension:": "18 1 32 ", + "input_name:": "/Slice_58_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_125_output_0" + } + ], + "node_name:": "/Reshape_67", + "node_outputs:": [ + { + "output_dimension:": "18 1 8 4 ", + "output_name:": "/Reshape_67_output_0" + } + ] + }, + { + "node_id:": 668, + "node_inputs:": [ + { + "input_dimension:": "18 1 8 4 ", + "input_name:": "/Reshape_67_output_0" + } + ], + "node_name:": "/Transpose_75", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 4 ", + "output_name:": "/Transpose_75_output_0" + } + ] + }, + { + "node_id:": 669, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 4 ", + "input_name:": "/Transpose_75_output_0" + }, + { + "input_dimension:": "1 8 4 107 ", + "input_name:": "/Transpose_77_output_0" + } + ], + "node_name:": "/MatMul_30", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 107 ", + "output_name:": "/MatMul_30_output_0" + } + ] + }, + { + "node_id:": 670, + "node_inputs:": [ + { + "input_dimension:": "90 1 192 ", + "input_name:": "/Concat_122_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_68", + "node_outputs:": [ + { + "output_dimension:": "90 1 8 24 ", + "output_name:": "/Reshape_68_output_0" + } + ] + }, + { + "node_id:": 671, + "node_inputs:": [ + { + "input_dimension:": "90 1 8 24 ", + "input_name:": "/Reshape_68_output_0" + } + ], + "node_name:": "/Transpose_76", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 90 ", + "output_name:": "/Transpose_76_output_0" + } + ] + }, + { + "node_id:": 672, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_55", + "node_outputs:": [ + { + "output_dimension:": "18 1 192 ", + "output_name:": "/Slice_55_output_0" + } + ] + }, + { + "node_id:": 673, + "node_inputs:": [ + { + "input_dimension:": "18 1 192 ", + "input_name:": "/Slice_55_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_124_output_0" + } + ], + "node_name:": "/Reshape_66", + "node_outputs:": [ + { + "output_dimension:": "18 1 8 24 ", + "output_name:": "/Reshape_66_output_0" + } + ] + }, + { + "node_id:": 674, + "node_inputs:": [ + { + "input_dimension:": "18 1 8 24 ", + "input_name:": "/Reshape_66_output_0" + } + ], + "node_name:": "/Transpose_74", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 24 ", + "output_name:": "/Transpose_74_output_0" + } + ] + }, + { + "node_id:": 675, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 24 ", + "input_name:": "/Transpose_74_output_0" + }, + { + "input_dimension:": "1 8 24 90 ", + "input_name:": "/Transpose_76_output_0" + } + ], + "node_name:": "/MatMul_31", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/MatMul_31_output_0" + } + ] + }, + { + "node_id:": 676, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 107 ", + "input_name:": "/MatMul_30_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_131_output_0" + } + ], + "node_name:": "/Reshape_71", + "node_outputs:": [ + { + "output_dimension:": "144 107 ", + "output_name:": "/Reshape_71_output_0" + } + ] + }, + { + "node_id:": 677, + "node_inputs:": [ + { + "input_dimension:": "144 107 ", + "input_name:": "/Reshape_71_output_0" + }, + { + "input_dimension:": "144 90 ", + "input_name:": "/Add_115_output_0" + } + ], + "node_name:": "/GatherElements_6", + "node_outputs:": [ + { + "output_dimension:": "144 90 ", + "output_name:": "/GatherElements_6_output_0" + } + ] + }, + { + "node_id:": 678, + "node_inputs:": [ + { + "input_dimension:": "144 90 ", + "input_name:": "/GatherElements_6_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_132_output_0" + } + ], + "node_name:": "/Reshape_72", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/Reshape_72_output_0" + } + ] + }, + { + "node_id:": 679, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/MatMul_31_output_0" + }, + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/Reshape_72_output_0" + } + ], + "node_name:": "/Add_116", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/Add_116_output_0" + } + ] + }, + { + "node_id:": 680, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/Add_116_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_133_output_0" + } + ], + "node_name:": "/Reshape_73", + "node_outputs:": [ + { + "output_dimension:": "8 18 90 ", + "output_name:": "/Reshape_73_output_0" + } + ] + }, + { + "node_id:": 681, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Reshape_73_output_0" + } + ], + "node_name:": "/Softmax_6", + "node_outputs:": [ + { + "output_dimension:": "8 18 90 ", + "output_name:": "/Softmax_6_output_0" + } + ] + }, + { + "node_id:": 682, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_57", + "node_outputs:": [ + { + "output_dimension:": "18 1 96 ", + "output_name:": "/Slice_57_output_0" + } + ] + }, + { + "node_id:": 683, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 96 ", + "input_name:": "cached_val_2" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_132", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Gather_132_output_0" + } + ] + }, + { + "node_id:": 684, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Gather_132_output_0" + }, + { + "input_dimension:": "18 1 96 ", + "input_name:": "/Slice_57_output_0" + } + ], + "node_name:": "/Concat_123", + "node_outputs:": [ + { + "output_dimension:": "90 1 96 ", + "output_name:": "/Concat_123_output_0" + } + ] + }, + { + "node_id:": 685, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_123_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_127_output_0" + } + ], + "node_name:": "/Reshape_69", + "node_outputs:": [ + { + "output_dimension:": "90 8 12 ", + "output_name:": "/Reshape_69_output_0" + } + ] + }, + { + "node_id:": 686, + "node_inputs:": [ + { + "input_dimension:": "90 8 12 ", + "input_name:": "/Reshape_69_output_0" + } + ], + "node_name:": "/Transpose_73", + "node_outputs:": [ + { + "output_dimension:": "8 90 12 ", + "output_name:": "/Transpose_73_output_0" + } + ] + }, + { + "node_id:": 687, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Softmax_6_output_0" + }, + { + "input_dimension:": "8 90 12 ", + "input_name:": "/Transpose_73_output_0" + } + ], + "node_name:": "/MatMul_32", + "node_outputs:": [ + { + "output_dimension:": "8 18 12 ", + "output_name:": "/MatMul_32_output_0" + } + ] + }, + { + "node_id:": 688, + "node_inputs:": [ + { + "input_dimension:": "8 18 12 ", + "input_name:": "/MatMul_32_output_0" + } + ], + "node_name:": "/Transpose_78", + "node_outputs:": [ + { + "output_dimension:": "18 8 12 ", + "output_name:": "/Transpose_78_output_0" + } + ] + }, + { + "node_id:": 689, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_113_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_117_output_0" + } + ], + "node_name:": "/Add_118", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_118_output_0" + } + ] + }, + { + "node_id:": 690, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_118_output_0" + } + ], + "node_name:": "/Transpose_79", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Transpose_79_output_0" + } + ] + }, + { + "node_id:": 691, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Transpose_79_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_12/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 18 ", + "output_name:": "/pointwise_conv1_12/Conv_output_0" + } + ] + }, + { + "node_id:": 692, + "node_inputs:": [ + { + "input_dimension:": "1 768 18 ", + "input_name:": "/pointwise_conv1_12/Conv_output_0" + } + ], + "node_name:": "/Split_12", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_12_output_0" + }, + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_12_output_1" + } + ] + }, + { + "node_id:": 693, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_12_output_1" + } + ], + "node_name:": "/Sigmoid_12", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Sigmoid_12_output_0" + } + ] + }, + { + "node_id:": 694, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_12_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Sigmoid_12_output_0" + } + ], + "node_name:": "/Mul_73", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Mul_73_output_0" + } + ] + }, + { + "node_id:": 695, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 30 ", + "input_name:": "cached_conv1_2" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_134", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_134_output_0" + } + ] + }, + { + "node_id:": 696, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_134_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Mul_73_output_0" + } + ], + "node_name:": "/Concat_135", + "node_outputs:": [ + { + "output_dimension:": "1 384 48 ", + "output_name:": "/Concat_135_output_0" + } + ] + }, + { + "node_id:": 697, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_135_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_12/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/depthwise_conv_12/Conv_output_0" + } + ] + }, + { + "node_id:": 698, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_12/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_12/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_12/Sub_output_0" + } + ] + }, + { + "node_id:": 699, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_12/Sub_output_0" + } + ], + "node_name:": "/activation_12/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_12/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 700, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_12/Conv_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_12/Sigmoid_output_0" + } + ], + "node_name:": "/activation_12/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_12/Mul_output_0" + } + ] + }, + { + "node_id:": 701, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_12/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_12/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/pointwise_conv2_12/Conv_output_0" + } + ] + }, + { + "node_id:": 702, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/pointwise_conv2_12/Conv_output_0" + } + ], + "node_name:": "/Transpose_80", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Transpose_80_output_0" + } + ] + }, + { + "node_id:": 703, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_118_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Transpose_80_output_0" + } + ], + "node_name:": "/Add_119", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_119_output_0" + } + ] + }, + { + "node_id:": 704, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_6/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_6/Sub_output_0" + } + ] + }, + { + "node_id:": 705, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_6/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_6/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_6/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 706, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_6/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_6/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_6/Mul_output_0" + } + ] + }, + { + "node_id:": 707, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_119_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward2/out_proj_6/Add_output_0" + } + ], + "node_name:": "/Add_120", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_120_output_0" + } + ] + }, + { + "node_id:": 708, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 96 ", + "input_name:": "cached_val2_2" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_133", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Gather_133_output_0" + } + ] + }, + { + "node_id:": 709, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_120_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7789" + } + ], + "node_name:": "/in_proj2_6/MatMul", + "node_outputs:": [ + { + "output_dimension:": "18 1 96 ", + "output_name:": "/in_proj2_6/MatMul_output_0" + } + ] + }, + { + "node_id:": 710, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Gather_133_output_0" + }, + { + "input_dimension:": "18 1 96 ", + "input_name:": "/in_proj2_6/MatMul_output_0" + } + ], + "node_name:": "/Concat_136", + "node_outputs:": [ + { + "output_dimension:": "90 1 96 ", + "output_name:": "/Concat_136_output_0" + } + ] + }, + { + "node_id:": 711, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_136_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_137_output_0" + } + ], + "node_name:": "/Reshape_75", + "node_outputs:": [ + { + "output_dimension:": "90 8 12 ", + "output_name:": "/Reshape_75_output_0" + } + ] + }, + { + "node_id:": 712, + "node_inputs:": [ + { + "input_dimension:": "90 8 12 ", + "input_name:": "/Reshape_75_output_0" + } + ], + "node_name:": "/Transpose_81", + "node_outputs:": [ + { + "output_dimension:": "8 90 12 ", + "output_name:": "/Transpose_81_output_0" + } + ] + }, + { + "node_id:": 713, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Softmax_6_output_0" + }, + { + "input_dimension:": "8 90 12 ", + "input_name:": "/Transpose_81_output_0" + } + ], + "node_name:": "/MatMul_34", + "node_outputs:": [ + { + "output_dimension:": "8 18 12 ", + "output_name:": "/MatMul_34_output_0" + } + ] + }, + { + "node_id:": 714, + "node_inputs:": [ + { + "input_dimension:": "8 18 12 ", + "input_name:": "/MatMul_34_output_0" + } + ], + "node_name:": "/Transpose_82", + "node_outputs:": [ + { + "output_dimension:": "18 8 12 ", + "output_name:": "/Transpose_82_output_0" + } + ] + }, + { + "node_id:": 715, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_120_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/out_proj2_6/Add_output_0" + } + ], + "node_name:": "/Add_122", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_122_output_0" + } + ] + }, + { + "node_id:": 716, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_122_output_0" + } + ], + "node_name:": "/Transpose_83", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Transpose_83_output_0" + } + ] + }, + { + "node_id:": 717, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Transpose_83_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_13/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 18 ", + "output_name:": "/pointwise_conv1_13/Conv_output_0" + } + ] + }, + { + "node_id:": 718, + "node_inputs:": [ + { + "input_dimension:": "1 768 18 ", + "input_name:": "/pointwise_conv1_13/Conv_output_0" + } + ], + "node_name:": "/Split_13", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_13_output_0" + }, + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_13_output_1" + } + ] + }, + { + "node_id:": 719, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_13_output_1" + } + ], + "node_name:": "/Sigmoid_13", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Sigmoid_13_output_0" + } + ] + }, + { + "node_id:": 720, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_13_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Sigmoid_13_output_0" + } + ], + "node_name:": "/Mul_75", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Mul_75_output_0" + } + ] + }, + { + "node_id:": 721, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 30 ", + "input_name:": "cached_conv2_2" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_135", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_135_output_0" + } + ] + }, + { + "node_id:": 722, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_135_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Mul_75_output_0" + } + ], + "node_name:": "/Concat_139", + "node_outputs:": [ + { + "output_dimension:": "1 384 48 ", + "output_name:": "/Concat_139_output_0" + } + ] + }, + { + "node_id:": 723, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_139_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_13/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/depthwise_conv_13/Conv_output_0" + } + ] + }, + { + "node_id:": 724, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_13/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_13/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_13/Sub_output_0" + } + ] + }, + { + "node_id:": 725, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_13/Sub_output_0" + } + ], + "node_name:": "/activation_13/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_13/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 726, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_13/Conv_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_13/Sigmoid_output_0" + } + ], + "node_name:": "/activation_13/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_13/Mul_output_0" + } + ] + }, + { + "node_id:": 727, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_13/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_13/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/pointwise_conv2_13/Conv_output_0" + } + ] + }, + { + "node_id:": 728, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/pointwise_conv2_13/Conv_output_0" + } + ], + "node_name:": "/Transpose_84", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Transpose_84_output_0" + } + ] + }, + { + "node_id:": 729, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_122_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Transpose_84_output_0" + } + ], + "node_name:": "/Add_123", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_123_output_0" + } + ] + }, + { + "node_id:": 730, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_6/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_6/Sub_output_0" + } + ] + }, + { + "node_id:": 731, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_6/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_6/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_6/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 732, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/in_proj_6/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_6/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_6/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_6/Mul_output_0" + } + ] + }, + { + "node_id:": 733, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_123_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward3/out_proj_6/Add_output_0" + } + ], + "node_name:": "/Add_124", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_124_output_0" + } + ] + }, + { + "node_id:": 734, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_124_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_124_output_0" + } + ], + "node_name:": "/norm_final_6/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/norm_final_6/Mul_output_0" + } + ] + }, + { + "node_id:": 735, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/norm_final_6/Mul_output_0" + } + ], + "node_name:": "/norm_final_6/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_6/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 736, + "node_inputs:": [ + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_6/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_6/Constant_output_0" + } + ], + "node_name:": "/norm_final_6/Add", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_6/Add_output_0" + } + ] + }, + { + "node_id:": 737, + "node_inputs:": [ + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_6/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_6/Pow", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_6/Pow_output_0" + } + ] + }, + { + "node_id:": 738, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_124_output_0" + }, + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_6/Pow_output_0" + } + ], + "node_name:": "/norm_final_6/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/norm_final_6/Mul_1_output_0" + } + ] + }, + { + "node_id:": 739, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/norm_final_6/Mul_1_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/downsample_1/ReduceSum_1_output_0" + } + ], + "node_name:": "/Sub_20", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Sub_20_output_0" + } + ] + }, + { + "node_id:": 740, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Sub_20_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.2.encoder.layers.0.bypass_scale" + } + ], + "node_name:": "/Mul_76", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Mul_76_output_0" + } + ] + }, + { + "node_id:": 741, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/downsample_1/ReduceSum_1_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_76_output_0" + } + ], + "node_name:": "/Add_125", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_125_output_0" + } + ] + }, + { + "node_id:": 742, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_7/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_7/Sub_output_0" + } + ] + }, + { + "node_id:": 743, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_7/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_7/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_7/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 744, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_7/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_7/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_7/Mul_output_0" + } + ] + }, + { + "node_id:": 745, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_125_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward1/out_proj_7/Add_output_0" + } + ], + "node_name:": "/Add_126", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_126_output_0" + } + ] + }, + { + "node_id:": 746, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_126_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_7", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/CumSum_7_output_0" + } + ] + }, + { + "node_id:": 747, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/CumSum_7_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_282_output_0" + } + ], + "node_name:": "/Add_127", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_127_output_0" + } + ] + }, + { + "node_id:": 748, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Unsqueeze_283_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_284_output_0" + } + ], + "node_name:": "/Add_129", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Add_129_output_0" + } + ] + }, + { + "node_id:": 749, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Add_129_output_0" + } + ], + "node_name:": "/Cast_37", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Cast_37_output_0" + } + ] + }, + { + "node_id:": 750, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Cast_37_output_0" + } + ], + "node_name:": "/Reciprocal_7", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Reciprocal_7_output_0" + } + ] + }, + { + "node_id:": 751, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Reciprocal_7_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_285", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/Unsqueeze_285_output_0" + } + ] + }, + { + "node_id:": 752, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_127_output_0" + }, + { + "input_dimension:": "18 1 1 ", + "input_name:": "/Unsqueeze_285_output_0" + } + ], + "node_name:": "/Mul_79", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Mul_79_output_0" + } + ] + }, + { + "node_id:": 753, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_79_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7805" + } + ], + "node_name:": "/proj_7/MatMul", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/proj_7/MatMul_output_0" + } + ] + }, + { + "node_id:": 754, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_126_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/proj_7/MatMul_output_0" + } + ], + "node_name:": "/Add_131", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_131_output_0" + } + ] + }, + { + "node_id:": 755, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_65", + "node_outputs:": [ + { + "output_dimension:": "18 1 192 ", + "output_name:": "/Slice_65_output_0" + } + ] + }, + { + "node_id:": 756, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 192 ", + "input_name:": "cached_key_2" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_152", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Gather_152_output_0" + } + ] + }, + { + "node_id:": 757, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Gather_152_output_0" + }, + { + "input_dimension:": "18 1 192 ", + "input_name:": "/Slice_65_output_0" + } + ], + "node_name:": "/Concat_140", + "node_outputs:": [ + { + "output_dimension:": "90 1 192 ", + "output_name:": "/Concat_140_output_0" + } + ] + }, + { + "node_id:": 758, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_67", + "node_outputs:": [ + { + "output_dimension:": "18 1 32 ", + "output_name:": "/Slice_67_output_0" + } + ] + }, + { + "node_id:": 759, + "node_inputs:": [ + { + "input_dimension:": "18 1 32 ", + "input_name:": "/Slice_67_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_143_output_0" + } + ], + "node_name:": "/Reshape_78", + "node_outputs:": [ + { + "output_dimension:": "18 1 8 4 ", + "output_name:": "/Reshape_78_output_0" + } + ] + }, + { + "node_id:": 760, + "node_inputs:": [ + { + "input_dimension:": "18 1 8 4 ", + "input_name:": "/Reshape_78_output_0" + } + ], + "node_name:": "/Transpose_87", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 4 ", + "output_name:": "/Transpose_87_output_0" + } + ] + }, + { + "node_id:": 761, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 4 ", + "input_name:": "/Transpose_87_output_0" + }, + { + "input_dimension:": "1 8 4 107 ", + "input_name:": "/Transpose_89_output_0" + } + ], + "node_name:": "/MatMul_35", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 107 ", + "output_name:": "/MatMul_35_output_0" + } + ] + }, + { + "node_id:": 762, + "node_inputs:": [ + { + "input_dimension:": "90 1 192 ", + "input_name:": "/Concat_140_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_79", + "node_outputs:": [ + { + "output_dimension:": "90 1 8 24 ", + "output_name:": "/Reshape_79_output_0" + } + ] + }, + { + "node_id:": 763, + "node_inputs:": [ + { + "input_dimension:": "90 1 8 24 ", + "input_name:": "/Reshape_79_output_0" + } + ], + "node_name:": "/Transpose_88", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 90 ", + "output_name:": "/Transpose_88_output_0" + } + ] + }, + { + "node_id:": 764, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_64", + "node_outputs:": [ + { + "output_dimension:": "18 1 192 ", + "output_name:": "/Slice_64_output_0" + } + ] + }, + { + "node_id:": 765, + "node_inputs:": [ + { + "input_dimension:": "18 1 192 ", + "input_name:": "/Slice_64_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_142_output_0" + } + ], + "node_name:": "/Reshape_77", + "node_outputs:": [ + { + "output_dimension:": "18 1 8 24 ", + "output_name:": "/Reshape_77_output_0" + } + ] + }, + { + "node_id:": 766, + "node_inputs:": [ + { + "input_dimension:": "18 1 8 24 ", + "input_name:": "/Reshape_77_output_0" + } + ], + "node_name:": "/Transpose_86", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 24 ", + "output_name:": "/Transpose_86_output_0" + } + ] + }, + { + "node_id:": 767, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 24 ", + "input_name:": "/Transpose_86_output_0" + }, + { + "input_dimension:": "1 8 24 90 ", + "input_name:": "/Transpose_88_output_0" + } + ], + "node_name:": "/MatMul_36", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/MatMul_36_output_0" + } + ] + }, + { + "node_id:": 768, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 107 ", + "input_name:": "/MatMul_35_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_149_output_0" + } + ], + "node_name:": "/Reshape_82", + "node_outputs:": [ + { + "output_dimension:": "144 107 ", + "output_name:": "/Reshape_82_output_0" + } + ] + }, + { + "node_id:": 769, + "node_inputs:": [ + { + "input_dimension:": "144 107 ", + "input_name:": "/Reshape_82_output_0" + }, + { + "input_dimension:": "144 90 ", + "input_name:": "/Add_133_output_0" + } + ], + "node_name:": "/GatherElements_7", + "node_outputs:": [ + { + "output_dimension:": "144 90 ", + "output_name:": "/GatherElements_7_output_0" + } + ] + }, + { + "node_id:": 770, + "node_inputs:": [ + { + "input_dimension:": "144 90 ", + "input_name:": "/GatherElements_7_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_150_output_0" + } + ], + "node_name:": "/Reshape_83", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/Reshape_83_output_0" + } + ] + }, + { + "node_id:": 771, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/MatMul_36_output_0" + }, + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/Reshape_83_output_0" + } + ], + "node_name:": "/Add_134", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/Add_134_output_0" + } + ] + }, + { + "node_id:": 772, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/Add_134_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_151_output_0" + } + ], + "node_name:": "/Reshape_84", + "node_outputs:": [ + { + "output_dimension:": "8 18 90 ", + "output_name:": "/Reshape_84_output_0" + } + ] + }, + { + "node_id:": 773, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Reshape_84_output_0" + } + ], + "node_name:": "/Softmax_7", + "node_outputs:": [ + { + "output_dimension:": "8 18 90 ", + "output_name:": "/Softmax_7_output_0" + } + ] + }, + { + "node_id:": 774, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_66", + "node_outputs:": [ + { + "output_dimension:": "18 1 96 ", + "output_name:": "/Slice_66_output_0" + } + ] + }, + { + "node_id:": 775, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 96 ", + "input_name:": "cached_val_2" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_153", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Gather_153_output_0" + } + ] + }, + { + "node_id:": 776, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Gather_153_output_0" + }, + { + "input_dimension:": "18 1 96 ", + "input_name:": "/Slice_66_output_0" + } + ], + "node_name:": "/Concat_141", + "node_outputs:": [ + { + "output_dimension:": "90 1 96 ", + "output_name:": "/Concat_141_output_0" + } + ] + }, + { + "node_id:": 777, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_141_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_145_output_0" + } + ], + "node_name:": "/Reshape_80", + "node_outputs:": [ + { + "output_dimension:": "90 8 12 ", + "output_name:": "/Reshape_80_output_0" + } + ] + }, + { + "node_id:": 778, + "node_inputs:": [ + { + "input_dimension:": "90 8 12 ", + "input_name:": "/Reshape_80_output_0" + } + ], + "node_name:": "/Transpose_85", + "node_outputs:": [ + { + "output_dimension:": "8 90 12 ", + "output_name:": "/Transpose_85_output_0" + } + ] + }, + { + "node_id:": 779, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Softmax_7_output_0" + }, + { + "input_dimension:": "8 90 12 ", + "input_name:": "/Transpose_85_output_0" + } + ], + "node_name:": "/MatMul_37", + "node_outputs:": [ + { + "output_dimension:": "8 18 12 ", + "output_name:": "/MatMul_37_output_0" + } + ] + }, + { + "node_id:": 780, + "node_inputs:": [ + { + "input_dimension:": "8 18 12 ", + "input_name:": "/MatMul_37_output_0" + } + ], + "node_name:": "/Transpose_90", + "node_outputs:": [ + { + "output_dimension:": "18 8 12 ", + "output_name:": "/Transpose_90_output_0" + } + ] + }, + { + "node_id:": 781, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_131_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_135_output_0" + } + ], + "node_name:": "/Add_136", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_136_output_0" + } + ] + }, + { + "node_id:": 782, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_136_output_0" + } + ], + "node_name:": "/Transpose_91", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Transpose_91_output_0" + } + ] + }, + { + "node_id:": 783, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Transpose_91_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_14/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 18 ", + "output_name:": "/pointwise_conv1_14/Conv_output_0" + } + ] + }, + { + "node_id:": 784, + "node_inputs:": [ + { + "input_dimension:": "1 768 18 ", + "input_name:": "/pointwise_conv1_14/Conv_output_0" + } + ], + "node_name:": "/Split_14", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_14_output_0" + }, + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_14_output_1" + } + ] + }, + { + "node_id:": 785, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_14_output_1" + } + ], + "node_name:": "/Sigmoid_14", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Sigmoid_14_output_0" + } + ] + }, + { + "node_id:": 786, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_14_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Sigmoid_14_output_0" + } + ], + "node_name:": "/Mul_84", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Mul_84_output_0" + } + ] + }, + { + "node_id:": 787, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 30 ", + "input_name:": "cached_conv1_2" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_155", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_155_output_0" + } + ] + }, + { + "node_id:": 788, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_155_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Mul_84_output_0" + } + ], + "node_name:": "/Concat_153", + "node_outputs:": [ + { + "output_dimension:": "1 384 48 ", + "output_name:": "/Concat_153_output_0" + } + ] + }, + { + "node_id:": 789, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_153_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_14/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/depthwise_conv_14/Conv_output_0" + } + ] + }, + { + "node_id:": 790, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_14/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_14/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_14/Sub_output_0" + } + ] + }, + { + "node_id:": 791, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_14/Sub_output_0" + } + ], + "node_name:": "/activation_14/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_14/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 792, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_14/Conv_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_14/Sigmoid_output_0" + } + ], + "node_name:": "/activation_14/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_14/Mul_output_0" + } + ] + }, + { + "node_id:": 793, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_14/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_14/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/pointwise_conv2_14/Conv_output_0" + } + ] + }, + { + "node_id:": 794, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/pointwise_conv2_14/Conv_output_0" + } + ], + "node_name:": "/Transpose_92", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Transpose_92_output_0" + } + ] + }, + { + "node_id:": 795, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_136_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Transpose_92_output_0" + } + ], + "node_name:": "/Add_137", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_137_output_0" + } + ] + }, + { + "node_id:": 796, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_7/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_7/Sub_output_0" + } + ] + }, + { + "node_id:": 797, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_7/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_7/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_7/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 798, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_7/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_7/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_7/Mul_output_0" + } + ] + }, + { + "node_id:": 799, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_137_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward2/out_proj_7/Add_output_0" + } + ], + "node_name:": "/Add_138", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_138_output_0" + } + ] + }, + { + "node_id:": 800, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_138_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7851" + } + ], + "node_name:": "/in_proj2_7/MatMul", + "node_outputs:": [ + { + "output_dimension:": "18 1 96 ", + "output_name:": "/in_proj2_7/MatMul_output_0" + } + ] + }, + { + "node_id:": 801, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 96 ", + "input_name:": "cached_val2_2" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_154", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Gather_154_output_0" + } + ] + }, + { + "node_id:": 802, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Gather_154_output_0" + }, + { + "input_dimension:": "18 1 96 ", + "input_name:": "/in_proj2_7/MatMul_output_0" + } + ], + "node_name:": "/Concat_154", + "node_outputs:": [ + { + "output_dimension:": "90 1 96 ", + "output_name:": "/Concat_154_output_0" + } + ] + }, + { + "node_id:": 803, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_154_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_155_output_0" + } + ], + "node_name:": "/Reshape_86", + "node_outputs:": [ + { + "output_dimension:": "90 8 12 ", + "output_name:": "/Reshape_86_output_0" + } + ] + }, + { + "node_id:": 804, + "node_inputs:": [ + { + "input_dimension:": "90 8 12 ", + "input_name:": "/Reshape_86_output_0" + } + ], + "node_name:": "/Transpose_93", + "node_outputs:": [ + { + "output_dimension:": "8 90 12 ", + "output_name:": "/Transpose_93_output_0" + } + ] + }, + { + "node_id:": 805, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Softmax_7_output_0" + }, + { + "input_dimension:": "8 90 12 ", + "input_name:": "/Transpose_93_output_0" + } + ], + "node_name:": "/MatMul_39", + "node_outputs:": [ + { + "output_dimension:": "8 18 12 ", + "output_name:": "/MatMul_39_output_0" + } + ] + }, + { + "node_id:": 806, + "node_inputs:": [ + { + "input_dimension:": "8 18 12 ", + "input_name:": "/MatMul_39_output_0" + } + ], + "node_name:": "/Transpose_94", + "node_outputs:": [ + { + "output_dimension:": "18 8 12 ", + "output_name:": "/Transpose_94_output_0" + } + ] + }, + { + "node_id:": 807, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_138_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/out_proj2_7/Add_output_0" + } + ], + "node_name:": "/Add_140", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_140_output_0" + } + ] + }, + { + "node_id:": 808, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_140_output_0" + } + ], + "node_name:": "/Transpose_95", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Transpose_95_output_0" + } + ] + }, + { + "node_id:": 809, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Transpose_95_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_15/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 18 ", + "output_name:": "/pointwise_conv1_15/Conv_output_0" + } + ] + }, + { + "node_id:": 810, + "node_inputs:": [ + { + "input_dimension:": "1 768 18 ", + "input_name:": "/pointwise_conv1_15/Conv_output_0" + } + ], + "node_name:": "/Split_15", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_15_output_0" + }, + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_15_output_1" + } + ] + }, + { + "node_id:": 811, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_15_output_1" + } + ], + "node_name:": "/Sigmoid_15", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Sigmoid_15_output_0" + } + ] + }, + { + "node_id:": 812, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_15_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Sigmoid_15_output_0" + } + ], + "node_name:": "/Mul_86", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Mul_86_output_0" + } + ] + }, + { + "node_id:": 813, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 30 ", + "input_name:": "cached_conv2_2" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_156", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_156_output_0" + } + ] + }, + { + "node_id:": 814, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_156_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Mul_86_output_0" + } + ], + "node_name:": "/Concat_157", + "node_outputs:": [ + { + "output_dimension:": "1 384 48 ", + "output_name:": "/Concat_157_output_0" + } + ] + }, + { + "node_id:": 815, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_157_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_15/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/depthwise_conv_15/Conv_output_0" + } + ] + }, + { + "node_id:": 816, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_15/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_15/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_15/Sub_output_0" + } + ] + }, + { + "node_id:": 817, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_15/Sub_output_0" + } + ], + "node_name:": "/activation_15/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_15/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 818, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_15/Conv_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_15/Sigmoid_output_0" + } + ], + "node_name:": "/activation_15/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_15/Mul_output_0" + } + ] + }, + { + "node_id:": 819, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_15/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_15/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/pointwise_conv2_15/Conv_output_0" + } + ] + }, + { + "node_id:": 820, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/pointwise_conv2_15/Conv_output_0" + } + ], + "node_name:": "/Transpose_96", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Transpose_96_output_0" + } + ] + }, + { + "node_id:": 821, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_140_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Transpose_96_output_0" + } + ], + "node_name:": "/Add_141", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_141_output_0" + } + ] + }, + { + "node_id:": 822, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_7/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_7/Sub_output_0" + } + ] + }, + { + "node_id:": 823, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_7/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_7/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_7/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 824, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/in_proj_7/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_7/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_7/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_7/Mul_output_0" + } + ] + }, + { + "node_id:": 825, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_141_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward3/out_proj_7/Add_output_0" + } + ], + "node_name:": "/Add_142", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_142_output_0" + } + ] + }, + { + "node_id:": 826, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_142_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_142_output_0" + } + ], + "node_name:": "/norm_final_7/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/norm_final_7/Mul_output_0" + } + ] + }, + { + "node_id:": 827, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/norm_final_7/Mul_output_0" + } + ], + "node_name:": "/norm_final_7/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_7/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 828, + "node_inputs:": [ + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_7/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_7/Constant_output_0" + } + ], + "node_name:": "/norm_final_7/Add", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_7/Add_output_0" + } + ] + }, + { + "node_id:": 829, + "node_inputs:": [ + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_7/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_7/Pow", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_7/Pow_output_0" + } + ] + }, + { + "node_id:": 830, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_142_output_0" + }, + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_7/Pow_output_0" + } + ], + "node_name:": "/norm_final_7/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/norm_final_7/Mul_1_output_0" + } + ] + }, + { + "node_id:": 831, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/norm_final_7/Mul_1_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_125_output_0" + } + ], + "node_name:": "/Sub_23", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Sub_23_output_0" + } + ] + }, + { + "node_id:": 832, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Sub_23_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.2.encoder.layers.1.bypass_scale" + } + ], + "node_name:": "/Mul_87", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Mul_87_output_0" + } + ] + }, + { + "node_id:": 833, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_125_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_87_output_0" + } + ], + "node_name:": "/Add_143", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_143_output_0" + } + ] + }, + { + "node_id:": 834, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_8/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_8/Sub_output_0" + } + ] + }, + { + "node_id:": 835, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_8/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_8/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_8/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 836, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_8/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_8/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/activation_8/Mul_output_0" + } + ] + }, + { + "node_id:": 837, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_143_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward1/out_proj_8/Add_output_0" + } + ], + "node_name:": "/Add_144", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_144_output_0" + } + ] + }, + { + "node_id:": 838, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_144_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_8", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/CumSum_8_output_0" + } + ] + }, + { + "node_id:": 839, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/CumSum_8_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_316_output_0" + } + ], + "node_name:": "/Add_145", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_145_output_0" + } + ] + }, + { + "node_id:": 840, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Unsqueeze_317_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_318_output_0" + } + ], + "node_name:": "/Add_147", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Add_147_output_0" + } + ] + }, + { + "node_id:": 841, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Add_147_output_0" + } + ], + "node_name:": "/Cast_42", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Cast_42_output_0" + } + ] + }, + { + "node_id:": 842, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Cast_42_output_0" + } + ], + "node_name:": "/Reciprocal_8", + "node_outputs:": [ + { + "output_dimension:": "18 1 ", + "output_name:": "/Reciprocal_8_output_0" + } + ] + }, + { + "node_id:": 843, + "node_inputs:": [ + { + "input_dimension:": "18 1 ", + "input_name:": "/Reciprocal_8_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_319", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/Unsqueeze_319_output_0" + } + ] + }, + { + "node_id:": 844, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_145_output_0" + }, + { + "input_dimension:": "18 1 1 ", + "input_name:": "/Unsqueeze_319_output_0" + } + ], + "node_name:": "/Mul_90", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Mul_90_output_0" + } + ] + }, + { + "node_id:": 845, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_90_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7867" + } + ], + "node_name:": "/proj_8/MatMul", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/proj_8/MatMul_output_0" + } + ] + }, + { + "node_id:": 846, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_144_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/proj_8/MatMul_output_0" + } + ], + "node_name:": "/Add_149", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_149_output_0" + } + ] + }, + { + "node_id:": 847, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_74", + "node_outputs:": [ + { + "output_dimension:": "18 1 192 ", + "output_name:": "/Slice_74_output_0" + } + ] + }, + { + "node_id:": 848, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 192 ", + "input_name:": "cached_key_2" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_173", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Gather_173_output_0" + } + ] + }, + { + "node_id:": 849, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Gather_173_output_0" + }, + { + "input_dimension:": "18 1 192 ", + "input_name:": "/Slice_74_output_0" + } + ], + "node_name:": "/Concat_158", + "node_outputs:": [ + { + "output_dimension:": "90 1 192 ", + "output_name:": "/Concat_158_output_0" + } + ] + }, + { + "node_id:": 850, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_76", + "node_outputs:": [ + { + "output_dimension:": "18 1 32 ", + "output_name:": "/Slice_76_output_0" + } + ] + }, + { + "node_id:": 851, + "node_inputs:": [ + { + "input_dimension:": "18 1 32 ", + "input_name:": "/Slice_76_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_161_output_0" + } + ], + "node_name:": "/Reshape_89", + "node_outputs:": [ + { + "output_dimension:": "18 1 8 4 ", + "output_name:": "/Reshape_89_output_0" + } + ] + }, + { + "node_id:": 852, + "node_inputs:": [ + { + "input_dimension:": "18 1 8 4 ", + "input_name:": "/Reshape_89_output_0" + } + ], + "node_name:": "/Transpose_99", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 4 ", + "output_name:": "/Transpose_99_output_0" + } + ] + }, + { + "node_id:": 853, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 4 ", + "input_name:": "/Transpose_99_output_0" + }, + { + "input_dimension:": "1 8 4 107 ", + "input_name:": "/Transpose_101_output_0" + } + ], + "node_name:": "/MatMul_40", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 107 ", + "output_name:": "/MatMul_40_output_0" + } + ] + }, + { + "node_id:": 854, + "node_inputs:": [ + { + "input_dimension:": "90 1 192 ", + "input_name:": "/Concat_158_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_90", + "node_outputs:": [ + { + "output_dimension:": "90 1 8 24 ", + "output_name:": "/Reshape_90_output_0" + } + ] + }, + { + "node_id:": 855, + "node_inputs:": [ + { + "input_dimension:": "90 1 8 24 ", + "input_name:": "/Reshape_90_output_0" + } + ], + "node_name:": "/Transpose_100", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 90 ", + "output_name:": "/Transpose_100_output_0" + } + ] + }, + { + "node_id:": 856, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_73", + "node_outputs:": [ + { + "output_dimension:": "18 1 192 ", + "output_name:": "/Slice_73_output_0" + } + ] + }, + { + "node_id:": 857, + "node_inputs:": [ + { + "input_dimension:": "18 1 192 ", + "input_name:": "/Slice_73_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_160_output_0" + } + ], + "node_name:": "/Reshape_88", + "node_outputs:": [ + { + "output_dimension:": "18 1 8 24 ", + "output_name:": "/Reshape_88_output_0" + } + ] + }, + { + "node_id:": 858, + "node_inputs:": [ + { + "input_dimension:": "18 1 8 24 ", + "input_name:": "/Reshape_88_output_0" + } + ], + "node_name:": "/Transpose_98", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 24 ", + "output_name:": "/Transpose_98_output_0" + } + ] + }, + { + "node_id:": 859, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 24 ", + "input_name:": "/Transpose_98_output_0" + }, + { + "input_dimension:": "1 8 24 90 ", + "input_name:": "/Transpose_100_output_0" + } + ], + "node_name:": "/MatMul_41", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/MatMul_41_output_0" + } + ] + }, + { + "node_id:": 860, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 107 ", + "input_name:": "/MatMul_40_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_167_output_0" + } + ], + "node_name:": "/Reshape_93", + "node_outputs:": [ + { + "output_dimension:": "144 107 ", + "output_name:": "/Reshape_93_output_0" + } + ] + }, + { + "node_id:": 861, + "node_inputs:": [ + { + "input_dimension:": "144 107 ", + "input_name:": "/Reshape_93_output_0" + }, + { + "input_dimension:": "144 90 ", + "input_name:": "/Add_151_output_0" + } + ], + "node_name:": "/GatherElements_8", + "node_outputs:": [ + { + "output_dimension:": "144 90 ", + "output_name:": "/GatherElements_8_output_0" + } + ] + }, + { + "node_id:": 862, + "node_inputs:": [ + { + "input_dimension:": "144 90 ", + "input_name:": "/GatherElements_8_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_168_output_0" + } + ], + "node_name:": "/Reshape_94", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/Reshape_94_output_0" + } + ] + }, + { + "node_id:": 863, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/MatMul_41_output_0" + }, + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/Reshape_94_output_0" + } + ], + "node_name:": "/Add_152", + "node_outputs:": [ + { + "output_dimension:": "1 8 18 90 ", + "output_name:": "/Add_152_output_0" + } + ] + }, + { + "node_id:": 864, + "node_inputs:": [ + { + "input_dimension:": "1 8 18 90 ", + "input_name:": "/Add_152_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_169_output_0" + } + ], + "node_name:": "/Reshape_95", + "node_outputs:": [ + { + "output_dimension:": "8 18 90 ", + "output_name:": "/Reshape_95_output_0" + } + ] + }, + { + "node_id:": 865, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Reshape_95_output_0" + } + ], + "node_name:": "/Softmax_8", + "node_outputs:": [ + { + "output_dimension:": "8 18 90 ", + "output_name:": "/Softmax_8_output_0" + } + ] + }, + { + "node_id:": 866, + "node_inputs:": [ + { + "input_dimension:": "18 1 512 ", + "input_name:": "/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_75", + "node_outputs:": [ + { + "output_dimension:": "18 1 96 ", + "output_name:": "/Slice_75_output_0" + } + ] + }, + { + "node_id:": 867, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 96 ", + "input_name:": "cached_val_2" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_174", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Gather_174_output_0" + } + ] + }, + { + "node_id:": 868, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Gather_174_output_0" + }, + { + "input_dimension:": "18 1 96 ", + "input_name:": "/Slice_75_output_0" + } + ], + "node_name:": "/Concat_159", + "node_outputs:": [ + { + "output_dimension:": "90 1 96 ", + "output_name:": "/Concat_159_output_0" + } + ] + }, + { + "node_id:": 869, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_159_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_163_output_0" + } + ], + "node_name:": "/Reshape_91", + "node_outputs:": [ + { + "output_dimension:": "90 8 12 ", + "output_name:": "/Reshape_91_output_0" + } + ] + }, + { + "node_id:": 870, + "node_inputs:": [ + { + "input_dimension:": "90 8 12 ", + "input_name:": "/Reshape_91_output_0" + } + ], + "node_name:": "/Transpose_97", + "node_outputs:": [ + { + "output_dimension:": "8 90 12 ", + "output_name:": "/Transpose_97_output_0" + } + ] + }, + { + "node_id:": 871, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Softmax_8_output_0" + }, + { + "input_dimension:": "8 90 12 ", + "input_name:": "/Transpose_97_output_0" + } + ], + "node_name:": "/MatMul_42", + "node_outputs:": [ + { + "output_dimension:": "8 18 12 ", + "output_name:": "/MatMul_42_output_0" + } + ] + }, + { + "node_id:": 872, + "node_inputs:": [ + { + "input_dimension:": "8 18 12 ", + "input_name:": "/MatMul_42_output_0" + } + ], + "node_name:": "/Transpose_102", + "node_outputs:": [ + { + "output_dimension:": "18 8 12 ", + "output_name:": "/Transpose_102_output_0" + } + ] + }, + { + "node_id:": 873, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_149_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_153_output_0" + } + ], + "node_name:": "/Add_154", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_154_output_0" + } + ] + }, + { + "node_id:": 874, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_154_output_0" + } + ], + "node_name:": "/Transpose_103", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Transpose_103_output_0" + } + ] + }, + { + "node_id:": 875, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Transpose_103_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_16/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 18 ", + "output_name:": "/pointwise_conv1_16/Conv_output_0" + } + ] + }, + { + "node_id:": 876, + "node_inputs:": [ + { + "input_dimension:": "1 768 18 ", + "input_name:": "/pointwise_conv1_16/Conv_output_0" + } + ], + "node_name:": "/Split_16", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_16_output_0" + }, + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_16_output_1" + } + ] + }, + { + "node_id:": 877, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_16_output_1" + } + ], + "node_name:": "/Sigmoid_16", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Sigmoid_16_output_0" + } + ] + }, + { + "node_id:": 878, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_16_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Sigmoid_16_output_0" + } + ], + "node_name:": "/Mul_95", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Mul_95_output_0" + } + ] + }, + { + "node_id:": 879, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 30 ", + "input_name:": "cached_conv1_2" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_176", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_176_output_0" + } + ] + }, + { + "node_id:": 880, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_176_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Mul_95_output_0" + } + ], + "node_name:": "/Concat_171", + "node_outputs:": [ + { + "output_dimension:": "1 384 48 ", + "output_name:": "/Concat_171_output_0" + } + ] + }, + { + "node_id:": 881, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_171_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_16/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/depthwise_conv_16/Conv_output_0" + } + ] + }, + { + "node_id:": 882, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_16/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_16/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_16/Sub_output_0" + } + ] + }, + { + "node_id:": 883, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_16/Sub_output_0" + } + ], + "node_name:": "/activation_16/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_16/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 884, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_16/Conv_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_16/Sigmoid_output_0" + } + ], + "node_name:": "/activation_16/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_16/Mul_output_0" + } + ] + }, + { + "node_id:": 885, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_16/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_16/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/pointwise_conv2_16/Conv_output_0" + } + ] + }, + { + "node_id:": 886, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/pointwise_conv2_16/Conv_output_0" + } + ], + "node_name:": "/Transpose_104", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Transpose_104_output_0" + } + ] + }, + { + "node_id:": 887, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_154_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Transpose_104_output_0" + } + ], + "node_name:": "/Add_155", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_155_output_0" + } + ] + }, + { + "node_id:": 888, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_8/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_8/Sub_output_0" + } + ] + }, + { + "node_id:": 889, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_8/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_8/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_8/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 890, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_8/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_8/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/activation_8/Mul_output_0" + } + ] + }, + { + "node_id:": 891, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_155_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward2/out_proj_8/Add_output_0" + } + ], + "node_name:": "/Add_156", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_156_output_0" + } + ] + }, + { + "node_id:": 892, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_156_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7913" + } + ], + "node_name:": "/in_proj2_8/MatMul", + "node_outputs:": [ + { + "output_dimension:": "18 1 96 ", + "output_name:": "/in_proj2_8/MatMul_output_0" + } + ] + }, + { + "node_id:": 893, + "node_inputs:": [ + { + "input_dimension:": "3 72 1 96 ", + "input_name:": "cached_val2_2" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_175", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Gather_175_output_0" + } + ] + }, + { + "node_id:": 894, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Gather_175_output_0" + }, + { + "input_dimension:": "18 1 96 ", + "input_name:": "/in_proj2_8/MatMul_output_0" + } + ], + "node_name:": "/Concat_172", + "node_outputs:": [ + { + "output_dimension:": "90 1 96 ", + "output_name:": "/Concat_172_output_0" + } + ] + }, + { + "node_id:": 895, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_172_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_173_output_0" + } + ], + "node_name:": "/Reshape_97", + "node_outputs:": [ + { + "output_dimension:": "90 8 12 ", + "output_name:": "/Reshape_97_output_0" + } + ] + }, + { + "node_id:": 896, + "node_inputs:": [ + { + "input_dimension:": "90 8 12 ", + "input_name:": "/Reshape_97_output_0" + } + ], + "node_name:": "/Transpose_105", + "node_outputs:": [ + { + "output_dimension:": "8 90 12 ", + "output_name:": "/Transpose_105_output_0" + } + ] + }, + { + "node_id:": 897, + "node_inputs:": [ + { + "input_dimension:": "8 18 90 ", + "input_name:": "/Softmax_8_output_0" + }, + { + "input_dimension:": "8 90 12 ", + "input_name:": "/Transpose_105_output_0" + } + ], + "node_name:": "/MatMul_44", + "node_outputs:": [ + { + "output_dimension:": "8 18 12 ", + "output_name:": "/MatMul_44_output_0" + } + ] + }, + { + "node_id:": 898, + "node_inputs:": [ + { + "input_dimension:": "8 18 12 ", + "input_name:": "/MatMul_44_output_0" + } + ], + "node_name:": "/Transpose_106", + "node_outputs:": [ + { + "output_dimension:": "18 8 12 ", + "output_name:": "/Transpose_106_output_0" + } + ] + }, + { + "node_id:": 899, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_156_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/out_proj2_8/Add_output_0" + } + ], + "node_name:": "/Add_158", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_158_output_0" + } + ] + }, + { + "node_id:": 900, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_158_output_0" + } + ], + "node_name:": "/Transpose_107", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Transpose_107_output_0" + } + ] + }, + { + "node_id:": 901, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Transpose_107_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_17/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 18 ", + "output_name:": "/pointwise_conv1_17/Conv_output_0" + } + ] + }, + { + "node_id:": 902, + "node_inputs:": [ + { + "input_dimension:": "1 768 18 ", + "input_name:": "/pointwise_conv1_17/Conv_output_0" + } + ], + "node_name:": "/Split_17", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_17_output_0" + }, + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Split_17_output_1" + } + ] + }, + { + "node_id:": 903, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_17_output_1" + } + ], + "node_name:": "/Sigmoid_17", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Sigmoid_17_output_0" + } + ] + }, + { + "node_id:": 904, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Split_17_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Sigmoid_17_output_0" + } + ], + "node_name:": "/Mul_97", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/Mul_97_output_0" + } + ] + }, + { + "node_id:": 905, + "node_inputs:": [ + { + "input_dimension:": "3 1 384 30 ", + "input_name:": "cached_conv2_2" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_177", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_177_output_0" + } + ] + }, + { + "node_id:": 906, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_177_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/Mul_97_output_0" + } + ], + "node_name:": "/Concat_175", + "node_outputs:": [ + { + "output_dimension:": "1 384 48 ", + "output_name:": "/Concat_175_output_0" + } + ] + }, + { + "node_id:": 907, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_175_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_17/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/depthwise_conv_17/Conv_output_0" + } + ] + }, + { + "node_id:": 908, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_17/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_17/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_17/Sub_output_0" + } + ] + }, + { + "node_id:": 909, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_17/Sub_output_0" + } + ], + "node_name:": "/activation_17/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_17/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 910, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/depthwise_conv_17/Conv_output_0" + }, + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_17/Sigmoid_output_0" + } + ], + "node_name:": "/activation_17/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/activation_17/Mul_output_0" + } + ] + }, + { + "node_id:": 911, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/activation_17/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_17/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 18 ", + "output_name:": "/pointwise_conv2_17/Conv_output_0" + } + ] + }, + { + "node_id:": 912, + "node_inputs:": [ + { + "input_dimension:": "1 384 18 ", + "input_name:": "/pointwise_conv2_17/Conv_output_0" + } + ], + "node_name:": "/Transpose_108", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Transpose_108_output_0" + } + ] + }, + { + "node_id:": 913, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_158_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Transpose_108_output_0" + } + ], + "node_name:": "/Add_159", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_159_output_0" + } + ] + }, + { + "node_id:": 914, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_8/Sub", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_8/Sub_output_0" + } + ] + }, + { + "node_id:": 915, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_8/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_8/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_8/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 916, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/in_proj_8/Add_output_0" + }, + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_8/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_8/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/activation_8/Mul_output_0" + } + ] + }, + { + "node_id:": 917, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_159_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/feed_forward3/out_proj_8/Add_output_0" + } + ], + "node_name:": "/Add_160", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_160_output_0" + } + ] + }, + { + "node_id:": 918, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_160_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_160_output_0" + } + ], + "node_name:": "/norm_final_8/Mul", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/norm_final_8/Mul_output_0" + } + ] + }, + { + "node_id:": 919, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/norm_final_8/Mul_output_0" + } + ], + "node_name:": "/norm_final_8/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_8/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 920, + "node_inputs:": [ + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_8/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_8/Constant_output_0" + } + ], + "node_name:": "/norm_final_8/Add", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_8/Add_output_0" + } + ] + }, + { + "node_id:": 921, + "node_inputs:": [ + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_8/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_8/Pow", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 ", + "output_name:": "/norm_final_8/Pow_output_0" + } + ] + }, + { + "node_id:": 922, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_160_output_0" + }, + { + "input_dimension:": "18 1 1 ", + "input_name:": "/norm_final_8/Pow_output_0" + } + ], + "node_name:": "/norm_final_8/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/norm_final_8/Mul_1_output_0" + } + ] + }, + { + "node_id:": 923, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/norm_final_8/Mul_1_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_143_output_0" + } + ], + "node_name:": "/Sub_26", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Sub_26_output_0" + } + ] + }, + { + "node_id:": 924, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Sub_26_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.2.encoder.layers.2.bypass_scale" + } + ], + "node_name:": "/Mul_98", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Mul_98_output_0" + } + ] + }, + { + "node_id:": 925, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_143_output_0" + }, + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_98_output_0" + } + ], + "node_name:": "/Add_161", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_161_output_0" + } + ] + }, + { + "node_id:": 926, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_161_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/upsample_1/Unsqueeze", + "node_outputs:": [ + { + "output_dimension:": "18 1 1 384 ", + "output_name:": "/upsample_1/Unsqueeze_output_0" + } + ] + }, + { + "node_id:": 927, + "node_inputs:": [ + { + "input_dimension:": "18 1 1 384 ", + "input_name:": "/upsample_1/Unsqueeze_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/upsample_1/Where_output_0" + } + ], + "node_name:": "/upsample_1/Expand", + "node_outputs:": [ + { + "output_dimension:": "18 4 1 384 ", + "output_name:": "/upsample_1/Expand_output_0" + } + ] + }, + { + "node_id:": 928, + "node_inputs:": [ + { + "input_dimension:": "18 4 1 384 ", + "input_name:": "/upsample_1/Expand_output_0" + }, + { + "input_dimension:": "4 1 384 ", + "input_name:": "onnx::Add_7927" + } + ], + "node_name:": "/upsample_1/Add", + "node_outputs:": [ + { + "output_dimension:": "18 4 1 384 ", + "output_name:": "/upsample_1/Add_output_0" + } + ] + }, + { + "node_id:": 929, + "node_inputs:": [ + { + "input_dimension:": "18 4 1 384 ", + "input_name:": "/upsample_1/Add_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/upsample_1/Concat_1_output_0" + } + ], + "node_name:": "/upsample_1/Reshape_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/upsample_1/Reshape_1_output_0" + } + ] + }, + { + "node_id:": 930, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/upsample_1/Reshape_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_370_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_82", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Slice_82_output_0" + } + ] + }, + { + "node_id:": 931, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Slice_82_output_0" + }, + { + "input_dimension:": "", + "input_name:": "onnx::Mul_7931" + } + ], + "node_name:": "/out_combiner_1/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_1/Mul_1_output_0" + } + ] + }, + { + "node_id:": 932, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.2.out_combiner.weight1" + } + ], + "node_name:": "/out_combiner_1/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_1/Mul_output_0" + } + ] + }, + { + "node_id:": 933, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_1/Mul_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_1/Mul_1_output_0" + } + ], + "node_name:": "/out_combiner_1/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_1/Add_output_0" + } + ] + }, + { + "node_id:": 934, + "node_inputs:": [ + { + "input_dimension:": "2 1 ", + "input_name:": "cached_len_3" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_214", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_214_output_0" + } + ] + }, + { + "node_id:": 935, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_214_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_408", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_408_output_0" + } + ] + }, + { + "node_id:": 936, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_214_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_405", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_405_output_0" + } + ] + }, + { + "node_id:": 937, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_405_output_0" + } + ], + "node_name:": "/Cast_50", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_50_output_0" + } + ] + }, + { + "node_id:": 938, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 ", + "input_name:": "cached_avg_3" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_215", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_215_output_0" + } + ] + }, + { + "node_id:": 939, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_215_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_50_output_0" + } + ], + "node_name:": "/Mul_110", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_110_output_0" + } + ] + }, + { + "node_id:": 940, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_110_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_406", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_406_output_0" + } + ] + }, + { + "node_id:": 941, + "node_inputs:": [ + { + "input_dimension:": "2 1 ", + "input_name:": "cached_len_3" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_193", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_193_output_0" + } + ] + }, + { + "node_id:": 942, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_193_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_374", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_374_output_0" + } + ] + }, + { + "node_id:": 943, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_193_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_371", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_371_output_0" + } + ] + }, + { + "node_id:": 944, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_371_output_0" + } + ], + "node_name:": "/Cast_45", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_45_output_0" + } + ] + }, + { + "node_id:": 945, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 ", + "input_name:": "cached_avg_3" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_194", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_194_output_0" + } + ] + }, + { + "node_id:": 946, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_194_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_45_output_0" + } + ], + "node_name:": "/Mul_99", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_99_output_0" + } + ] + }, + { + "node_id:": 947, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_99_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_372", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_372_output_0" + } + ] + }, + { + "node_id:": 948, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_1/Add_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/downsample_2/Concat_output_0" + } + ], + "node_name:": "/downsample_2/Reshape", + "node_outputs:": [ + { + "output_dimension:": "9 8 1 384 ", + "output_name:": "/downsample_2/Reshape_output_0" + } + ] + }, + { + "node_id:": 949, + "node_inputs:": [ + { + "input_dimension:": "9 8 1 384 ", + "input_name:": "/downsample_2/Reshape_output_0" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.downsample.query" + } + ], + "node_name:": "/downsample_2/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 8 1 384 ", + "output_name:": "/downsample_2/Mul_output_0" + } + ] + }, + { + "node_id:": 950, + "node_inputs:": [ + { + "input_dimension:": "9 8 1 384 ", + "input_name:": "/downsample_2/Mul_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_246_output_0" + } + ], + "node_name:": "/downsample_2/ReduceSum", + "node_outputs:": [ + { + "output_dimension:": "9 8 1 1 ", + "output_name:": "/downsample_2/ReduceSum_output_0" + } + ] + }, + { + "node_id:": 951, + "node_inputs:": [ + { + "input_dimension:": "9 8 1 1 ", + "input_name:": "/downsample_2/ReduceSum_output_0" + } + ], + "node_name:": "/downsample_2/Softmax", + "node_outputs:": [ + { + "output_dimension:": "9 8 1 1 ", + "output_name:": "/downsample_2/Softmax_output_0" + } + ] + }, + { + "node_id:": 952, + "node_inputs:": [ + { + "input_dimension:": "9 8 1 384 ", + "input_name:": "/downsample_2/Reshape_output_0" + }, + { + "input_dimension:": "9 8 1 1 ", + "input_name:": "/downsample_2/Softmax_output_0" + } + ], + "node_name:": "/downsample_2/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "9 8 1 384 ", + "output_name:": "/downsample_2/Mul_1_output_0" + } + ] + }, + { + "node_id:": 953, + "node_inputs:": [ + { + "input_dimension:": "9 8 1 384 ", + "input_name:": "/downsample_2/Mul_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/downsample_2/ReduceSum_1", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/downsample_2/ReduceSum_1_output_0" + } + ] + }, + { + "node_id:": 954, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_9/Sub", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/activation_9/Sub_output_0" + } + ] + }, + { + "node_id:": 955, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/activation_9/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_9/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/activation_9/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 956, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/activation_9/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_9/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/activation_9/Mul_output_0" + } + ] + }, + { + "node_id:": 957, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/downsample_2/ReduceSum_1_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/feed_forward1/out_proj_9/Add_output_0" + } + ], + "node_name:": "/Add_162", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_162_output_0" + } + ] + }, + { + "node_id:": 958, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_162_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_9", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/CumSum_9_output_0" + } + ] + }, + { + "node_id:": 959, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/CumSum_9_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_372_output_0" + } + ], + "node_name:": "/Add_163", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_163_output_0" + } + ] + }, + { + "node_id:": 960, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Unsqueeze_373_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_374_output_0" + } + ], + "node_name:": "/Add_165", + "node_outputs:": [ + { + "output_dimension:": "9 1 ", + "output_name:": "/Add_165_output_0" + } + ] + }, + { + "node_id:": 961, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Add_165_output_0" + } + ], + "node_name:": "/Cast_47", + "node_outputs:": [ + { + "output_dimension:": "9 1 ", + "output_name:": "/Cast_47_output_0" + } + ] + }, + { + "node_id:": 962, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Cast_47_output_0" + } + ], + "node_name:": "/Reciprocal_9", + "node_outputs:": [ + { + "output_dimension:": "9 1 ", + "output_name:": "/Reciprocal_9_output_0" + } + ] + }, + { + "node_id:": 963, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Reciprocal_9_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_375", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/Unsqueeze_375_output_0" + } + ] + }, + { + "node_id:": 964, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_163_output_0" + }, + { + "input_dimension:": "9 1 1 ", + "input_name:": "/Unsqueeze_375_output_0" + } + ], + "node_name:": "/Mul_101", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Mul_101_output_0" + } + ] + }, + { + "node_id:": 965, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Mul_101_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_7938" + } + ], + "node_name:": "/proj_9/MatMul", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/proj_9/MatMul_output_0" + } + ] + }, + { + "node_id:": 966, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_162_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/proj_9/MatMul_output_0" + } + ], + "node_name:": "/Add_167", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_167_output_0" + } + ] + }, + { + "node_id:": 967, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_84", + "node_outputs:": [ + { + "output_dimension:": "9 1 192 ", + "output_name:": "/Slice_84_output_0" + } + ] + }, + { + "node_id:": 968, + "node_inputs:": [ + { + "input_dimension:": "2 36 1 192 ", + "input_name:": "cached_key_3" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_195", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Gather_195_output_0" + } + ] + }, + { + "node_id:": 969, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Gather_195_output_0" + }, + { + "input_dimension:": "9 1 192 ", + "input_name:": "/Slice_84_output_0" + } + ], + "node_name:": "/Concat_183", + "node_outputs:": [ + { + "output_dimension:": "45 1 192 ", + "output_name:": "/Concat_183_output_0" + } + ] + }, + { + "node_id:": 970, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_86", + "node_outputs:": [ + { + "output_dimension:": "9 1 32 ", + "output_name:": "/Slice_86_output_0" + } + ] + }, + { + "node_id:": 971, + "node_inputs:": [ + { + "input_dimension:": "9 1 32 ", + "input_name:": "/Slice_86_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_186_output_0" + } + ], + "node_name:": "/Reshape_100", + "node_outputs:": [ + { + "output_dimension:": "9 1 8 4 ", + "output_name:": "/Reshape_100_output_0" + } + ] + }, + { + "node_id:": 972, + "node_inputs:": [ + { + "input_dimension:": "9 1 8 4 ", + "input_name:": "/Reshape_100_output_0" + } + ], + "node_name:": "/Transpose_111", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 4 ", + "output_name:": "/Transpose_111_output_0" + } + ] + }, + { + "node_id:": 973, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 4 ", + "input_name:": "/Transpose_111_output_0" + }, + { + "input_dimension:": "1 8 4 53 ", + "input_name:": "/Transpose_113_output_0" + } + ], + "node_name:": "/MatMul_45", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 53 ", + "output_name:": "/MatMul_45_output_0" + } + ] + }, + { + "node_id:": 974, + "node_inputs:": [ + { + "input_dimension:": "45 1 192 ", + "input_name:": "/Concat_183_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_101", + "node_outputs:": [ + { + "output_dimension:": "45 1 8 24 ", + "output_name:": "/Reshape_101_output_0" + } + ] + }, + { + "node_id:": 975, + "node_inputs:": [ + { + "input_dimension:": "45 1 8 24 ", + "input_name:": "/Reshape_101_output_0" + } + ], + "node_name:": "/Transpose_112", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 45 ", + "output_name:": "/Transpose_112_output_0" + } + ] + }, + { + "node_id:": 976, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_83", + "node_outputs:": [ + { + "output_dimension:": "9 1 192 ", + "output_name:": "/Slice_83_output_0" + } + ] + }, + { + "node_id:": 977, + "node_inputs:": [ + { + "input_dimension:": "9 1 192 ", + "input_name:": "/Slice_83_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_185_output_0" + } + ], + "node_name:": "/Reshape_99", + "node_outputs:": [ + { + "output_dimension:": "9 1 8 24 ", + "output_name:": "/Reshape_99_output_0" + } + ] + }, + { + "node_id:": 978, + "node_inputs:": [ + { + "input_dimension:": "9 1 8 24 ", + "input_name:": "/Reshape_99_output_0" + } + ], + "node_name:": "/Transpose_110", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 24 ", + "output_name:": "/Transpose_110_output_0" + } + ] + }, + { + "node_id:": 979, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 24 ", + "input_name:": "/Transpose_110_output_0" + }, + { + "input_dimension:": "1 8 24 45 ", + "input_name:": "/Transpose_112_output_0" + } + ], + "node_name:": "/MatMul_46", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 45 ", + "output_name:": "/MatMul_46_output_0" + } + ] + }, + { + "node_id:": 980, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 53 ", + "input_name:": "/MatMul_45_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_192_output_0" + } + ], + "node_name:": "/Reshape_104", + "node_outputs:": [ + { + "output_dimension:": "72 53 ", + "output_name:": "/Reshape_104_output_0" + } + ] + }, + { + "node_id:": 981, + "node_inputs:": [ + { + "input_dimension:": "72 53 ", + "input_name:": "/Reshape_104_output_0" + }, + { + "input_dimension:": "72 45 ", + "input_name:": "/Add_169_output_0" + } + ], + "node_name:": "/GatherElements_9", + "node_outputs:": [ + { + "output_dimension:": "72 45 ", + "output_name:": "/GatherElements_9_output_0" + } + ] + }, + { + "node_id:": 982, + "node_inputs:": [ + { + "input_dimension:": "72 45 ", + "input_name:": "/GatherElements_9_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_193_output_0" + } + ], + "node_name:": "/Reshape_105", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 45 ", + "output_name:": "/Reshape_105_output_0" + } + ] + }, + { + "node_id:": 983, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 45 ", + "input_name:": "/MatMul_46_output_0" + }, + { + "input_dimension:": "1 8 9 45 ", + "input_name:": "/Reshape_105_output_0" + } + ], + "node_name:": "/Add_170", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 45 ", + "output_name:": "/Add_170_output_0" + } + ] + }, + { + "node_id:": 984, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 45 ", + "input_name:": "/Add_170_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_194_output_0" + } + ], + "node_name:": "/Reshape_106", + "node_outputs:": [ + { + "output_dimension:": "8 9 45 ", + "output_name:": "/Reshape_106_output_0" + } + ] + }, + { + "node_id:": 985, + "node_inputs:": [ + { + "input_dimension:": "8 9 45 ", + "input_name:": "/Reshape_106_output_0" + } + ], + "node_name:": "/Softmax_9", + "node_outputs:": [ + { + "output_dimension:": "8 9 45 ", + "output_name:": "/Softmax_9_output_0" + } + ] + }, + { + "node_id:": 986, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_85", + "node_outputs:": [ + { + "output_dimension:": "9 1 96 ", + "output_name:": "/Slice_85_output_0" + } + ] + }, + { + "node_id:": 987, + "node_inputs:": [ + { + "input_dimension:": "2 36 1 96 ", + "input_name:": "cached_val_3" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_196", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Gather_196_output_0" + } + ] + }, + { + "node_id:": 988, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Gather_196_output_0" + }, + { + "input_dimension:": "9 1 96 ", + "input_name:": "/Slice_85_output_0" + } + ], + "node_name:": "/Concat_184", + "node_outputs:": [ + { + "output_dimension:": "45 1 96 ", + "output_name:": "/Concat_184_output_0" + } + ] + }, + { + "node_id:": 989, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_184_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_188_output_0" + } + ], + "node_name:": "/Reshape_102", + "node_outputs:": [ + { + "output_dimension:": "45 8 12 ", + "output_name:": "/Reshape_102_output_0" + } + ] + }, + { + "node_id:": 990, + "node_inputs:": [ + { + "input_dimension:": "45 8 12 ", + "input_name:": "/Reshape_102_output_0" + } + ], + "node_name:": "/Transpose_109", + "node_outputs:": [ + { + "output_dimension:": "8 45 12 ", + "output_name:": "/Transpose_109_output_0" + } + ] + }, + { + "node_id:": 991, + "node_inputs:": [ + { + "input_dimension:": "8 9 45 ", + "input_name:": "/Softmax_9_output_0" + }, + { + "input_dimension:": "8 45 12 ", + "input_name:": "/Transpose_109_output_0" + } + ], + "node_name:": "/MatMul_47", + "node_outputs:": [ + { + "output_dimension:": "8 9 12 ", + "output_name:": "/MatMul_47_output_0" + } + ] + }, + { + "node_id:": 992, + "node_inputs:": [ + { + "input_dimension:": "8 9 12 ", + "input_name:": "/MatMul_47_output_0" + } + ], + "node_name:": "/Transpose_114", + "node_outputs:": [ + { + "output_dimension:": "9 8 12 ", + "output_name:": "/Transpose_114_output_0" + } + ] + }, + { + "node_id:": 993, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_167_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_171_output_0" + } + ], + "node_name:": "/Add_172", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_172_output_0" + } + ] + }, + { + "node_id:": 994, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_172_output_0" + } + ], + "node_name:": "/Transpose_115", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Transpose_115_output_0" + } + ] + }, + { + "node_id:": 995, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Transpose_115_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_18/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 9 ", + "output_name:": "/pointwise_conv1_18/Conv_output_0" + } + ] + }, + { + "node_id:": 996, + "node_inputs:": [ + { + "input_dimension:": "1 768 9 ", + "input_name:": "/pointwise_conv1_18/Conv_output_0" + } + ], + "node_name:": "/Split_18", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_18_output_0" + }, + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_18_output_1" + } + ] + }, + { + "node_id:": 997, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_18_output_1" + } + ], + "node_name:": "/Sigmoid_18", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Sigmoid_18_output_0" + } + ] + }, + { + "node_id:": 998, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_18_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Sigmoid_18_output_0" + } + ], + "node_name:": "/Mul_106", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Mul_106_output_0" + } + ] + }, + { + "node_id:": 999, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv1_3" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_198", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_198_output_0" + } + ] + }, + { + "node_id:": 1000, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_198_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Mul_106_output_0" + } + ], + "node_name:": "/Concat_196", + "node_outputs:": [ + { + "output_dimension:": "1 384 39 ", + "output_name:": "/Concat_196_output_0" + } + ] + }, + { + "node_id:": 1001, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_196_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_18/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/depthwise_conv_18/Conv_output_0" + } + ] + }, + { + "node_id:": 1002, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_18/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_18/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_18/Sub_output_0" + } + ] + }, + { + "node_id:": 1003, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_18/Sub_output_0" + } + ], + "node_name:": "/activation_18/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_18/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1004, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_18/Conv_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_18/Sigmoid_output_0" + } + ], + "node_name:": "/activation_18/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_18/Mul_output_0" + } + ] + }, + { + "node_id:": 1005, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_18/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_18/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/pointwise_conv2_18/Conv_output_0" + } + ] + }, + { + "node_id:": 1006, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/pointwise_conv2_18/Conv_output_0" + } + ], + "node_name:": "/Transpose_116", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Transpose_116_output_0" + } + ] + }, + { + "node_id:": 1007, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_172_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Transpose_116_output_0" + } + ], + "node_name:": "/Add_173", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_173_output_0" + } + ] + }, + { + "node_id:": 1008, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_9/Sub", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/activation_9/Sub_output_0" + } + ] + }, + { + "node_id:": 1009, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/activation_9/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_9/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/activation_9/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1010, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/activation_9/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_9/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/activation_9/Mul_output_0" + } + ] + }, + { + "node_id:": 1011, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_173_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/feed_forward2/out_proj_9/Add_output_0" + } + ], + "node_name:": "/Add_174", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_174_output_0" + } + ] + }, + { + "node_id:": 1012, + "node_inputs:": [ + { + "input_dimension:": "2 36 1 96 ", + "input_name:": "cached_val2_3" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_197", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Gather_197_output_0" + } + ] + }, + { + "node_id:": 1013, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_174_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_7984" + } + ], + "node_name:": "/in_proj2_9/MatMul", + "node_outputs:": [ + { + "output_dimension:": "9 1 96 ", + "output_name:": "/in_proj2_9/MatMul_output_0" + } + ] + }, + { + "node_id:": 1014, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Gather_197_output_0" + }, + { + "input_dimension:": "9 1 96 ", + "input_name:": "/in_proj2_9/MatMul_output_0" + } + ], + "node_name:": "/Concat_197", + "node_outputs:": [ + { + "output_dimension:": "45 1 96 ", + "output_name:": "/Concat_197_output_0" + } + ] + }, + { + "node_id:": 1015, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_197_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_198_output_0" + } + ], + "node_name:": "/Reshape_108", + "node_outputs:": [ + { + "output_dimension:": "45 8 12 ", + "output_name:": "/Reshape_108_output_0" + } + ] + }, + { + "node_id:": 1016, + "node_inputs:": [ + { + "input_dimension:": "45 8 12 ", + "input_name:": "/Reshape_108_output_0" + } + ], + "node_name:": "/Transpose_117", + "node_outputs:": [ + { + "output_dimension:": "8 45 12 ", + "output_name:": "/Transpose_117_output_0" + } + ] + }, + { + "node_id:": 1017, + "node_inputs:": [ + { + "input_dimension:": "8 9 45 ", + "input_name:": "/Softmax_9_output_0" + }, + { + "input_dimension:": "8 45 12 ", + "input_name:": "/Transpose_117_output_0" + } + ], + "node_name:": "/MatMul_49", + "node_outputs:": [ + { + "output_dimension:": "8 9 12 ", + "output_name:": "/MatMul_49_output_0" + } + ] + }, + { + "node_id:": 1018, + "node_inputs:": [ + { + "input_dimension:": "8 9 12 ", + "input_name:": "/MatMul_49_output_0" + } + ], + "node_name:": "/Transpose_118", + "node_outputs:": [ + { + "output_dimension:": "9 8 12 ", + "output_name:": "/Transpose_118_output_0" + } + ] + }, + { + "node_id:": 1019, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_174_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/out_proj2_9/Add_output_0" + } + ], + "node_name:": "/Add_176", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_176_output_0" + } + ] + }, + { + "node_id:": 1020, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_176_output_0" + } + ], + "node_name:": "/Transpose_119", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Transpose_119_output_0" + } + ] + }, + { + "node_id:": 1021, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Transpose_119_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_19/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 9 ", + "output_name:": "/pointwise_conv1_19/Conv_output_0" + } + ] + }, + { + "node_id:": 1022, + "node_inputs:": [ + { + "input_dimension:": "1 768 9 ", + "input_name:": "/pointwise_conv1_19/Conv_output_0" + } + ], + "node_name:": "/Split_19", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_19_output_0" + }, + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_19_output_1" + } + ] + }, + { + "node_id:": 1023, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_19_output_1" + } + ], + "node_name:": "/Sigmoid_19", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Sigmoid_19_output_0" + } + ] + }, + { + "node_id:": 1024, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_19_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Sigmoid_19_output_0" + } + ], + "node_name:": "/Mul_108", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Mul_108_output_0" + } + ] + }, + { + "node_id:": 1025, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv2_3" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_199", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_199_output_0" + } + ] + }, + { + "node_id:": 1026, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_199_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Mul_108_output_0" + } + ], + "node_name:": "/Concat_200", + "node_outputs:": [ + { + "output_dimension:": "1 384 39 ", + "output_name:": "/Concat_200_output_0" + } + ] + }, + { + "node_id:": 1027, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_200_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_19/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/depthwise_conv_19/Conv_output_0" + } + ] + }, + { + "node_id:": 1028, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_19/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_19/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_19/Sub_output_0" + } + ] + }, + { + "node_id:": 1029, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_19/Sub_output_0" + } + ], + "node_name:": "/activation_19/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_19/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1030, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_19/Conv_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_19/Sigmoid_output_0" + } + ], + "node_name:": "/activation_19/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_19/Mul_output_0" + } + ] + }, + { + "node_id:": 1031, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_19/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_19/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/pointwise_conv2_19/Conv_output_0" + } + ] + }, + { + "node_id:": 1032, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/pointwise_conv2_19/Conv_output_0" + } + ], + "node_name:": "/Transpose_120", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Transpose_120_output_0" + } + ] + }, + { + "node_id:": 1033, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_176_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Transpose_120_output_0" + } + ], + "node_name:": "/Add_177", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_177_output_0" + } + ] + }, + { + "node_id:": 1034, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_9/Sub", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/activation_9/Sub_output_0" + } + ] + }, + { + "node_id:": 1035, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/activation_9/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_9/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/activation_9/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1036, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/in_proj_9/Add_output_0" + }, + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/activation_9/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_9/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/activation_9/Mul_output_0" + } + ] + }, + { + "node_id:": 1037, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_177_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/feed_forward3/out_proj_9/Add_output_0" + } + ], + "node_name:": "/Add_178", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_178_output_0" + } + ] + }, + { + "node_id:": 1038, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_178_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_178_output_0" + } + ], + "node_name:": "/norm_final_9/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/norm_final_9/Mul_output_0" + } + ] + }, + { + "node_id:": 1039, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/norm_final_9/Mul_output_0" + } + ], + "node_name:": "/norm_final_9/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/norm_final_9/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 1040, + "node_inputs:": [ + { + "input_dimension:": "9 1 1 ", + "input_name:": "/norm_final_9/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_9/Constant_output_0" + } + ], + "node_name:": "/norm_final_9/Add", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/norm_final_9/Add_output_0" + } + ] + }, + { + "node_id:": 1041, + "node_inputs:": [ + { + "input_dimension:": "9 1 1 ", + "input_name:": "/norm_final_9/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_9/Pow", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/norm_final_9/Pow_output_0" + } + ] + }, + { + "node_id:": 1042, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_178_output_0" + }, + { + "input_dimension:": "9 1 1 ", + "input_name:": "/norm_final_9/Pow_output_0" + } + ], + "node_name:": "/norm_final_9/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/norm_final_9/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1043, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/norm_final_9/Mul_1_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/downsample_2/ReduceSum_1_output_0" + } + ], + "node_name:": "/Sub_29", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Sub_29_output_0" + } + ] + }, + { + "node_id:": 1044, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Sub_29_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.3.encoder.layers.0.bypass_scale" + } + ], + "node_name:": "/Mul_109", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Mul_109_output_0" + } + ] + }, + { + "node_id:": 1045, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/downsample_2/ReduceSum_1_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Mul_109_output_0" + } + ], + "node_name:": "/Add_179", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_179_output_0" + } + ] + }, + { + "node_id:": 1046, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_10/Sub", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/activation_10/Sub_output_0" + } + ] + }, + { + "node_id:": 1047, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/activation_10/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_10/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/activation_10/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1048, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/activation_10/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_10/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/activation_10/Mul_output_0" + } + ] + }, + { + "node_id:": 1049, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_179_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/feed_forward1/out_proj_10/Add_output_0" + } + ], + "node_name:": "/Add_180", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_180_output_0" + } + ] + }, + { + "node_id:": 1050, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_180_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_10", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/CumSum_10_output_0" + } + ] + }, + { + "node_id:": 1051, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/CumSum_10_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_406_output_0" + } + ], + "node_name:": "/Add_181", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_181_output_0" + } + ] + }, + { + "node_id:": 1052, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Unsqueeze_407_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_408_output_0" + } + ], + "node_name:": "/Add_183", + "node_outputs:": [ + { + "output_dimension:": "9 1 ", + "output_name:": "/Add_183_output_0" + } + ] + }, + { + "node_id:": 1053, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Add_183_output_0" + } + ], + "node_name:": "/Cast_52", + "node_outputs:": [ + { + "output_dimension:": "9 1 ", + "output_name:": "/Cast_52_output_0" + } + ] + }, + { + "node_id:": 1054, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Cast_52_output_0" + } + ], + "node_name:": "/Reciprocal_10", + "node_outputs:": [ + { + "output_dimension:": "9 1 ", + "output_name:": "/Reciprocal_10_output_0" + } + ] + }, + { + "node_id:": 1055, + "node_inputs:": [ + { + "input_dimension:": "9 1 ", + "input_name:": "/Reciprocal_10_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_409", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/Unsqueeze_409_output_0" + } + ] + }, + { + "node_id:": 1056, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_181_output_0" + }, + { + "input_dimension:": "9 1 1 ", + "input_name:": "/Unsqueeze_409_output_0" + } + ], + "node_name:": "/Mul_112", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Mul_112_output_0" + } + ] + }, + { + "node_id:": 1057, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Mul_112_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_8000" + } + ], + "node_name:": "/proj_10/MatMul", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/proj_10/MatMul_output_0" + } + ] + }, + { + "node_id:": 1058, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_180_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/proj_10/MatMul_output_0" + } + ], + "node_name:": "/Add_185", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_185_output_0" + } + ] + }, + { + "node_id:": 1059, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_93", + "node_outputs:": [ + { + "output_dimension:": "9 1 192 ", + "output_name:": "/Slice_93_output_0" + } + ] + }, + { + "node_id:": 1060, + "node_inputs:": [ + { + "input_dimension:": "2 36 1 192 ", + "input_name:": "cached_key_3" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_216", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Gather_216_output_0" + } + ] + }, + { + "node_id:": 1061, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Gather_216_output_0" + }, + { + "input_dimension:": "9 1 192 ", + "input_name:": "/Slice_93_output_0" + } + ], + "node_name:": "/Concat_201", + "node_outputs:": [ + { + "output_dimension:": "45 1 192 ", + "output_name:": "/Concat_201_output_0" + } + ] + }, + { + "node_id:": 1062, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_95", + "node_outputs:": [ + { + "output_dimension:": "9 1 32 ", + "output_name:": "/Slice_95_output_0" + } + ] + }, + { + "node_id:": 1063, + "node_inputs:": [ + { + "input_dimension:": "9 1 32 ", + "input_name:": "/Slice_95_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_204_output_0" + } + ], + "node_name:": "/Reshape_111", + "node_outputs:": [ + { + "output_dimension:": "9 1 8 4 ", + "output_name:": "/Reshape_111_output_0" + } + ] + }, + { + "node_id:": 1064, + "node_inputs:": [ + { + "input_dimension:": "9 1 8 4 ", + "input_name:": "/Reshape_111_output_0" + } + ], + "node_name:": "/Transpose_123", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 4 ", + "output_name:": "/Transpose_123_output_0" + } + ] + }, + { + "node_id:": 1065, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 4 ", + "input_name:": "/Transpose_123_output_0" + }, + { + "input_dimension:": "1 8 4 53 ", + "input_name:": "/Transpose_125_output_0" + } + ], + "node_name:": "/MatMul_50", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 53 ", + "output_name:": "/MatMul_50_output_0" + } + ] + }, + { + "node_id:": 1066, + "node_inputs:": [ + { + "input_dimension:": "45 1 192 ", + "input_name:": "/Concat_201_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_112", + "node_outputs:": [ + { + "output_dimension:": "45 1 8 24 ", + "output_name:": "/Reshape_112_output_0" + } + ] + }, + { + "node_id:": 1067, + "node_inputs:": [ + { + "input_dimension:": "45 1 8 24 ", + "input_name:": "/Reshape_112_output_0" + } + ], + "node_name:": "/Transpose_124", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 45 ", + "output_name:": "/Transpose_124_output_0" + } + ] + }, + { + "node_id:": 1068, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_92", + "node_outputs:": [ + { + "output_dimension:": "9 1 192 ", + "output_name:": "/Slice_92_output_0" + } + ] + }, + { + "node_id:": 1069, + "node_inputs:": [ + { + "input_dimension:": "9 1 192 ", + "input_name:": "/Slice_92_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_203_output_0" + } + ], + "node_name:": "/Reshape_110", + "node_outputs:": [ + { + "output_dimension:": "9 1 8 24 ", + "output_name:": "/Reshape_110_output_0" + } + ] + }, + { + "node_id:": 1070, + "node_inputs:": [ + { + "input_dimension:": "9 1 8 24 ", + "input_name:": "/Reshape_110_output_0" + } + ], + "node_name:": "/Transpose_122", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 24 ", + "output_name:": "/Transpose_122_output_0" + } + ] + }, + { + "node_id:": 1071, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 24 ", + "input_name:": "/Transpose_122_output_0" + }, + { + "input_dimension:": "1 8 24 45 ", + "input_name:": "/Transpose_124_output_0" + } + ], + "node_name:": "/MatMul_51", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 45 ", + "output_name:": "/MatMul_51_output_0" + } + ] + }, + { + "node_id:": 1072, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 53 ", + "input_name:": "/MatMul_50_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_210_output_0" + } + ], + "node_name:": "/Reshape_115", + "node_outputs:": [ + { + "output_dimension:": "72 53 ", + "output_name:": "/Reshape_115_output_0" + } + ] + }, + { + "node_id:": 1073, + "node_inputs:": [ + { + "input_dimension:": "72 53 ", + "input_name:": "/Reshape_115_output_0" + }, + { + "input_dimension:": "72 45 ", + "input_name:": "/Add_187_output_0" + } + ], + "node_name:": "/GatherElements_10", + "node_outputs:": [ + { + "output_dimension:": "72 45 ", + "output_name:": "/GatherElements_10_output_0" + } + ] + }, + { + "node_id:": 1074, + "node_inputs:": [ + { + "input_dimension:": "72 45 ", + "input_name:": "/GatherElements_10_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_211_output_0" + } + ], + "node_name:": "/Reshape_116", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 45 ", + "output_name:": "/Reshape_116_output_0" + } + ] + }, + { + "node_id:": 1075, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 45 ", + "input_name:": "/MatMul_51_output_0" + }, + { + "input_dimension:": "1 8 9 45 ", + "input_name:": "/Reshape_116_output_0" + } + ], + "node_name:": "/Add_188", + "node_outputs:": [ + { + "output_dimension:": "1 8 9 45 ", + "output_name:": "/Add_188_output_0" + } + ] + }, + { + "node_id:": 1076, + "node_inputs:": [ + { + "input_dimension:": "1 8 9 45 ", + "input_name:": "/Add_188_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_212_output_0" + } + ], + "node_name:": "/Reshape_117", + "node_outputs:": [ + { + "output_dimension:": "8 9 45 ", + "output_name:": "/Reshape_117_output_0" + } + ] + }, + { + "node_id:": 1077, + "node_inputs:": [ + { + "input_dimension:": "8 9 45 ", + "input_name:": "/Reshape_117_output_0" + } + ], + "node_name:": "/Softmax_10", + "node_outputs:": [ + { + "output_dimension:": "8 9 45 ", + "output_name:": "/Softmax_10_output_0" + } + ] + }, + { + "node_id:": 1078, + "node_inputs:": [ + { + "input_dimension:": "9 1 512 ", + "input_name:": "/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_94", + "node_outputs:": [ + { + "output_dimension:": "9 1 96 ", + "output_name:": "/Slice_94_output_0" + } + ] + }, + { + "node_id:": 1079, + "node_inputs:": [ + { + "input_dimension:": "2 36 1 96 ", + "input_name:": "cached_val_3" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_217", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Gather_217_output_0" + } + ] + }, + { + "node_id:": 1080, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Gather_217_output_0" + }, + { + "input_dimension:": "9 1 96 ", + "input_name:": "/Slice_94_output_0" + } + ], + "node_name:": "/Concat_202", + "node_outputs:": [ + { + "output_dimension:": "45 1 96 ", + "output_name:": "/Concat_202_output_0" + } + ] + }, + { + "node_id:": 1081, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_202_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_206_output_0" + } + ], + "node_name:": "/Reshape_113", + "node_outputs:": [ + { + "output_dimension:": "45 8 12 ", + "output_name:": "/Reshape_113_output_0" + } + ] + }, + { + "node_id:": 1082, + "node_inputs:": [ + { + "input_dimension:": "45 8 12 ", + "input_name:": "/Reshape_113_output_0" + } + ], + "node_name:": "/Transpose_121", + "node_outputs:": [ + { + "output_dimension:": "8 45 12 ", + "output_name:": "/Transpose_121_output_0" + } + ] + }, + { + "node_id:": 1083, + "node_inputs:": [ + { + "input_dimension:": "8 9 45 ", + "input_name:": "/Softmax_10_output_0" + }, + { + "input_dimension:": "8 45 12 ", + "input_name:": "/Transpose_121_output_0" + } + ], + "node_name:": "/MatMul_52", + "node_outputs:": [ + { + "output_dimension:": "8 9 12 ", + "output_name:": "/MatMul_52_output_0" + } + ] + }, + { + "node_id:": 1084, + "node_inputs:": [ + { + "input_dimension:": "8 9 12 ", + "input_name:": "/MatMul_52_output_0" + } + ], + "node_name:": "/Transpose_126", + "node_outputs:": [ + { + "output_dimension:": "9 8 12 ", + "output_name:": "/Transpose_126_output_0" + } + ] + }, + { + "node_id:": 1085, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_185_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_189_output_0" + } + ], + "node_name:": "/Add_190", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_190_output_0" + } + ] + }, + { + "node_id:": 1086, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_190_output_0" + } + ], + "node_name:": "/Transpose_127", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Transpose_127_output_0" + } + ] + }, + { + "node_id:": 1087, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Transpose_127_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_20/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 9 ", + "output_name:": "/pointwise_conv1_20/Conv_output_0" + } + ] + }, + { + "node_id:": 1088, + "node_inputs:": [ + { + "input_dimension:": "1 768 9 ", + "input_name:": "/pointwise_conv1_20/Conv_output_0" + } + ], + "node_name:": "/Split_20", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_20_output_0" + }, + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_20_output_1" + } + ] + }, + { + "node_id:": 1089, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_20_output_1" + } + ], + "node_name:": "/Sigmoid_20", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Sigmoid_20_output_0" + } + ] + }, + { + "node_id:": 1090, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_20_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Sigmoid_20_output_0" + } + ], + "node_name:": "/Mul_117", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Mul_117_output_0" + } + ] + }, + { + "node_id:": 1091, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv1_3" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_219", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_219_output_0" + } + ] + }, + { + "node_id:": 1092, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_219_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Mul_117_output_0" + } + ], + "node_name:": "/Concat_214", + "node_outputs:": [ + { + "output_dimension:": "1 384 39 ", + "output_name:": "/Concat_214_output_0" + } + ] + }, + { + "node_id:": 1093, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_214_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_20/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/depthwise_conv_20/Conv_output_0" + } + ] + }, + { + "node_id:": 1094, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_20/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_20/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_20/Sub_output_0" + } + ] + }, + { + "node_id:": 1095, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_20/Sub_output_0" + } + ], + "node_name:": "/activation_20/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_20/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1096, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_20/Conv_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_20/Sigmoid_output_0" + } + ], + "node_name:": "/activation_20/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_20/Mul_output_0" + } + ] + }, + { + "node_id:": 1097, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_20/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_20/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/pointwise_conv2_20/Conv_output_0" + } + ] + }, + { + "node_id:": 1098, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/pointwise_conv2_20/Conv_output_0" + } + ], + "node_name:": "/Transpose_128", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Transpose_128_output_0" + } + ] + }, + { + "node_id:": 1099, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_190_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Transpose_128_output_0" + } + ], + "node_name:": "/Add_191", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_191_output_0" + } + ] + }, + { + "node_id:": 1100, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_10/Sub", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/activation_10/Sub_output_0" + } + ] + }, + { + "node_id:": 1101, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/activation_10/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_10/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/activation_10/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1102, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/activation_10/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_10/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/activation_10/Mul_output_0" + } + ] + }, + { + "node_id:": 1103, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_191_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/feed_forward2/out_proj_10/Add_output_0" + } + ], + "node_name:": "/Add_192", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_192_output_0" + } + ] + }, + { + "node_id:": 1104, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_192_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_8046" + } + ], + "node_name:": "/in_proj2_10/MatMul", + "node_outputs:": [ + { + "output_dimension:": "9 1 96 ", + "output_name:": "/in_proj2_10/MatMul_output_0" + } + ] + }, + { + "node_id:": 1105, + "node_inputs:": [ + { + "input_dimension:": "2 36 1 96 ", + "input_name:": "cached_val2_3" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_218", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Gather_218_output_0" + } + ] + }, + { + "node_id:": 1106, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Gather_218_output_0" + }, + { + "input_dimension:": "9 1 96 ", + "input_name:": "/in_proj2_10/MatMul_output_0" + } + ], + "node_name:": "/Concat_215", + "node_outputs:": [ + { + "output_dimension:": "45 1 96 ", + "output_name:": "/Concat_215_output_0" + } + ] + }, + { + "node_id:": 1107, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_215_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_216_output_0" + } + ], + "node_name:": "/Reshape_119", + "node_outputs:": [ + { + "output_dimension:": "45 8 12 ", + "output_name:": "/Reshape_119_output_0" + } + ] + }, + { + "node_id:": 1108, + "node_inputs:": [ + { + "input_dimension:": "45 8 12 ", + "input_name:": "/Reshape_119_output_0" + } + ], + "node_name:": "/Transpose_129", + "node_outputs:": [ + { + "output_dimension:": "8 45 12 ", + "output_name:": "/Transpose_129_output_0" + } + ] + }, + { + "node_id:": 1109, + "node_inputs:": [ + { + "input_dimension:": "8 9 45 ", + "input_name:": "/Softmax_10_output_0" + }, + { + "input_dimension:": "8 45 12 ", + "input_name:": "/Transpose_129_output_0" + } + ], + "node_name:": "/MatMul_54", + "node_outputs:": [ + { + "output_dimension:": "8 9 12 ", + "output_name:": "/MatMul_54_output_0" + } + ] + }, + { + "node_id:": 1110, + "node_inputs:": [ + { + "input_dimension:": "8 9 12 ", + "input_name:": "/MatMul_54_output_0" + } + ], + "node_name:": "/Transpose_130", + "node_outputs:": [ + { + "output_dimension:": "9 8 12 ", + "output_name:": "/Transpose_130_output_0" + } + ] + }, + { + "node_id:": 1111, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_192_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/out_proj2_10/Add_output_0" + } + ], + "node_name:": "/Add_194", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_194_output_0" + } + ] + }, + { + "node_id:": 1112, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_194_output_0" + } + ], + "node_name:": "/Transpose_131", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Transpose_131_output_0" + } + ] + }, + { + "node_id:": 1113, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Transpose_131_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_21/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 9 ", + "output_name:": "/pointwise_conv1_21/Conv_output_0" + } + ] + }, + { + "node_id:": 1114, + "node_inputs:": [ + { + "input_dimension:": "1 768 9 ", + "input_name:": "/pointwise_conv1_21/Conv_output_0" + } + ], + "node_name:": "/Split_21", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_21_output_0" + }, + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Split_21_output_1" + } + ] + }, + { + "node_id:": 1115, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_21_output_1" + } + ], + "node_name:": "/Sigmoid_21", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Sigmoid_21_output_0" + } + ] + }, + { + "node_id:": 1116, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Split_21_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Sigmoid_21_output_0" + } + ], + "node_name:": "/Mul_119", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/Mul_119_output_0" + } + ] + }, + { + "node_id:": 1117, + "node_inputs:": [ + { + "input_dimension:": "2 1 384 30 ", + "input_name:": "cached_conv2_3" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_220", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_220_output_0" + } + ] + }, + { + "node_id:": 1118, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_220_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/Mul_119_output_0" + } + ], + "node_name:": "/Concat_218", + "node_outputs:": [ + { + "output_dimension:": "1 384 39 ", + "output_name:": "/Concat_218_output_0" + } + ] + }, + { + "node_id:": 1119, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_218_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_21/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/depthwise_conv_21/Conv_output_0" + } + ] + }, + { + "node_id:": 1120, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_21/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_21/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_21/Sub_output_0" + } + ] + }, + { + "node_id:": 1121, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_21/Sub_output_0" + } + ], + "node_name:": "/activation_21/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_21/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1122, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/depthwise_conv_21/Conv_output_0" + }, + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_21/Sigmoid_output_0" + } + ], + "node_name:": "/activation_21/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/activation_21/Mul_output_0" + } + ] + }, + { + "node_id:": 1123, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/activation_21/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_21/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 9 ", + "output_name:": "/pointwise_conv2_21/Conv_output_0" + } + ] + }, + { + "node_id:": 1124, + "node_inputs:": [ + { + "input_dimension:": "1 384 9 ", + "input_name:": "/pointwise_conv2_21/Conv_output_0" + } + ], + "node_name:": "/Transpose_132", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Transpose_132_output_0" + } + ] + }, + { + "node_id:": 1125, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_194_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Transpose_132_output_0" + } + ], + "node_name:": "/Add_195", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_195_output_0" + } + ] + }, + { + "node_id:": 1126, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_10/Sub", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/activation_10/Sub_output_0" + } + ] + }, + { + "node_id:": 1127, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/activation_10/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_10/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/activation_10/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1128, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/in_proj_10/Add_output_0" + }, + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/activation_10/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_10/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/activation_10/Mul_output_0" + } + ] + }, + { + "node_id:": 1129, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_195_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/feed_forward3/out_proj_10/Add_output_0" + } + ], + "node_name:": "/Add_196", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_196_output_0" + } + ] + }, + { + "node_id:": 1130, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_196_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_196_output_0" + } + ], + "node_name:": "/norm_final_10/Mul", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/norm_final_10/Mul_output_0" + } + ] + }, + { + "node_id:": 1131, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/norm_final_10/Mul_output_0" + } + ], + "node_name:": "/norm_final_10/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/norm_final_10/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 1132, + "node_inputs:": [ + { + "input_dimension:": "9 1 1 ", + "input_name:": "/norm_final_10/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_10/Constant_output_0" + } + ], + "node_name:": "/norm_final_10/Add", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/norm_final_10/Add_output_0" + } + ] + }, + { + "node_id:": 1133, + "node_inputs:": [ + { + "input_dimension:": "9 1 1 ", + "input_name:": "/norm_final_10/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_10/Pow", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 ", + "output_name:": "/norm_final_10/Pow_output_0" + } + ] + }, + { + "node_id:": 1134, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_196_output_0" + }, + { + "input_dimension:": "9 1 1 ", + "input_name:": "/norm_final_10/Pow_output_0" + } + ], + "node_name:": "/norm_final_10/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/norm_final_10/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1135, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/norm_final_10/Mul_1_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_179_output_0" + } + ], + "node_name:": "/Sub_32", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Sub_32_output_0" + } + ] + }, + { + "node_id:": 1136, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Sub_32_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.3.encoder.layers.1.bypass_scale" + } + ], + "node_name:": "/Mul_120", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Mul_120_output_0" + } + ] + }, + { + "node_id:": 1137, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_179_output_0" + }, + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Mul_120_output_0" + } + ], + "node_name:": "/Add_197", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_197_output_0" + } + ] + }, + { + "node_id:": 1138, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_197_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/upsample_2/Unsqueeze", + "node_outputs:": [ + { + "output_dimension:": "9 1 1 384 ", + "output_name:": "/upsample_2/Unsqueeze_output_0" + } + ] + }, + { + "node_id:": 1139, + "node_inputs:": [ + { + "input_dimension:": "9 1 1 384 ", + "input_name:": "/upsample_2/Unsqueeze_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/upsample_2/Where_output_0" + } + ], + "node_name:": "/upsample_2/Expand", + "node_outputs:": [ + { + "output_dimension:": "9 8 1 384 ", + "output_name:": "/upsample_2/Expand_output_0" + } + ] + }, + { + "node_id:": 1140, + "node_inputs:": [ + { + "input_dimension:": "9 8 1 384 ", + "input_name:": "/upsample_2/Expand_output_0" + }, + { + "input_dimension:": "8 1 384 ", + "input_name:": "onnx::Add_8060" + } + ], + "node_name:": "/upsample_2/Add", + "node_outputs:": [ + { + "output_dimension:": "9 8 1 384 ", + "output_name:": "/upsample_2/Add_output_0" + } + ] + }, + { + "node_id:": 1141, + "node_inputs:": [ + { + "input_dimension:": "9 8 1 384 ", + "input_name:": "/upsample_2/Add_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/upsample_2/Concat_1_output_0" + } + ], + "node_name:": "/upsample_2/Reshape_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/upsample_2/Reshape_1_output_0" + } + ] + }, + { + "node_id:": 1142, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/upsample_2/Reshape_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_453_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_101", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Slice_101_output_0" + } + ] + }, + { + "node_id:": 1143, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Slice_101_output_0" + }, + { + "input_dimension:": "", + "input_name:": "onnx::Mul_8064" + } + ], + "node_name:": "/out_combiner_2/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_2/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1144, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_1/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.3.out_combiner.weight1" + } + ], + "node_name:": "/out_combiner_2/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_2/Mul_output_0" + } + ] + }, + { + "node_id:": 1145, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_2/Mul_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_2/Mul_1_output_0" + } + ], + "node_name:": "/out_combiner_2/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_2/Add_output_0" + } + ] + }, + { + "node_id:": 1146, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_2/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "onnx::Mul_8065" + } + ], + "node_name:": "/skip_modules.4/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/skip_modules.4/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1147, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.skip_modules.4.weight1" + } + ], + "node_name:": "/skip_modules.4/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/skip_modules.4/Mul_output_0" + } + ] + }, + { + "node_id:": 1148, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/skip_modules.4/Mul_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/skip_modules.4/Mul_1_output_0" + } + ], + "node_name:": "/skip_modules.4/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/skip_modules.4/Add_output_0" + } + ] + }, + { + "node_id:": 1149, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_299", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_299_output_0" + } + ] + }, + { + "node_id:": 1150, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_299_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_559", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_559_output_0" + } + ] + }, + { + "node_id:": 1151, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_299_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_556", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_556_output_0" + } + ] + }, + { + "node_id:": 1152, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_556_output_0" + } + ], + "node_name:": "/Cast_70", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_70_output_0" + } + ] + }, + { + "node_id:": 1153, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_300", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_300_output_0" + } + ] + }, + { + "node_id:": 1154, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_300_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_70_output_0" + } + ], + "node_name:": "/Mul_154", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_154_output_0" + } + ] + }, + { + "node_id:": 1155, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_154_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_557", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_557_output_0" + } + ] + }, + { + "node_id:": 1156, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_4" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_278", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_278_output_0" + } + ] + }, + { + "node_id:": 1157, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_278_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_525", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_525_output_0" + } + ] + }, + { + "node_id:": 1158, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_278_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_522", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_522_output_0" + } + ] + }, + { + "node_id:": 1159, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_522_output_0" + } + ], + "node_name:": "/Cast_65", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_65_output_0" + } + ] + }, + { + "node_id:": 1160, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_4" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_279", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_279_output_0" + } + ] + }, + { + "node_id:": 1161, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_279_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_65_output_0" + } + ], + "node_name:": "/Mul_143", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_143_output_0" + } + ] + }, + { + "node_id:": 1162, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_143_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_523", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_523_output_0" + } + ] + }, + { + "node_id:": 1163, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_257", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_257_output_0" + } + ] + }, + { + "node_id:": 1164, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_257_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_491", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_491_output_0" + } + ] + }, + { + "node_id:": 1165, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_257_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_488", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_488_output_0" + } + ] + }, + { + "node_id:": 1166, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_488_output_0" + } + ], + "node_name:": "/Cast_60", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_60_output_0" + } + ] + }, + { + "node_id:": 1167, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_258", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_258_output_0" + } + ] + }, + { + "node_id:": 1168, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_258_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_60_output_0" + } + ], + "node_name:": "/Mul_132", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_132_output_0" + } + ] + }, + { + "node_id:": 1169, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_132_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_489", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_489_output_0" + } + ] + }, + { + "node_id:": 1170, + "node_inputs:": [ + { + "input_dimension:": "4 1 ", + "input_name:": "cached_len_4" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_236", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Gather_236_output_0" + } + ] + }, + { + "node_id:": 1171, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_236_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_457", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_457_output_0" + } + ] + }, + { + "node_id:": 1172, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_236_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Unsqueeze_454", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_454_output_0" + } + ] + }, + { + "node_id:": 1173, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_454_output_0" + } + ], + "node_name:": "/Cast_55", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Cast_55_output_0" + } + ] + }, + { + "node_id:": 1174, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 ", + "input_name:": "cached_avg_4" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_237", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_237_output_0" + } + ] + }, + { + "node_id:": 1175, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_237_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Cast_55_output_0" + } + ], + "node_name:": "/Mul_121", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Mul_121_output_0" + } + ] + }, + { + "node_id:": 1176, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Mul_121_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_455", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_455_output_0" + } + ] + }, + { + "node_id:": 1177, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/skip_modules.4/Add_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/downsample_3/Concat_output_0" + } + ], + "node_name:": "/downsample_3/Reshape", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample_3/Reshape_output_0" + } + ] + }, + { + "node_id:": 1178, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_3/Reshape_output_0" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.downsample.query" + } + ], + "node_name:": "/downsample_3/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample_3/Mul_output_0" + } + ] + }, + { + "node_id:": 1179, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_3/Mul_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_246_output_0" + } + ], + "node_name:": "/downsample_3/ReduceSum", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 1 ", + "output_name:": "/downsample_3/ReduceSum_output_0" + } + ] + }, + { + "node_id:": 1180, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 1 ", + "input_name:": "/downsample_3/ReduceSum_output_0" + } + ], + "node_name:": "/downsample_3/Softmax", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 1 ", + "output_name:": "/downsample_3/Softmax_output_0" + } + ] + }, + { + "node_id:": 1181, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_3/Reshape_output_0" + }, + { + "input_dimension:": "36 2 1 1 ", + "input_name:": "/downsample_3/Softmax_output_0" + } + ], + "node_name:": "/downsample_3/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample_3/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1182, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_3/Mul_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/downsample_3/ReduceSum_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/downsample_3/ReduceSum_1_output_0" + } + ] + }, + { + "node_id:": 1183, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_11/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_11/Sub_output_0" + } + ] + }, + { + "node_id:": 1184, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_11/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_11/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_11/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1185, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_11/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_11/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_11/Mul_output_0" + } + ] + }, + { + "node_id:": 1186, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample_3/ReduceSum_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_11/Add_output_0" + } + ], + "node_name:": "/Add_198", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_198_output_0" + } + ] + }, + { + "node_id:": 1187, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_198_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_11", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_11_output_0" + } + ] + }, + { + "node_id:": 1188, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_11_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_455_output_0" + } + ], + "node_name:": "/Add_199", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_199_output_0" + } + ] + }, + { + "node_id:": 1189, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_456_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_457_output_0" + } + ], + "node_name:": "/Add_201", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_201_output_0" + } + ] + }, + { + "node_id:": 1190, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_201_output_0" + } + ], + "node_name:": "/Cast_57", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_57_output_0" + } + ] + }, + { + "node_id:": 1191, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_57_output_0" + } + ], + "node_name:": "/Reciprocal_11", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_11_output_0" + } + ] + }, + { + "node_id:": 1192, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_11_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_458", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_458_output_0" + } + ] + }, + { + "node_id:": 1193, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_199_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_458_output_0" + } + ], + "node_name:": "/Mul_123", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_123_output_0" + } + ] + }, + { + "node_id:": 1194, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_123_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_8072" + } + ], + "node_name:": "/proj_11/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_11/MatMul_output_0" + } + ] + }, + { + "node_id:": 1195, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_198_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_11/MatMul_output_0" + } + ], + "node_name:": "/Add_203", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_203_output_0" + } + ] + }, + { + "node_id:": 1196, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_103", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_103_output_0" + } + ] + }, + { + "node_id:": 1197, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_4" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_238", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_238_output_0" + } + ] + }, + { + "node_id:": 1198, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_238_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_103_output_0" + } + ], + "node_name:": "/Concat_226", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_226_output_0" + } + ] + }, + { + "node_id:": 1199, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_105", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_105_output_0" + } + ] + }, + { + "node_id:": 1200, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_105_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_229_output_0" + } + ], + "node_name:": "/Reshape_122", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_122_output_0" + } + ] + }, + { + "node_id:": 1201, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_122_output_0" + } + ], + "node_name:": "/Transpose_135", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_135_output_0" + } + ] + }, + { + "node_id:": 1202, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_135_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_137_output_0" + } + ], + "node_name:": "/MatMul_55", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_55_output_0" + } + ] + }, + { + "node_id:": 1203, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_226_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_123", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_123_output_0" + } + ] + }, + { + "node_id:": 1204, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_123_output_0" + } + ], + "node_name:": "/Transpose_136", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_136_output_0" + } + ] + }, + { + "node_id:": 1205, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_102", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_102_output_0" + } + ] + }, + { + "node_id:": 1206, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_102_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_228_output_0" + } + ], + "node_name:": "/Reshape_121", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_121_output_0" + } + ] + }, + { + "node_id:": 1207, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_121_output_0" + } + ], + "node_name:": "/Transpose_134", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_134_output_0" + } + ] + }, + { + "node_id:": 1208, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_134_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_136_output_0" + } + ], + "node_name:": "/MatMul_56", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_56_output_0" + } + ] + }, + { + "node_id:": 1209, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_55_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_235_output_0" + } + ], + "node_name:": "/Reshape_126", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_126_output_0" + } + ] + }, + { + "node_id:": 1210, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_126_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_205_output_0" + } + ], + "node_name:": "/GatherElements_11", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_11_output_0" + } + ] + }, + { + "node_id:": 1211, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_11_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_236_output_0" + } + ], + "node_name:": "/Reshape_127", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_127_output_0" + } + ] + }, + { + "node_id:": 1212, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_56_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_127_output_0" + } + ], + "node_name:": "/Add_206", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_206_output_0" + } + ] + }, + { + "node_id:": 1213, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_206_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_237_output_0" + } + ], + "node_name:": "/Reshape_128", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_128_output_0" + } + ] + }, + { + "node_id:": 1214, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_128_output_0" + } + ], + "node_name:": "/Softmax_11", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_11_output_0" + } + ] + }, + { + "node_id:": 1215, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_104", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_104_output_0" + } + ] + }, + { + "node_id:": 1216, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_4" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_239", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_239_output_0" + } + ] + }, + { + "node_id:": 1217, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_239_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_104_output_0" + } + ], + "node_name:": "/Concat_227", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_227_output_0" + } + ] + }, + { + "node_id:": 1218, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_227_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_231_output_0" + } + ], + "node_name:": "/Reshape_124", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_124_output_0" + } + ] + }, + { + "node_id:": 1219, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_124_output_0" + } + ], + "node_name:": "/Transpose_133", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_133_output_0" + } + ] + }, + { + "node_id:": 1220, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_11_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_133_output_0" + } + ], + "node_name:": "/MatMul_57", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_57_output_0" + } + ] + }, + { + "node_id:": 1221, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_57_output_0" + } + ], + "node_name:": "/Transpose_138", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_138_output_0" + } + ] + }, + { + "node_id:": 1222, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_203_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_207_output_0" + } + ], + "node_name:": "/Add_208", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_208_output_0" + } + ] + }, + { + "node_id:": 1223, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_208_output_0" + } + ], + "node_name:": "/Transpose_139", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_139_output_0" + } + ] + }, + { + "node_id:": 1224, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_139_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_22/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_22/Conv_output_0" + } + ] + }, + { + "node_id:": 1225, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_22/Conv_output_0" + } + ], + "node_name:": "/Split_22", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_22_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_22_output_1" + } + ] + }, + { + "node_id:": 1226, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_22_output_1" + } + ], + "node_name:": "/Sigmoid_22", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_22_output_0" + } + ] + }, + { + "node_id:": 1227, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_22_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_22_output_0" + } + ], + "node_name:": "/Mul_128", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_128_output_0" + } + ] + }, + { + "node_id:": 1228, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_4" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_241", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_241_output_0" + } + ] + }, + { + "node_id:": 1229, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_241_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_128_output_0" + } + ], + "node_name:": "/Concat_239", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_239_output_0" + } + ] + }, + { + "node_id:": 1230, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_239_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_22/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_22/Conv_output_0" + } + ] + }, + { + "node_id:": 1231, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_22/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_22/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_22/Sub_output_0" + } + ] + }, + { + "node_id:": 1232, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_22/Sub_output_0" + } + ], + "node_name:": "/activation_22/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_22/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1233, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_22/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_22/Sigmoid_output_0" + } + ], + "node_name:": "/activation_22/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_22/Mul_output_0" + } + ] + }, + { + "node_id:": 1234, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_22/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_22/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_22/Conv_output_0" + } + ] + }, + { + "node_id:": 1235, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_22/Conv_output_0" + } + ], + "node_name:": "/Transpose_140", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_140_output_0" + } + ] + }, + { + "node_id:": 1236, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_208_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_140_output_0" + } + ], + "node_name:": "/Add_209", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_209_output_0" + } + ] + }, + { + "node_id:": 1237, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_11/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_11/Sub_output_0" + } + ] + }, + { + "node_id:": 1238, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_11/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_11/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_11/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1239, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_11/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_11/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_11/Mul_output_0" + } + ] + }, + { + "node_id:": 1240, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_209_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_11/Add_output_0" + } + ], + "node_name:": "/Add_210", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_210_output_0" + } + ] + }, + { + "node_id:": 1241, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_210_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_8118" + } + ], + "node_name:": "/in_proj2_11/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_11/MatMul_output_0" + } + ] + }, + { + "node_id:": 1242, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_4" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_240", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_240_output_0" + } + ] + }, + { + "node_id:": 1243, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_240_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_11/MatMul_output_0" + } + ], + "node_name:": "/Concat_240", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_240_output_0" + } + ] + }, + { + "node_id:": 1244, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_240_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_241_output_0" + } + ], + "node_name:": "/Reshape_130", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_130_output_0" + } + ] + }, + { + "node_id:": 1245, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_130_output_0" + } + ], + "node_name:": "/Transpose_141", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_141_output_0" + } + ] + }, + { + "node_id:": 1246, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_11_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_141_output_0" + } + ], + "node_name:": "/MatMul_59", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_59_output_0" + } + ] + }, + { + "node_id:": 1247, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_59_output_0" + } + ], + "node_name:": "/Transpose_142", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_142_output_0" + } + ] + }, + { + "node_id:": 1248, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_210_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_11/Add_output_0" + } + ], + "node_name:": "/Add_212", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_212_output_0" + } + ] + }, + { + "node_id:": 1249, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_212_output_0" + } + ], + "node_name:": "/Transpose_143", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_143_output_0" + } + ] + }, + { + "node_id:": 1250, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_143_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_23/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_23/Conv_output_0" + } + ] + }, + { + "node_id:": 1251, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_23/Conv_output_0" + } + ], + "node_name:": "/Split_23", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_23_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_23_output_1" + } + ] + }, + { + "node_id:": 1252, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_23_output_1" + } + ], + "node_name:": "/Sigmoid_23", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_23_output_0" + } + ] + }, + { + "node_id:": 1253, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_23_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_23_output_0" + } + ], + "node_name:": "/Mul_130", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_130_output_0" + } + ] + }, + { + "node_id:": 1254, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_4" + }, + { + "input_dimension:": "", + "input_name:": "/upsample_3/Constant_output_0" + } + ], + "node_name:": "/Gather_242", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_242_output_0" + } + ] + }, + { + "node_id:": 1255, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_242_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_130_output_0" + } + ], + "node_name:": "/Concat_243", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_243_output_0" + } + ] + }, + { + "node_id:": 1256, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_243_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_23/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_23/Conv_output_0" + } + ] + }, + { + "node_id:": 1257, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_23/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_23/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_23/Sub_output_0" + } + ] + }, + { + "node_id:": 1258, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_23/Sub_output_0" + } + ], + "node_name:": "/activation_23/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_23/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1259, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_23/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_23/Sigmoid_output_0" + } + ], + "node_name:": "/activation_23/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_23/Mul_output_0" + } + ] + }, + { + "node_id:": 1260, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_23/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_23/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_23/Conv_output_0" + } + ] + }, + { + "node_id:": 1261, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_23/Conv_output_0" + } + ], + "node_name:": "/Transpose_144", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_144_output_0" + } + ] + }, + { + "node_id:": 1262, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_212_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_144_output_0" + } + ], + "node_name:": "/Add_213", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_213_output_0" + } + ] + }, + { + "node_id:": 1263, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_11/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_11/Sub_output_0" + } + ] + }, + { + "node_id:": 1264, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_11/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_11/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_11/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1265, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_11/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_11/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_11/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_11/Mul_output_0" + } + ] + }, + { + "node_id:": 1266, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_213_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_11/Add_output_0" + } + ], + "node_name:": "/Add_214", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_214_output_0" + } + ] + }, + { + "node_id:": 1267, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_214_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_214_output_0" + } + ], + "node_name:": "/norm_final_11/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_11/Mul_output_0" + } + ] + }, + { + "node_id:": 1268, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_11/Mul_output_0" + } + ], + "node_name:": "/norm_final_11/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_11/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 1269, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_11/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_11/Constant_output_0" + } + ], + "node_name:": "/norm_final_11/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_11/Add_output_0" + } + ] + }, + { + "node_id:": 1270, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_11/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_11/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_11/Pow_output_0" + } + ] + }, + { + "node_id:": 1271, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_214_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_11/Pow_output_0" + } + ], + "node_name:": "/norm_final_11/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_11/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1272, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_11/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample_3/ReduceSum_1_output_0" + } + ], + "node_name:": "/Sub_35", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_35_output_0" + } + ] + }, + { + "node_id:": 1273, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_35_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.4.encoder.layers.0.bypass_scale" + } + ], + "node_name:": "/Mul_131", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_131_output_0" + } + ] + }, + { + "node_id:": 1274, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample_3/ReduceSum_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_131_output_0" + } + ], + "node_name:": "/Add_215", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_215_output_0" + } + ] + }, + { + "node_id:": 1275, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_12/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_12/Sub_output_0" + } + ] + }, + { + "node_id:": 1276, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_12/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_12/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_12/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1277, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_12/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_12/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_12/Mul_output_0" + } + ] + }, + { + "node_id:": 1278, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_215_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_12/Add_output_0" + } + ], + "node_name:": "/Add_216", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_216_output_0" + } + ] + }, + { + "node_id:": 1279, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_216_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_12", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_12_output_0" + } + ] + }, + { + "node_id:": 1280, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_12_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_489_output_0" + } + ], + "node_name:": "/Add_217", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_217_output_0" + } + ] + }, + { + "node_id:": 1281, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_490_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_491_output_0" + } + ], + "node_name:": "/Add_219", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_219_output_0" + } + ] + }, + { + "node_id:": 1282, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_219_output_0" + } + ], + "node_name:": "/Cast_62", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_62_output_0" + } + ] + }, + { + "node_id:": 1283, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_62_output_0" + } + ], + "node_name:": "/Reciprocal_12", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_12_output_0" + } + ] + }, + { + "node_id:": 1284, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_12_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_492", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_492_output_0" + } + ] + }, + { + "node_id:": 1285, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_217_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_492_output_0" + } + ], + "node_name:": "/Mul_134", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_134_output_0" + } + ] + }, + { + "node_id:": 1286, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_134_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_8134" + } + ], + "node_name:": "/proj_12/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_12/MatMul_output_0" + } + ] + }, + { + "node_id:": 1287, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_216_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_12/MatMul_output_0" + } + ], + "node_name:": "/Add_221", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_221_output_0" + } + ] + }, + { + "node_id:": 1288, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_112", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_112_output_0" + } + ] + }, + { + "node_id:": 1289, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_259", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_259_output_0" + } + ] + }, + { + "node_id:": 1290, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_259_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_112_output_0" + } + ], + "node_name:": "/Concat_244", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_244_output_0" + } + ] + }, + { + "node_id:": 1291, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_114", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_114_output_0" + } + ] + }, + { + "node_id:": 1292, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_114_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_247_output_0" + } + ], + "node_name:": "/Reshape_133", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_133_output_0" + } + ] + }, + { + "node_id:": 1293, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_133_output_0" + } + ], + "node_name:": "/Transpose_147", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_147_output_0" + } + ] + }, + { + "node_id:": 1294, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_147_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_149_output_0" + } + ], + "node_name:": "/MatMul_60", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_60_output_0" + } + ] + }, + { + "node_id:": 1295, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_244_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_134", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_134_output_0" + } + ] + }, + { + "node_id:": 1296, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_134_output_0" + } + ], + "node_name:": "/Transpose_148", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_148_output_0" + } + ] + }, + { + "node_id:": 1297, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_111", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_111_output_0" + } + ] + }, + { + "node_id:": 1298, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_111_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_246_output_0" + } + ], + "node_name:": "/Reshape_132", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_132_output_0" + } + ] + }, + { + "node_id:": 1299, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_132_output_0" + } + ], + "node_name:": "/Transpose_146", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_146_output_0" + } + ] + }, + { + "node_id:": 1300, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_146_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_148_output_0" + } + ], + "node_name:": "/MatMul_61", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_61_output_0" + } + ] + }, + { + "node_id:": 1301, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_60_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_253_output_0" + } + ], + "node_name:": "/Reshape_137", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_137_output_0" + } + ] + }, + { + "node_id:": 1302, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_137_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_223_output_0" + } + ], + "node_name:": "/GatherElements_12", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_12_output_0" + } + ] + }, + { + "node_id:": 1303, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_12_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_254_output_0" + } + ], + "node_name:": "/Reshape_138", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_138_output_0" + } + ] + }, + { + "node_id:": 1304, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_61_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_138_output_0" + } + ], + "node_name:": "/Add_224", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_224_output_0" + } + ] + }, + { + "node_id:": 1305, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_224_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_255_output_0" + } + ], + "node_name:": "/Reshape_139", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_139_output_0" + } + ] + }, + { + "node_id:": 1306, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_139_output_0" + } + ], + "node_name:": "/Softmax_12", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_12_output_0" + } + ] + }, + { + "node_id:": 1307, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_113", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_113_output_0" + } + ] + }, + { + "node_id:": 1308, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_260", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_260_output_0" + } + ] + }, + { + "node_id:": 1309, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_260_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_113_output_0" + } + ], + "node_name:": "/Concat_245", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_245_output_0" + } + ] + }, + { + "node_id:": 1310, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_245_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_249_output_0" + } + ], + "node_name:": "/Reshape_135", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_135_output_0" + } + ] + }, + { + "node_id:": 1311, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_135_output_0" + } + ], + "node_name:": "/Transpose_145", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_145_output_0" + } + ] + }, + { + "node_id:": 1312, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_12_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_145_output_0" + } + ], + "node_name:": "/MatMul_62", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_62_output_0" + } + ] + }, + { + "node_id:": 1313, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_62_output_0" + } + ], + "node_name:": "/Transpose_150", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_150_output_0" + } + ] + }, + { + "node_id:": 1314, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_221_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_225_output_0" + } + ], + "node_name:": "/Add_226", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_226_output_0" + } + ] + }, + { + "node_id:": 1315, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_226_output_0" + } + ], + "node_name:": "/Transpose_151", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_151_output_0" + } + ] + }, + { + "node_id:": 1316, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_151_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_24/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_24/Conv_output_0" + } + ] + }, + { + "node_id:": 1317, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_24/Conv_output_0" + } + ], + "node_name:": "/Split_24", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_24_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_24_output_1" + } + ] + }, + { + "node_id:": 1318, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_24_output_1" + } + ], + "node_name:": "/Sigmoid_24", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_24_output_0" + } + ] + }, + { + "node_id:": 1319, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_24_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_24_output_0" + } + ], + "node_name:": "/Mul_139", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_139_output_0" + } + ] + }, + { + "node_id:": 1320, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_262", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_262_output_0" + } + ] + }, + { + "node_id:": 1321, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_262_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_139_output_0" + } + ], + "node_name:": "/Concat_257", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_257_output_0" + } + ] + }, + { + "node_id:": 1322, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_257_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_24/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_24/Conv_output_0" + } + ] + }, + { + "node_id:": 1323, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_24/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_24/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_24/Sub_output_0" + } + ] + }, + { + "node_id:": 1324, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_24/Sub_output_0" + } + ], + "node_name:": "/activation_24/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_24/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1325, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_24/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_24/Sigmoid_output_0" + } + ], + "node_name:": "/activation_24/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_24/Mul_output_0" + } + ] + }, + { + "node_id:": 1326, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_24/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_24/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_24/Conv_output_0" + } + ] + }, + { + "node_id:": 1327, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_24/Conv_output_0" + } + ], + "node_name:": "/Transpose_152", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_152_output_0" + } + ] + }, + { + "node_id:": 1328, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_226_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_152_output_0" + } + ], + "node_name:": "/Add_227", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_227_output_0" + } + ] + }, + { + "node_id:": 1329, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_12/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_12/Sub_output_0" + } + ] + }, + { + "node_id:": 1330, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_12/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_12/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_12/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1331, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_12/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_12/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_12/Mul_output_0" + } + ] + }, + { + "node_id:": 1332, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_227_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_12/Add_output_0" + } + ], + "node_name:": "/Add_228", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_228_output_0" + } + ] + }, + { + "node_id:": 1333, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_228_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_8180" + } + ], + "node_name:": "/in_proj2_12/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_12/MatMul_output_0" + } + ] + }, + { + "node_id:": 1334, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_261", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_261_output_0" + } + ] + }, + { + "node_id:": 1335, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_261_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_12/MatMul_output_0" + } + ], + "node_name:": "/Concat_258", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_258_output_0" + } + ] + }, + { + "node_id:": 1336, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_258_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_259_output_0" + } + ], + "node_name:": "/Reshape_141", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_141_output_0" + } + ] + }, + { + "node_id:": 1337, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_141_output_0" + } + ], + "node_name:": "/Transpose_153", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_153_output_0" + } + ] + }, + { + "node_id:": 1338, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_12_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_153_output_0" + } + ], + "node_name:": "/MatMul_64", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_64_output_0" + } + ] + }, + { + "node_id:": 1339, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_64_output_0" + } + ], + "node_name:": "/Transpose_154", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_154_output_0" + } + ] + }, + { + "node_id:": 1340, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_228_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_12/Add_output_0" + } + ], + "node_name:": "/Add_230", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_230_output_0" + } + ] + }, + { + "node_id:": 1341, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_230_output_0" + } + ], + "node_name:": "/Transpose_155", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_155_output_0" + } + ] + }, + { + "node_id:": 1342, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_155_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_25/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_25/Conv_output_0" + } + ] + }, + { + "node_id:": 1343, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_25/Conv_output_0" + } + ], + "node_name:": "/Split_25", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_25_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_25_output_1" + } + ] + }, + { + "node_id:": 1344, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_25_output_1" + } + ], + "node_name:": "/Sigmoid_25", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_25_output_0" + } + ] + }, + { + "node_id:": 1345, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_25_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_25_output_0" + } + ], + "node_name:": "/Mul_141", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_141_output_0" + } + ] + }, + { + "node_id:": 1346, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_pos_1/Constant_2_output_0" + } + ], + "node_name:": "/Gather_263", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_263_output_0" + } + ] + }, + { + "node_id:": 1347, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_263_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_141_output_0" + } + ], + "node_name:": "/Concat_261", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_261_output_0" + } + ] + }, + { + "node_id:": 1348, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_261_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_25/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_25/Conv_output_0" + } + ] + }, + { + "node_id:": 1349, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_25/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_25/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_25/Sub_output_0" + } + ] + }, + { + "node_id:": 1350, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_25/Sub_output_0" + } + ], + "node_name:": "/activation_25/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_25/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1351, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_25/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_25/Sigmoid_output_0" + } + ], + "node_name:": "/activation_25/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_25/Mul_output_0" + } + ] + }, + { + "node_id:": 1352, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_25/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_25/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_25/Conv_output_0" + } + ] + }, + { + "node_id:": 1353, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_25/Conv_output_0" + } + ], + "node_name:": "/Transpose_156", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_156_output_0" + } + ] + }, + { + "node_id:": 1354, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_230_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_156_output_0" + } + ], + "node_name:": "/Add_231", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_231_output_0" + } + ] + }, + { + "node_id:": 1355, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_12/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_12/Sub_output_0" + } + ] + }, + { + "node_id:": 1356, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_12/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_12/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_12/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1357, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_12/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_12/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_12/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_12/Mul_output_0" + } + ] + }, + { + "node_id:": 1358, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_231_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_12/Add_output_0" + } + ], + "node_name:": "/Add_232", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_232_output_0" + } + ] + }, + { + "node_id:": 1359, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_232_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_232_output_0" + } + ], + "node_name:": "/norm_final_12/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_12/Mul_output_0" + } + ] + }, + { + "node_id:": 1360, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_12/Mul_output_0" + } + ], + "node_name:": "/norm_final_12/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_12/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 1361, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_12/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_12/Constant_output_0" + } + ], + "node_name:": "/norm_final_12/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_12/Add_output_0" + } + ] + }, + { + "node_id:": 1362, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_12/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_12/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_12/Pow_output_0" + } + ] + }, + { + "node_id:": 1363, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_232_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_12/Pow_output_0" + } + ], + "node_name:": "/norm_final_12/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_12/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1364, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_12/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_215_output_0" + } + ], + "node_name:": "/Sub_38", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_38_output_0" + } + ] + }, + { + "node_id:": 1365, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_38_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.4.encoder.layers.1.bypass_scale" + } + ], + "node_name:": "/Mul_142", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_142_output_0" + } + ] + }, + { + "node_id:": 1366, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_215_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_142_output_0" + } + ], + "node_name:": "/Add_233", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_233_output_0" + } + ] + }, + { + "node_id:": 1367, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_13/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_13/Sub_output_0" + } + ] + }, + { + "node_id:": 1368, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_13/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_13/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_13/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1369, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_13/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_13/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_13/Mul_output_0" + } + ] + }, + { + "node_id:": 1370, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_233_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_13/Add_output_0" + } + ], + "node_name:": "/Add_234", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_234_output_0" + } + ] + }, + { + "node_id:": 1371, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_234_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_13", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_13_output_0" + } + ] + }, + { + "node_id:": 1372, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_13_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_523_output_0" + } + ], + "node_name:": "/Add_235", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_235_output_0" + } + ] + }, + { + "node_id:": 1373, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_524_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_525_output_0" + } + ], + "node_name:": "/Add_237", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_237_output_0" + } + ] + }, + { + "node_id:": 1374, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_237_output_0" + } + ], + "node_name:": "/Cast_67", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_67_output_0" + } + ] + }, + { + "node_id:": 1375, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_67_output_0" + } + ], + "node_name:": "/Reciprocal_13", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_13_output_0" + } + ] + }, + { + "node_id:": 1376, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_13_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_526", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_526_output_0" + } + ] + }, + { + "node_id:": 1377, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_235_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_526_output_0" + } + ], + "node_name:": "/Mul_145", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_145_output_0" + } + ] + }, + { + "node_id:": 1378, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_145_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_8196" + } + ], + "node_name:": "/proj_13/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_13/MatMul_output_0" + } + ] + }, + { + "node_id:": 1379, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_234_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_13/MatMul_output_0" + } + ], + "node_name:": "/Add_239", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_239_output_0" + } + ] + }, + { + "node_id:": 1380, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_121", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_121_output_0" + } + ] + }, + { + "node_id:": 1381, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_4" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_280", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_280_output_0" + } + ] + }, + { + "node_id:": 1382, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_280_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_121_output_0" + } + ], + "node_name:": "/Concat_262", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_262_output_0" + } + ] + }, + { + "node_id:": 1383, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_123", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_123_output_0" + } + ] + }, + { + "node_id:": 1384, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_123_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_265_output_0" + } + ], + "node_name:": "/Reshape_144", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_144_output_0" + } + ] + }, + { + "node_id:": 1385, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_144_output_0" + } + ], + "node_name:": "/Transpose_159", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_159_output_0" + } + ] + }, + { + "node_id:": 1386, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_159_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_161_output_0" + } + ], + "node_name:": "/MatMul_65", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_65_output_0" + } + ] + }, + { + "node_id:": 1387, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_262_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_145", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_145_output_0" + } + ] + }, + { + "node_id:": 1388, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_145_output_0" + } + ], + "node_name:": "/Transpose_160", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_160_output_0" + } + ] + }, + { + "node_id:": 1389, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_120", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_120_output_0" + } + ] + }, + { + "node_id:": 1390, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_120_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_264_output_0" + } + ], + "node_name:": "/Reshape_143", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_143_output_0" + } + ] + }, + { + "node_id:": 1391, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_143_output_0" + } + ], + "node_name:": "/Transpose_158", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_158_output_0" + } + ] + }, + { + "node_id:": 1392, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_158_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_160_output_0" + } + ], + "node_name:": "/MatMul_66", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_66_output_0" + } + ] + }, + { + "node_id:": 1393, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_65_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_271_output_0" + } + ], + "node_name:": "/Reshape_148", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_148_output_0" + } + ] + }, + { + "node_id:": 1394, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_148_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_241_output_0" + } + ], + "node_name:": "/GatherElements_13", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_13_output_0" + } + ] + }, + { + "node_id:": 1395, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_13_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_272_output_0" + } + ], + "node_name:": "/Reshape_149", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_149_output_0" + } + ] + }, + { + "node_id:": 1396, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_66_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_149_output_0" + } + ], + "node_name:": "/Add_242", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_242_output_0" + } + ] + }, + { + "node_id:": 1397, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_242_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_273_output_0" + } + ], + "node_name:": "/Reshape_150", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_150_output_0" + } + ] + }, + { + "node_id:": 1398, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_150_output_0" + } + ], + "node_name:": "/Softmax_13", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_13_output_0" + } + ] + }, + { + "node_id:": 1399, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_122", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_122_output_0" + } + ] + }, + { + "node_id:": 1400, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_4" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_281", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_281_output_0" + } + ] + }, + { + "node_id:": 1401, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_281_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_122_output_0" + } + ], + "node_name:": "/Concat_263", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_263_output_0" + } + ] + }, + { + "node_id:": 1402, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_263_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_267_output_0" + } + ], + "node_name:": "/Reshape_146", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_146_output_0" + } + ] + }, + { + "node_id:": 1403, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_146_output_0" + } + ], + "node_name:": "/Transpose_157", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_157_output_0" + } + ] + }, + { + "node_id:": 1404, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_13_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_157_output_0" + } + ], + "node_name:": "/MatMul_67", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_67_output_0" + } + ] + }, + { + "node_id:": 1405, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_67_output_0" + } + ], + "node_name:": "/Transpose_162", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_162_output_0" + } + ] + }, + { + "node_id:": 1406, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_239_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_243_output_0" + } + ], + "node_name:": "/Add_244", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_244_output_0" + } + ] + }, + { + "node_id:": 1407, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_244_output_0" + } + ], + "node_name:": "/Transpose_163", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_163_output_0" + } + ] + }, + { + "node_id:": 1408, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_163_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_26/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_26/Conv_output_0" + } + ] + }, + { + "node_id:": 1409, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_26/Conv_output_0" + } + ], + "node_name:": "/Split_26", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_26_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_26_output_1" + } + ] + }, + { + "node_id:": 1410, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_26_output_1" + } + ], + "node_name:": "/Sigmoid_26", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_26_output_0" + } + ] + }, + { + "node_id:": 1411, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_26_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_26_output_0" + } + ], + "node_name:": "/Mul_150", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_150_output_0" + } + ] + }, + { + "node_id:": 1412, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_4" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_283", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_283_output_0" + } + ] + }, + { + "node_id:": 1413, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_283_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_150_output_0" + } + ], + "node_name:": "/Concat_275", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_275_output_0" + } + ] + }, + { + "node_id:": 1414, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_275_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_26/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_26/Conv_output_0" + } + ] + }, + { + "node_id:": 1415, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_26/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_26/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_26/Sub_output_0" + } + ] + }, + { + "node_id:": 1416, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_26/Sub_output_0" + } + ], + "node_name:": "/activation_26/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_26/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1417, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_26/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_26/Sigmoid_output_0" + } + ], + "node_name:": "/activation_26/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_26/Mul_output_0" + } + ] + }, + { + "node_id:": 1418, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_26/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_26/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_26/Conv_output_0" + } + ] + }, + { + "node_id:": 1419, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_26/Conv_output_0" + } + ], + "node_name:": "/Transpose_164", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_164_output_0" + } + ] + }, + { + "node_id:": 1420, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_244_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_164_output_0" + } + ], + "node_name:": "/Add_245", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_245_output_0" + } + ] + }, + { + "node_id:": 1421, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_13/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_13/Sub_output_0" + } + ] + }, + { + "node_id:": 1422, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_13/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_13/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_13/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1423, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_13/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_13/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_13/Mul_output_0" + } + ] + }, + { + "node_id:": 1424, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_245_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_13/Add_output_0" + } + ], + "node_name:": "/Add_246", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_246_output_0" + } + ] + }, + { + "node_id:": 1425, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_246_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_8242" + } + ], + "node_name:": "/in_proj2_13/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_13/MatMul_output_0" + } + ] + }, + { + "node_id:": 1426, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_4" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_282", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_282_output_0" + } + ] + }, + { + "node_id:": 1427, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_282_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_13/MatMul_output_0" + } + ], + "node_name:": "/Concat_276", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_276_output_0" + } + ] + }, + { + "node_id:": 1428, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_276_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_277_output_0" + } + ], + "node_name:": "/Reshape_152", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_152_output_0" + } + ] + }, + { + "node_id:": 1429, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_152_output_0" + } + ], + "node_name:": "/Transpose_165", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_165_output_0" + } + ] + }, + { + "node_id:": 1430, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_13_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_165_output_0" + } + ], + "node_name:": "/MatMul_69", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_69_output_0" + } + ] + }, + { + "node_id:": 1431, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_69_output_0" + } + ], + "node_name:": "/Transpose_166", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_166_output_0" + } + ] + }, + { + "node_id:": 1432, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_246_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_13/Add_output_0" + } + ], + "node_name:": "/Add_248", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_248_output_0" + } + ] + }, + { + "node_id:": 1433, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_248_output_0" + } + ], + "node_name:": "/Transpose_167", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_167_output_0" + } + ] + }, + { + "node_id:": 1434, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_167_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_27/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_27/Conv_output_0" + } + ] + }, + { + "node_id:": 1435, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_27/Conv_output_0" + } + ], + "node_name:": "/Split_27", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_27_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_27_output_1" + } + ] + }, + { + "node_id:": 1436, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_27_output_1" + } + ], + "node_name:": "/Sigmoid_27", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_27_output_0" + } + ] + }, + { + "node_id:": 1437, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_27_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_27_output_0" + } + ], + "node_name:": "/Mul_152", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_152_output_0" + } + ] + }, + { + "node_id:": 1438, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_4" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_404_output_0" + } + ], + "node_name:": "/Gather_284", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_284_output_0" + } + ] + }, + { + "node_id:": 1439, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_284_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_152_output_0" + } + ], + "node_name:": "/Concat_279", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_279_output_0" + } + ] + }, + { + "node_id:": 1440, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_279_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_27/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_27/Conv_output_0" + } + ] + }, + { + "node_id:": 1441, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_27/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_27/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_27/Sub_output_0" + } + ] + }, + { + "node_id:": 1442, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_27/Sub_output_0" + } + ], + "node_name:": "/activation_27/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_27/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1443, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_27/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_27/Sigmoid_output_0" + } + ], + "node_name:": "/activation_27/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_27/Mul_output_0" + } + ] + }, + { + "node_id:": 1444, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_27/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_27/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_27/Conv_output_0" + } + ] + }, + { + "node_id:": 1445, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_27/Conv_output_0" + } + ], + "node_name:": "/Transpose_168", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_168_output_0" + } + ] + }, + { + "node_id:": 1446, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_248_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_168_output_0" + } + ], + "node_name:": "/Add_249", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_249_output_0" + } + ] + }, + { + "node_id:": 1447, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_13/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_13/Sub_output_0" + } + ] + }, + { + "node_id:": 1448, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_13/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_13/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_13/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1449, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_13/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_13/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_13/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_13/Mul_output_0" + } + ] + }, + { + "node_id:": 1450, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_249_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_13/Add_output_0" + } + ], + "node_name:": "/Add_250", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_250_output_0" + } + ] + }, + { + "node_id:": 1451, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_250_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_250_output_0" + } + ], + "node_name:": "/norm_final_13/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_13/Mul_output_0" + } + ] + }, + { + "node_id:": 1452, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_13/Mul_output_0" + } + ], + "node_name:": "/norm_final_13/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_13/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 1453, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_13/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_13/Constant_output_0" + } + ], + "node_name:": "/norm_final_13/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_13/Add_output_0" + } + ] + }, + { + "node_id:": 1454, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_13/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_13/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_13/Pow_output_0" + } + ] + }, + { + "node_id:": 1455, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_250_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_13/Pow_output_0" + } + ], + "node_name:": "/norm_final_13/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_13/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1456, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_13/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_233_output_0" + } + ], + "node_name:": "/Sub_41", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_41_output_0" + } + ] + }, + { + "node_id:": 1457, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_41_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.4.encoder.layers.2.bypass_scale" + } + ], + "node_name:": "/Mul_153", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_153_output_0" + } + ] + }, + { + "node_id:": 1458, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_233_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_153_output_0" + } + ], + "node_name:": "/Add_251", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_251_output_0" + } + ] + }, + { + "node_id:": 1459, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward1/activation_14/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_14/Sub_output_0" + } + ] + }, + { + "node_id:": 1460, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_14/Sub_output_0" + } + ], + "node_name:": "/feed_forward1/activation_14/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_14/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1461, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_14/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward1/activation_14/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/activation_14/Mul_output_0" + } + ] + }, + { + "node_id:": 1462, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_251_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward1/out_proj_14/Add_output_0" + } + ], + "node_name:": "/Add_252", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_252_output_0" + } + ] + }, + { + "node_id:": 1463, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_252_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_631_output_0" + } + ], + "node_name:": "/CumSum_14", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/CumSum_14_output_0" + } + ] + }, + { + "node_id:": 1464, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/CumSum_14_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_557_output_0" + } + ], + "node_name:": "/Add_253", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_253_output_0" + } + ] + }, + { + "node_id:": 1465, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Unsqueeze_558_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_559_output_0" + } + ], + "node_name:": "/Add_255", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Add_255_output_0" + } + ] + }, + { + "node_id:": 1466, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Add_255_output_0" + } + ], + "node_name:": "/Cast_72", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Cast_72_output_0" + } + ] + }, + { + "node_id:": 1467, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Cast_72_output_0" + } + ], + "node_name:": "/Reciprocal_14", + "node_outputs:": [ + { + "output_dimension:": "36 1 ", + "output_name:": "/Reciprocal_14_output_0" + } + ] + }, + { + "node_id:": 1468, + "node_inputs:": [ + { + "input_dimension:": "36 1 ", + "input_name:": "/Reciprocal_14_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + } + ], + "node_name:": "/Unsqueeze_560", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/Unsqueeze_560_output_0" + } + ] + }, + { + "node_id:": 1469, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_253_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/Unsqueeze_560_output_0" + } + ], + "node_name:": "/Mul_156", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_156_output_0" + } + ] + }, + { + "node_id:": 1470, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_156_output_0" + }, + { + "input_dimension:": "384 384 ", + "input_name:": "onnx::MatMul_8258" + } + ], + "node_name:": "/proj_14/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/proj_14/MatMul_output_0" + } + ] + }, + { + "node_id:": 1471, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_252_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/proj_14/MatMul_output_0" + } + ], + "node_name:": "/Add_257", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_257_output_0" + } + ] + }, + { + "node_id:": 1472, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_130", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_130_output_0" + } + ] + }, + { + "node_id:": 1473, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 192 ", + "input_name:": "cached_key_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_301", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Gather_301_output_0" + } + ] + }, + { + "node_id:": 1474, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Gather_301_output_0" + }, + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_130_output_0" + } + ], + "node_name:": "/Concat_280", + "node_outputs:": [ + { + "output_dimension:": "180 1 192 ", + "output_name:": "/Concat_280_output_0" + } + ] + }, + { + "node_id:": 1475, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_132", + "node_outputs:": [ + { + "output_dimension:": "36 1 32 ", + "output_name:": "/Slice_132_output_0" + } + ] + }, + { + "node_id:": 1476, + "node_inputs:": [ + { + "input_dimension:": "36 1 32 ", + "input_name:": "/Slice_132_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_283_output_0" + } + ], + "node_name:": "/Reshape_155", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 4 ", + "output_name:": "/Reshape_155_output_0" + } + ] + }, + { + "node_id:": 1477, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 4 ", + "input_name:": "/Reshape_155_output_0" + } + ], + "node_name:": "/Transpose_171", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 4 ", + "output_name:": "/Transpose_171_output_0" + } + ] + }, + { + "node_id:": 1478, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 4 ", + "input_name:": "/Transpose_171_output_0" + }, + { + "input_dimension:": "1 8 4 215 ", + "input_name:": "/Transpose_173_output_0" + } + ], + "node_name:": "/MatMul_70", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 215 ", + "output_name:": "/MatMul_70_output_0" + } + ] + }, + { + "node_id:": 1479, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_280_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_266_output_0" + } + ], + "node_name:": "/Reshape_156", + "node_outputs:": [ + { + "output_dimension:": "180 1 8 24 ", + "output_name:": "/Reshape_156_output_0" + } + ] + }, + { + "node_id:": 1480, + "node_inputs:": [ + { + "input_dimension:": "180 1 8 24 ", + "input_name:": "/Reshape_156_output_0" + } + ], + "node_name:": "/Transpose_172", + "node_outputs:": [ + { + "output_dimension:": "1 8 24 180 ", + "output_name:": "/Transpose_172_output_0" + } + ] + }, + { + "node_id:": 1481, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_285_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_129", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_129_output_0" + } + ] + }, + { + "node_id:": 1482, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_129_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_282_output_0" + } + ], + "node_name:": "/Reshape_154", + "node_outputs:": [ + { + "output_dimension:": "36 1 8 24 ", + "output_name:": "/Reshape_154_output_0" + } + ] + }, + { + "node_id:": 1483, + "node_inputs:": [ + { + "input_dimension:": "36 1 8 24 ", + "input_name:": "/Reshape_154_output_0" + } + ], + "node_name:": "/Transpose_170", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 24 ", + "output_name:": "/Transpose_170_output_0" + } + ] + }, + { + "node_id:": 1484, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 24 ", + "input_name:": "/Transpose_170_output_0" + }, + { + "input_dimension:": "1 8 24 180 ", + "input_name:": "/Transpose_172_output_0" + } + ], + "node_name:": "/MatMul_71", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/MatMul_71_output_0" + } + ] + }, + { + "node_id:": 1485, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 215 ", + "input_name:": "/MatMul_70_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Concat_289_output_0" + } + ], + "node_name:": "/Reshape_159", + "node_outputs:": [ + { + "output_dimension:": "288 215 ", + "output_name:": "/Reshape_159_output_0" + } + ] + }, + { + "node_id:": 1486, + "node_inputs:": [ + { + "input_dimension:": "288 215 ", + "input_name:": "/Reshape_159_output_0" + }, + { + "input_dimension:": "288 180 ", + "input_name:": "/Add_259_output_0" + } + ], + "node_name:": "/GatherElements_14", + "node_outputs:": [ + { + "output_dimension:": "288 180 ", + "output_name:": "/GatherElements_14_output_0" + } + ] + }, + { + "node_id:": 1487, + "node_inputs:": [ + { + "input_dimension:": "288 180 ", + "input_name:": "/GatherElements_14_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/Concat_290_output_0" + } + ], + "node_name:": "/Reshape_160", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Reshape_160_output_0" + } + ] + }, + { + "node_id:": 1488, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/MatMul_71_output_0" + }, + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Reshape_160_output_0" + } + ], + "node_name:": "/Add_260", + "node_outputs:": [ + { + "output_dimension:": "1 8 36 180 ", + "output_name:": "/Add_260_output_0" + } + ] + }, + { + "node_id:": 1489, + "node_inputs:": [ + { + "input_dimension:": "1 8 36 180 ", + "input_name:": "/Add_260_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_291_output_0" + } + ], + "node_name:": "/Reshape_161", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Reshape_161_output_0" + } + ] + }, + { + "node_id:": 1490, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Reshape_161_output_0" + } + ], + "node_name:": "/Softmax_14", + "node_outputs:": [ + { + "output_dimension:": "8 36 180 ", + "output_name:": "/Softmax_14_output_0" + } + ] + }, + { + "node_id:": 1491, + "node_inputs:": [ + { + "input_dimension:": "36 1 512 ", + "input_name:": "/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_568_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_131", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_131_output_0" + } + ] + }, + { + "node_id:": 1492, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_302", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_302_output_0" + } + ] + }, + { + "node_id:": 1493, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_302_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_131_output_0" + } + ], + "node_name:": "/Concat_281", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_281_output_0" + } + ] + }, + { + "node_id:": 1494, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_281_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_285_output_0" + } + ], + "node_name:": "/Reshape_157", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_157_output_0" + } + ] + }, + { + "node_id:": 1495, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_157_output_0" + } + ], + "node_name:": "/Transpose_169", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_169_output_0" + } + ] + }, + { + "node_id:": 1496, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_14_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_169_output_0" + } + ], + "node_name:": "/MatMul_72", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_72_output_0" + } + ] + }, + { + "node_id:": 1497, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_72_output_0" + } + ], + "node_name:": "/Transpose_174", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_174_output_0" + } + ] + }, + { + "node_id:": 1498, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_257_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_261_output_0" + } + ], + "node_name:": "/Add_262", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_262_output_0" + } + ] + }, + { + "node_id:": 1499, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_262_output_0" + } + ], + "node_name:": "/Transpose_175", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_175_output_0" + } + ] + }, + { + "node_id:": 1500, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_175_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_28/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_28/Conv_output_0" + } + ] + }, + { + "node_id:": 1501, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_28/Conv_output_0" + } + ], + "node_name:": "/Split_28", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_28_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_28_output_1" + } + ] + }, + { + "node_id:": 1502, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_28_output_1" + } + ], + "node_name:": "/Sigmoid_28", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_28_output_0" + } + ] + }, + { + "node_id:": 1503, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_28_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_28_output_0" + } + ], + "node_name:": "/Mul_161", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_161_output_0" + } + ] + }, + { + "node_id:": 1504, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv1_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_304", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_304_output_0" + } + ] + }, + { + "node_id:": 1505, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_304_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_161_output_0" + } + ], + "node_name:": "/Concat_293", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_293_output_0" + } + ] + }, + { + "node_id:": 1506, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_293_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module1.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module1.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_28/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_28/Conv_output_0" + } + ] + }, + { + "node_id:": 1507, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_28/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_28/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_28/Sub_output_0" + } + ] + }, + { + "node_id:": 1508, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_28/Sub_output_0" + } + ], + "node_name:": "/activation_28/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_28/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1509, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_28/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_28/Sigmoid_output_0" + } + ], + "node_name:": "/activation_28/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_28/Mul_output_0" + } + ] + }, + { + "node_id:": 1510, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_28/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module1.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_28/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_28/Conv_output_0" + } + ] + }, + { + "node_id:": 1511, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_28/Conv_output_0" + } + ], + "node_name:": "/Transpose_176", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_176_output_0" + } + ] + }, + { + "node_id:": 1512, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_262_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_176_output_0" + } + ], + "node_name:": "/Add_263", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_263_output_0" + } + ] + }, + { + "node_id:": 1513, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward2/activation_14/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_14/Sub_output_0" + } + ] + }, + { + "node_id:": 1514, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_14/Sub_output_0" + } + ], + "node_name:": "/feed_forward2/activation_14/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_14/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1515, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_14/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward2/activation_14/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/activation_14/Mul_output_0" + } + ] + }, + { + "node_id:": 1516, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_263_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward2/out_proj_14/Add_output_0" + } + ], + "node_name:": "/Add_264", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_264_output_0" + } + ] + }, + { + "node_id:": 1517, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_264_output_0" + }, + { + "input_dimension:": "384 96 ", + "input_name:": "onnx::MatMul_8304" + } + ], + "node_name:": "/in_proj2_14/MatMul", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/in_proj2_14/MatMul_output_0" + } + ] + }, + { + "node_id:": 1518, + "node_inputs:": [ + { + "input_dimension:": "4 144 1 96 ", + "input_name:": "cached_val2_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_303", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Gather_303_output_0" + } + ] + }, + { + "node_id:": 1519, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Gather_303_output_0" + }, + { + "input_dimension:": "36 1 96 ", + "input_name:": "/in_proj2_14/MatMul_output_0" + } + ], + "node_name:": "/Concat_294", + "node_outputs:": [ + { + "output_dimension:": "180 1 96 ", + "output_name:": "/Concat_294_output_0" + } + ] + }, + { + "node_id:": 1520, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_294_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/Concat_295_output_0" + } + ], + "node_name:": "/Reshape_163", + "node_outputs:": [ + { + "output_dimension:": "180 8 12 ", + "output_name:": "/Reshape_163_output_0" + } + ] + }, + { + "node_id:": 1521, + "node_inputs:": [ + { + "input_dimension:": "180 8 12 ", + "input_name:": "/Reshape_163_output_0" + } + ], + "node_name:": "/Transpose_177", + "node_outputs:": [ + { + "output_dimension:": "8 180 12 ", + "output_name:": "/Transpose_177_output_0" + } + ] + }, + { + "node_id:": 1522, + "node_inputs:": [ + { + "input_dimension:": "8 36 180 ", + "input_name:": "/Softmax_14_output_0" + }, + { + "input_dimension:": "8 180 12 ", + "input_name:": "/Transpose_177_output_0" + } + ], + "node_name:": "/MatMul_74", + "node_outputs:": [ + { + "output_dimension:": "8 36 12 ", + "output_name:": "/MatMul_74_output_0" + } + ] + }, + { + "node_id:": 1523, + "node_inputs:": [ + { + "input_dimension:": "8 36 12 ", + "input_name:": "/MatMul_74_output_0" + } + ], + "node_name:": "/Transpose_178", + "node_outputs:": [ + { + "output_dimension:": "36 8 12 ", + "output_name:": "/Transpose_178_output_0" + } + ] + }, + { + "node_id:": 1524, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_264_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/out_proj2_14/Add_output_0" + } + ], + "node_name:": "/Add_266", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_266_output_0" + } + ] + }, + { + "node_id:": 1525, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_266_output_0" + } + ], + "node_name:": "/Transpose_179", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Transpose_179_output_0" + } + ] + }, + { + "node_id:": 1526, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Transpose_179_output_0" + }, + { + "input_dimension:": "768 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv1.weight" + }, + { + "input_dimension:": "768 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv1.bias" + } + ], + "node_name:": "/pointwise_conv1_29/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 768 36 ", + "output_name:": "/pointwise_conv1_29/Conv_output_0" + } + ] + }, + { + "node_id:": 1527, + "node_inputs:": [ + { + "input_dimension:": "1 768 36 ", + "input_name:": "/pointwise_conv1_29/Conv_output_0" + } + ], + "node_name:": "/Split_29", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_29_output_0" + }, + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Split_29_output_1" + } + ] + }, + { + "node_id:": 1528, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_29_output_1" + } + ], + "node_name:": "/Sigmoid_29", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Sigmoid_29_output_0" + } + ] + }, + { + "node_id:": 1529, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Split_29_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Sigmoid_29_output_0" + } + ], + "node_name:": "/Mul_163", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/Mul_163_output_0" + } + ] + }, + { + "node_id:": 1530, + "node_inputs:": [ + { + "input_dimension:": "4 1 384 30 ", + "input_name:": "cached_conv2_4" + }, + { + "input_dimension:": "", + "input_name:": "/encoder_embed/Constant_7_output_0" + } + ], + "node_name:": "/Gather_305", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Gather_305_output_0" + } + ] + }, + { + "node_id:": 1531, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Gather_305_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/Mul_163_output_0" + } + ], + "node_name:": "/Concat_297", + "node_outputs:": [ + { + "output_dimension:": "1 384 66 ", + "output_name:": "/Concat_297_output_0" + } + ] + }, + { + "node_id:": 1532, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_297_output_0" + }, + { + "input_dimension:": "384 1 31 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module2.depthwise_conv.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module2.depthwise_conv.bias" + } + ], + "node_name:": "/depthwise_conv_29/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/depthwise_conv_29/Conv_output_0" + } + ] + }, + { + "node_id:": 1533, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_29/Conv_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/activation_29/Sub", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_29/Sub_output_0" + } + ] + }, + { + "node_id:": 1534, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_29/Sub_output_0" + } + ], + "node_name:": "/activation_29/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_29/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1535, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/depthwise_conv_29/Conv_output_0" + }, + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_29/Sigmoid_output_0" + } + ], + "node_name:": "/activation_29/Mul", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/activation_29/Mul_output_0" + } + ] + }, + { + "node_id:": 1536, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/activation_29/Mul_output_0" + }, + { + "input_dimension:": "384 384 1 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv2.weight" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.conv_module2.pointwise_conv2.bias" + } + ], + "node_name:": "/pointwise_conv2_29/Conv", + "node_outputs:": [ + { + "output_dimension:": "1 384 36 ", + "output_name:": "/pointwise_conv2_29/Conv_output_0" + } + ] + }, + { + "node_id:": 1537, + "node_inputs:": [ + { + "input_dimension:": "1 384 36 ", + "input_name:": "/pointwise_conv2_29/Conv_output_0" + } + ], + "node_name:": "/Transpose_180", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Transpose_180_output_0" + } + ] + }, + { + "node_id:": 1538, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_266_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Transpose_180_output_0" + } + ], + "node_name:": "/Add_267", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_267_output_0" + } + ] + }, + { + "node_id:": 1539, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/feed_forward1/activation_2/Constant_output_0" + } + ], + "node_name:": "/feed_forward3/activation_14/Sub", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_14/Sub_output_0" + } + ] + }, + { + "node_id:": 1540, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_14/Sub_output_0" + } + ], + "node_name:": "/feed_forward3/activation_14/Sigmoid", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_14/Sigmoid_output_0" + } + ] + }, + { + "node_id:": 1541, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/in_proj_14/Add_output_0" + }, + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_14/Sigmoid_output_0" + } + ], + "node_name:": "/feed_forward3/activation_14/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/activation_14/Mul_output_0" + } + ] + }, + { + "node_id:": 1542, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_267_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/feed_forward3/out_proj_14/Add_output_0" + } + ], + "node_name:": "/Add_268", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_268_output_0" + } + ] + }, + { + "node_id:": 1543, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_268_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_268_output_0" + } + ], + "node_name:": "/norm_final_14/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_14/Mul_output_0" + } + ] + }, + { + "node_id:": 1544, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_14/Mul_output_0" + } + ], + "node_name:": "/norm_final_14/ReduceMean", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_14/ReduceMean_output_0" + } + ] + }, + { + "node_id:": 1545, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_14/ReduceMean_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_14/Constant_output_0" + } + ], + "node_name:": "/norm_final_14/Add", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_14/Add_output_0" + } + ] + }, + { + "node_id:": 1546, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_14/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/norm_final_1/Constant_1_output_0" + } + ], + "node_name:": "/norm_final_14/Pow", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 ", + "output_name:": "/norm_final_14/Pow_output_0" + } + ] + }, + { + "node_id:": 1547, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_268_output_0" + }, + { + "input_dimension:": "36 1 1 ", + "input_name:": "/norm_final_14/Pow_output_0" + } + ], + "node_name:": "/norm_final_14/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/norm_final_14/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1548, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/norm_final_14/Mul_1_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_251_output_0" + } + ], + "node_name:": "/Sub_44", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Sub_44_output_0" + } + ] + }, + { + "node_id:": 1549, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Sub_44_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.4.encoder.layers.3.bypass_scale" + } + ], + "node_name:": "/Mul_164", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Mul_164_output_0" + } + ] + }, + { + "node_id:": 1550, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_251_output_0" + }, + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_164_output_0" + } + ], + "node_name:": "/Add_269", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_269_output_0" + } + ] + }, + { + "node_id:": 1551, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_269_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/upsample_3/Unsqueeze", + "node_outputs:": [ + { + "output_dimension:": "36 1 1 384 ", + "output_name:": "/upsample_3/Unsqueeze_output_0" + } + ] + }, + { + "node_id:": 1552, + "node_inputs:": [ + { + "input_dimension:": "36 1 1 384 ", + "input_name:": "/upsample_3/Unsqueeze_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/upsample_3/Where_output_0" + } + ], + "node_name:": "/upsample_3/Expand", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/upsample_3/Expand_output_0" + } + ] + }, + { + "node_id:": 1553, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/upsample_3/Expand_output_0" + }, + { + "input_dimension:": "2 1 384 ", + "input_name:": "onnx::Add_8318" + } + ], + "node_name:": "/upsample_3/Add", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/upsample_3/Add_output_0" + } + ] + }, + { + "node_id:": 1554, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/upsample_3/Add_output_0" + }, + { + "input_dimension:": "3 ", + "input_name:": "/upsample_3/Concat_1_output_0" + } + ], + "node_name:": "/upsample_3/Reshape_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/upsample_3/Reshape_1_output_0" + } + ] + }, + { + "node_id:": 1555, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/upsample_3/Reshape_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_618_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_138", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Slice_138_output_0" + } + ] + }, + { + "node_id:": 1556, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Slice_138_output_0" + }, + { + "input_dimension:": "", + "input_name:": "onnx::Mul_8322" + } + ], + "node_name:": "/out_combiner_3/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_3/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1557, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/skip_modules.4/Add_output_0" + }, + { + "input_dimension:": "", + "input_name:": "encoder.encoders.4.out_combiner.weight1" + } + ], + "node_name:": "/out_combiner_3/Mul", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_3/Mul_output_0" + } + ] + }, + { + "node_id:": 1558, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_3/Mul_output_0" + }, + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_3/Mul_1_output_0" + } + ], + "node_name:": "/out_combiner_3/Add", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_combiner_3/Add_output_0" + } + ] + }, + { + "node_id:": 1559, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/out_combiner_3/Add_output_0" + }, + { + "input_dimension:": "4 ", + "input_name:": "/downsample_output/Concat_output_0" + } + ], + "node_name:": "/downsample_output/Reshape", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample_output/Reshape_output_0" + } + ] + }, + { + "node_id:": 1560, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_output/Reshape_output_0" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.downsample_output.query" + } + ], + "node_name:": "/downsample_output/Mul", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample_output/Mul_output_0" + } + ] + }, + { + "node_id:": 1561, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_output/Mul_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_246_output_0" + } + ], + "node_name:": "/downsample_output/ReduceSum", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 1 ", + "output_name:": "/downsample_output/ReduceSum_output_0" + } + ] + }, + { + "node_id:": 1562, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 1 ", + "input_name:": "/downsample_output/ReduceSum_output_0" + } + ], + "node_name:": "/downsample_output/Softmax", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 1 ", + "output_name:": "/downsample_output/Softmax_output_0" + } + ] + }, + { + "node_id:": 1563, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_output/Reshape_output_0" + }, + { + "input_dimension:": "36 2 1 1 ", + "input_name:": "/downsample_output/Softmax_output_0" + } + ], + "node_name:": "/downsample_output/Mul_1", + "node_outputs:": [ + { + "output_dimension:": "36 2 1 384 ", + "output_name:": "/downsample_output/Mul_1_output_0" + } + ] + }, + { + "node_id:": 1564, + "node_inputs:": [ + { + "input_dimension:": "36 2 1 384 ", + "input_name:": "/downsample_output/Mul_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/downsample_output/ReduceSum_1", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/downsample_output/ReduceSum_1_output_0" + } + ] + }, + { + "node_id:": 1565, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample_output/ReduceSum_1_output_0" + } + ], + "node_name:": "/Transpose_181", + "node_outputs:": [ + { + "output_dimension:": "1 36 384 ", + "output_name:": "/Transpose_181_output_0" + } + ] + }, + { + "node_id:": 1566, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_297_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_137", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_137_output_0" + } + ] + }, + { + "node_id:": 1567, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_137_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_617", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_617_output_0" + } + ] + }, + { + "node_id:": 1568, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_279_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_128", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_128_output_0" + } + ] + }, + { + "node_id:": 1569, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_128_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_616", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_616_output_0" + } + ] + }, + { + "node_id:": 1570, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_261_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_119", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_119_output_0" + } + ] + }, + { + "node_id:": 1571, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_119_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_615", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_615_output_0" + } + ] + }, + { + "node_id:": 1572, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_243_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_110", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_110_output_0" + } + ] + }, + { + "node_id:": 1573, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_110_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_614", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_614_output_0" + } + ] + }, + { + "node_id:": 1574, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_614_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_615_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_616_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_617_output_0" + } + ], + "node_name:": "/Concat_304", + "node_outputs:": [ + { + "output_dimension:": "4 1 384 30 ", + "output_name:": "new_cached_conv2_4" + } + ] + }, + { + "node_id:": 1575, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_293_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_135", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_135_output_0" + } + ] + }, + { + "node_id:": 1576, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_135_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_613", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_613_output_0" + } + ] + }, + { + "node_id:": 1577, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_275_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_126", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_126_output_0" + } + ] + }, + { + "node_id:": 1578, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_126_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_612", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_612_output_0" + } + ] + }, + { + "node_id:": 1579, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_257_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_117", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_117_output_0" + } + ] + }, + { + "node_id:": 1580, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_117_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_611", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_611_output_0" + } + ] + }, + { + "node_id:": 1581, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_239_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_108", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_108_output_0" + } + ] + }, + { + "node_id:": 1582, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_108_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_610", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_610_output_0" + } + ] + }, + { + "node_id:": 1583, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_610_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_611_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_612_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_613_output_0" + } + ], + "node_name:": "/Concat_303", + "node_outputs:": [ + { + "output_dimension:": "4 1 384 30 ", + "output_name:": "new_cached_conv1_4" + } + ] + }, + { + "node_id:": 1584, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_294_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_136", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_136_output_0" + } + ] + }, + { + "node_id:": 1585, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_136_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_609", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_609_output_0" + } + ] + }, + { + "node_id:": 1586, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_276_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_127", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_127_output_0" + } + ] + }, + { + "node_id:": 1587, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_127_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_608", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_608_output_0" + } + ] + }, + { + "node_id:": 1588, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_258_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_118", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_118_output_0" + } + ] + }, + { + "node_id:": 1589, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_118_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_607", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_607_output_0" + } + ] + }, + { + "node_id:": 1590, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_240_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_109", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_109_output_0" + } + ] + }, + { + "node_id:": 1591, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_109_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_606", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_606_output_0" + } + ] + }, + { + "node_id:": 1592, + "node_inputs:": [ + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_606_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_607_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_608_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_609_output_0" + } + ], + "node_name:": "/Concat_302", + "node_outputs:": [ + { + "output_dimension:": "4 144 1 96 ", + "output_name:": "new_cached_val2_4" + } + ] + }, + { + "node_id:": 1593, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_281_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_134", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_134_output_0" + } + ] + }, + { + "node_id:": 1594, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_134_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_605", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_605_output_0" + } + ] + }, + { + "node_id:": 1595, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_263_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_125", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_125_output_0" + } + ] + }, + { + "node_id:": 1596, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_125_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_604", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_604_output_0" + } + ] + }, + { + "node_id:": 1597, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_245_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_116", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_116_output_0" + } + ] + }, + { + "node_id:": 1598, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_116_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_603", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_603_output_0" + } + ] + }, + { + "node_id:": 1599, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_227_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_107", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_107_output_0" + } + ] + }, + { + "node_id:": 1600, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_107_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_602", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_602_output_0" + } + ] + }, + { + "node_id:": 1601, + "node_inputs:": [ + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_602_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_603_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_604_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_605_output_0" + } + ], + "node_name:": "/Concat_301", + "node_outputs:": [ + { + "output_dimension:": "4 144 1 96 ", + "output_name:": "new_cached_val_4" + } + ] + }, + { + "node_id:": 1602, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_280_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_133", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_133_output_0" + } + ] + }, + { + "node_id:": 1603, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_133_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_601", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_601_output_0" + } + ] + }, + { + "node_id:": 1604, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_262_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_124", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_124_output_0" + } + ] + }, + { + "node_id:": 1605, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_124_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_600", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_600_output_0" + } + ] + }, + { + "node_id:": 1606, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_244_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_115", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_115_output_0" + } + ] + }, + { + "node_id:": 1607, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_115_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_599", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_599_output_0" + } + ] + }, + { + "node_id:": 1608, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_226_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_106", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_106_output_0" + } + ] + }, + { + "node_id:": 1609, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_106_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_598", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_598_output_0" + } + ] + }, + { + "node_id:": 1610, + "node_inputs:": [ + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_598_output_0" + }, + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_599_output_0" + }, + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_600_output_0" + }, + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_601_output_0" + } + ], + "node_name:": "/Concat_300", + "node_outputs:": [ + { + "output_dimension:": "4 144 1 192 ", + "output_name:": "new_cached_key_4" + } + ] + }, + { + "node_id:": 1611, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_156_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_308", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_308_output_0" + } + ] + }, + { + "node_id:": 1612, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_308_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_597", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_597_output_0" + } + ] + }, + { + "node_id:": 1613, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_145_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_287", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_287_output_0" + } + ] + }, + { + "node_id:": 1614, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_287_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_596", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_596_output_0" + } + ] + }, + { + "node_id:": 1615, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_134_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_266", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_266_output_0" + } + ] + }, + { + "node_id:": 1616, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_266_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_595", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_595_output_0" + } + ] + }, + { + "node_id:": 1617, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_123_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_245", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_245_output_0" + } + ] + }, + { + "node_id:": 1618, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_245_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_594", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_594_output_0" + } + ] + }, + { + "node_id:": 1619, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_594_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_595_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_596_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_597_output_0" + } + ], + "node_name:": "/Concat_299", + "node_outputs:": [ + { + "output_dimension:": "4 1 384 ", + "output_name:": "new_cached_avg_4" + } + ] + }, + { + "node_id:": 1620, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_299_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_307_output_0" + } + ], + "node_name:": "/Add_256", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_256_output_0" + } + ] + }, + { + "node_id:": 1621, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_256_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_593", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_593_output_0" + } + ] + }, + { + "node_id:": 1622, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_278_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_286_output_0" + } + ], + "node_name:": "/Add_238", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_238_output_0" + } + ] + }, + { + "node_id:": 1623, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_238_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_592", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_592_output_0" + } + ] + }, + { + "node_id:": 1624, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_257_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_265_output_0" + } + ], + "node_name:": "/Add_220", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_220_output_0" + } + ] + }, + { + "node_id:": 1625, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_220_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_591", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_591_output_0" + } + ] + }, + { + "node_id:": 1626, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_236_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_244_output_0" + } + ], + "node_name:": "/Add_202", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_202_output_0" + } + ] + }, + { + "node_id:": 1627, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_202_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_590", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_590_output_0" + } + ] + }, + { + "node_id:": 1628, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_590_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_591_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_592_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_593_output_0" + } + ], + "node_name:": "/Concat_298", + "node_outputs:": [ + { + "output_dimension:": "4 1 ", + "output_name:": "new_cached_len_4" + } + ] + }, + { + "node_id:": 1629, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_218_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_100", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_100_output_0" + } + ] + }, + { + "node_id:": 1630, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_100_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_452", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_452_output_0" + } + ] + }, + { + "node_id:": 1631, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_200_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_91", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_91_output_0" + } + ] + }, + { + "node_id:": 1632, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_91_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_451", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_451_output_0" + } + ] + }, + { + "node_id:": 1633, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_451_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_452_output_0" + } + ], + "node_name:": "/Concat_225", + "node_outputs:": [ + { + "output_dimension:": "2 1 384 30 ", + "output_name:": "new_cached_conv2_3" + } + ] + }, + { + "node_id:": 1634, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_214_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_98", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_98_output_0" + } + ] + }, + { + "node_id:": 1635, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_98_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_450", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_450_output_0" + } + ] + }, + { + "node_id:": 1636, + "node_inputs:": [ + { + "input_dimension:": "1 384 39 ", + "input_name:": "/Concat_196_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_89", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_89_output_0" + } + ] + }, + { + "node_id:": 1637, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_89_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_449", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_449_output_0" + } + ] + }, + { + "node_id:": 1638, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_449_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_450_output_0" + } + ], + "node_name:": "/Concat_224", + "node_outputs:": [ + { + "output_dimension:": "2 1 384 30 ", + "output_name:": "new_cached_conv1_3" + } + ] + }, + { + "node_id:": 1639, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_215_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_434_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_99", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_99_output_0" + } + ] + }, + { + "node_id:": 1640, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_99_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_448", + "node_outputs:": [ + { + "output_dimension:": "1 36 1 96 ", + "output_name:": "/Unsqueeze_448_output_0" + } + ] + }, + { + "node_id:": 1641, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_197_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_434_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_90", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_90_output_0" + } + ] + }, + { + "node_id:": 1642, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_90_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_447", + "node_outputs:": [ + { + "output_dimension:": "1 36 1 96 ", + "output_name:": "/Unsqueeze_447_output_0" + } + ] + }, + { + "node_id:": 1643, + "node_inputs:": [ + { + "input_dimension:": "1 36 1 96 ", + "input_name:": "/Unsqueeze_447_output_0" + }, + { + "input_dimension:": "1 36 1 96 ", + "input_name:": "/Unsqueeze_448_output_0" + } + ], + "node_name:": "/Concat_223", + "node_outputs:": [ + { + "output_dimension:": "2 36 1 96 ", + "output_name:": "new_cached_val2_3" + } + ] + }, + { + "node_id:": 1644, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_202_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_411_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_97", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_97_output_0" + } + ] + }, + { + "node_id:": 1645, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_97_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_446", + "node_outputs:": [ + { + "output_dimension:": "1 36 1 96 ", + "output_name:": "/Unsqueeze_446_output_0" + } + ] + }, + { + "node_id:": 1646, + "node_inputs:": [ + { + "input_dimension:": "45 1 96 ", + "input_name:": "/Concat_184_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_411_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_88", + "node_outputs:": [ + { + "output_dimension:": "36 1 96 ", + "output_name:": "/Slice_88_output_0" + } + ] + }, + { + "node_id:": 1647, + "node_inputs:": [ + { + "input_dimension:": "36 1 96 ", + "input_name:": "/Slice_88_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_445", + "node_outputs:": [ + { + "output_dimension:": "1 36 1 96 ", + "output_name:": "/Unsqueeze_445_output_0" + } + ] + }, + { + "node_id:": 1648, + "node_inputs:": [ + { + "input_dimension:": "1 36 1 96 ", + "input_name:": "/Unsqueeze_445_output_0" + }, + { + "input_dimension:": "1 36 1 96 ", + "input_name:": "/Unsqueeze_446_output_0" + } + ], + "node_name:": "/Concat_222", + "node_outputs:": [ + { + "output_dimension:": "2 36 1 96 ", + "output_name:": "new_cached_val_3" + } + ] + }, + { + "node_id:": 1649, + "node_inputs:": [ + { + "input_dimension:": "45 1 192 ", + "input_name:": "/Concat_201_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_411_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_96", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_96_output_0" + } + ] + }, + { + "node_id:": 1650, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_96_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_444", + "node_outputs:": [ + { + "output_dimension:": "1 36 1 192 ", + "output_name:": "/Unsqueeze_444_output_0" + } + ] + }, + { + "node_id:": 1651, + "node_inputs:": [ + { + "input_dimension:": "45 1 192 ", + "input_name:": "/Concat_183_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_411_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_87", + "node_outputs:": [ + { + "output_dimension:": "36 1 192 ", + "output_name:": "/Slice_87_output_0" + } + ] + }, + { + "node_id:": 1652, + "node_inputs:": [ + { + "input_dimension:": "36 1 192 ", + "input_name:": "/Slice_87_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_443", + "node_outputs:": [ + { + "output_dimension:": "1 36 1 192 ", + "output_name:": "/Unsqueeze_443_output_0" + } + ] + }, + { + "node_id:": 1653, + "node_inputs:": [ + { + "input_dimension:": "1 36 1 192 ", + "input_name:": "/Unsqueeze_443_output_0" + }, + { + "input_dimension:": "1 36 1 192 ", + "input_name:": "/Unsqueeze_444_output_0" + } + ], + "node_name:": "/Concat_221", + "node_outputs:": [ + { + "output_dimension:": "2 36 1 192 ", + "output_name:": "new_cached_key_3" + } + ] + }, + { + "node_id:": 1654, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Mul_112_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_223", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_223_output_0" + } + ] + }, + { + "node_id:": 1655, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_223_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_442", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_442_output_0" + } + ] + }, + { + "node_id:": 1656, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Mul_101_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_202", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_202_output_0" + } + ] + }, + { + "node_id:": 1657, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_202_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_441", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_441_output_0" + } + ] + }, + { + "node_id:": 1658, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_441_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_442_output_0" + } + ], + "node_name:": "/Concat_220", + "node_outputs:": [ + { + "output_dimension:": "2 1 384 ", + "output_name:": "new_cached_avg_3" + } + ] + }, + { + "node_id:": 1659, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_214_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_222_output_0" + } + ], + "node_name:": "/Add_184", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_184_output_0" + } + ] + }, + { + "node_id:": 1660, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_184_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_440", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_440_output_0" + } + ] + }, + { + "node_id:": 1661, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_193_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_201_output_0" + } + ], + "node_name:": "/Add_166", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_166_output_0" + } + ] + }, + { + "node_id:": 1662, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_166_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_439", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_439_output_0" + } + ] + }, + { + "node_id:": 1663, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_439_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_440_output_0" + } + ], + "node_name:": "/Concat_219", + "node_outputs:": [ + { + "output_dimension:": "2 1 ", + "output_name:": "new_cached_len_3" + } + ] + }, + { + "node_id:": 1664, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_175_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_81", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_81_output_0" + } + ] + }, + { + "node_id:": 1665, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_81_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_369", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_369_output_0" + } + ] + }, + { + "node_id:": 1666, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_157_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_72", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_72_output_0" + } + ] + }, + { + "node_id:": 1667, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_72_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_368", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_368_output_0" + } + ] + }, + { + "node_id:": 1668, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_139_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_63", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_63_output_0" + } + ] + }, + { + "node_id:": 1669, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_63_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_367", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_367_output_0" + } + ] + }, + { + "node_id:": 1670, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_367_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_368_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_369_output_0" + } + ], + "node_name:": "/Concat_182", + "node_outputs:": [ + { + "output_dimension:": "3 1 384 30 ", + "output_name:": "new_cached_conv2_2" + } + ] + }, + { + "node_id:": 1671, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_171_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_79", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_79_output_0" + } + ] + }, + { + "node_id:": 1672, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_79_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_366", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_366_output_0" + } + ] + }, + { + "node_id:": 1673, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_153_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_70", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_70_output_0" + } + ] + }, + { + "node_id:": 1674, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_70_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_365", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_365_output_0" + } + ] + }, + { + "node_id:": 1675, + "node_inputs:": [ + { + "input_dimension:": "1 384 48 ", + "input_name:": "/Concat_135_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_61", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_61_output_0" + } + ] + }, + { + "node_id:": 1676, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_61_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_364", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_364_output_0" + } + ] + }, + { + "node_id:": 1677, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_364_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_365_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_366_output_0" + } + ], + "node_name:": "/Concat_181", + "node_outputs:": [ + { + "output_dimension:": "3 1 384 30 ", + "output_name:": "new_cached_conv1_2" + } + ] + }, + { + "node_id:": 1678, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_172_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_344_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_80", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_80_output_0" + } + ] + }, + { + "node_id:": 1679, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_80_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_363", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 96 ", + "output_name:": "/Unsqueeze_363_output_0" + } + ] + }, + { + "node_id:": 1680, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_154_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_344_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_71", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_71_output_0" + } + ] + }, + { + "node_id:": 1681, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_71_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_362", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 96 ", + "output_name:": "/Unsqueeze_362_output_0" + } + ] + }, + { + "node_id:": 1682, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_136_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_344_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_62", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_62_output_0" + } + ] + }, + { + "node_id:": 1683, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_62_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_361", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 96 ", + "output_name:": "/Unsqueeze_361_output_0" + } + ] + }, + { + "node_id:": 1684, + "node_inputs:": [ + { + "input_dimension:": "1 72 1 96 ", + "input_name:": "/Unsqueeze_361_output_0" + }, + { + "input_dimension:": "1 72 1 96 ", + "input_name:": "/Unsqueeze_362_output_0" + }, + { + "input_dimension:": "1 72 1 96 ", + "input_name:": "/Unsqueeze_363_output_0" + } + ], + "node_name:": "/Concat_180", + "node_outputs:": [ + { + "output_dimension:": "3 72 1 96 ", + "output_name:": "new_cached_val2_2" + } + ] + }, + { + "node_id:": 1685, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_159_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_321_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_78", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_78_output_0" + } + ] + }, + { + "node_id:": 1686, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_78_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_360", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 96 ", + "output_name:": "/Unsqueeze_360_output_0" + } + ] + }, + { + "node_id:": 1687, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_141_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_321_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_69", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_69_output_0" + } + ] + }, + { + "node_id:": 1688, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_359", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 96 ", + "output_name:": "/Unsqueeze_359_output_0" + } + ] + }, + { + "node_id:": 1689, + "node_inputs:": [ + { + "input_dimension:": "90 1 96 ", + "input_name:": "/Concat_123_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_321_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_60", + "node_outputs:": [ + { + "output_dimension:": "72 1 96 ", + "output_name:": "/Slice_60_output_0" + } + ] + }, + { + "node_id:": 1690, + "node_inputs:": [ + { + "input_dimension:": "72 1 96 ", + "input_name:": "/Slice_60_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_358", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 96 ", + "output_name:": "/Unsqueeze_358_output_0" + } + ] + }, + { + "node_id:": 1691, + "node_inputs:": [ + { + "input_dimension:": "1 72 1 96 ", + "input_name:": "/Unsqueeze_358_output_0" + }, + { + "input_dimension:": "1 72 1 96 ", + "input_name:": "/Unsqueeze_359_output_0" + }, + { + "input_dimension:": "1 72 1 96 ", + "input_name:": "/Unsqueeze_360_output_0" + } + ], + "node_name:": "/Concat_179", + "node_outputs:": [ + { + "output_dimension:": "3 72 1 96 ", + "output_name:": "new_cached_val_2" + } + ] + }, + { + "node_id:": 1692, + "node_inputs:": [ + { + "input_dimension:": "90 1 192 ", + "input_name:": "/Concat_158_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_321_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_77", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Slice_77_output_0" + } + ] + }, + { + "node_id:": 1693, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Slice_77_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_357", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 192 ", + "output_name:": "/Unsqueeze_357_output_0" + } + ] + }, + { + "node_id:": 1694, + "node_inputs:": [ + { + "input_dimension:": "90 1 192 ", + "input_name:": "/Concat_140_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_321_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_68", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Slice_68_output_0" + } + ] + }, + { + "node_id:": 1695, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Slice_68_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_356", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 192 ", + "output_name:": "/Unsqueeze_356_output_0" + } + ] + }, + { + "node_id:": 1696, + "node_inputs:": [ + { + "input_dimension:": "90 1 192 ", + "input_name:": "/Concat_122_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_321_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_59", + "node_outputs:": [ + { + "output_dimension:": "72 1 192 ", + "output_name:": "/Slice_59_output_0" + } + ] + }, + { + "node_id:": 1697, + "node_inputs:": [ + { + "input_dimension:": "72 1 192 ", + "input_name:": "/Slice_59_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_355", + "node_outputs:": [ + { + "output_dimension:": "1 72 1 192 ", + "output_name:": "/Unsqueeze_355_output_0" + } + ] + }, + { + "node_id:": 1698, + "node_inputs:": [ + { + "input_dimension:": "1 72 1 192 ", + "input_name:": "/Unsqueeze_355_output_0" + }, + { + "input_dimension:": "1 72 1 192 ", + "input_name:": "/Unsqueeze_356_output_0" + }, + { + "input_dimension:": "1 72 1 192 ", + "input_name:": "/Unsqueeze_357_output_0" + } + ], + "node_name:": "/Concat_178", + "node_outputs:": [ + { + "output_dimension:": "3 72 1 192 ", + "output_name:": "new_cached_key_2" + } + ] + }, + { + "node_id:": 1699, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_90_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_180", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_180_output_0" + } + ] + }, + { + "node_id:": 1700, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_180_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_354", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_354_output_0" + } + ] + }, + { + "node_id:": 1701, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_79_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_159", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_159_output_0" + } + ] + }, + { + "node_id:": 1702, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_159_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_353", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_353_output_0" + } + ] + }, + { + "node_id:": 1703, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Mul_68_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_138", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_138_output_0" + } + ] + }, + { + "node_id:": 1704, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_138_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_352", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_352_output_0" + } + ] + }, + { + "node_id:": 1705, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_352_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_353_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_354_output_0" + } + ], + "node_name:": "/Concat_177", + "node_outputs:": [ + { + "output_dimension:": "3 1 384 ", + "output_name:": "new_cached_avg_2" + } + ] + }, + { + "node_id:": 1706, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_171_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_179_output_0" + } + ], + "node_name:": "/Add_148", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_148_output_0" + } + ] + }, + { + "node_id:": 1707, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_148_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_351", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_351_output_0" + } + ] + }, + { + "node_id:": 1708, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_150_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_158_output_0" + } + ], + "node_name:": "/Add_130", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_130_output_0" + } + ] + }, + { + "node_id:": 1709, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_130_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_350", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_350_output_0" + } + ] + }, + { + "node_id:": 1710, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_129_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_137_output_0" + } + ], + "node_name:": "/Add_112", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_112_output_0" + } + ] + }, + { + "node_id:": 1711, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_112_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_349", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_349_output_0" + } + ] + }, + { + "node_id:": 1712, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_349_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_350_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_351_output_0" + } + ], + "node_name:": "/Concat_176", + "node_outputs:": [ + { + "output_dimension:": "3 1 ", + "output_name:": "new_cached_len_2" + } + ] + }, + { + "node_id:": 1713, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_114_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_53", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_53_output_0" + } + ] + }, + { + "node_id:": 1714, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_53_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_245", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_245_output_0" + } + ] + }, + { + "node_id:": 1715, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_96_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_44", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_44_output_0" + } + ] + }, + { + "node_id:": 1716, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_44_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_244", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_244_output_0" + } + ] + }, + { + "node_id:": 1717, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_78_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_35", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_35_output_0" + } + ] + }, + { + "node_id:": 1718, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_35_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_243", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_243_output_0" + } + ] + }, + { + "node_id:": 1719, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_60_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_26", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_26_output_0" + } + ] + }, + { + "node_id:": 1720, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_26_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_242", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_242_output_0" + } + ] + }, + { + "node_id:": 1721, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_242_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_243_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_244_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_245_output_0" + } + ], + "node_name:": "/Concat_121", + "node_outputs:": [ + { + "output_dimension:": "4 1 384 30 ", + "output_name:": "new_cached_conv2_1" + } + ] + }, + { + "node_id:": 1722, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_110_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_51", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_51_output_0" + } + ] + }, + { + "node_id:": 1723, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_51_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_241", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_241_output_0" + } + ] + }, + { + "node_id:": 1724, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_92_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_42", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_42_output_0" + } + ] + }, + { + "node_id:": 1725, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_42_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_240", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_240_output_0" + } + ] + }, + { + "node_id:": 1726, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_74_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_33", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_33_output_0" + } + ] + }, + { + "node_id:": 1727, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_33_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_239", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_239_output_0" + } + ] + }, + { + "node_id:": 1728, + "node_inputs:": [ + { + "input_dimension:": "1 384 66 ", + "input_name:": "/Concat_56_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_24", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_24_output_0" + } + ] + }, + { + "node_id:": 1729, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_24_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_238", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_238_output_0" + } + ] + }, + { + "node_id:": 1730, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_238_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_239_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_240_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_241_output_0" + } + ], + "node_name:": "/Concat_120", + "node_outputs:": [ + { + "output_dimension:": "4 1 384 30 ", + "output_name:": "new_cached_conv1_1" + } + ] + }, + { + "node_id:": 1731, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_111_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_52", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_52_output_0" + } + ] + }, + { + "node_id:": 1732, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_52_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_237", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_237_output_0" + } + ] + }, + { + "node_id:": 1733, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_93_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_43", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_43_output_0" + } + ] + }, + { + "node_id:": 1734, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_43_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_236", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_236_output_0" + } + ] + }, + { + "node_id:": 1735, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_75_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_34", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_34_output_0" + } + ] + }, + { + "node_id:": 1736, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_34_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_235", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_235_output_0" + } + ] + }, + { + "node_id:": 1737, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_57_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_585_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_25", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_25_output_0" + } + ] + }, + { + "node_id:": 1738, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_25_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_234", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_234_output_0" + } + ] + }, + { + "node_id:": 1739, + "node_inputs:": [ + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_234_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_235_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_236_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_237_output_0" + } + ], + "node_name:": "/Concat_119", + "node_outputs:": [ + { + "output_dimension:": "4 144 1 96 ", + "output_name:": "new_cached_val2_1" + } + ] + }, + { + "node_id:": 1740, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_98_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_50", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_50_output_0" + } + ] + }, + { + "node_id:": 1741, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_50_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_233", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_233_output_0" + } + ] + }, + { + "node_id:": 1742, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_80_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_41", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_41_output_0" + } + ] + }, + { + "node_id:": 1743, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_41_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_232", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_232_output_0" + } + ] + }, + { + "node_id:": 1744, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_62_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_32", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_32_output_0" + } + ] + }, + { + "node_id:": 1745, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_32_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_231", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_231_output_0" + } + ] + }, + { + "node_id:": 1746, + "node_inputs:": [ + { + "input_dimension:": "180 1 96 ", + "input_name:": "/Concat_44_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_23", + "node_outputs:": [ + { + "output_dimension:": "144 1 96 ", + "output_name:": "/Slice_23_output_0" + } + ] + }, + { + "node_id:": 1747, + "node_inputs:": [ + { + "input_dimension:": "144 1 96 ", + "input_name:": "/Slice_23_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_230", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 96 ", + "output_name:": "/Unsqueeze_230_output_0" + } + ] + }, + { + "node_id:": 1748, + "node_inputs:": [ + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_230_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_231_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_232_output_0" + }, + { + "input_dimension:": "1 144 1 96 ", + "input_name:": "/Unsqueeze_233_output_0" + } + ], + "node_name:": "/Concat_118", + "node_outputs:": [ + { + "output_dimension:": "4 144 1 96 ", + "output_name:": "new_cached_val_1" + } + ] + }, + { + "node_id:": 1749, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_97_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_49", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_49_output_0" + } + ] + }, + { + "node_id:": 1750, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_49_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_229", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_229_output_0" + } + ] + }, + { + "node_id:": 1751, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_79_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_40", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_40_output_0" + } + ] + }, + { + "node_id:": 1752, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_40_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_228", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_228_output_0" + } + ] + }, + { + "node_id:": 1753, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_61_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_31", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_31_output_0" + } + ] + }, + { + "node_id:": 1754, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_31_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_227", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_227_output_0" + } + ] + }, + { + "node_id:": 1755, + "node_inputs:": [ + { + "input_dimension:": "180 1 192 ", + "input_name:": "/Concat_43_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_562_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_22", + "node_outputs:": [ + { + "output_dimension:": "144 1 192 ", + "output_name:": "/Slice_22_output_0" + } + ] + }, + { + "node_id:": 1756, + "node_inputs:": [ + { + "input_dimension:": "144 1 192 ", + "input_name:": "/Slice_22_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_226", + "node_outputs:": [ + { + "output_dimension:": "1 144 1 192 ", + "output_name:": "/Unsqueeze_226_output_0" + } + ] + }, + { + "node_id:": 1757, + "node_inputs:": [ + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_226_output_0" + }, + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_227_output_0" + }, + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_228_output_0" + }, + { + "input_dimension:": "1 144 1 192 ", + "input_name:": "/Unsqueeze_229_output_0" + } + ], + "node_name:": "/Concat_117", + "node_outputs:": [ + { + "output_dimension:": "4 144 1 192 ", + "output_name:": "new_cached_key_1" + } + ] + }, + { + "node_id:": 1758, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_57_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_116", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_116_output_0" + } + ] + }, + { + "node_id:": 1759, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_116_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_225", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_225_output_0" + } + ] + }, + { + "node_id:": 1760, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_46_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_95", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_95_output_0" + } + ] + }, + { + "node_id:": 1761, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_95_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_224", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_224_output_0" + } + ] + }, + { + "node_id:": 1762, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_35_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_74", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_74_output_0" + } + ] + }, + { + "node_id:": 1763, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_74_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_223", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_223_output_0" + } + ] + }, + { + "node_id:": 1764, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Mul_24_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_53", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_53_output_0" + } + ] + }, + { + "node_id:": 1765, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_53_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_222", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_222_output_0" + } + ] + }, + { + "node_id:": 1766, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_222_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_223_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_224_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_225_output_0" + } + ], + "node_name:": "/Concat_116", + "node_outputs:": [ + { + "output_dimension:": "4 1 384 ", + "output_name:": "new_cached_avg_1" + } + ] + }, + { + "node_id:": 1767, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_107_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_115_output_0" + } + ], + "node_name:": "/Add_94", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_94_output_0" + } + ] + }, + { + "node_id:": 1768, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_94_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_221", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_221_output_0" + } + ] + }, + { + "node_id:": 1769, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_86_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_94_output_0" + } + ], + "node_name:": "/Add_76", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_76_output_0" + } + ] + }, + { + "node_id:": 1770, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_76_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_220", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_220_output_0" + } + ] + }, + { + "node_id:": 1771, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_65_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_73_output_0" + } + ], + "node_name:": "/Add_58", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_58_output_0" + } + ] + }, + { + "node_id:": 1772, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_58_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_219", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_219_output_0" + } + ] + }, + { + "node_id:": 1773, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_44_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_52_output_0" + } + ], + "node_name:": "/Add_40", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_40_output_0" + } + ] + }, + { + "node_id:": 1774, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_40_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_218", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_218_output_0" + } + ] + }, + { + "node_id:": 1775, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_218_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_219_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_220_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_221_output_0" + } + ], + "node_name:": "/Concat_115", + "node_outputs:": [ + { + "output_dimension:": "4 1 ", + "output_name:": "new_cached_len_1" + } + ] + }, + { + "node_id:": 1776, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_35_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_17", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_17_output_0" + } + ] + }, + { + "node_id:": 1777, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_17_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_81", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_81_output_0" + } + ] + }, + { + "node_id:": 1778, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_17_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_8", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_8_output_0" + } + ] + }, + { + "node_id:": 1779, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_8_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_80", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_80_output_0" + } + ] + }, + { + "node_id:": 1780, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_80_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_81_output_0" + } + ], + "node_name:": "/Concat_42", + "node_outputs:": [ + { + "output_dimension:": "2 1 384 30 ", + "output_name:": "new_cached_conv2_0" + } + ] + }, + { + "node_id:": 1781, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_31_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_15", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_15_output_0" + } + ] + }, + { + "node_id:": 1782, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_15_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_79", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_79_output_0" + } + ] + }, + { + "node_id:": 1783, + "node_inputs:": [ + { + "input_dimension:": "1 384 102 ", + "input_name:": "/Concat_13_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_614_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_1113_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_6", + "node_outputs:": [ + { + "output_dimension:": "1 384 30 ", + "output_name:": "/Slice_6_output_0" + } + ] + }, + { + "node_id:": 1784, + "node_inputs:": [ + { + "input_dimension:": "1 384 30 ", + "input_name:": "/Slice_6_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_78", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 30 ", + "output_name:": "/Unsqueeze_78_output_0" + } + ] + }, + { + "node_id:": 1785, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_78_output_0" + }, + { + "input_dimension:": "1 1 384 30 ", + "input_name:": "/Unsqueeze_79_output_0" + } + ], + "node_name:": "/Concat_41", + "node_outputs:": [ + { + "output_dimension:": "2 1 384 30 ", + "output_name:": "new_cached_conv1_0" + } + ] + }, + { + "node_id:": 1786, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_32_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_63_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_16", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Slice_16_output_0" + } + ] + }, + { + "node_id:": 1787, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Slice_16_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_77", + "node_outputs:": [ + { + "output_dimension:": "1 288 1 96 ", + "output_name:": "/Unsqueeze_77_output_0" + } + ] + }, + { + "node_id:": 1788, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_14_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_63_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_7", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Slice_7_output_0" + } + ] + }, + { + "node_id:": 1789, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Slice_7_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_76", + "node_outputs:": [ + { + "output_dimension:": "1 288 1 96 ", + "output_name:": "/Unsqueeze_76_output_0" + } + ] + }, + { + "node_id:": 1790, + "node_inputs:": [ + { + "input_dimension:": "1 288 1 96 ", + "input_name:": "/Unsqueeze_76_output_0" + }, + { + "input_dimension:": "1 288 1 96 ", + "input_name:": "/Unsqueeze_77_output_0" + } + ], + "node_name:": "/Concat_40", + "node_outputs:": [ + { + "output_dimension:": "2 288 1 96 ", + "output_name:": "new_cached_val2_0" + } + ] + }, + { + "node_id:": 1791, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_19_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_40_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_14", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Slice_14_output_0" + } + ] + }, + { + "node_id:": 1792, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Slice_14_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_75", + "node_outputs:": [ + { + "output_dimension:": "1 288 1 96 ", + "output_name:": "/Unsqueeze_75_output_0" + } + ] + }, + { + "node_id:": 1793, + "node_inputs:": [ + { + "input_dimension:": "360 1 96 ", + "input_name:": "/Concat_1_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_40_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_5", + "node_outputs:": [ + { + "output_dimension:": "288 1 96 ", + "output_name:": "/Slice_5_output_0" + } + ] + }, + { + "node_id:": 1794, + "node_inputs:": [ + { + "input_dimension:": "288 1 96 ", + "input_name:": "/Slice_5_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_74", + "node_outputs:": [ + { + "output_dimension:": "1 288 1 96 ", + "output_name:": "/Unsqueeze_74_output_0" + } + ] + }, + { + "node_id:": 1795, + "node_inputs:": [ + { + "input_dimension:": "1 288 1 96 ", + "input_name:": "/Unsqueeze_74_output_0" + }, + { + "input_dimension:": "1 288 1 96 ", + "input_name:": "/Unsqueeze_75_output_0" + } + ], + "node_name:": "/Concat_39", + "node_outputs:": [ + { + "output_dimension:": "2 288 1 96 ", + "output_name:": "new_cached_val_0" + } + ] + }, + { + "node_id:": 1796, + "node_inputs:": [ + { + "input_dimension:": "360 1 192 ", + "input_name:": "/Concat_18_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_40_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_13", + "node_outputs:": [ + { + "output_dimension:": "288 1 192 ", + "output_name:": "/Slice_13_output_0" + } + ] + }, + { + "node_id:": 1797, + "node_inputs:": [ + { + "input_dimension:": "288 1 192 ", + "input_name:": "/Slice_13_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_73", + "node_outputs:": [ + { + "output_dimension:": "1 288 1 192 ", + "output_name:": "/Unsqueeze_73_output_0" + } + ] + }, + { + "node_id:": 1798, + "node_inputs:": [ + { + "input_dimension:": "360 1 192 ", + "input_name:": "/Concat_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Unsqueeze_40_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_69_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + }, + { + "input_dimension:": "1 ", + "input_name:": "/Constant_830_output_0" + } + ], + "node_name:": "/Slice_4", + "node_outputs:": [ + { + "output_dimension:": "288 1 192 ", + "output_name:": "/Slice_4_output_0" + } + ] + }, + { + "node_id:": 1799, + "node_inputs:": [ + { + "input_dimension:": "288 1 192 ", + "input_name:": "/Slice_4_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_72", + "node_outputs:": [ + { + "output_dimension:": "1 288 1 192 ", + "output_name:": "/Unsqueeze_72_output_0" + } + ] + }, + { + "node_id:": 1800, + "node_inputs:": [ + { + "input_dimension:": "1 288 1 192 ", + "input_name:": "/Unsqueeze_72_output_0" + }, + { + "input_dimension:": "1 288 1 192 ", + "input_name:": "/Unsqueeze_73_output_0" + } + ], + "node_name:": "/Concat_38", + "node_outputs:": [ + { + "output_dimension:": "2 288 1 192 ", + "output_name:": "new_cached_key_0" + } + ] + }, + { + "node_id:": 1801, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Mul_13_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_31", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_31_output_0" + } + ] + }, + { + "node_id:": 1802, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_31_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_71", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_71_output_0" + } + ] + }, + { + "node_id:": 1803, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Mul_2_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Constant_964_output_0" + } + ], + "node_name:": "/Gather_10", + "node_outputs:": [ + { + "output_dimension:": "1 384 ", + "output_name:": "/Gather_10_output_0" + } + ] + }, + { + "node_id:": 1804, + "node_inputs:": [ + { + "input_dimension:": "1 384 ", + "input_name:": "/Gather_10_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_70", + "node_outputs:": [ + { + "output_dimension:": "1 1 384 ", + "output_name:": "/Unsqueeze_70_output_0" + } + ] + }, + { + "node_id:": 1805, + "node_inputs:": [ + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_70_output_0" + }, + { + "input_dimension:": "1 1 384 ", + "input_name:": "/Unsqueeze_71_output_0" + } + ], + "node_name:": "/Concat_37", + "node_outputs:": [ + { + "output_dimension:": "2 1 384 ", + "output_name:": "new_cached_avg_0" + } + ] + }, + { + "node_id:": 1806, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_22_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_30_output_0" + } + ], + "node_name:": "/Add_22", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_22_output_0" + } + ] + }, + { + "node_id:": 1807, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_22_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_69", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_69_output_0" + } + ] + }, + { + "node_id:": 1808, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Gather_1_output_0" + }, + { + "input_dimension:": "", + "input_name:": "/Gather_9_output_0" + } + ], + "node_name:": "/Add_4", + "node_outputs:": [ + { + "output_dimension:": "1 ", + "output_name:": "/Add_4_output_0" + } + ] + }, + { + "node_id:": 1809, + "node_inputs:": [ + { + "input_dimension:": "1 ", + "input_name:": "/Add_4_output_0" + }, + { + "input_dimension:": "1 ", + "input_name:": "onnx::Unsqueeze_910" + } + ], + "node_name:": "/Unsqueeze_68", + "node_outputs:": [ + { + "output_dimension:": "1 1 ", + "output_name:": "/Unsqueeze_68_output_0" + } + ] + }, + { + "node_id:": 1810, + "node_inputs:": [ + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_68_output_0" + }, + { + "input_dimension:": "1 1 ", + "input_name:": "/Unsqueeze_69_output_0" + } + ], + "node_name:": "/Concat_36", + "node_outputs:": [ + { + "output_dimension:": "2 1 ", + "output_name:": "new_cached_len_0" + } + ] + }, + { + "node_id:": 1811, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape" + } + ], + "node_name:": "gemm_output_reshape", + "node_outputs:": [ + { + "output_dimension:": "1 72 384 ", + "output_name:": "/encoder_embed/out/Add_output_0" + } + ] + }, + { + "node_id:": 1812, + "node_inputs:": [ + { + "input_dimension:": "72 2432 ", + "input_name:": "gemm_input_reshape_arg" + }, + { + "input_dimension:": "2432 384 ", + "input_name:": "onnx::MatMul_7353" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoder_embed.out.bias" + } + ], + "node_name:": "/encoder_embed/out/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg" + } + ] + }, + { + "node_id:": 1813, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Transpose_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_0" + } + ], + "node_name:": "gemm_input_reshape_token_2", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_1" + } + ] + }, + { + "node_id:": 1814, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_output_reshape_arg_token_4" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_3" + } + ], + "node_name:": "gemm_output_reshape_token_5", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/in_proj/Add_output_0" + } + ] + }, + { + "node_id:": 1815, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_1" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7356" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.0.layers.0.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_output_reshape_arg_token_4" + } + ] + }, + { + "node_id:": 1816, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/activation/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_6" + } + ], + "node_name:": "gemm_input_reshape_token_8", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_input_reshape_arg_token_7" + } + ] + }, + { + "node_id:": 1817, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_10" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_9" + } + ], + "node_name:": "gemm_output_reshape_token_11", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/feed_forward1/out_proj/Add_output_0" + } + ] + }, + { + "node_id:": 1818, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_input_reshape_arg_token_7" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7357" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_10" + } + ] + }, + { + "node_id:": 1819, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_5_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_12" + } + ], + "node_name:": "gemm_input_reshape_token_14", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_13" + } + ] + }, + { + "node_id:": 1820, + "node_inputs:": [ + { + "input_dimension:": "72 512 ", + "input_name:": "gemm_output_reshape_arg_token_16" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_15" + } + ], + "node_name:": "gemm_output_reshape_token_17", + "node_outputs:": [ + { + "output_dimension:": "72 1 512 ", + "output_name:": "/in_proj/Add_output_0" + } + ] + }, + { + "node_id:": 1821, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_13" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7360" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.0.layers.0.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 512 ", + "output_name:": "gemm_output_reshape_arg_token_16" + } + ] + }, + { + "node_id:": 1822, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_22" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_21" + } + ], + "node_name:": "gemm_output_reshape_token_23", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_9_output_0" + } + ] + }, + { + "node_id:": 1823, + "node_inputs:": [ + { + "input_dimension:": "72 96 ", + "input_name:": "gemm_input_reshape_arg_token_19" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7398" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_3/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_22" + } + ] + }, + { + "node_id:": 1824, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_11_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_24" + } + ], + "node_name:": "gemm_input_reshape_token_26", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_25" + } + ] + }, + { + "node_id:": 1825, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_output_reshape_arg_token_28" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_27" + } + ], + "node_name:": "gemm_output_reshape_token_29", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/in_proj/Add_output_0" + } + ] + }, + { + "node_id:": 1826, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_25" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7403" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.0.layers.0.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_output_reshape_arg_token_28" + } + ] + }, + { + "node_id:": 1827, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/activation/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_30" + } + ], + "node_name:": "gemm_input_reshape_token_32", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_input_reshape_arg_token_31" + } + ] + }, + { + "node_id:": 1828, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_34" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_33" + } + ], + "node_name:": "gemm_output_reshape_token_35", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/feed_forward2/out_proj/Add_output_0" + } + ] + }, + { + "node_id:": 1829, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_input_reshape_arg_token_31" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7404" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_34" + } + ] + }, + { + "node_id:": 1830, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_40" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_39" + } + ], + "node_name:": "gemm_output_reshape_token_41", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_proj2/Add_output_0" + } + ] + }, + { + "node_id:": 1831, + "node_inputs:": [ + { + "input_dimension:": "72 96 ", + "input_name:": "gemm_input_reshape_arg_token_37" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7411" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_40" + } + ] + }, + { + "node_id:": 1832, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_15_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_42" + } + ], + "node_name:": "gemm_input_reshape_token_44", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_43" + } + ] + }, + { + "node_id:": 1833, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_output_reshape_arg_token_46" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_45" + } + ], + "node_name:": "gemm_output_reshape_token_47", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/in_proj/Add_output_0" + } + ] + }, + { + "node_id:": 1834, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_43" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7416" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.0.layers.0.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_output_reshape_arg_token_46" + } + ] + }, + { + "node_id:": 1835, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/activation/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_48" + } + ], + "node_name:": "gemm_input_reshape_token_50", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_input_reshape_arg_token_49" + } + ] + }, + { + "node_id:": 1836, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_52" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_51" + } + ], + "node_name:": "gemm_output_reshape_token_53", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/feed_forward3/out_proj/Add_output_0" + } + ] + }, + { + "node_id:": 1837, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_input_reshape_arg_token_49" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7417" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.0.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_52" + } + ] + }, + { + "node_id:": 1838, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_17_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_54" + } + ], + "node_name:": "gemm_input_reshape_token_56", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_55" + } + ] + }, + { + "node_id:": 1839, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_output_reshape_arg_token_58" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_57" + } + ], + "node_name:": "gemm_output_reshape_token_59", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward1/in_proj_1/Add_output_0" + } + ] + }, + { + "node_id:": 1840, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_55" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7418" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.0.layers.1.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_output_reshape_arg_token_58" + } + ] + }, + { + "node_id:": 1841, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward1/activation_1/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_60" + } + ], + "node_name:": "gemm_input_reshape_token_62", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_input_reshape_arg_token_61" + } + ] + }, + { + "node_id:": 1842, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_64" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_63" + } + ], + "node_name:": "gemm_output_reshape_token_65", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/feed_forward1/out_proj_1/Add_output_0" + } + ] + }, + { + "node_id:": 1843, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_input_reshape_arg_token_61" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7419" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_64" + } + ] + }, + { + "node_id:": 1844, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_23_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_66" + } + ], + "node_name:": "gemm_input_reshape_token_68", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_67" + } + ] + }, + { + "node_id:": 1845, + "node_inputs:": [ + { + "input_dimension:": "72 512 ", + "input_name:": "gemm_output_reshape_arg_token_70" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_69" + } + ], + "node_name:": "gemm_output_reshape_token_71", + "node_outputs:": [ + { + "output_dimension:": "72 1 512 ", + "output_name:": "/in_proj_1/Add_output_0" + } + ] + }, + { + "node_id:": 1846, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_67" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7422" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.0.layers.1.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 512 ", + "output_name:": "gemm_output_reshape_arg_token_70" + } + ] + }, + { + "node_id:": 1847, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_76" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_75" + } + ], + "node_name:": "gemm_output_reshape_token_77", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/Add_27_output_0" + } + ] + }, + { + "node_id:": 1848, + "node_inputs:": [ + { + "input_dimension:": "72 96 ", + "input_name:": "gemm_input_reshape_arg_token_73" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7460" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_8/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_76" + } + ] + }, + { + "node_id:": 1849, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_29_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_78" + } + ], + "node_name:": "gemm_input_reshape_token_80", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_79" + } + ] + }, + { + "node_id:": 1850, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_output_reshape_arg_token_82" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_81" + } + ], + "node_name:": "gemm_output_reshape_token_83", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward2/in_proj_1/Add_output_0" + } + ] + }, + { + "node_id:": 1851, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_79" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7465" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.0.layers.1.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_output_reshape_arg_token_82" + } + ] + }, + { + "node_id:": 1852, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward2/activation_1/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_84" + } + ], + "node_name:": "gemm_input_reshape_token_86", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_input_reshape_arg_token_85" + } + ] + }, + { + "node_id:": 1853, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_88" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_87" + } + ], + "node_name:": "gemm_output_reshape_token_89", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/feed_forward2/out_proj_1/Add_output_0" + } + ] + }, + { + "node_id:": 1854, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_input_reshape_arg_token_85" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7466" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_88" + } + ] + }, + { + "node_id:": 1855, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_94" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_93" + } + ], + "node_name:": "gemm_output_reshape_token_95", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/out_proj2_1/Add_output_0" + } + ] + }, + { + "node_id:": 1856, + "node_inputs:": [ + { + "input_dimension:": "72 96 ", + "input_name:": "gemm_input_reshape_arg_token_91" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7473" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_94" + } + ] + }, + { + "node_id:": 1857, + "node_inputs:": [ + { + "input_dimension:": "72 1 384 ", + "input_name:": "/Add_33_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_96" + } + ], + "node_name:": "gemm_input_reshape_token_98", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_input_reshape_arg_token_97" + } + ] + }, + { + "node_id:": 1858, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_output_reshape_arg_token_100" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_99" + } + ], + "node_name:": "gemm_output_reshape_token_101", + "node_outputs:": [ + { + "output_dimension:": "72 1 1024 ", + "output_name:": "/feed_forward3/in_proj_1/Add_output_0" + } + ] + }, + { + "node_id:": 1859, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_input_reshape_arg_token_97" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7478" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.0.layers.1.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_output_reshape_arg_token_100" + } + ] + }, + { + "node_id:": 1860, + "node_inputs:": [ + { + "input_dimension:": "72 1 1024 ", + "input_name:": "/feed_forward3/activation_1/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_102" + } + ], + "node_name:": "gemm_input_reshape_token_104", + "node_outputs:": [ + { + "output_dimension:": "72 1024 ", + "output_name:": "gemm_input_reshape_arg_token_103" + } + ] + }, + { + "node_id:": 1861, + "node_inputs:": [ + { + "input_dimension:": "72 384 ", + "input_name:": "gemm_output_reshape_arg_token_106" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_105" + } + ], + "node_name:": "gemm_output_reshape_token_107", + "node_outputs:": [ + { + "output_dimension:": "72 1 384 ", + "output_name:": "/feed_forward3/out_proj_1/Add_output_0" + } + ] + }, + { + "node_id:": 1862, + "node_inputs:": [ + { + "input_dimension:": "72 1024 ", + "input_name:": "gemm_input_reshape_arg_token_103" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7479" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.0.layers.1.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_1/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "72 384 ", + "output_name:": "gemm_output_reshape_arg_token_106" + } + ] + }, + { + "node_id:": 1863, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample/ReduceSum_1_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_108" + } + ], + "node_name:": "gemm_input_reshape_token_110", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_109" + } + ] + }, + { + "node_id:": 1864, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_112" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_111" + } + ], + "node_name:": "gemm_output_reshape_token_113", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_2/Add_output_0" + } + ] + }, + { + "node_id:": 1865, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_109" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7483" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_112" + } + ] + }, + { + "node_id:": 1866, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_2/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_114" + } + ], + "node_name:": "gemm_input_reshape_token_116", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_115" + } + ] + }, + { + "node_id:": 1867, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_118" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_117" + } + ], + "node_name:": "gemm_output_reshape_token_119", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_2/Add_output_0" + } + ] + }, + { + "node_id:": 1868, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_115" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7484" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_118" + } + ] + }, + { + "node_id:": 1869, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_41_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_120" + } + ], + "node_name:": "gemm_input_reshape_token_122", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_121" + } + ] + }, + { + "node_id:": 1870, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_124" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_123" + } + ], + "node_name:": "gemm_output_reshape_token_125", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_2/Add_output_0" + } + ] + }, + { + "node_id:": 1871, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_121" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7487" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_124" + } + ] + }, + { + "node_id:": 1872, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_130" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_129" + } + ], + "node_name:": "gemm_output_reshape_token_131", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_45_output_0" + } + ] + }, + { + "node_id:": 1873, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_127" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7525" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_13/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_130" + } + ] + }, + { + "node_id:": 1874, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_47_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_132" + } + ], + "node_name:": "gemm_input_reshape_token_134", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_133" + } + ] + }, + { + "node_id:": 1875, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_136" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_135" + } + ], + "node_name:": "gemm_output_reshape_token_137", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_2/Add_output_0" + } + ] + }, + { + "node_id:": 1876, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_133" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7530" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_136" + } + ] + }, + { + "node_id:": 1877, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_2/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_138" + } + ], + "node_name:": "gemm_input_reshape_token_140", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_139" + } + ] + }, + { + "node_id:": 1878, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_142" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_141" + } + ], + "node_name:": "gemm_output_reshape_token_143", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_2/Add_output_0" + } + ] + }, + { + "node_id:": 1879, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_139" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7531" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_142" + } + ] + }, + { + "node_id:": 1880, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_148" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_147" + } + ], + "node_name:": "gemm_output_reshape_token_149", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_2/Add_output_0" + } + ] + }, + { + "node_id:": 1881, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_145" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7538" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_148" + } + ] + }, + { + "node_id:": 1882, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_51_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_150" + } + ], + "node_name:": "gemm_input_reshape_token_152", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_151" + } + ] + }, + { + "node_id:": 1883, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_154" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_153" + } + ], + "node_name:": "gemm_output_reshape_token_155", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_2/Add_output_0" + } + ] + }, + { + "node_id:": 1884, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_151" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7543" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_154" + } + ] + }, + { + "node_id:": 1885, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_2/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_156" + } + ], + "node_name:": "gemm_input_reshape_token_158", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_157" + } + ] + }, + { + "node_id:": 1886, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_160" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_159" + } + ], + "node_name:": "gemm_output_reshape_token_161", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_2/Add_output_0" + } + ] + }, + { + "node_id:": 1887, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_157" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7544" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.0.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_2/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_160" + } + ] + }, + { + "node_id:": 1888, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_53_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_162" + } + ], + "node_name:": "gemm_input_reshape_token_164", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_163" + } + ] + }, + { + "node_id:": 1889, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_166" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_165" + } + ], + "node_name:": "gemm_output_reshape_token_167", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_3/Add_output_0" + } + ] + }, + { + "node_id:": 1890, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_163" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7545" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_166" + } + ] + }, + { + "node_id:": 1891, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_3/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_168" + } + ], + "node_name:": "gemm_input_reshape_token_170", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_169" + } + ] + }, + { + "node_id:": 1892, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_172" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_171" + } + ], + "node_name:": "gemm_output_reshape_token_173", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_3/Add_output_0" + } + ] + }, + { + "node_id:": 1893, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_169" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7546" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_172" + } + ] + }, + { + "node_id:": 1894, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_59_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_174" + } + ], + "node_name:": "gemm_input_reshape_token_176", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_175" + } + ] + }, + { + "node_id:": 1895, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_178" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_177" + } + ], + "node_name:": "gemm_output_reshape_token_179", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_3/Add_output_0" + } + ] + }, + { + "node_id:": 1896, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_175" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7549" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_178" + } + ] + }, + { + "node_id:": 1897, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_184" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_183" + } + ], + "node_name:": "gemm_output_reshape_token_185", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_63_output_0" + } + ] + }, + { + "node_id:": 1898, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_181" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7587" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_18/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_184" + } + ] + }, + { + "node_id:": 1899, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_65_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_186" + } + ], + "node_name:": "gemm_input_reshape_token_188", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_187" + } + ] + }, + { + "node_id:": 1900, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_190" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_189" + } + ], + "node_name:": "gemm_output_reshape_token_191", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_3/Add_output_0" + } + ] + }, + { + "node_id:": 1901, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_187" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7592" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_190" + } + ] + }, + { + "node_id:": 1902, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_3/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_192" + } + ], + "node_name:": "gemm_input_reshape_token_194", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_193" + } + ] + }, + { + "node_id:": 1903, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_196" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_195" + } + ], + "node_name:": "gemm_output_reshape_token_197", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_3/Add_output_0" + } + ] + }, + { + "node_id:": 1904, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_193" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7593" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_196" + } + ] + }, + { + "node_id:": 1905, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_202" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_201" + } + ], + "node_name:": "gemm_output_reshape_token_203", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_3/Add_output_0" + } + ] + }, + { + "node_id:": 1906, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_199" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7600" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_202" + } + ] + }, + { + "node_id:": 1907, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_69_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_204" + } + ], + "node_name:": "gemm_input_reshape_token_206", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_205" + } + ] + }, + { + "node_id:": 1908, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_208" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_207" + } + ], + "node_name:": "gemm_output_reshape_token_209", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_3/Add_output_0" + } + ] + }, + { + "node_id:": 1909, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_205" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7605" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_208" + } + ] + }, + { + "node_id:": 1910, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_3/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_210" + } + ], + "node_name:": "gemm_input_reshape_token_212", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_211" + } + ] + }, + { + "node_id:": 1911, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_214" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_213" + } + ], + "node_name:": "gemm_output_reshape_token_215", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_3/Add_output_0" + } + ] + }, + { + "node_id:": 1912, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_211" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7606" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.1.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_3/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_214" + } + ] + }, + { + "node_id:": 1913, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_71_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_216" + } + ], + "node_name:": "gemm_input_reshape_token_218", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_217" + } + ] + }, + { + "node_id:": 1914, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_220" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_219" + } + ], + "node_name:": "gemm_output_reshape_token_221", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_4/Add_output_0" + } + ] + }, + { + "node_id:": 1915, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_217" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7607" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_220" + } + ] + }, + { + "node_id:": 1916, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_4/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_222" + } + ], + "node_name:": "gemm_input_reshape_token_224", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_223" + } + ] + }, + { + "node_id:": 1917, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_226" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_225" + } + ], + "node_name:": "gemm_output_reshape_token_227", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_4/Add_output_0" + } + ] + }, + { + "node_id:": 1918, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_223" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7608" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_226" + } + ] + }, + { + "node_id:": 1919, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_77_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_228" + } + ], + "node_name:": "gemm_input_reshape_token_230", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_229" + } + ] + }, + { + "node_id:": 1920, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_232" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_231" + } + ], + "node_name:": "gemm_output_reshape_token_233", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_4/Add_output_0" + } + ] + }, + { + "node_id:": 1921, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_229" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7611" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_232" + } + ] + }, + { + "node_id:": 1922, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_238" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_237" + } + ], + "node_name:": "gemm_output_reshape_token_239", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_81_output_0" + } + ] + }, + { + "node_id:": 1923, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_235" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7649" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_23/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_238" + } + ] + }, + { + "node_id:": 1924, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_83_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_240" + } + ], + "node_name:": "gemm_input_reshape_token_242", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_241" + } + ] + }, + { + "node_id:": 1925, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_244" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_243" + } + ], + "node_name:": "gemm_output_reshape_token_245", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_4/Add_output_0" + } + ] + }, + { + "node_id:": 1926, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_241" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7654" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_244" + } + ] + }, + { + "node_id:": 1927, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_4/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_246" + } + ], + "node_name:": "gemm_input_reshape_token_248", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_247" + } + ] + }, + { + "node_id:": 1928, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_250" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_249" + } + ], + "node_name:": "gemm_output_reshape_token_251", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_4/Add_output_0" + } + ] + }, + { + "node_id:": 1929, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_247" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7655" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_250" + } + ] + }, + { + "node_id:": 1930, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_256" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_255" + } + ], + "node_name:": "gemm_output_reshape_token_257", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_4/Add_output_0" + } + ] + }, + { + "node_id:": 1931, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_253" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7662" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_256" + } + ] + }, + { + "node_id:": 1932, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_87_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_258" + } + ], + "node_name:": "gemm_input_reshape_token_260", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_259" + } + ] + }, + { + "node_id:": 1933, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_262" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_261" + } + ], + "node_name:": "gemm_output_reshape_token_263", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_4/Add_output_0" + } + ] + }, + { + "node_id:": 1934, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_259" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7667" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_262" + } + ] + }, + { + "node_id:": 1935, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_4/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_264" + } + ], + "node_name:": "gemm_input_reshape_token_266", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_265" + } + ] + }, + { + "node_id:": 1936, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_268" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_267" + } + ], + "node_name:": "gemm_output_reshape_token_269", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_4/Add_output_0" + } + ] + }, + { + "node_id:": 1937, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_265" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7668" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.2.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_4/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_268" + } + ] + }, + { + "node_id:": 1938, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_89_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_270" + } + ], + "node_name:": "gemm_input_reshape_token_272", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_271" + } + ] + }, + { + "node_id:": 1939, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_274" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_273" + } + ], + "node_name:": "gemm_output_reshape_token_275", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_5/Add_output_0" + } + ] + }, + { + "node_id:": 1940, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_271" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7669" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_274" + } + ] + }, + { + "node_id:": 1941, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_5/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_276" + } + ], + "node_name:": "gemm_input_reshape_token_278", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_277" + } + ] + }, + { + "node_id:": 1942, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_280" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_279" + } + ], + "node_name:": "gemm_output_reshape_token_281", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_5/Add_output_0" + } + ] + }, + { + "node_id:": 1943, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_277" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7670" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_280" + } + ] + }, + { + "node_id:": 1944, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_95_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_282" + } + ], + "node_name:": "gemm_input_reshape_token_284", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_283" + } + ] + }, + { + "node_id:": 1945, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_286" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_285" + } + ], + "node_name:": "gemm_output_reshape_token_287", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_5/Add_output_0" + } + ] + }, + { + "node_id:": 1946, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_283" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7673" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_286" + } + ] + }, + { + "node_id:": 1947, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_292" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_291" + } + ], + "node_name:": "gemm_output_reshape_token_293", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_99_output_0" + } + ] + }, + { + "node_id:": 1948, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_289" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7711" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_28/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_292" + } + ] + }, + { + "node_id:": 1949, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_101_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_294" + } + ], + "node_name:": "gemm_input_reshape_token_296", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_295" + } + ] + }, + { + "node_id:": 1950, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_298" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_297" + } + ], + "node_name:": "gemm_output_reshape_token_299", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_5/Add_output_0" + } + ] + }, + { + "node_id:": 1951, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_295" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7716" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_298" + } + ] + }, + { + "node_id:": 1952, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_5/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_300" + } + ], + "node_name:": "gemm_input_reshape_token_302", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_301" + } + ] + }, + { + "node_id:": 1953, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_304" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_303" + } + ], + "node_name:": "gemm_output_reshape_token_305", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_5/Add_output_0" + } + ] + }, + { + "node_id:": 1954, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_301" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7717" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_304" + } + ] + }, + { + "node_id:": 1955, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_310" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_309" + } + ], + "node_name:": "gemm_output_reshape_token_311", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_5/Add_output_0" + } + ] + }, + { + "node_id:": 1956, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_307" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7724" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_310" + } + ] + }, + { + "node_id:": 1957, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_105_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_312" + } + ], + "node_name:": "gemm_input_reshape_token_314", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_313" + } + ] + }, + { + "node_id:": 1958, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_316" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_315" + } + ], + "node_name:": "gemm_output_reshape_token_317", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_5/Add_output_0" + } + ] + }, + { + "node_id:": 1959, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_313" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_7729" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_316" + } + ] + }, + { + "node_id:": 1960, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_5/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_318" + } + ], + "node_name:": "gemm_input_reshape_token_320", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_319" + } + ] + }, + { + "node_id:": 1961, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_322" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_321" + } + ], + "node_name:": "gemm_output_reshape_token_323", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_5/Add_output_0" + } + ] + }, + { + "node_id:": 1962, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_319" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_7730" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.1.encoder.layers.3.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_5/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_322" + } + ] + }, + { + "node_id:": 1963, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/downsample_1/ReduceSum_1_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_324" + } + ], + "node_name:": "gemm_input_reshape_token_326", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_325" + } + ] + }, + { + "node_id:": 1964, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_328" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_327" + } + ], + "node_name:": "gemm_output_reshape_token_329", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/in_proj_6/Add_output_0" + } + ] + }, + { + "node_id:": 1965, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_325" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7740" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_328" + } + ] + }, + { + "node_id:": 1966, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_6/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_330" + } + ], + "node_name:": "gemm_input_reshape_token_332", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_331" + } + ] + }, + { + "node_id:": 1967, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_334" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_333" + } + ], + "node_name:": "gemm_output_reshape_token_335", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward1/out_proj_6/Add_output_0" + } + ] + }, + { + "node_id:": 1968, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_331" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7741" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_334" + } + ] + }, + { + "node_id:": 1969, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_113_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_336" + } + ], + "node_name:": "gemm_input_reshape_token_338", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_337" + } + ] + }, + { + "node_id:": 1970, + "node_inputs:": [ + { + "input_dimension:": "18 512 ", + "input_name:": "gemm_output_reshape_arg_token_340" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_339" + } + ], + "node_name:": "gemm_output_reshape_token_341", + "node_outputs:": [ + { + "output_dimension:": "18 1 512 ", + "output_name:": "/in_proj_6/Add_output_0" + } + ] + }, + { + "node_id:": 1971, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_337" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7744" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 512 ", + "output_name:": "gemm_output_reshape_arg_token_340" + } + ] + }, + { + "node_id:": 1972, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_346" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_345" + } + ], + "node_name:": "gemm_output_reshape_token_347", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_117_output_0" + } + ] + }, + { + "node_id:": 1973, + "node_inputs:": [ + { + "input_dimension:": "18 96 ", + "input_name:": "gemm_input_reshape_arg_token_343" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7782" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_33/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_346" + } + ] + }, + { + "node_id:": 1974, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_119_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_348" + } + ], + "node_name:": "gemm_input_reshape_token_350", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_349" + } + ] + }, + { + "node_id:": 1975, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_352" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_351" + } + ], + "node_name:": "gemm_output_reshape_token_353", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/in_proj_6/Add_output_0" + } + ] + }, + { + "node_id:": 1976, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_349" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7787" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_352" + } + ] + }, + { + "node_id:": 1977, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_6/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_354" + } + ], + "node_name:": "gemm_input_reshape_token_356", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_355" + } + ] + }, + { + "node_id:": 1978, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_358" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_357" + } + ], + "node_name:": "gemm_output_reshape_token_359", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward2/out_proj_6/Add_output_0" + } + ] + }, + { + "node_id:": 1979, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_355" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7788" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_358" + } + ] + }, + { + "node_id:": 1980, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_364" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_363" + } + ], + "node_name:": "gemm_output_reshape_token_365", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/out_proj2_6/Add_output_0" + } + ] + }, + { + "node_id:": 1981, + "node_inputs:": [ + { + "input_dimension:": "18 96 ", + "input_name:": "gemm_input_reshape_arg_token_361" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7795" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_364" + } + ] + }, + { + "node_id:": 1982, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_123_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_366" + } + ], + "node_name:": "gemm_input_reshape_token_368", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_367" + } + ] + }, + { + "node_id:": 1983, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_370" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_369" + } + ], + "node_name:": "gemm_output_reshape_token_371", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/in_proj_6/Add_output_0" + } + ] + }, + { + "node_id:": 1984, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_367" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7800" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_370" + } + ] + }, + { + "node_id:": 1985, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_6/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_372" + } + ], + "node_name:": "gemm_input_reshape_token_374", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_373" + } + ] + }, + { + "node_id:": 1986, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_376" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_375" + } + ], + "node_name:": "gemm_output_reshape_token_377", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward3/out_proj_6/Add_output_0" + } + ] + }, + { + "node_id:": 1987, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_373" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7801" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.0.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_6/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_376" + } + ] + }, + { + "node_id:": 1988, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_125_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_378" + } + ], + "node_name:": "gemm_input_reshape_token_380", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_379" + } + ] + }, + { + "node_id:": 1989, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_382" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_381" + } + ], + "node_name:": "gemm_output_reshape_token_383", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/in_proj_7/Add_output_0" + } + ] + }, + { + "node_id:": 1990, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_379" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7802" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_382" + } + ] + }, + { + "node_id:": 1991, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_7/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_384" + } + ], + "node_name:": "gemm_input_reshape_token_386", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_385" + } + ] + }, + { + "node_id:": 1992, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_388" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_387" + } + ], + "node_name:": "gemm_output_reshape_token_389", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward1/out_proj_7/Add_output_0" + } + ] + }, + { + "node_id:": 1993, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_385" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7803" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_388" + } + ] + }, + { + "node_id:": 1994, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_131_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_390" + } + ], + "node_name:": "gemm_input_reshape_token_392", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_391" + } + ] + }, + { + "node_id:": 1995, + "node_inputs:": [ + { + "input_dimension:": "18 512 ", + "input_name:": "gemm_output_reshape_arg_token_394" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_393" + } + ], + "node_name:": "gemm_output_reshape_token_395", + "node_outputs:": [ + { + "output_dimension:": "18 1 512 ", + "output_name:": "/in_proj_7/Add_output_0" + } + ] + }, + { + "node_id:": 1996, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_391" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7806" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 512 ", + "output_name:": "gemm_output_reshape_arg_token_394" + } + ] + }, + { + "node_id:": 1997, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_400" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_399" + } + ], + "node_name:": "gemm_output_reshape_token_401", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_135_output_0" + } + ] + }, + { + "node_id:": 1998, + "node_inputs:": [ + { + "input_dimension:": "18 96 ", + "input_name:": "gemm_input_reshape_arg_token_397" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7844" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_38/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_400" + } + ] + }, + { + "node_id:": 1999, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_137_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_402" + } + ], + "node_name:": "gemm_input_reshape_token_404", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_403" + } + ] + }, + { + "node_id:": 2000, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_406" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_405" + } + ], + "node_name:": "gemm_output_reshape_token_407", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/in_proj_7/Add_output_0" + } + ] + }, + { + "node_id:": 2001, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_403" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7849" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_406" + } + ] + }, + { + "node_id:": 2002, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_7/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_408" + } + ], + "node_name:": "gemm_input_reshape_token_410", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_409" + } + ] + }, + { + "node_id:": 2003, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_412" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_411" + } + ], + "node_name:": "gemm_output_reshape_token_413", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward2/out_proj_7/Add_output_0" + } + ] + }, + { + "node_id:": 2004, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_409" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7850" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_412" + } + ] + }, + { + "node_id:": 2005, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_418" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_417" + } + ], + "node_name:": "gemm_output_reshape_token_419", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/out_proj2_7/Add_output_0" + } + ] + }, + { + "node_id:": 2006, + "node_inputs:": [ + { + "input_dimension:": "18 96 ", + "input_name:": "gemm_input_reshape_arg_token_415" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7857" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_418" + } + ] + }, + { + "node_id:": 2007, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_141_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_420" + } + ], + "node_name:": "gemm_input_reshape_token_422", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_421" + } + ] + }, + { + "node_id:": 2008, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_424" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_423" + } + ], + "node_name:": "gemm_output_reshape_token_425", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/in_proj_7/Add_output_0" + } + ] + }, + { + "node_id:": 2009, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_421" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7862" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_424" + } + ] + }, + { + "node_id:": 2010, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_7/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_426" + } + ], + "node_name:": "gemm_input_reshape_token_428", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_427" + } + ] + }, + { + "node_id:": 2011, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_430" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_429" + } + ], + "node_name:": "gemm_output_reshape_token_431", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward3/out_proj_7/Add_output_0" + } + ] + }, + { + "node_id:": 2012, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_427" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7863" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.1.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_7/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_430" + } + ] + }, + { + "node_id:": 2013, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_143_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_432" + } + ], + "node_name:": "gemm_input_reshape_token_434", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_433" + } + ] + }, + { + "node_id:": 2014, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_436" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_435" + } + ], + "node_name:": "gemm_output_reshape_token_437", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward1/in_proj_8/Add_output_0" + } + ] + }, + { + "node_id:": 2015, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_433" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7864" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_436" + } + ] + }, + { + "node_id:": 2016, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward1/activation_8/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_438" + } + ], + "node_name:": "gemm_input_reshape_token_440", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_439" + } + ] + }, + { + "node_id:": 2017, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_442" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_441" + } + ], + "node_name:": "gemm_output_reshape_token_443", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward1/out_proj_8/Add_output_0" + } + ] + }, + { + "node_id:": 2018, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_439" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7865" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_442" + } + ] + }, + { + "node_id:": 2019, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_149_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_444" + } + ], + "node_name:": "gemm_input_reshape_token_446", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_445" + } + ] + }, + { + "node_id:": 2020, + "node_inputs:": [ + { + "input_dimension:": "18 512 ", + "input_name:": "gemm_output_reshape_arg_token_448" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_447" + } + ], + "node_name:": "gemm_output_reshape_token_449", + "node_outputs:": [ + { + "output_dimension:": "18 1 512 ", + "output_name:": "/in_proj_8/Add_output_0" + } + ] + }, + { + "node_id:": 2021, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_445" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7868" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 512 ", + "output_name:": "gemm_output_reshape_arg_token_448" + } + ] + }, + { + "node_id:": 2022, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_454" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_453" + } + ], + "node_name:": "gemm_output_reshape_token_455", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/Add_153_output_0" + } + ] + }, + { + "node_id:": 2023, + "node_inputs:": [ + { + "input_dimension:": "18 96 ", + "input_name:": "gemm_input_reshape_arg_token_451" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7906" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_43/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_454" + } + ] + }, + { + "node_id:": 2024, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_155_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_456" + } + ], + "node_name:": "gemm_input_reshape_token_458", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_457" + } + ] + }, + { + "node_id:": 2025, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_460" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_459" + } + ], + "node_name:": "gemm_output_reshape_token_461", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward2/in_proj_8/Add_output_0" + } + ] + }, + { + "node_id:": 2026, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_457" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7911" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_460" + } + ] + }, + { + "node_id:": 2027, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward2/activation_8/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_462" + } + ], + "node_name:": "gemm_input_reshape_token_464", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_463" + } + ] + }, + { + "node_id:": 2028, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_466" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_465" + } + ], + "node_name:": "gemm_output_reshape_token_467", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward2/out_proj_8/Add_output_0" + } + ] + }, + { + "node_id:": 2029, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_463" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7912" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_466" + } + ] + }, + { + "node_id:": 2030, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_472" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_471" + } + ], + "node_name:": "gemm_output_reshape_token_473", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/out_proj2_8/Add_output_0" + } + ] + }, + { + "node_id:": 2031, + "node_inputs:": [ + { + "input_dimension:": "18 96 ", + "input_name:": "gemm_input_reshape_arg_token_469" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7919" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_472" + } + ] + }, + { + "node_id:": 2032, + "node_inputs:": [ + { + "input_dimension:": "18 1 384 ", + "input_name:": "/Add_159_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_474" + } + ], + "node_name:": "gemm_input_reshape_token_476", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_input_reshape_arg_token_475" + } + ] + }, + { + "node_id:": 2033, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_output_reshape_arg_token_478" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_477" + } + ], + "node_name:": "gemm_output_reshape_token_479", + "node_outputs:": [ + { + "output_dimension:": "18 1 2048 ", + "output_name:": "/feed_forward3/in_proj_8/Add_output_0" + } + ] + }, + { + "node_id:": 2034, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_input_reshape_arg_token_475" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7924" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_output_reshape_arg_token_478" + } + ] + }, + { + "node_id:": 2035, + "node_inputs:": [ + { + "input_dimension:": "18 1 2048 ", + "input_name:": "/feed_forward3/activation_8/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_480" + } + ], + "node_name:": "gemm_input_reshape_token_482", + "node_outputs:": [ + { + "output_dimension:": "18 2048 ", + "output_name:": "gemm_input_reshape_arg_token_481" + } + ] + }, + { + "node_id:": 2036, + "node_inputs:": [ + { + "input_dimension:": "18 384 ", + "input_name:": "gemm_output_reshape_arg_token_484" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_483" + } + ], + "node_name:": "gemm_output_reshape_token_485", + "node_outputs:": [ + { + "output_dimension:": "18 1 384 ", + "output_name:": "/feed_forward3/out_proj_8/Add_output_0" + } + ] + }, + { + "node_id:": 2037, + "node_inputs:": [ + { + "input_dimension:": "18 2048 ", + "input_name:": "gemm_input_reshape_arg_token_481" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7925" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.2.encoder.layers.2.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_8/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "18 384 ", + "output_name:": "gemm_output_reshape_arg_token_484" + } + ] + }, + { + "node_id:": 2038, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/downsample_2/ReduceSum_1_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_486" + } + ], + "node_name:": "gemm_input_reshape_token_488", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_487" + } + ] + }, + { + "node_id:": 2039, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_output_reshape_arg_token_490" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_489" + } + ], + "node_name:": "gemm_output_reshape_token_491", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/in_proj_9/Add_output_0" + } + ] + }, + { + "node_id:": 2040, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_487" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7935" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_output_reshape_arg_token_490" + } + ] + }, + { + "node_id:": 2041, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/activation_9/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_492" + } + ], + "node_name:": "gemm_input_reshape_token_494", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_input_reshape_arg_token_493" + } + ] + }, + { + "node_id:": 2042, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_496" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_495" + } + ], + "node_name:": "gemm_output_reshape_token_497", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/feed_forward1/out_proj_9/Add_output_0" + } + ] + }, + { + "node_id:": 2043, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_input_reshape_arg_token_493" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7936" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_496" + } + ] + }, + { + "node_id:": 2044, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_167_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_498" + } + ], + "node_name:": "gemm_input_reshape_token_500", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_499" + } + ] + }, + { + "node_id:": 2045, + "node_inputs:": [ + { + "input_dimension:": "9 512 ", + "input_name:": "gemm_output_reshape_arg_token_502" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_501" + } + ], + "node_name:": "gemm_output_reshape_token_503", + "node_outputs:": [ + { + "output_dimension:": "9 1 512 ", + "output_name:": "/in_proj_9/Add_output_0" + } + ] + }, + { + "node_id:": 2046, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_499" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_7939" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 512 ", + "output_name:": "gemm_output_reshape_arg_token_502" + } + ] + }, + { + "node_id:": 2047, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_508" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_507" + } + ], + "node_name:": "gemm_output_reshape_token_509", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_171_output_0" + } + ] + }, + { + "node_id:": 2048, + "node_inputs:": [ + { + "input_dimension:": "9 96 ", + "input_name:": "gemm_input_reshape_arg_token_505" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7977" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_48/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_508" + } + ] + }, + { + "node_id:": 2049, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_173_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_510" + } + ], + "node_name:": "gemm_input_reshape_token_512", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_511" + } + ] + }, + { + "node_id:": 2050, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_output_reshape_arg_token_514" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_513" + } + ], + "node_name:": "gemm_output_reshape_token_515", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/in_proj_9/Add_output_0" + } + ] + }, + { + "node_id:": 2051, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_511" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7982" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_output_reshape_arg_token_514" + } + ] + }, + { + "node_id:": 2052, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/activation_9/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_516" + } + ], + "node_name:": "gemm_input_reshape_token_518", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_input_reshape_arg_token_517" + } + ] + }, + { + "node_id:": 2053, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_520" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_519" + } + ], + "node_name:": "gemm_output_reshape_token_521", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/feed_forward2/out_proj_9/Add_output_0" + } + ] + }, + { + "node_id:": 2054, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_input_reshape_arg_token_517" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7983" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_520" + } + ] + }, + { + "node_id:": 2055, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_526" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_525" + } + ], + "node_name:": "gemm_output_reshape_token_527", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/out_proj2_9/Add_output_0" + } + ] + }, + { + "node_id:": 2056, + "node_inputs:": [ + { + "input_dimension:": "9 96 ", + "input_name:": "gemm_input_reshape_arg_token_523" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_7990" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_526" + } + ] + }, + { + "node_id:": 2057, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_177_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_528" + } + ], + "node_name:": "gemm_input_reshape_token_530", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_529" + } + ] + }, + { + "node_id:": 2058, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_output_reshape_arg_token_532" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_531" + } + ], + "node_name:": "gemm_output_reshape_token_533", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/in_proj_9/Add_output_0" + } + ] + }, + { + "node_id:": 2059, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_529" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7995" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_output_reshape_arg_token_532" + } + ] + }, + { + "node_id:": 2060, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/activation_9/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_534" + } + ], + "node_name:": "gemm_input_reshape_token_536", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_input_reshape_arg_token_535" + } + ] + }, + { + "node_id:": 2061, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_538" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_537" + } + ], + "node_name:": "gemm_output_reshape_token_539", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/feed_forward3/out_proj_9/Add_output_0" + } + ] + }, + { + "node_id:": 2062, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_input_reshape_arg_token_535" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7996" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.0.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_9/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_538" + } + ] + }, + { + "node_id:": 2063, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_179_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_540" + } + ], + "node_name:": "gemm_input_reshape_token_542", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_541" + } + ] + }, + { + "node_id:": 2064, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_output_reshape_arg_token_544" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_543" + } + ], + "node_name:": "gemm_output_reshape_token_545", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward1/in_proj_10/Add_output_0" + } + ] + }, + { + "node_id:": 2065, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_541" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_7997" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_output_reshape_arg_token_544" + } + ] + }, + { + "node_id:": 2066, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward1/activation_10/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_546" + } + ], + "node_name:": "gemm_input_reshape_token_548", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_input_reshape_arg_token_547" + } + ] + }, + { + "node_id:": 2067, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_550" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_549" + } + ], + "node_name:": "gemm_output_reshape_token_551", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/feed_forward1/out_proj_10/Add_output_0" + } + ] + }, + { + "node_id:": 2068, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_input_reshape_arg_token_547" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_7998" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_550" + } + ] + }, + { + "node_id:": 2069, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_185_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_552" + } + ], + "node_name:": "gemm_input_reshape_token_554", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_553" + } + ] + }, + { + "node_id:": 2070, + "node_inputs:": [ + { + "input_dimension:": "9 512 ", + "input_name:": "gemm_output_reshape_arg_token_556" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_555" + } + ], + "node_name:": "gemm_output_reshape_token_557", + "node_outputs:": [ + { + "output_dimension:": "9 1 512 ", + "output_name:": "/in_proj_10/Add_output_0" + } + ] + }, + { + "node_id:": 2071, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_553" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_8001" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 512 ", + "output_name:": "gemm_output_reshape_arg_token_556" + } + ] + }, + { + "node_id:": 2072, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_562" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_561" + } + ], + "node_name:": "gemm_output_reshape_token_563", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/Add_189_output_0" + } + ] + }, + { + "node_id:": 2073, + "node_inputs:": [ + { + "input_dimension:": "9 96 ", + "input_name:": "gemm_input_reshape_arg_token_559" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8039" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_53/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_562" + } + ] + }, + { + "node_id:": 2074, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_191_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_564" + } + ], + "node_name:": "gemm_input_reshape_token_566", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_565" + } + ] + }, + { + "node_id:": 2075, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_output_reshape_arg_token_568" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_567" + } + ], + "node_name:": "gemm_output_reshape_token_569", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward2/in_proj_10/Add_output_0" + } + ] + }, + { + "node_id:": 2076, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_565" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_8044" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_output_reshape_arg_token_568" + } + ] + }, + { + "node_id:": 2077, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward2/activation_10/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_570" + } + ], + "node_name:": "gemm_input_reshape_token_572", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_input_reshape_arg_token_571" + } + ] + }, + { + "node_id:": 2078, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_574" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_573" + } + ], + "node_name:": "gemm_output_reshape_token_575", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/feed_forward2/out_proj_10/Add_output_0" + } + ] + }, + { + "node_id:": 2079, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_input_reshape_arg_token_571" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_8045" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_574" + } + ] + }, + { + "node_id:": 2080, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_580" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_579" + } + ], + "node_name:": "gemm_output_reshape_token_581", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/out_proj2_10/Add_output_0" + } + ] + }, + { + "node_id:": 2081, + "node_inputs:": [ + { + "input_dimension:": "9 96 ", + "input_name:": "gemm_input_reshape_arg_token_577" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8052" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_580" + } + ] + }, + { + "node_id:": 2082, + "node_inputs:": [ + { + "input_dimension:": "9 1 384 ", + "input_name:": "/Add_195_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_582" + } + ], + "node_name:": "gemm_input_reshape_token_584", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_input_reshape_arg_token_583" + } + ] + }, + { + "node_id:": 2083, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_output_reshape_arg_token_586" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_585" + } + ], + "node_name:": "gemm_output_reshape_token_587", + "node_outputs:": [ + { + "output_dimension:": "9 1 2048 ", + "output_name:": "/feed_forward3/in_proj_10/Add_output_0" + } + ] + }, + { + "node_id:": 2084, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_input_reshape_arg_token_583" + }, + { + "input_dimension:": "384 2048 ", + "input_name:": "onnx::MatMul_8057" + }, + { + "input_dimension:": "2048 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_output_reshape_arg_token_586" + } + ] + }, + { + "node_id:": 2085, + "node_inputs:": [ + { + "input_dimension:": "9 1 2048 ", + "input_name:": "/feed_forward3/activation_10/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_588" + } + ], + "node_name:": "gemm_input_reshape_token_590", + "node_outputs:": [ + { + "output_dimension:": "9 2048 ", + "output_name:": "gemm_input_reshape_arg_token_589" + } + ] + }, + { + "node_id:": 2086, + "node_inputs:": [ + { + "input_dimension:": "9 384 ", + "input_name:": "gemm_output_reshape_arg_token_592" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_591" + } + ], + "node_name:": "gemm_output_reshape_token_593", + "node_outputs:": [ + { + "output_dimension:": "9 1 384 ", + "output_name:": "/feed_forward3/out_proj_10/Add_output_0" + } + ] + }, + { + "node_id:": 2087, + "node_inputs:": [ + { + "input_dimension:": "9 2048 ", + "input_name:": "gemm_input_reshape_arg_token_589" + }, + { + "input_dimension:": "2048 384 ", + "input_name:": "onnx::MatMul_8058" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.3.encoder.layers.1.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_10/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "9 384 ", + "output_name:": "gemm_output_reshape_arg_token_592" + } + ] + }, + { + "node_id:": 2088, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/downsample_3/ReduceSum_1_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_594" + } + ], + "node_name:": "gemm_input_reshape_token_596", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_595" + } + ] + }, + { + "node_id:": 2089, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_598" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_597" + } + ], + "node_name:": "gemm_output_reshape_token_599", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_11/Add_output_0" + } + ] + }, + { + "node_id:": 2090, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_595" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8069" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_598" + } + ] + }, + { + "node_id:": 2091, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_11/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_600" + } + ], + "node_name:": "gemm_input_reshape_token_602", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_601" + } + ] + }, + { + "node_id:": 2092, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_604" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_603" + } + ], + "node_name:": "gemm_output_reshape_token_605", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_11/Add_output_0" + } + ] + }, + { + "node_id:": 2093, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_601" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8070" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_604" + } + ] + }, + { + "node_id:": 2094, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_203_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_606" + } + ], + "node_name:": "gemm_input_reshape_token_608", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_607" + } + ] + }, + { + "node_id:": 2095, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_610" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_609" + } + ], + "node_name:": "gemm_output_reshape_token_611", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_11/Add_output_0" + } + ] + }, + { + "node_id:": 2096, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_607" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_8073" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_610" + } + ] + }, + { + "node_id:": 2097, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_616" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_615" + } + ], + "node_name:": "gemm_output_reshape_token_617", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_207_output_0" + } + ] + }, + { + "node_id:": 2098, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_613" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8111" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_58/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_616" + } + ] + }, + { + "node_id:": 2099, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_209_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_618" + } + ], + "node_name:": "gemm_input_reshape_token_620", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_619" + } + ] + }, + { + "node_id:": 2100, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_622" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_621" + } + ], + "node_name:": "gemm_output_reshape_token_623", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_11/Add_output_0" + } + ] + }, + { + "node_id:": 2101, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_619" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8116" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_622" + } + ] + }, + { + "node_id:": 2102, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_11/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_624" + } + ], + "node_name:": "gemm_input_reshape_token_626", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_625" + } + ] + }, + { + "node_id:": 2103, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_628" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_627" + } + ], + "node_name:": "gemm_output_reshape_token_629", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_11/Add_output_0" + } + ] + }, + { + "node_id:": 2104, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_625" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8117" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_628" + } + ] + }, + { + "node_id:": 2105, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_634" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_633" + } + ], + "node_name:": "gemm_output_reshape_token_635", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_11/Add_output_0" + } + ] + }, + { + "node_id:": 2106, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_631" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8124" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_634" + } + ] + }, + { + "node_id:": 2107, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_213_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_636" + } + ], + "node_name:": "gemm_input_reshape_token_638", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_637" + } + ] + }, + { + "node_id:": 2108, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_640" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_639" + } + ], + "node_name:": "gemm_output_reshape_token_641", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_11/Add_output_0" + } + ] + }, + { + "node_id:": 2109, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_637" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8129" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_640" + } + ] + }, + { + "node_id:": 2110, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_11/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_642" + } + ], + "node_name:": "gemm_input_reshape_token_644", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_643" + } + ] + }, + { + "node_id:": 2111, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_646" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_645" + } + ], + "node_name:": "gemm_output_reshape_token_647", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_11/Add_output_0" + } + ] + }, + { + "node_id:": 2112, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_643" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8130" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.0.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_11/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_646" + } + ] + }, + { + "node_id:": 2113, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_215_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_648" + } + ], + "node_name:": "gemm_input_reshape_token_650", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_649" + } + ] + }, + { + "node_id:": 2114, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_652" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_651" + } + ], + "node_name:": "gemm_output_reshape_token_653", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_12/Add_output_0" + } + ] + }, + { + "node_id:": 2115, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_649" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8131" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_652" + } + ] + }, + { + "node_id:": 2116, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_12/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_654" + } + ], + "node_name:": "gemm_input_reshape_token_656", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_655" + } + ] + }, + { + "node_id:": 2117, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_658" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_657" + } + ], + "node_name:": "gemm_output_reshape_token_659", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_12/Add_output_0" + } + ] + }, + { + "node_id:": 2118, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_655" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8132" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_658" + } + ] + }, + { + "node_id:": 2119, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_221_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_660" + } + ], + "node_name:": "gemm_input_reshape_token_662", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_661" + } + ] + }, + { + "node_id:": 2120, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_664" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_663" + } + ], + "node_name:": "gemm_output_reshape_token_665", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_12/Add_output_0" + } + ] + }, + { + "node_id:": 2121, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_661" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_8135" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_664" + } + ] + }, + { + "node_id:": 2122, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_670" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_669" + } + ], + "node_name:": "gemm_output_reshape_token_671", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_225_output_0" + } + ] + }, + { + "node_id:": 2123, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_667" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8173" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_63/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_670" + } + ] + }, + { + "node_id:": 2124, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_227_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_672" + } + ], + "node_name:": "gemm_input_reshape_token_674", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_673" + } + ] + }, + { + "node_id:": 2125, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_676" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_675" + } + ], + "node_name:": "gemm_output_reshape_token_677", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_12/Add_output_0" + } + ] + }, + { + "node_id:": 2126, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_673" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8178" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_676" + } + ] + }, + { + "node_id:": 2127, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_12/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_678" + } + ], + "node_name:": "gemm_input_reshape_token_680", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_679" + } + ] + }, + { + "node_id:": 2128, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_682" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_681" + } + ], + "node_name:": "gemm_output_reshape_token_683", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_12/Add_output_0" + } + ] + }, + { + "node_id:": 2129, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_679" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8179" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_682" + } + ] + }, + { + "node_id:": 2130, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_688" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_687" + } + ], + "node_name:": "gemm_output_reshape_token_689", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_12/Add_output_0" + } + ] + }, + { + "node_id:": 2131, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_685" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8186" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_688" + } + ] + }, + { + "node_id:": 2132, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_231_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_690" + } + ], + "node_name:": "gemm_input_reshape_token_692", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_691" + } + ] + }, + { + "node_id:": 2133, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_694" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_693" + } + ], + "node_name:": "gemm_output_reshape_token_695", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_12/Add_output_0" + } + ] + }, + { + "node_id:": 2134, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_691" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8191" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_694" + } + ] + }, + { + "node_id:": 2135, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_12/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_696" + } + ], + "node_name:": "gemm_input_reshape_token_698", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_697" + } + ] + }, + { + "node_id:": 2136, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_700" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_699" + } + ], + "node_name:": "gemm_output_reshape_token_701", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_12/Add_output_0" + } + ] + }, + { + "node_id:": 2137, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_697" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8192" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.1.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_12/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_700" + } + ] + }, + { + "node_id:": 2138, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_233_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_702" + } + ], + "node_name:": "gemm_input_reshape_token_704", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_703" + } + ] + }, + { + "node_id:": 2139, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_706" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_705" + } + ], + "node_name:": "gemm_output_reshape_token_707", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_13/Add_output_0" + } + ] + }, + { + "node_id:": 2140, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_703" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8193" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_706" + } + ] + }, + { + "node_id:": 2141, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_13/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_708" + } + ], + "node_name:": "gemm_input_reshape_token_710", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_709" + } + ] + }, + { + "node_id:": 2142, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_712" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_711" + } + ], + "node_name:": "gemm_output_reshape_token_713", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_13/Add_output_0" + } + ] + }, + { + "node_id:": 2143, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_709" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8194" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_712" + } + ] + }, + { + "node_id:": 2144, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_239_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_714" + } + ], + "node_name:": "gemm_input_reshape_token_716", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_715" + } + ] + }, + { + "node_id:": 2145, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_718" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_717" + } + ], + "node_name:": "gemm_output_reshape_token_719", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_13/Add_output_0" + } + ] + }, + { + "node_id:": 2146, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_715" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_8197" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_718" + } + ] + }, + { + "node_id:": 2147, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_724" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_723" + } + ], + "node_name:": "gemm_output_reshape_token_725", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_243_output_0" + } + ] + }, + { + "node_id:": 2148, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_721" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8235" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_68/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_724" + } + ] + }, + { + "node_id:": 2149, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_245_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_726" + } + ], + "node_name:": "gemm_input_reshape_token_728", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_727" + } + ] + }, + { + "node_id:": 2150, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_730" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_729" + } + ], + "node_name:": "gemm_output_reshape_token_731", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_13/Add_output_0" + } + ] + }, + { + "node_id:": 2151, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_727" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8240" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_730" + } + ] + }, + { + "node_id:": 2152, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_13/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_732" + } + ], + "node_name:": "gemm_input_reshape_token_734", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_733" + } + ] + }, + { + "node_id:": 2153, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_736" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_735" + } + ], + "node_name:": "gemm_output_reshape_token_737", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_13/Add_output_0" + } + ] + }, + { + "node_id:": 2154, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_733" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8241" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_736" + } + ] + }, + { + "node_id:": 2155, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_742" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_741" + } + ], + "node_name:": "gemm_output_reshape_token_743", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_13/Add_output_0" + } + ] + }, + { + "node_id:": 2156, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_739" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8248" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_742" + } + ] + }, + { + "node_id:": 2157, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_249_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_744" + } + ], + "node_name:": "gemm_input_reshape_token_746", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_745" + } + ] + }, + { + "node_id:": 2158, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_748" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_747" + } + ], + "node_name:": "gemm_output_reshape_token_749", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_13/Add_output_0" + } + ] + }, + { + "node_id:": 2159, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_745" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8253" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_748" + } + ] + }, + { + "node_id:": 2160, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_13/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_750" + } + ], + "node_name:": "gemm_input_reshape_token_752", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_751" + } + ] + }, + { + "node_id:": 2161, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_754" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_753" + } + ], + "node_name:": "gemm_output_reshape_token_755", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_13/Add_output_0" + } + ] + }, + { + "node_id:": 2162, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_751" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8254" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.2.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_13/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_754" + } + ] + }, + { + "node_id:": 2163, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_251_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_756" + } + ], + "node_name:": "gemm_input_reshape_token_758", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_757" + } + ] + }, + { + "node_id:": 2164, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_760" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_759" + } + ], + "node_name:": "gemm_output_reshape_token_761", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward1/in_proj_14/Add_output_0" + } + ] + }, + { + "node_id:": 2165, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_757" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8255" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.feed_forward1.in_proj.bias" + } + ], + "node_name:": "/feed_forward1/in_proj_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_760" + } + ] + }, + { + "node_id:": 2166, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward1/activation_14/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_762" + } + ], + "node_name:": "gemm_input_reshape_token_764", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_763" + } + ] + }, + { + "node_id:": 2167, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_766" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_765" + } + ], + "node_name:": "gemm_output_reshape_token_767", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward1/out_proj_14/Add_output_0" + } + ] + }, + { + "node_id:": 2168, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_763" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8256" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.feed_forward1.out_proj.bias" + } + ], + "node_name:": "/feed_forward1/out_proj_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_766" + } + ] + }, + { + "node_id:": 2169, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_257_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_768" + } + ], + "node_name:": "gemm_input_reshape_token_770", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_769" + } + ] + }, + { + "node_id:": 2170, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_772" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_771" + } + ], + "node_name:": "gemm_output_reshape_token_773", + "node_outputs:": [ + { + "output_dimension:": "36 1 512 ", + "output_name:": "/in_proj_14/Add_output_0" + } + ] + }, + { + "node_id:": 2171, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_769" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_8259" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.self_attn.in_proj.bias" + } + ], + "node_name:": "/in_proj_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_772" + } + ] + }, + { + "node_id:": 2172, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_778" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_777" + } + ], + "node_name:": "gemm_output_reshape_token_779", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/Add_261_output_0" + } + ] + }, + { + "node_id:": 2173, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_775" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8297" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.self_attn.out_proj.bias" + } + ], + "node_name:": "/MatMul_73/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_778" + } + ] + }, + { + "node_id:": 2174, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_263_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_780" + } + ], + "node_name:": "gemm_input_reshape_token_782", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_781" + } + ] + }, + { + "node_id:": 2175, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_784" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_783" + } + ], + "node_name:": "gemm_output_reshape_token_785", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward2/in_proj_14/Add_output_0" + } + ] + }, + { + "node_id:": 2176, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_781" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8302" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.feed_forward2.in_proj.bias" + } + ], + "node_name:": "/feed_forward2/in_proj_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_784" + } + ] + }, + { + "node_id:": 2177, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward2/activation_14/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_786" + } + ], + "node_name:": "gemm_input_reshape_token_788", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_787" + } + ] + }, + { + "node_id:": 2178, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_790" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_789" + } + ], + "node_name:": "gemm_output_reshape_token_791", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward2/out_proj_14/Add_output_0" + } + ] + }, + { + "node_id:": 2179, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_787" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8303" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.feed_forward2.out_proj.bias" + } + ], + "node_name:": "/feed_forward2/out_proj_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_790" + } + ] + }, + { + "node_id:": 2180, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_796" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_795" + } + ], + "node_name:": "gemm_output_reshape_token_797", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/out_proj2_14/Add_output_0" + } + ] + }, + { + "node_id:": 2181, + "node_inputs:": [ + { + "input_dimension:": "36 96 ", + "input_name:": "gemm_input_reshape_arg_token_793" + }, + { + "input_dimension:": "96 384 ", + "input_name:": "onnx::MatMul_8310" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.self_attn.out_proj2.bias" + } + ], + "node_name:": "/out_proj2_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_796" + } + ] + }, + { + "node_id:": 2182, + "node_inputs:": [ + { + "input_dimension:": "36 1 384 ", + "input_name:": "/Add_267_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_798" + } + ], + "node_name:": "gemm_input_reshape_token_800", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_799" + } + ] + }, + { + "node_id:": 2183, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_output_reshape_arg_token_802" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_801" + } + ], + "node_name:": "gemm_output_reshape_token_803", + "node_outputs:": [ + { + "output_dimension:": "36 1 1024 ", + "output_name:": "/feed_forward3/in_proj_14/Add_output_0" + } + ] + }, + { + "node_id:": 2184, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_799" + }, + { + "input_dimension:": "384 1024 ", + "input_name:": "onnx::MatMul_8315" + }, + { + "input_dimension:": "1024 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.feed_forward3.in_proj.bias" + } + ], + "node_name:": "/feed_forward3/in_proj_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_output_reshape_arg_token_802" + } + ] + }, + { + "node_id:": 2185, + "node_inputs:": [ + { + "input_dimension:": "36 1 1024 ", + "input_name:": "/feed_forward3/activation_14/Mul_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_804" + } + ], + "node_name:": "gemm_input_reshape_token_806", + "node_outputs:": [ + { + "output_dimension:": "36 1024 ", + "output_name:": "gemm_input_reshape_arg_token_805" + } + ] + }, + { + "node_id:": 2186, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_output_reshape_arg_token_808" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_807" + } + ], + "node_name:": "gemm_output_reshape_token_809", + "node_outputs:": [ + { + "output_dimension:": "36 1 384 ", + "output_name:": "/feed_forward3/out_proj_14/Add_output_0" + } + ] + }, + { + "node_id:": 2187, + "node_inputs:": [ + { + "input_dimension:": "36 1024 ", + "input_name:": "gemm_input_reshape_arg_token_805" + }, + { + "input_dimension:": "1024 384 ", + "input_name:": "onnx::MatMul_8316" + }, + { + "input_dimension:": "384 ", + "input_name:": "encoder.encoders.4.encoder.layers.3.feed_forward3.out_proj.bias" + } + ], + "node_name:": "/feed_forward3/out_proj_14/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_output_reshape_arg_token_808" + } + ] + }, + { + "node_id:": 2188, + "node_inputs:": [ + { + "input_dimension:": "1 36 384 ", + "input_name:": "/Transpose_181_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "gemm_input_shape_token_810" + } + ], + "node_name:": "gemm_input_reshape_token_812", + "node_outputs:": [ + { + "output_dimension:": "36 384 ", + "output_name:": "gemm_input_reshape_arg_token_811" + } + ] + }, + { + "node_id:": 2189, + "node_inputs:": [ + { + "input_dimension:": "36 512 ", + "input_name:": "gemm_output_reshape_arg_token_814" + }, + { + "input_dimension:": "3 ", + "input_name:": "gemm_output_shape_token_813" + } + ], + "node_name:": "gemm_output_reshape_token_815", + "node_outputs:": [ + { + "output_dimension:": "1 36 512 ", + "output_name:": "encoder_out" + } + ] + }, + { + "node_id:": 2190, + "node_inputs:": [ + { + "input_dimension:": "36 384 ", + "input_name:": "gemm_input_reshape_arg_token_811" + }, + { + "input_dimension:": "384 512 ", + "input_name:": "onnx::MatMul_8324" + }, + { + "input_dimension:": "512 ", + "input_name:": "encoder_proj.bias" + } + ], + "node_name:": "/encoder_proj/MatMul/MatMulAddFusion", + "node_outputs:": [ + { + "output_dimension:": "36 512 ", + "output_name:": "gemm_output_reshape_arg_token_814" + } + ] + }, + { + "node_id:": 2191, + "node_inputs:": [ + { + "input_dimension:": "1 72 128 19 ", + "input_name:": "/encoder_embed/Transpose_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/encoder_embed/Reshape_new_shape" + } + ], + "node_name:": "/encoder_embed/Reshape_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "72 2432 ", + "output_name:": "gemm_input_reshape_arg" + } + ] + }, + { + "node_id:": 2192, + "node_inputs:": [ + { + "input_dimension:": "72 8 12 ", + "input_name:": "/Transpose_6_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_8_new_shape" + } + ], + "node_name:": "/Reshape_8_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "72 96 ", + "output_name:": "gemm_input_reshape_arg_token_19" + } + ] + }, + { + "node_id:": 2193, + "node_inputs:": [ + { + "input_dimension:": "72 8 12 ", + "input_name:": "/Transpose_10_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_10_new_shape" + } + ], + "node_name:": "/Reshape_10_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "72 96 ", + "output_name:": "gemm_input_reshape_arg_token_37" + } + ] + }, + { + "node_id:": 2194, + "node_inputs:": [ + { + "input_dimension:": "72 8 12 ", + "input_name:": "/Transpose_18_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_19_new_shape" + } + ], + "node_name:": "/Reshape_19_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "72 96 ", + "output_name:": "gemm_input_reshape_arg_token_73" + } + ] + }, + { + "node_id:": 2195, + "node_inputs:": [ + { + "input_dimension:": "72 8 12 ", + "input_name:": "/Transpose_22_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_21_new_shape" + } + ], + "node_name:": "/Reshape_21_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "72 96 ", + "output_name:": "gemm_input_reshape_arg_token_91" + } + ] + }, + { + "node_id:": 2196, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_30_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_30_new_shape" + } + ], + "node_name:": "/Reshape_30_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_127" + } + ] + }, + { + "node_id:": 2197, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_34_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_32_new_shape" + } + ], + "node_name:": "/Reshape_32_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_145" + } + ] + }, + { + "node_id:": 2198, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_42_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_41_new_shape" + } + ], + "node_name:": "/Reshape_41_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_181" + } + ] + }, + { + "node_id:": 2199, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_46_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_43_new_shape" + } + ], + "node_name:": "/Reshape_43_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_199" + } + ] + }, + { + "node_id:": 2200, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_54_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_52_new_shape" + } + ], + "node_name:": "/Reshape_52_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_235" + } + ] + }, + { + "node_id:": 2201, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_58_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_54_new_shape" + } + ], + "node_name:": "/Reshape_54_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_253" + } + ] + }, + { + "node_id:": 2202, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_66_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_63_new_shape" + } + ], + "node_name:": "/Reshape_63_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_289" + } + ] + }, + { + "node_id:": 2203, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_70_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_65_new_shape" + } + ], + "node_name:": "/Reshape_65_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_307" + } + ] + }, + { + "node_id:": 2204, + "node_inputs:": [ + { + "input_dimension:": "18 8 12 ", + "input_name:": "/Transpose_78_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_74_new_shape" + } + ], + "node_name:": "/Reshape_74_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "18 96 ", + "output_name:": "gemm_input_reshape_arg_token_343" + } + ] + }, + { + "node_id:": 2205, + "node_inputs:": [ + { + "input_dimension:": "18 8 12 ", + "input_name:": "/Transpose_82_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_76_new_shape" + } + ], + "node_name:": "/Reshape_76_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "18 96 ", + "output_name:": "gemm_input_reshape_arg_token_361" + } + ] + }, + { + "node_id:": 2206, + "node_inputs:": [ + { + "input_dimension:": "18 8 12 ", + "input_name:": "/Transpose_90_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_85_new_shape" + } + ], + "node_name:": "/Reshape_85_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "18 96 ", + "output_name:": "gemm_input_reshape_arg_token_397" + } + ] + }, + { + "node_id:": 2207, + "node_inputs:": [ + { + "input_dimension:": "18 8 12 ", + "input_name:": "/Transpose_94_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_87_new_shape" + } + ], + "node_name:": "/Reshape_87_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "18 96 ", + "output_name:": "gemm_input_reshape_arg_token_415" + } + ] + }, + { + "node_id:": 2208, + "node_inputs:": [ + { + "input_dimension:": "18 8 12 ", + "input_name:": "/Transpose_102_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_96_new_shape" + } + ], + "node_name:": "/Reshape_96_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "18 96 ", + "output_name:": "gemm_input_reshape_arg_token_451" + } + ] + }, + { + "node_id:": 2209, + "node_inputs:": [ + { + "input_dimension:": "18 8 12 ", + "input_name:": "/Transpose_106_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_98_new_shape" + } + ], + "node_name:": "/Reshape_98_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "18 96 ", + "output_name:": "gemm_input_reshape_arg_token_469" + } + ] + }, + { + "node_id:": 2210, + "node_inputs:": [ + { + "input_dimension:": "9 8 12 ", + "input_name:": "/Transpose_114_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_107_new_shape" + } + ], + "node_name:": "/Reshape_107_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "9 96 ", + "output_name:": "gemm_input_reshape_arg_token_505" + } + ] + }, + { + "node_id:": 2211, + "node_inputs:": [ + { + "input_dimension:": "9 8 12 ", + "input_name:": "/Transpose_118_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_109_new_shape" + } + ], + "node_name:": "/Reshape_109_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "9 96 ", + "output_name:": "gemm_input_reshape_arg_token_523" + } + ] + }, + { + "node_id:": 2212, + "node_inputs:": [ + { + "input_dimension:": "9 8 12 ", + "input_name:": "/Transpose_126_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_118_new_shape" + } + ], + "node_name:": "/Reshape_118_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "9 96 ", + "output_name:": "gemm_input_reshape_arg_token_559" + } + ] + }, + { + "node_id:": 2213, + "node_inputs:": [ + { + "input_dimension:": "9 8 12 ", + "input_name:": "/Transpose_130_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_120_new_shape" + } + ], + "node_name:": "/Reshape_120_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "9 96 ", + "output_name:": "gemm_input_reshape_arg_token_577" + } + ] + }, + { + "node_id:": 2214, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_138_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_129_new_shape" + } + ], + "node_name:": "/Reshape_129_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_613" + } + ] + }, + { + "node_id:": 2215, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_142_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_131_new_shape" + } + ], + "node_name:": "/Reshape_131_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_631" + } + ] + }, + { + "node_id:": 2216, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_150_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_140_new_shape" + } + ], + "node_name:": "/Reshape_140_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_667" + } + ] + }, + { + "node_id:": 2217, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_154_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_142_new_shape" + } + ], + "node_name:": "/Reshape_142_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_685" + } + ] + }, + { + "node_id:": 2218, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_162_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_151_new_shape" + } + ], + "node_name:": "/Reshape_151_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_721" + } + ] + }, + { + "node_id:": 2219, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_166_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_153_new_shape" + } + ], + "node_name:": "/Reshape_153_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_739" + } + ] + }, + { + "node_id:": 2220, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_174_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_162_new_shape" + } + ], + "node_name:": "/Reshape_162_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_775" + } + ] + }, + { + "node_id:": 2221, + "node_inputs:": [ + { + "input_dimension:": "36 8 12 ", + "input_name:": "/Transpose_178_output_0" + }, + { + "input_dimension:": "2 ", + "input_name:": "/Reshape_164_new_shape" + } + ], + "node_name:": "/Reshape_164_new_reshape", + "node_outputs:": [ + { + "output_dimension:": "36 96 ", + "output_name:": "gemm_input_reshape_arg_token_793" + } + ] + } +] \ No newline at end of file diff --git a/graph_partition_trace.csv b/graph_partition_trace.csv new file mode 100644 index 0000000000000000000000000000000000000000..927d9a9679b54803449f07e74f839fcdecca371d --- /dev/null +++ b/graph_partition_trace.csv @@ -0,0 +1,2223 @@ +Node, Type, Subgraph/CustomOp, Status +/Add,,vaiml_par_0,Supported +/Add_1,,vaiml_par_0,Supported +/Add_10,,vaiml_par_0,Supported +/Add_100,,vaiml_par_0,Supported +/Add_101,,vaiml_par_0,Supported +/Add_102,,vaiml_par_0,Supported +/Add_104,,vaiml_par_0,Supported +/Add_105,,vaiml_par_0,Supported +/Add_106,,vaiml_par_0,Supported +/Add_107,,vaiml_par_0,Supported +/Add_108,,vaiml_par_0,Supported +/Add_109,,vaiml_par_0,Supported +/Add_11,,vaiml_par_0,Supported +/Add_111,,,Not supported. Check aie_unsupported_original_ops.json +/Add_112,,,Not supported. Check aie_unsupported_original_ops.json +/Add_113,,vaiml_par_0,Supported +/Add_116,,vaiml_par_0,Supported +/Add_118,,vaiml_par_0,Supported +/Add_119,,vaiml_par_0,Supported +/Add_12,,vaiml_par_0,Supported +/Add_120,,vaiml_par_0,Supported +/Add_122,,vaiml_par_0,Supported +/Add_123,,vaiml_par_0,Supported +/Add_124,,vaiml_par_0,Supported +/Add_125,,vaiml_par_0,Supported +/Add_126,,vaiml_par_0,Supported +/Add_127,,vaiml_par_0,Supported +/Add_129,,,Not supported. Check aie_unsupported_original_ops.json +/Add_130,,,Not supported. Check aie_unsupported_original_ops.json +/Add_131,,vaiml_par_0,Supported +/Add_134,,vaiml_par_0,Supported +/Add_136,,vaiml_par_0,Supported +/Add_137,,vaiml_par_0,Supported +/Add_138,,vaiml_par_0,Supported +/Add_14,,vaiml_par_0,Supported +/Add_140,,vaiml_par_0,Supported +/Add_141,,vaiml_par_0,Supported +/Add_142,,vaiml_par_0,Supported +/Add_143,,vaiml_par_0,Supported +/Add_144,,vaiml_par_0,Supported +/Add_145,,vaiml_par_0,Supported +/Add_147,,,Not supported. Check aie_unsupported_original_ops.json +/Add_148,,,Not supported. Check aie_unsupported_original_ops.json +/Add_149,,vaiml_par_0,Supported +/Add_15,,vaiml_par_0,Supported +/Add_152,,vaiml_par_0,Supported +/Add_154,,vaiml_par_0,Supported +/Add_155,,vaiml_par_0,Supported +/Add_156,,vaiml_par_0,Supported +/Add_158,,vaiml_par_0,Supported +/Add_159,,vaiml_par_0,Supported +/Add_16,,vaiml_par_0,Supported +/Add_160,,vaiml_par_0,Supported +/Add_161,,vaiml_par_0,Supported +/Add_162,,vaiml_par_0,Supported +/Add_163,,vaiml_par_0,Supported +/Add_165,,,Not supported. Check aie_unsupported_original_ops.json +/Add_166,,,Not supported. Check aie_unsupported_original_ops.json +/Add_167,,vaiml_par_0,Supported +/Add_17,,vaiml_par_0,Supported +/Add_170,,vaiml_par_0,Supported +/Add_172,,vaiml_par_0,Supported +/Add_173,,vaiml_par_0,Supported +/Add_174,,vaiml_par_0,Supported +/Add_176,,vaiml_par_0,Supported +/Add_177,,vaiml_par_0,Supported +/Add_178,,vaiml_par_0,Supported +/Add_179,,vaiml_par_0,Supported +/Add_18,,vaiml_par_0,Supported +/Add_180,,vaiml_par_0,Supported +/Add_181,,vaiml_par_0,Supported +/Add_183,,,Not supported. Check aie_unsupported_original_ops.json +/Add_184,,,Not supported. Check aie_unsupported_original_ops.json +/Add_185,,vaiml_par_0,Supported +/Add_188,,vaiml_par_0,Supported +/Add_19,,vaiml_par_0,Supported +/Add_190,,vaiml_par_0,Supported +/Add_191,,vaiml_par_0,Supported +/Add_192,,vaiml_par_0,Supported +/Add_194,,vaiml_par_0,Supported +/Add_195,,vaiml_par_0,Supported +/Add_196,,vaiml_par_0,Supported +/Add_197,,vaiml_par_0,Supported +/Add_198,,vaiml_par_0,Supported +/Add_199,,vaiml_par_0,Supported +/Add_201,,,Not supported. Check aie_unsupported_original_ops.json +/Add_202,,,Not supported. Check aie_unsupported_original_ops.json +/Add_203,,vaiml_par_0,Supported +/Add_206,,vaiml_par_0,Supported +/Add_208,,vaiml_par_0,Supported +/Add_209,,vaiml_par_0,Supported +/Add_21,,,Not supported. Check aie_unsupported_original_ops.json +/Add_210,,vaiml_par_0,Supported +/Add_212,,vaiml_par_0,Supported +/Add_213,,vaiml_par_0,Supported +/Add_214,,vaiml_par_0,Supported +/Add_215,,vaiml_par_0,Supported +/Add_216,,vaiml_par_0,Supported +/Add_217,,vaiml_par_0,Supported +/Add_219,,,Not supported. Check aie_unsupported_original_ops.json +/Add_22,,,Not supported. Check aie_unsupported_original_ops.json +/Add_220,,,Not supported. Check aie_unsupported_original_ops.json +/Add_221,,vaiml_par_0,Supported +/Add_224,,vaiml_par_0,Supported +/Add_226,,vaiml_par_0,Supported +/Add_227,,vaiml_par_0,Supported +/Add_228,,vaiml_par_0,Supported +/Add_23,,vaiml_par_0,Supported +/Add_230,,vaiml_par_0,Supported +/Add_231,,vaiml_par_0,Supported +/Add_232,,vaiml_par_0,Supported +/Add_233,,vaiml_par_0,Supported +/Add_234,,vaiml_par_0,Supported +/Add_235,,vaiml_par_0,Supported +/Add_237,,,Not supported. Check aie_unsupported_original_ops.json +/Add_238,,,Not supported. Check aie_unsupported_original_ops.json +/Add_239,,vaiml_par_0,Supported +/Add_242,,vaiml_par_0,Supported +/Add_244,,vaiml_par_0,Supported +/Add_245,,vaiml_par_0,Supported +/Add_246,,vaiml_par_0,Supported +/Add_248,,vaiml_par_0,Supported +/Add_249,,vaiml_par_0,Supported +/Add_250,,vaiml_par_0,Supported +/Add_251,,vaiml_par_0,Supported +/Add_252,,vaiml_par_0,Supported +/Add_253,,vaiml_par_0,Supported +/Add_255,,,Not supported. Check aie_unsupported_original_ops.json +/Add_256,,,Not supported. Check aie_unsupported_original_ops.json +/Add_257,,vaiml_par_0,Supported +/Add_26,,vaiml_par_0,Supported +/Add_260,,vaiml_par_0,Supported +/Add_262,,vaiml_par_0,Supported +/Add_263,,vaiml_par_0,Supported +/Add_264,,vaiml_par_0,Supported +/Add_266,,vaiml_par_0,Supported +/Add_267,,vaiml_par_0,Supported +/Add_268,,vaiml_par_0,Supported +/Add_269,,vaiml_par_0,Supported +/Add_28,,vaiml_par_0,Supported +/Add_29,,vaiml_par_0,Supported +/Add_3,,,Not supported. Check aie_unsupported_original_ops.json +/Add_30,,vaiml_par_0,Supported +/Add_32,,vaiml_par_0,Supported +/Add_33,,vaiml_par_0,Supported +/Add_34,,vaiml_par_0,Supported +/Add_35,,vaiml_par_0,Supported +/Add_36,,vaiml_par_0,Supported +/Add_37,,vaiml_par_0,Supported +/Add_39,,,Not supported. Check aie_unsupported_original_ops.json +/Add_4,,,Not supported. Check aie_unsupported_original_ops.json +/Add_40,,,Not supported. Check aie_unsupported_original_ops.json +/Add_41,,vaiml_par_0,Supported +/Add_44,,vaiml_par_0,Supported +/Add_46,,vaiml_par_0,Supported +/Add_47,,vaiml_par_0,Supported +/Add_48,,vaiml_par_0,Supported +/Add_5,,vaiml_par_0,Supported +/Add_50,,vaiml_par_0,Supported +/Add_51,,vaiml_par_0,Supported +/Add_52,,vaiml_par_0,Supported +/Add_53,,vaiml_par_0,Supported +/Add_54,,vaiml_par_0,Supported +/Add_55,,vaiml_par_0,Supported +/Add_57,,,Not supported. Check aie_unsupported_original_ops.json +/Add_58,,,Not supported. Check aie_unsupported_original_ops.json +/Add_59,,vaiml_par_0,Supported +/Add_62,,vaiml_par_0,Supported +/Add_64,,vaiml_par_0,Supported +/Add_65,,vaiml_par_0,Supported +/Add_66,,vaiml_par_0,Supported +/Add_68,,vaiml_par_0,Supported +/Add_69,,vaiml_par_0,Supported +/Add_70,,vaiml_par_0,Supported +/Add_71,,vaiml_par_0,Supported +/Add_72,,vaiml_par_0,Supported +/Add_73,,vaiml_par_0,Supported +/Add_75,,,Not supported. Check aie_unsupported_original_ops.json +/Add_76,,,Not supported. Check aie_unsupported_original_ops.json +/Add_77,,vaiml_par_0,Supported +/Add_8,,vaiml_par_0,Supported +/Add_80,,vaiml_par_0,Supported +/Add_82,,vaiml_par_0,Supported +/Add_83,,vaiml_par_0,Supported +/Add_84,,vaiml_par_0,Supported +/Add_86,,vaiml_par_0,Supported +/Add_87,,vaiml_par_0,Supported +/Add_88,,vaiml_par_0,Supported +/Add_89,,vaiml_par_0,Supported +/Add_90,,vaiml_par_0,Supported +/Add_91,,vaiml_par_0,Supported +/Add_93,,,Not supported. Check aie_unsupported_original_ops.json +/Add_94,,,Not supported. Check aie_unsupported_original_ops.json +/Add_95,,vaiml_par_0,Supported +/Add_98,,vaiml_par_0,Supported +/Cast,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_10,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_12,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_15,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_17,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_2,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_20,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_22,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_25,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_27,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_30,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_32,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_35,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_37,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_40,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_42,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_45,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_47,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_5,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_50,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_52,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_55,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_57,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_60,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_62,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_65,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_67,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_7,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_70,,,Not supported. Check aie_unsupported_original_ops.json +/Cast_72,,,Not supported. Check aie_unsupported_original_ops.json +/Concat,,vaiml_par_0,Supported +/Concat_1,,vaiml_par_0,Supported +/Concat_110,,vaiml_par_0,Supported +/Concat_111,,vaiml_par_0,Supported +/Concat_114,,vaiml_par_0,Supported +/Concat_115,,,Not supported. Check aie_unsupported_original_ops.json +/Concat_116,,vaiml_par_0,Supported +/Concat_117,,vaiml_par_0,Supported +/Concat_118,,vaiml_par_0,Supported +/Concat_119,,vaiml_par_0,Supported +/Concat_120,,vaiml_par_0,Supported +/Concat_121,,vaiml_par_0,Supported +/Concat_122,,vaiml_par_0,Supported +/Concat_123,,vaiml_par_0,Supported +/Concat_13,,vaiml_par_0,Supported +/Concat_135,,vaiml_par_0,Supported +/Concat_136,,vaiml_par_0,Supported +/Concat_139,,vaiml_par_0,Supported +/Concat_14,,vaiml_par_0,Supported +/Concat_140,,vaiml_par_0,Supported +/Concat_141,,vaiml_par_0,Supported +/Concat_153,,vaiml_par_0,Supported +/Concat_154,,vaiml_par_0,Supported +/Concat_157,,vaiml_par_0,Supported +/Concat_158,,vaiml_par_0,Supported +/Concat_159,,vaiml_par_0,Supported +/Concat_17,,vaiml_par_0,Supported +/Concat_171,,vaiml_par_0,Supported +/Concat_172,,vaiml_par_0,Supported +/Concat_175,,vaiml_par_0,Supported +/Concat_176,,,Not supported. Check aie_unsupported_original_ops.json +/Concat_177,,vaiml_par_0,Supported +/Concat_178,,vaiml_par_0,Supported +/Concat_179,,vaiml_par_0,Supported +/Concat_18,,vaiml_par_0,Supported +/Concat_180,,vaiml_par_0,Supported +/Concat_181,,vaiml_par_0,Supported +/Concat_182,,vaiml_par_0,Supported +/Concat_183,,vaiml_par_0,Supported +/Concat_184,,vaiml_par_0,Supported +/Concat_19,,vaiml_par_0,Supported +/Concat_196,,vaiml_par_0,Supported +/Concat_197,,vaiml_par_0,Supported +/Concat_200,,vaiml_par_0,Supported +/Concat_201,,vaiml_par_0,Supported +/Concat_202,,vaiml_par_0,Supported +/Concat_214,,vaiml_par_0,Supported +/Concat_215,,vaiml_par_0,Supported +/Concat_218,,vaiml_par_0,Supported +/Concat_219,,,Not supported. Check aie_unsupported_original_ops.json +/Concat_220,,vaiml_par_0,Supported +/Concat_221,,vaiml_par_0,Supported +/Concat_222,,vaiml_par_0,Supported +/Concat_223,,vaiml_par_0,Supported +/Concat_224,,vaiml_par_0,Supported +/Concat_225,,vaiml_par_0,Supported +/Concat_226,,vaiml_par_0,Supported +/Concat_227,,vaiml_par_0,Supported +/Concat_239,,vaiml_par_0,Supported +/Concat_240,,vaiml_par_0,Supported +/Concat_243,,vaiml_par_0,Supported +/Concat_244,,vaiml_par_0,Supported +/Concat_245,,vaiml_par_0,Supported +/Concat_257,,vaiml_par_0,Supported +/Concat_258,,vaiml_par_0,Supported +/Concat_261,,vaiml_par_0,Supported +/Concat_262,,vaiml_par_0,Supported +/Concat_263,,vaiml_par_0,Supported +/Concat_275,,vaiml_par_0,Supported +/Concat_276,,vaiml_par_0,Supported +/Concat_279,,vaiml_par_0,Supported +/Concat_280,,vaiml_par_0,Supported +/Concat_281,,vaiml_par_0,Supported +/Concat_293,,vaiml_par_0,Supported +/Concat_294,,vaiml_par_0,Supported +/Concat_297,,vaiml_par_0,Supported +/Concat_298,,,Not supported. Check aie_unsupported_original_ops.json +/Concat_299,,vaiml_par_0,Supported +/Concat_300,,vaiml_par_0,Supported +/Concat_301,,vaiml_par_0,Supported +/Concat_302,,vaiml_par_0,Supported +/Concat_303,,vaiml_par_0,Supported +/Concat_304,,vaiml_par_0,Supported +/Concat_31,,vaiml_par_0,Supported +/Concat_32,,vaiml_par_0,Supported +/Concat_35,,vaiml_par_0,Supported +/Concat_36,,,Not supported. Check aie_unsupported_original_ops.json +/Concat_37,,vaiml_par_0,Supported +/Concat_38,,vaiml_par_0,Supported +/Concat_39,,vaiml_par_0,Supported +/Concat_40,,vaiml_par_0,Supported +/Concat_41,,vaiml_par_0,Supported +/Concat_42,,vaiml_par_0,Supported +/Concat_43,,vaiml_par_0,Supported +/Concat_44,,vaiml_par_0,Supported +/Concat_56,,vaiml_par_0,Supported +/Concat_57,,vaiml_par_0,Supported +/Concat_60,,vaiml_par_0,Supported +/Concat_61,,vaiml_par_0,Supported +/Concat_62,,vaiml_par_0,Supported +/Concat_74,,vaiml_par_0,Supported +/Concat_75,,vaiml_par_0,Supported +/Concat_78,,vaiml_par_0,Supported +/Concat_79,,vaiml_par_0,Supported +/Concat_80,,vaiml_par_0,Supported +/Concat_92,,vaiml_par_0,Supported +/Concat_93,,vaiml_par_0,Supported +/Concat_96,,vaiml_par_0,Supported +/Concat_97,,vaiml_par_0,Supported +/Concat_98,,vaiml_par_0,Supported +/CumSum,,vaiml_par_0,Supported +/CumSum_1,,vaiml_par_0,Supported +/CumSum_10,,vaiml_par_0,Supported +/CumSum_11,,vaiml_par_0,Supported +/CumSum_12,,vaiml_par_0,Supported +/CumSum_13,,vaiml_par_0,Supported +/CumSum_14,,vaiml_par_0,Supported +/CumSum_2,,vaiml_par_0,Supported +/CumSum_3,,vaiml_par_0,Supported +/CumSum_4,,vaiml_par_0,Supported +/CumSum_5,,vaiml_par_0,Supported +/CumSum_6,,vaiml_par_0,Supported +/CumSum_7,,vaiml_par_0,Supported +/CumSum_8,,vaiml_par_0,Supported +/CumSum_9,,vaiml_par_0,Supported +/GatherElements,,vaiml_par_0,Supported +/GatherElements_1,,vaiml_par_0,Supported +/GatherElements_10,,vaiml_par_0,Supported +/GatherElements_11,,vaiml_par_0,Supported +/GatherElements_12,,vaiml_par_0,Supported +/GatherElements_13,,vaiml_par_0,Supported +/GatherElements_14,,vaiml_par_0,Supported +/GatherElements_2,,vaiml_par_0,Supported +/GatherElements_3,,vaiml_par_0,Supported +/GatherElements_4,,vaiml_par_0,Supported +/GatherElements_5,,vaiml_par_0,Supported +/GatherElements_6,,vaiml_par_0,Supported +/GatherElements_7,,vaiml_par_0,Supported +/GatherElements_8,,vaiml_par_0,Supported +/GatherElements_9,,vaiml_par_0,Supported +/Gather_1,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_10,,vaiml_par_0,Supported +/Gather_107,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_108,,vaiml_par_0,Supported +/Gather_109,,vaiml_par_0,Supported +/Gather_110,,vaiml_par_0,Supported +/Gather_111,,vaiml_par_0,Supported +/Gather_112,,vaiml_par_0,Supported +/Gather_113,,vaiml_par_0,Supported +/Gather_116,,vaiml_par_0,Supported +/Gather_129,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_130,,vaiml_par_0,Supported +/Gather_131,,vaiml_par_0,Supported +/Gather_132,,vaiml_par_0,Supported +/Gather_133,,vaiml_par_0,Supported +/Gather_134,,vaiml_par_0,Supported +/Gather_135,,vaiml_par_0,Supported +/Gather_138,,vaiml_par_0,Supported +/Gather_150,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_151,,vaiml_par_0,Supported +/Gather_152,,vaiml_par_0,Supported +/Gather_153,,vaiml_par_0,Supported +/Gather_154,,vaiml_par_0,Supported +/Gather_155,,vaiml_par_0,Supported +/Gather_156,,vaiml_par_0,Supported +/Gather_159,,vaiml_par_0,Supported +/Gather_171,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_172,,vaiml_par_0,Supported +/Gather_173,,vaiml_par_0,Supported +/Gather_174,,vaiml_par_0,Supported +/Gather_175,,vaiml_par_0,Supported +/Gather_176,,vaiml_par_0,Supported +/Gather_177,,vaiml_par_0,Supported +/Gather_180,,vaiml_par_0,Supported +/Gather_193,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_194,,vaiml_par_0,Supported +/Gather_195,,vaiml_par_0,Supported +/Gather_196,,vaiml_par_0,Supported +/Gather_197,,vaiml_par_0,Supported +/Gather_198,,vaiml_par_0,Supported +/Gather_199,,vaiml_par_0,Supported +/Gather_2,,vaiml_par_0,Supported +/Gather_202,,vaiml_par_0,Supported +/Gather_214,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_215,,vaiml_par_0,Supported +/Gather_216,,vaiml_par_0,Supported +/Gather_217,,vaiml_par_0,Supported +/Gather_218,,vaiml_par_0,Supported +/Gather_219,,vaiml_par_0,Supported +/Gather_22,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_220,,vaiml_par_0,Supported +/Gather_223,,vaiml_par_0,Supported +/Gather_23,,vaiml_par_0,Supported +/Gather_236,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_237,,vaiml_par_0,Supported +/Gather_238,,vaiml_par_0,Supported +/Gather_239,,vaiml_par_0,Supported +/Gather_24,,vaiml_par_0,Supported +/Gather_240,,vaiml_par_0,Supported +/Gather_241,,vaiml_par_0,Supported +/Gather_242,,vaiml_par_0,Supported +/Gather_245,,vaiml_par_0,Supported +/Gather_25,,vaiml_par_0,Supported +/Gather_257,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_258,,vaiml_par_0,Supported +/Gather_259,,vaiml_par_0,Supported +/Gather_26,,vaiml_par_0,Supported +/Gather_260,,vaiml_par_0,Supported +/Gather_261,,vaiml_par_0,Supported +/Gather_262,,vaiml_par_0,Supported +/Gather_263,,vaiml_par_0,Supported +/Gather_266,,vaiml_par_0,Supported +/Gather_27,,vaiml_par_0,Supported +/Gather_278,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_279,,vaiml_par_0,Supported +/Gather_28,,vaiml_par_0,Supported +/Gather_280,,vaiml_par_0,Supported +/Gather_281,,vaiml_par_0,Supported +/Gather_282,,vaiml_par_0,Supported +/Gather_283,,vaiml_par_0,Supported +/Gather_284,,vaiml_par_0,Supported +/Gather_287,,vaiml_par_0,Supported +/Gather_299,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_3,,vaiml_par_0,Supported +/Gather_300,,vaiml_par_0,Supported +/Gather_301,,vaiml_par_0,Supported +/Gather_302,,vaiml_par_0,Supported +/Gather_303,,vaiml_par_0,Supported +/Gather_304,,vaiml_par_0,Supported +/Gather_305,,vaiml_par_0,Supported +/Gather_308,,vaiml_par_0,Supported +/Gather_31,,vaiml_par_0,Supported +/Gather_4,,vaiml_par_0,Supported +/Gather_44,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_45,,vaiml_par_0,Supported +/Gather_46,,vaiml_par_0,Supported +/Gather_47,,vaiml_par_0,Supported +/Gather_48,,vaiml_par_0,Supported +/Gather_49,,vaiml_par_0,Supported +/Gather_5,,vaiml_par_0,Supported +/Gather_50,,vaiml_par_0,Supported +/Gather_53,,vaiml_par_0,Supported +/Gather_6,,vaiml_par_0,Supported +/Gather_65,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_66,,vaiml_par_0,Supported +/Gather_67,,vaiml_par_0,Supported +/Gather_68,,vaiml_par_0,Supported +/Gather_69,,vaiml_par_0,Supported +/Gather_7,,vaiml_par_0,Supported +/Gather_70,,vaiml_par_0,Supported +/Gather_71,,vaiml_par_0,Supported +/Gather_74,,vaiml_par_0,Supported +/Gather_86,,,Not supported. Check aie_unsupported_original_ops.json +/Gather_87,,vaiml_par_0,Supported +/Gather_88,,vaiml_par_0,Supported +/Gather_89,,vaiml_par_0,Supported +/Gather_90,,vaiml_par_0,Supported +/Gather_91,,vaiml_par_0,Supported +/Gather_92,,vaiml_par_0,Supported +/Gather_95,,vaiml_par_0,Supported +/MatMul,,vaiml_par_0,Supported +/MatMul_1,,vaiml_par_0,Supported +/MatMul_10,,vaiml_par_0,Supported +/MatMul_11,,vaiml_par_0,Supported +/MatMul_12,,vaiml_par_0,Supported +/MatMul_13/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_14,,vaiml_par_0,Supported +/MatMul_15,,vaiml_par_0,Supported +/MatMul_16,,vaiml_par_0,Supported +/MatMul_17,,vaiml_par_0,Supported +/MatMul_18/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_19,,vaiml_par_0,Supported +/MatMul_2,,vaiml_par_0,Supported +/MatMul_20,,vaiml_par_0,Supported +/MatMul_21,,vaiml_par_0,Supported +/MatMul_22,,vaiml_par_0,Supported +/MatMul_23/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_24,,vaiml_par_0,Supported +/MatMul_25,,vaiml_par_0,Supported +/MatMul_26,,vaiml_par_0,Supported +/MatMul_27,,vaiml_par_0,Supported +/MatMul_28/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_29,,vaiml_par_0,Supported +/MatMul_3/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_30,,vaiml_par_0,Supported +/MatMul_31,,vaiml_par_0,Supported +/MatMul_32,,vaiml_par_0,Supported +/MatMul_33/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_34,,vaiml_par_0,Supported +/MatMul_35,,vaiml_par_0,Supported +/MatMul_36,,vaiml_par_0,Supported +/MatMul_37,,vaiml_par_0,Supported +/MatMul_38/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_39,,vaiml_par_0,Supported +/MatMul_4,,vaiml_par_0,Supported +/MatMul_40,,vaiml_par_0,Supported +/MatMul_41,,vaiml_par_0,Supported +/MatMul_42,,vaiml_par_0,Supported +/MatMul_43/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_44,,vaiml_par_0,Supported +/MatMul_45,,vaiml_par_0,Supported +/MatMul_46,,vaiml_par_0,Supported +/MatMul_47,,vaiml_par_0,Supported +/MatMul_48/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_49,,vaiml_par_0,Supported +/MatMul_5,,vaiml_par_0,Supported +/MatMul_50,,vaiml_par_0,Supported +/MatMul_51,,vaiml_par_0,Supported +/MatMul_52,,vaiml_par_0,Supported +/MatMul_53/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_54,,vaiml_par_0,Supported +/MatMul_55,,vaiml_par_0,Supported +/MatMul_56,,vaiml_par_0,Supported +/MatMul_57,,vaiml_par_0,Supported +/MatMul_58/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_59,,vaiml_par_0,Supported +/MatMul_6,,vaiml_par_0,Supported +/MatMul_60,,vaiml_par_0,Supported +/MatMul_61,,vaiml_par_0,Supported +/MatMul_62,,vaiml_par_0,Supported +/MatMul_63/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_64,,vaiml_par_0,Supported +/MatMul_65,,vaiml_par_0,Supported +/MatMul_66,,vaiml_par_0,Supported +/MatMul_67,,vaiml_par_0,Supported +/MatMul_68/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_69,,vaiml_par_0,Supported +/MatMul_7,,vaiml_par_0,Supported +/MatMul_70,,vaiml_par_0,Supported +/MatMul_71,,vaiml_par_0,Supported +/MatMul_72,,vaiml_par_0,Supported +/MatMul_73/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_74,,vaiml_par_0,Supported +/MatMul_8/MatMulAddFusion,,vaiml_par_0,Supported +/MatMul_9,,vaiml_par_0,Supported +/Mul,,vaiml_par_0,Supported +/Mul_10,,vaiml_par_0,Supported +/Mul_101,,vaiml_par_0,Supported +/Mul_106,,vaiml_par_0,Supported +/Mul_108,,vaiml_par_0,Supported +/Mul_109,,vaiml_par_0,Supported +/Mul_11,,vaiml_par_0,Supported +/Mul_110,,vaiml_par_0,Supported +/Mul_112,,vaiml_par_0,Supported +/Mul_117,,vaiml_par_0,Supported +/Mul_119,,vaiml_par_0,Supported +/Mul_120,,vaiml_par_0,Supported +/Mul_121,,vaiml_par_0,Supported +/Mul_123,,vaiml_par_0,Supported +/Mul_128,,vaiml_par_0,Supported +/Mul_13,,vaiml_par_0,Supported +/Mul_130,,vaiml_par_0,Supported +/Mul_131,,vaiml_par_0,Supported +/Mul_132,,vaiml_par_0,Supported +/Mul_134,,vaiml_par_0,Supported +/Mul_139,,vaiml_par_0,Supported +/Mul_141,,vaiml_par_0,Supported +/Mul_142,,vaiml_par_0,Supported +/Mul_143,,vaiml_par_0,Supported +/Mul_145,,vaiml_par_0,Supported +/Mul_150,,vaiml_par_0,Supported +/Mul_152,,vaiml_par_0,Supported +/Mul_153,,vaiml_par_0,Supported +/Mul_154,,vaiml_par_0,Supported +/Mul_156,,vaiml_par_0,Supported +/Mul_161,,vaiml_par_0,Supported +/Mul_163,,vaiml_par_0,Supported +/Mul_164,,vaiml_par_0,Supported +/Mul_18,,vaiml_par_0,Supported +/Mul_2,,vaiml_par_0,Supported +/Mul_20,,vaiml_par_0,Supported +/Mul_21,,vaiml_par_0,Supported +/Mul_22,,vaiml_par_0,Supported +/Mul_24,,vaiml_par_0,Supported +/Mul_29,,vaiml_par_0,Supported +/Mul_31,,vaiml_par_0,Supported +/Mul_32,,vaiml_par_0,Supported +/Mul_33,,vaiml_par_0,Supported +/Mul_35,,vaiml_par_0,Supported +/Mul_40,,vaiml_par_0,Supported +/Mul_42,,vaiml_par_0,Supported +/Mul_43,,vaiml_par_0,Supported +/Mul_44,,vaiml_par_0,Supported +/Mul_46,,vaiml_par_0,Supported +/Mul_51,,vaiml_par_0,Supported +/Mul_53,,vaiml_par_0,Supported +/Mul_54,,vaiml_par_0,Supported +/Mul_55,,vaiml_par_0,Supported +/Mul_57,,vaiml_par_0,Supported +/Mul_62,,vaiml_par_0,Supported +/Mul_64,,vaiml_par_0,Supported +/Mul_65,,vaiml_par_0,Supported +/Mul_66,,vaiml_par_0,Supported +/Mul_68,,vaiml_par_0,Supported +/Mul_7,,vaiml_par_0,Supported +/Mul_73,,vaiml_par_0,Supported +/Mul_75,,vaiml_par_0,Supported +/Mul_76,,vaiml_par_0,Supported +/Mul_77,,vaiml_par_0,Supported +/Mul_79,,vaiml_par_0,Supported +/Mul_84,,vaiml_par_0,Supported +/Mul_86,,vaiml_par_0,Supported +/Mul_87,,vaiml_par_0,Supported +/Mul_88,,vaiml_par_0,Supported +/Mul_9,,vaiml_par_0,Supported +/Mul_90,,vaiml_par_0,Supported +/Mul_95,,vaiml_par_0,Supported +/Mul_97,,vaiml_par_0,Supported +/Mul_98,,vaiml_par_0,Supported +/Mul_99,,vaiml_par_0,Supported +/Reciprocal,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_1,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_10,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_11,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_12,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_13,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_14,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_2,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_3,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_4,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_5,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_6,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_7,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_8,,,Not supported. Check aie_unsupported_original_ops.json +/Reciprocal_9,,,Not supported. Check aie_unsupported_original_ops.json +/Reshape,,vaiml_par_0,Supported +/Reshape_1,,vaiml_par_0,Supported +/Reshape_100,,vaiml_par_0,Supported +/Reshape_101,,vaiml_par_0,Supported +/Reshape_102,,vaiml_par_0,Supported +/Reshape_104,,vaiml_par_0,Supported +/Reshape_105,,vaiml_par_0,Supported +/Reshape_106,,vaiml_par_0,Supported +/Reshape_107_new_reshape,,vaiml_par_0,Supported +/Reshape_108,,vaiml_par_0,Supported +/Reshape_109_new_reshape,,vaiml_par_0,Supported +/Reshape_10_new_reshape,,vaiml_par_0,Supported +/Reshape_11,,vaiml_par_0,Supported +/Reshape_110,,vaiml_par_0,Supported +/Reshape_111,,vaiml_par_0,Supported +/Reshape_112,,vaiml_par_0,Supported +/Reshape_113,,vaiml_par_0,Supported +/Reshape_115,,vaiml_par_0,Supported +/Reshape_116,,vaiml_par_0,Supported +/Reshape_117,,vaiml_par_0,Supported +/Reshape_118_new_reshape,,vaiml_par_0,Supported +/Reshape_119,,vaiml_par_0,Supported +/Reshape_12,,vaiml_par_0,Supported +/Reshape_120_new_reshape,,vaiml_par_0,Supported +/Reshape_121,,vaiml_par_0,Supported +/Reshape_122,,vaiml_par_0,Supported +/Reshape_123,,vaiml_par_0,Supported +/Reshape_124,,vaiml_par_0,Supported +/Reshape_126,,vaiml_par_0,Supported +/Reshape_127,,vaiml_par_0,Supported +/Reshape_128,,vaiml_par_0,Supported +/Reshape_129_new_reshape,,vaiml_par_0,Supported +/Reshape_13,,vaiml_par_0,Supported +/Reshape_130,,vaiml_par_0,Supported +/Reshape_131_new_reshape,,vaiml_par_0,Supported +/Reshape_132,,vaiml_par_0,Supported +/Reshape_133,,vaiml_par_0,Supported +/Reshape_134,,vaiml_par_0,Supported +/Reshape_135,,vaiml_par_0,Supported +/Reshape_137,,vaiml_par_0,Supported +/Reshape_138,,vaiml_par_0,Supported +/Reshape_139,,vaiml_par_0,Supported +/Reshape_14,,vaiml_par_0,Supported +/Reshape_140_new_reshape,,vaiml_par_0,Supported +/Reshape_141,,vaiml_par_0,Supported +/Reshape_142_new_reshape,,vaiml_par_0,Supported +/Reshape_143,,vaiml_par_0,Supported +/Reshape_144,,vaiml_par_0,Supported +/Reshape_145,,vaiml_par_0,Supported +/Reshape_146,,vaiml_par_0,Supported +/Reshape_148,,vaiml_par_0,Supported +/Reshape_149,,vaiml_par_0,Supported +/Reshape_150,,vaiml_par_0,Supported +/Reshape_151_new_reshape,,vaiml_par_0,Supported +/Reshape_152,,vaiml_par_0,Supported +/Reshape_153_new_reshape,,vaiml_par_0,Supported +/Reshape_154,,vaiml_par_0,Supported +/Reshape_155,,vaiml_par_0,Supported +/Reshape_156,,vaiml_par_0,Supported +/Reshape_157,,vaiml_par_0,Supported +/Reshape_159,,vaiml_par_0,Supported +/Reshape_16,,vaiml_par_0,Supported +/Reshape_160,,vaiml_par_0,Supported +/Reshape_161,,vaiml_par_0,Supported +/Reshape_162_new_reshape,,vaiml_par_0,Supported +/Reshape_163,,vaiml_par_0,Supported +/Reshape_164_new_reshape,,vaiml_par_0,Supported +/Reshape_17,,vaiml_par_0,Supported +/Reshape_18,,vaiml_par_0,Supported +/Reshape_19_new_reshape,,vaiml_par_0,Supported +/Reshape_2,,vaiml_par_0,Supported +/Reshape_20,,vaiml_par_0,Supported +/Reshape_21_new_reshape,,vaiml_par_0,Supported +/Reshape_22,,vaiml_par_0,Supported +/Reshape_23,,vaiml_par_0,Supported +/Reshape_24,,vaiml_par_0,Supported +/Reshape_25,,vaiml_par_0,Supported +/Reshape_27,,vaiml_par_0,Supported +/Reshape_28,,vaiml_par_0,Supported +/Reshape_29,,vaiml_par_0,Supported +/Reshape_3,,vaiml_par_0,Supported +/Reshape_30_new_reshape,,vaiml_par_0,Supported +/Reshape_31,,vaiml_par_0,Supported +/Reshape_32_new_reshape,,vaiml_par_0,Supported +/Reshape_33,,vaiml_par_0,Supported +/Reshape_34,,vaiml_par_0,Supported +/Reshape_35,,vaiml_par_0,Supported +/Reshape_36,,vaiml_par_0,Supported +/Reshape_38,,vaiml_par_0,Supported +/Reshape_39,,vaiml_par_0,Supported +/Reshape_40,,vaiml_par_0,Supported +/Reshape_41_new_reshape,,vaiml_par_0,Supported +/Reshape_42,,vaiml_par_0,Supported +/Reshape_43_new_reshape,,vaiml_par_0,Supported +/Reshape_44,,vaiml_par_0,Supported +/Reshape_45,,vaiml_par_0,Supported +/Reshape_46,,vaiml_par_0,Supported +/Reshape_47,,vaiml_par_0,Supported +/Reshape_49,,vaiml_par_0,Supported +/Reshape_5,,vaiml_par_0,Supported +/Reshape_50,,vaiml_par_0,Supported +/Reshape_51,,vaiml_par_0,Supported +/Reshape_52_new_reshape,,vaiml_par_0,Supported +/Reshape_53,,vaiml_par_0,Supported +/Reshape_54_new_reshape,,vaiml_par_0,Supported +/Reshape_55,,vaiml_par_0,Supported +/Reshape_56,,vaiml_par_0,Supported +/Reshape_57,,vaiml_par_0,Supported +/Reshape_58,,vaiml_par_0,Supported +/Reshape_6,,vaiml_par_0,Supported +/Reshape_60,,vaiml_par_0,Supported +/Reshape_61,,vaiml_par_0,Supported +/Reshape_62,,vaiml_par_0,Supported +/Reshape_63_new_reshape,,vaiml_par_0,Supported +/Reshape_64,,vaiml_par_0,Supported +/Reshape_65_new_reshape,,vaiml_par_0,Supported +/Reshape_66,,vaiml_par_0,Supported +/Reshape_67,,vaiml_par_0,Supported +/Reshape_68,,vaiml_par_0,Supported +/Reshape_69,,vaiml_par_0,Supported +/Reshape_7,,vaiml_par_0,Supported +/Reshape_71,,vaiml_par_0,Supported +/Reshape_72,,vaiml_par_0,Supported +/Reshape_73,,vaiml_par_0,Supported +/Reshape_74_new_reshape,,vaiml_par_0,Supported +/Reshape_75,,vaiml_par_0,Supported +/Reshape_76_new_reshape,,vaiml_par_0,Supported +/Reshape_77,,vaiml_par_0,Supported +/Reshape_78,,vaiml_par_0,Supported +/Reshape_79,,vaiml_par_0,Supported +/Reshape_80,,vaiml_par_0,Supported +/Reshape_82,,vaiml_par_0,Supported +/Reshape_83,,vaiml_par_0,Supported +/Reshape_84,,vaiml_par_0,Supported +/Reshape_85_new_reshape,,vaiml_par_0,Supported +/Reshape_86,,vaiml_par_0,Supported +/Reshape_87_new_reshape,,vaiml_par_0,Supported +/Reshape_88,,vaiml_par_0,Supported +/Reshape_89,,vaiml_par_0,Supported +/Reshape_8_new_reshape,,vaiml_par_0,Supported +/Reshape_9,,vaiml_par_0,Supported +/Reshape_90,,vaiml_par_0,Supported +/Reshape_91,,vaiml_par_0,Supported +/Reshape_93,,vaiml_par_0,Supported +/Reshape_94,,vaiml_par_0,Supported +/Reshape_95,,vaiml_par_0,Supported +/Reshape_96_new_reshape,,vaiml_par_0,Supported +/Reshape_97,,vaiml_par_0,Supported +/Reshape_98_new_reshape,,vaiml_par_0,Supported +/Reshape_99,,vaiml_par_0,Supported +/Sigmoid,,vaiml_par_0,Supported +/Sigmoid_1,,vaiml_par_0,Supported +/Sigmoid_10,,vaiml_par_0,Supported +/Sigmoid_11,,vaiml_par_0,Supported +/Sigmoid_12,,vaiml_par_0,Supported +/Sigmoid_13,,vaiml_par_0,Supported +/Sigmoid_14,,vaiml_par_0,Supported +/Sigmoid_15,,vaiml_par_0,Supported +/Sigmoid_16,,vaiml_par_0,Supported +/Sigmoid_17,,vaiml_par_0,Supported +/Sigmoid_18,,vaiml_par_0,Supported +/Sigmoid_19,,vaiml_par_0,Supported +/Sigmoid_2,,vaiml_par_0,Supported +/Sigmoid_20,,vaiml_par_0,Supported +/Sigmoid_21,,vaiml_par_0,Supported +/Sigmoid_22,,vaiml_par_0,Supported +/Sigmoid_23,,vaiml_par_0,Supported +/Sigmoid_24,,vaiml_par_0,Supported +/Sigmoid_25,,vaiml_par_0,Supported +/Sigmoid_26,,vaiml_par_0,Supported +/Sigmoid_27,,vaiml_par_0,Supported +/Sigmoid_28,,vaiml_par_0,Supported +/Sigmoid_29,,vaiml_par_0,Supported +/Sigmoid_3,,vaiml_par_0,Supported +/Sigmoid_4,,vaiml_par_0,Supported +/Sigmoid_5,,vaiml_par_0,Supported +/Sigmoid_6,,vaiml_par_0,Supported +/Sigmoid_7,,vaiml_par_0,Supported +/Sigmoid_8,,vaiml_par_0,Supported +/Sigmoid_9,,vaiml_par_0,Supported +/Slice,,vaiml_par_0,Supported +/Slice_1,,vaiml_par_0,Supported +/Slice_10,,vaiml_par_0,Supported +/Slice_100,,vaiml_par_0,Supported +/Slice_101,,vaiml_par_0,Supported +/Slice_102,,vaiml_par_0,Supported +/Slice_103,,vaiml_par_0,Supported +/Slice_104,,vaiml_par_0,Supported +/Slice_105,,vaiml_par_0,Supported +/Slice_106,,vaiml_par_0,Supported +/Slice_107,,vaiml_par_0,Supported +/Slice_108,,vaiml_par_0,Supported +/Slice_109,,vaiml_par_0,Supported +/Slice_11,,vaiml_par_0,Supported +/Slice_110,,vaiml_par_0,Supported +/Slice_111,,vaiml_par_0,Supported +/Slice_112,,vaiml_par_0,Supported +/Slice_113,,vaiml_par_0,Supported +/Slice_114,,vaiml_par_0,Supported +/Slice_115,,vaiml_par_0,Supported +/Slice_116,,vaiml_par_0,Supported +/Slice_117,,vaiml_par_0,Supported +/Slice_118,,vaiml_par_0,Supported +/Slice_119,,vaiml_par_0,Supported +/Slice_12,,vaiml_par_0,Supported +/Slice_120,,vaiml_par_0,Supported +/Slice_121,,vaiml_par_0,Supported +/Slice_122,,vaiml_par_0,Supported +/Slice_123,,vaiml_par_0,Supported +/Slice_124,,vaiml_par_0,Supported +/Slice_125,,vaiml_par_0,Supported +/Slice_126,,vaiml_par_0,Supported +/Slice_127,,vaiml_par_0,Supported +/Slice_128,,vaiml_par_0,Supported +/Slice_129,,vaiml_par_0,Supported +/Slice_13,,vaiml_par_0,Supported +/Slice_130,,vaiml_par_0,Supported +/Slice_131,,vaiml_par_0,Supported +/Slice_132,,vaiml_par_0,Supported +/Slice_133,,vaiml_par_0,Supported +/Slice_134,,vaiml_par_0,Supported +/Slice_135,,vaiml_par_0,Supported +/Slice_136,,vaiml_par_0,Supported +/Slice_137,,vaiml_par_0,Supported +/Slice_138,,vaiml_par_0,Supported +/Slice_14,,vaiml_par_0,Supported +/Slice_15,,vaiml_par_0,Supported +/Slice_16,,vaiml_par_0,Supported +/Slice_17,,vaiml_par_0,Supported +/Slice_18,,vaiml_par_0,Supported +/Slice_19,,vaiml_par_0,Supported +/Slice_2,,vaiml_par_0,Supported +/Slice_20,,vaiml_par_0,Supported +/Slice_21,,vaiml_par_0,Supported +/Slice_22,,vaiml_par_0,Supported +/Slice_23,,vaiml_par_0,Supported +/Slice_24,,vaiml_par_0,Supported +/Slice_25,,vaiml_par_0,Supported +/Slice_26,,vaiml_par_0,Supported +/Slice_27,,vaiml_par_0,Supported +/Slice_28,,vaiml_par_0,Supported +/Slice_29,,vaiml_par_0,Supported +/Slice_3,,vaiml_par_0,Supported +/Slice_30,,vaiml_par_0,Supported +/Slice_31,,vaiml_par_0,Supported +/Slice_32,,vaiml_par_0,Supported +/Slice_33,,vaiml_par_0,Supported +/Slice_34,,vaiml_par_0,Supported +/Slice_35,,vaiml_par_0,Supported +/Slice_36,,vaiml_par_0,Supported +/Slice_37,,vaiml_par_0,Supported +/Slice_38,,vaiml_par_0,Supported +/Slice_39,,vaiml_par_0,Supported +/Slice_4,,vaiml_par_0,Supported +/Slice_40,,vaiml_par_0,Supported +/Slice_41,,vaiml_par_0,Supported +/Slice_42,,vaiml_par_0,Supported +/Slice_43,,vaiml_par_0,Supported +/Slice_44,,vaiml_par_0,Supported +/Slice_45,,vaiml_par_0,Supported +/Slice_46,,vaiml_par_0,Supported +/Slice_47,,vaiml_par_0,Supported +/Slice_48,,vaiml_par_0,Supported +/Slice_49,,vaiml_par_0,Supported +/Slice_5,,vaiml_par_0,Supported +/Slice_50,,vaiml_par_0,Supported +/Slice_51,,vaiml_par_0,Supported +/Slice_52,,vaiml_par_0,Supported +/Slice_53,,vaiml_par_0,Supported +/Slice_54,,vaiml_par_0,Supported +/Slice_55,,vaiml_par_0,Supported +/Slice_56,,vaiml_par_0,Supported +/Slice_57,,vaiml_par_0,Supported +/Slice_58,,vaiml_par_0,Supported +/Slice_59,,vaiml_par_0,Supported +/Slice_6,,vaiml_par_0,Supported +/Slice_60,,vaiml_par_0,Supported +/Slice_61,,vaiml_par_0,Supported +/Slice_62,,vaiml_par_0,Supported +/Slice_63,,vaiml_par_0,Supported +/Slice_64,,vaiml_par_0,Supported +/Slice_65,,vaiml_par_0,Supported +/Slice_66,,vaiml_par_0,Supported +/Slice_67,,vaiml_par_0,Supported +/Slice_68,,vaiml_par_0,Supported +/Slice_69,,vaiml_par_0,Supported +/Slice_7,,vaiml_par_0,Supported +/Slice_70,,vaiml_par_0,Supported +/Slice_71,,vaiml_par_0,Supported +/Slice_72,,vaiml_par_0,Supported +/Slice_73,,vaiml_par_0,Supported +/Slice_74,,vaiml_par_0,Supported +/Slice_75,,vaiml_par_0,Supported +/Slice_76,,vaiml_par_0,Supported +/Slice_77,,vaiml_par_0,Supported +/Slice_78,,vaiml_par_0,Supported +/Slice_79,,vaiml_par_0,Supported +/Slice_8,,vaiml_par_0,Supported +/Slice_80,,vaiml_par_0,Supported +/Slice_81,,vaiml_par_0,Supported +/Slice_82,,vaiml_par_0,Supported +/Slice_83,,vaiml_par_0,Supported +/Slice_84,,vaiml_par_0,Supported +/Slice_85,,vaiml_par_0,Supported +/Slice_86,,vaiml_par_0,Supported +/Slice_87,,vaiml_par_0,Supported +/Slice_88,,vaiml_par_0,Supported +/Slice_89,,vaiml_par_0,Supported +/Slice_9,,vaiml_par_0,Supported +/Slice_90,,vaiml_par_0,Supported +/Slice_91,,vaiml_par_0,Supported +/Slice_92,,vaiml_par_0,Supported +/Slice_93,,vaiml_par_0,Supported +/Slice_94,,vaiml_par_0,Supported +/Slice_95,,vaiml_par_0,Supported +/Slice_96,,vaiml_par_0,Supported +/Slice_97,,vaiml_par_0,Supported +/Slice_98,,vaiml_par_0,Supported +/Slice_99,,vaiml_par_0,Supported +/Softmax,,vaiml_par_0,Supported +/Softmax_1,,vaiml_par_0,Supported +/Softmax_10,,vaiml_par_0,Supported +/Softmax_11,,vaiml_par_0,Supported +/Softmax_12,,vaiml_par_0,Supported +/Softmax_13,,vaiml_par_0,Supported +/Softmax_14,,vaiml_par_0,Supported +/Softmax_2,,vaiml_par_0,Supported +/Softmax_3,,vaiml_par_0,Supported +/Softmax_4,,vaiml_par_0,Supported +/Softmax_5,,vaiml_par_0,Supported +/Softmax_6,,vaiml_par_0,Supported +/Softmax_7,,vaiml_par_0,Supported +/Softmax_8,,vaiml_par_0,Supported +/Softmax_9,,vaiml_par_0,Supported +/Split,,vaiml_par_0,Supported +/Split_1,,vaiml_par_0,Supported +/Split_10,,vaiml_par_0,Supported +/Split_11,,vaiml_par_0,Supported +/Split_12,,vaiml_par_0,Supported +/Split_13,,vaiml_par_0,Supported +/Split_14,,vaiml_par_0,Supported +/Split_15,,vaiml_par_0,Supported +/Split_16,,vaiml_par_0,Supported +/Split_17,,vaiml_par_0,Supported +/Split_18,,vaiml_par_0,Supported +/Split_19,,vaiml_par_0,Supported +/Split_2,,vaiml_par_0,Supported +/Split_20,,vaiml_par_0,Supported +/Split_21,,vaiml_par_0,Supported +/Split_22,,vaiml_par_0,Supported +/Split_23,,vaiml_par_0,Supported +/Split_24,,vaiml_par_0,Supported +/Split_25,,vaiml_par_0,Supported +/Split_26,,vaiml_par_0,Supported +/Split_27,,vaiml_par_0,Supported +/Split_28,,vaiml_par_0,Supported +/Split_29,,vaiml_par_0,Supported +/Split_3,,vaiml_par_0,Supported +/Split_4,,vaiml_par_0,Supported +/Split_5,,vaiml_par_0,Supported +/Split_6,,vaiml_par_0,Supported +/Split_7,,vaiml_par_0,Supported +/Split_8,,vaiml_par_0,Supported +/Split_9,,vaiml_par_0,Supported +/Sub_11,,vaiml_par_0,Supported +/Sub_14,,vaiml_par_0,Supported +/Sub_17,,vaiml_par_0,Supported +/Sub_2,,vaiml_par_0,Supported +/Sub_20,,vaiml_par_0,Supported +/Sub_23,,vaiml_par_0,Supported +/Sub_26,,vaiml_par_0,Supported +/Sub_29,,vaiml_par_0,Supported +/Sub_32,,vaiml_par_0,Supported +/Sub_35,,vaiml_par_0,Supported +/Sub_38,,vaiml_par_0,Supported +/Sub_41,,vaiml_par_0,Supported +/Sub_44,,vaiml_par_0,Supported +/Sub_5,,vaiml_par_0,Supported +/Sub_8,,vaiml_par_0,Supported +/Transpose,,vaiml_par_0,Supported +/Transpose_1,,vaiml_par_0,Supported +/Transpose_10,,vaiml_par_0,Supported +/Transpose_100,,vaiml_par_0,Supported +/Transpose_102,,vaiml_par_0,Supported +/Transpose_103,,vaiml_par_0,Supported +/Transpose_104,,vaiml_par_0,Supported +/Transpose_105,,vaiml_par_0,Supported +/Transpose_106,,vaiml_par_0,Supported +/Transpose_107,,vaiml_par_0,Supported +/Transpose_108,,vaiml_par_0,Supported +/Transpose_109,,vaiml_par_0,Supported +/Transpose_11,,vaiml_par_0,Supported +/Transpose_110,,vaiml_par_0,Supported +/Transpose_111,,vaiml_par_0,Supported +/Transpose_112,,vaiml_par_0,Supported +/Transpose_114,,vaiml_par_0,Supported +/Transpose_115,,vaiml_par_0,Supported +/Transpose_116,,vaiml_par_0,Supported +/Transpose_117,,vaiml_par_0,Supported +/Transpose_118,,vaiml_par_0,Supported +/Transpose_119,,vaiml_par_0,Supported +/Transpose_12,,vaiml_par_0,Supported +/Transpose_120,,vaiml_par_0,Supported +/Transpose_121,,vaiml_par_0,Supported +/Transpose_122,,vaiml_par_0,Supported +/Transpose_123,,vaiml_par_0,Supported +/Transpose_124,,vaiml_par_0,Supported +/Transpose_126,,vaiml_par_0,Supported +/Transpose_127,,vaiml_par_0,Supported +/Transpose_128,,vaiml_par_0,Supported +/Transpose_129,,vaiml_par_0,Supported +/Transpose_13,,vaiml_par_0,Supported +/Transpose_130,,vaiml_par_0,Supported +/Transpose_131,,vaiml_par_0,Supported +/Transpose_132,,vaiml_par_0,Supported +/Transpose_133,,vaiml_par_0,Supported +/Transpose_134,,vaiml_par_0,Supported +/Transpose_135,,vaiml_par_0,Supported +/Transpose_136,,vaiml_par_0,Supported +/Transpose_138,,vaiml_par_0,Supported +/Transpose_139,,vaiml_par_0,Supported +/Transpose_14,,vaiml_par_0,Supported +/Transpose_140,,vaiml_par_0,Supported +/Transpose_141,,vaiml_par_0,Supported +/Transpose_142,,vaiml_par_0,Supported +/Transpose_143,,vaiml_par_0,Supported +/Transpose_144,,vaiml_par_0,Supported +/Transpose_145,,vaiml_par_0,Supported +/Transpose_146,,vaiml_par_0,Supported +/Transpose_147,,vaiml_par_0,Supported +/Transpose_148,,vaiml_par_0,Supported +/Transpose_15,,vaiml_par_0,Supported +/Transpose_150,,vaiml_par_0,Supported +/Transpose_151,,vaiml_par_0,Supported +/Transpose_152,,vaiml_par_0,Supported +/Transpose_153,,vaiml_par_0,Supported +/Transpose_154,,vaiml_par_0,Supported +/Transpose_155,,vaiml_par_0,Supported +/Transpose_156,,vaiml_par_0,Supported +/Transpose_157,,vaiml_par_0,Supported +/Transpose_158,,vaiml_par_0,Supported +/Transpose_159,,vaiml_par_0,Supported +/Transpose_16,,vaiml_par_0,Supported +/Transpose_160,,vaiml_par_0,Supported +/Transpose_162,,vaiml_par_0,Supported +/Transpose_163,,vaiml_par_0,Supported +/Transpose_164,,vaiml_par_0,Supported +/Transpose_165,,vaiml_par_0,Supported +/Transpose_166,,vaiml_par_0,Supported +/Transpose_167,,vaiml_par_0,Supported +/Transpose_168,,vaiml_par_0,Supported +/Transpose_169,,vaiml_par_0,Supported +/Transpose_170,,vaiml_par_0,Supported +/Transpose_171,,vaiml_par_0,Supported +/Transpose_172,,vaiml_par_0,Supported +/Transpose_174,,vaiml_par_0,Supported +/Transpose_175,,vaiml_par_0,Supported +/Transpose_176,,vaiml_par_0,Supported +/Transpose_177,,vaiml_par_0,Supported +/Transpose_178,,vaiml_par_0,Supported +/Transpose_179,,vaiml_par_0,Supported +/Transpose_18,,vaiml_par_0,Supported +/Transpose_180,,vaiml_par_0,Supported +/Transpose_181,,vaiml_par_0,Supported +/Transpose_19,,vaiml_par_0,Supported +/Transpose_2,,vaiml_par_0,Supported +/Transpose_20,,vaiml_par_0,Supported +/Transpose_21,,vaiml_par_0,Supported +/Transpose_22,,vaiml_par_0,Supported +/Transpose_23,,vaiml_par_0,Supported +/Transpose_24,,vaiml_par_0,Supported +/Transpose_25,,vaiml_par_0,Supported +/Transpose_26,,vaiml_par_0,Supported +/Transpose_27,,vaiml_par_0,Supported +/Transpose_28,,vaiml_par_0,Supported +/Transpose_3,,vaiml_par_0,Supported +/Transpose_30,,vaiml_par_0,Supported +/Transpose_31,,vaiml_par_0,Supported +/Transpose_32,,vaiml_par_0,Supported +/Transpose_33,,vaiml_par_0,Supported +/Transpose_34,,vaiml_par_0,Supported +/Transpose_35,,vaiml_par_0,Supported +/Transpose_36,,vaiml_par_0,Supported +/Transpose_37,,vaiml_par_0,Supported +/Transpose_38,,vaiml_par_0,Supported +/Transpose_39,,vaiml_par_0,Supported +/Transpose_4,,vaiml_par_0,Supported +/Transpose_40,,vaiml_par_0,Supported +/Transpose_42,,vaiml_par_0,Supported +/Transpose_43,,vaiml_par_0,Supported +/Transpose_44,,vaiml_par_0,Supported +/Transpose_45,,vaiml_par_0,Supported +/Transpose_46,,vaiml_par_0,Supported +/Transpose_47,,vaiml_par_0,Supported +/Transpose_48,,vaiml_par_0,Supported +/Transpose_49,,vaiml_par_0,Supported +/Transpose_50,,vaiml_par_0,Supported +/Transpose_51,,vaiml_par_0,Supported +/Transpose_52,,vaiml_par_0,Supported +/Transpose_54,,vaiml_par_0,Supported +/Transpose_55,,vaiml_par_0,Supported +/Transpose_56,,vaiml_par_0,Supported +/Transpose_57,,vaiml_par_0,Supported +/Transpose_58,,vaiml_par_0,Supported +/Transpose_59,,vaiml_par_0,Supported +/Transpose_6,,vaiml_par_0,Supported +/Transpose_60,,vaiml_par_0,Supported +/Transpose_61,,vaiml_par_0,Supported +/Transpose_62,,vaiml_par_0,Supported +/Transpose_63,,vaiml_par_0,Supported +/Transpose_64,,vaiml_par_0,Supported +/Transpose_66,,vaiml_par_0,Supported +/Transpose_67,,vaiml_par_0,Supported +/Transpose_68,,vaiml_par_0,Supported +/Transpose_69,,vaiml_par_0,Supported +/Transpose_7,,vaiml_par_0,Supported +/Transpose_70,,vaiml_par_0,Supported +/Transpose_71,,vaiml_par_0,Supported +/Transpose_72,,vaiml_par_0,Supported +/Transpose_73,,vaiml_par_0,Supported +/Transpose_74,,vaiml_par_0,Supported +/Transpose_75,,vaiml_par_0,Supported +/Transpose_76,,vaiml_par_0,Supported +/Transpose_78,,vaiml_par_0,Supported +/Transpose_79,,vaiml_par_0,Supported +/Transpose_8,,vaiml_par_0,Supported +/Transpose_80,,vaiml_par_0,Supported +/Transpose_81,,vaiml_par_0,Supported +/Transpose_82,,vaiml_par_0,Supported +/Transpose_83,,vaiml_par_0,Supported +/Transpose_84,,vaiml_par_0,Supported +/Transpose_85,,vaiml_par_0,Supported +/Transpose_86,,vaiml_par_0,Supported +/Transpose_87,,vaiml_par_0,Supported +/Transpose_88,,vaiml_par_0,Supported +/Transpose_9,,vaiml_par_0,Supported +/Transpose_90,,vaiml_par_0,Supported +/Transpose_91,,vaiml_par_0,Supported +/Transpose_92,,vaiml_par_0,Supported +/Transpose_93,,vaiml_par_0,Supported +/Transpose_94,,vaiml_par_0,Supported +/Transpose_95,,vaiml_par_0,Supported +/Transpose_96,,vaiml_par_0,Supported +/Transpose_97,,vaiml_par_0,Supported +/Transpose_98,,vaiml_par_0,Supported +/Transpose_99,,vaiml_par_0,Supported +/Unsqueeze,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_1,,vaiml_par_0,Supported +/Unsqueeze_116,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_117,,vaiml_par_0,Supported +/Unsqueeze_119,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_120,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_150,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_151,,vaiml_par_0,Supported +/Unsqueeze_153,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_154,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_184,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_185,,vaiml_par_0,Supported +/Unsqueeze_187,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_188,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_218,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_219,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_220,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_221,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_222,,vaiml_par_0,Supported +/Unsqueeze_223,,vaiml_par_0,Supported +/Unsqueeze_224,,vaiml_par_0,Supported +/Unsqueeze_225,,vaiml_par_0,Supported +/Unsqueeze_226,,vaiml_par_0,Supported +/Unsqueeze_227,,vaiml_par_0,Supported +/Unsqueeze_228,,vaiml_par_0,Supported +/Unsqueeze_229,,vaiml_par_0,Supported +/Unsqueeze_230,,vaiml_par_0,Supported +/Unsqueeze_231,,vaiml_par_0,Supported +/Unsqueeze_232,,vaiml_par_0,Supported +/Unsqueeze_233,,vaiml_par_0,Supported +/Unsqueeze_234,,vaiml_par_0,Supported +/Unsqueeze_235,,vaiml_par_0,Supported +/Unsqueeze_236,,vaiml_par_0,Supported +/Unsqueeze_237,,vaiml_par_0,Supported +/Unsqueeze_238,,vaiml_par_0,Supported +/Unsqueeze_239,,vaiml_par_0,Supported +/Unsqueeze_240,,vaiml_par_0,Supported +/Unsqueeze_241,,vaiml_par_0,Supported +/Unsqueeze_242,,vaiml_par_0,Supported +/Unsqueeze_243,,vaiml_par_0,Supported +/Unsqueeze_244,,vaiml_par_0,Supported +/Unsqueeze_245,,vaiml_par_0,Supported +/Unsqueeze_247,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_248,,vaiml_par_0,Supported +/Unsqueeze_250,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_251,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_281,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_282,,vaiml_par_0,Supported +/Unsqueeze_284,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_285,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_3,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_315,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_316,,vaiml_par_0,Supported +/Unsqueeze_318,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_319,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_34,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_349,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_35,,vaiml_par_0,Supported +/Unsqueeze_350,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_351,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_352,,vaiml_par_0,Supported +/Unsqueeze_353,,vaiml_par_0,Supported +/Unsqueeze_354,,vaiml_par_0,Supported +/Unsqueeze_355,,vaiml_par_0,Supported +/Unsqueeze_356,,vaiml_par_0,Supported +/Unsqueeze_357,,vaiml_par_0,Supported +/Unsqueeze_358,,vaiml_par_0,Supported +/Unsqueeze_359,,vaiml_par_0,Supported +/Unsqueeze_360,,vaiml_par_0,Supported +/Unsqueeze_361,,vaiml_par_0,Supported +/Unsqueeze_362,,vaiml_par_0,Supported +/Unsqueeze_363,,vaiml_par_0,Supported +/Unsqueeze_364,,vaiml_par_0,Supported +/Unsqueeze_365,,vaiml_par_0,Supported +/Unsqueeze_366,,vaiml_par_0,Supported +/Unsqueeze_367,,vaiml_par_0,Supported +/Unsqueeze_368,,vaiml_par_0,Supported +/Unsqueeze_369,,vaiml_par_0,Supported +/Unsqueeze_37,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_371,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_372,,vaiml_par_0,Supported +/Unsqueeze_374,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_375,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_38,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_4,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_405,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_406,,vaiml_par_0,Supported +/Unsqueeze_408,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_409,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_439,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_440,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_441,,vaiml_par_0,Supported +/Unsqueeze_442,,vaiml_par_0,Supported +/Unsqueeze_443,,vaiml_par_0,Supported +/Unsqueeze_444,,vaiml_par_0,Supported +/Unsqueeze_445,,vaiml_par_0,Supported +/Unsqueeze_446,,vaiml_par_0,Supported +/Unsqueeze_447,,vaiml_par_0,Supported +/Unsqueeze_448,,vaiml_par_0,Supported +/Unsqueeze_449,,vaiml_par_0,Supported +/Unsqueeze_450,,vaiml_par_0,Supported +/Unsqueeze_451,,vaiml_par_0,Supported +/Unsqueeze_452,,vaiml_par_0,Supported +/Unsqueeze_454,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_455,,vaiml_par_0,Supported +/Unsqueeze_457,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_458,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_488,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_489,,vaiml_par_0,Supported +/Unsqueeze_491,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_492,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_522,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_523,,vaiml_par_0,Supported +/Unsqueeze_525,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_526,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_556,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_557,,vaiml_par_0,Supported +/Unsqueeze_559,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_560,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_590,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_591,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_592,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_593,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_594,,vaiml_par_0,Supported +/Unsqueeze_595,,vaiml_par_0,Supported +/Unsqueeze_596,,vaiml_par_0,Supported +/Unsqueeze_597,,vaiml_par_0,Supported +/Unsqueeze_598,,vaiml_par_0,Supported +/Unsqueeze_599,,vaiml_par_0,Supported +/Unsqueeze_600,,vaiml_par_0,Supported +/Unsqueeze_601,,vaiml_par_0,Supported +/Unsqueeze_602,,vaiml_par_0,Supported +/Unsqueeze_603,,vaiml_par_0,Supported +/Unsqueeze_604,,vaiml_par_0,Supported +/Unsqueeze_605,,vaiml_par_0,Supported +/Unsqueeze_606,,vaiml_par_0,Supported +/Unsqueeze_607,,vaiml_par_0,Supported +/Unsqueeze_608,,vaiml_par_0,Supported +/Unsqueeze_609,,vaiml_par_0,Supported +/Unsqueeze_610,,vaiml_par_0,Supported +/Unsqueeze_611,,vaiml_par_0,Supported +/Unsqueeze_612,,vaiml_par_0,Supported +/Unsqueeze_613,,vaiml_par_0,Supported +/Unsqueeze_614,,vaiml_par_0,Supported +/Unsqueeze_615,,vaiml_par_0,Supported +/Unsqueeze_616,,vaiml_par_0,Supported +/Unsqueeze_617,,vaiml_par_0,Supported +/Unsqueeze_68,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_69,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_70,,vaiml_par_0,Supported +/Unsqueeze_71,,vaiml_par_0,Supported +/Unsqueeze_72,,vaiml_par_0,Supported +/Unsqueeze_73,,vaiml_par_0,Supported +/Unsqueeze_74,,vaiml_par_0,Supported +/Unsqueeze_75,,vaiml_par_0,Supported +/Unsqueeze_76,,vaiml_par_0,Supported +/Unsqueeze_77,,vaiml_par_0,Supported +/Unsqueeze_78,,vaiml_par_0,Supported +/Unsqueeze_79,,vaiml_par_0,Supported +/Unsqueeze_80,,vaiml_par_0,Supported +/Unsqueeze_81,,vaiml_par_0,Supported +/Unsqueeze_82,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_83,,vaiml_par_0,Supported +/Unsqueeze_85,,,Not supported. Check aie_unsupported_original_ops.json +/Unsqueeze_86,,,Not supported. Check aie_unsupported_original_ops.json +/activation/Mul,,vaiml_par_0,Supported +/activation/Sigmoid,,vaiml_par_0,Supported +/activation/Sub,,vaiml_par_0,Supported +/activation_1/Mul,,vaiml_par_0,Supported +/activation_1/Sigmoid,,vaiml_par_0,Supported +/activation_1/Sub,,vaiml_par_0,Supported +/activation_10/Mul,,vaiml_par_0,Supported +/activation_10/Sigmoid,,vaiml_par_0,Supported +/activation_10/Sub,,vaiml_par_0,Supported +/activation_11/Mul,,vaiml_par_0,Supported +/activation_11/Sigmoid,,vaiml_par_0,Supported +/activation_11/Sub,,vaiml_par_0,Supported +/activation_12/Mul,,vaiml_par_0,Supported +/activation_12/Sigmoid,,vaiml_par_0,Supported +/activation_12/Sub,,vaiml_par_0,Supported +/activation_13/Mul,,vaiml_par_0,Supported +/activation_13/Sigmoid,,vaiml_par_0,Supported +/activation_13/Sub,,vaiml_par_0,Supported +/activation_14/Mul,,vaiml_par_0,Supported +/activation_14/Sigmoid,,vaiml_par_0,Supported +/activation_14/Sub,,vaiml_par_0,Supported +/activation_15/Mul,,vaiml_par_0,Supported +/activation_15/Sigmoid,,vaiml_par_0,Supported +/activation_15/Sub,,vaiml_par_0,Supported +/activation_16/Mul,,vaiml_par_0,Supported +/activation_16/Sigmoid,,vaiml_par_0,Supported +/activation_16/Sub,,vaiml_par_0,Supported +/activation_17/Mul,,vaiml_par_0,Supported +/activation_17/Sigmoid,,vaiml_par_0,Supported +/activation_17/Sub,,vaiml_par_0,Supported +/activation_18/Mul,,vaiml_par_0,Supported +/activation_18/Sigmoid,,vaiml_par_0,Supported +/activation_18/Sub,,vaiml_par_0,Supported +/activation_19/Mul,,vaiml_par_0,Supported +/activation_19/Sigmoid,,vaiml_par_0,Supported +/activation_19/Sub,,vaiml_par_0,Supported +/activation_2/Mul,,vaiml_par_0,Supported +/activation_2/Sigmoid,,vaiml_par_0,Supported +/activation_2/Sub,,vaiml_par_0,Supported +/activation_20/Mul,,vaiml_par_0,Supported +/activation_20/Sigmoid,,vaiml_par_0,Supported +/activation_20/Sub,,vaiml_par_0,Supported +/activation_21/Mul,,vaiml_par_0,Supported +/activation_21/Sigmoid,,vaiml_par_0,Supported +/activation_21/Sub,,vaiml_par_0,Supported +/activation_22/Mul,,vaiml_par_0,Supported +/activation_22/Sigmoid,,vaiml_par_0,Supported +/activation_22/Sub,,vaiml_par_0,Supported +/activation_23/Mul,,vaiml_par_0,Supported +/activation_23/Sigmoid,,vaiml_par_0,Supported +/activation_23/Sub,,vaiml_par_0,Supported +/activation_24/Mul,,vaiml_par_0,Supported +/activation_24/Sigmoid,,vaiml_par_0,Supported +/activation_24/Sub,,vaiml_par_0,Supported +/activation_25/Mul,,vaiml_par_0,Supported +/activation_25/Sigmoid,,vaiml_par_0,Supported +/activation_25/Sub,,vaiml_par_0,Supported +/activation_26/Mul,,vaiml_par_0,Supported +/activation_26/Sigmoid,,vaiml_par_0,Supported +/activation_26/Sub,,vaiml_par_0,Supported +/activation_27/Mul,,vaiml_par_0,Supported +/activation_27/Sigmoid,,vaiml_par_0,Supported +/activation_27/Sub,,vaiml_par_0,Supported +/activation_28/Mul,,vaiml_par_0,Supported +/activation_28/Sigmoid,,vaiml_par_0,Supported +/activation_28/Sub,,vaiml_par_0,Supported +/activation_29/Mul,,vaiml_par_0,Supported +/activation_29/Sigmoid,,vaiml_par_0,Supported +/activation_29/Sub,,vaiml_par_0,Supported +/activation_3/Mul,,vaiml_par_0,Supported +/activation_3/Sigmoid,,vaiml_par_0,Supported +/activation_3/Sub,,vaiml_par_0,Supported +/activation_4/Mul,,vaiml_par_0,Supported +/activation_4/Sigmoid,,vaiml_par_0,Supported +/activation_4/Sub,,vaiml_par_0,Supported +/activation_5/Mul,,vaiml_par_0,Supported +/activation_5/Sigmoid,,vaiml_par_0,Supported +/activation_5/Sub,,vaiml_par_0,Supported +/activation_6/Mul,,vaiml_par_0,Supported +/activation_6/Sigmoid,,vaiml_par_0,Supported +/activation_6/Sub,,vaiml_par_0,Supported +/activation_7/Mul,,vaiml_par_0,Supported +/activation_7/Sigmoid,,vaiml_par_0,Supported +/activation_7/Sub,,vaiml_par_0,Supported +/activation_8/Mul,,vaiml_par_0,Supported +/activation_8/Sigmoid,,vaiml_par_0,Supported +/activation_8/Sub,,vaiml_par_0,Supported +/activation_9/Mul,,vaiml_par_0,Supported +/activation_9/Sigmoid,,vaiml_par_0,Supported +/activation_9/Sub,,vaiml_par_0,Supported +/depthwise_conv/Conv,,vaiml_par_0,Supported +/depthwise_conv_1/Conv,,vaiml_par_0,Supported +/depthwise_conv_10/Conv,,vaiml_par_0,Supported +/depthwise_conv_11/Conv,,vaiml_par_0,Supported +/depthwise_conv_12/Conv,,vaiml_par_0,Supported +/depthwise_conv_13/Conv,,vaiml_par_0,Supported +/depthwise_conv_14/Conv,,vaiml_par_0,Supported +/depthwise_conv_15/Conv,,vaiml_par_0,Supported +/depthwise_conv_16/Conv,,vaiml_par_0,Supported +/depthwise_conv_17/Conv,,vaiml_par_0,Supported +/depthwise_conv_18/Conv,,vaiml_par_0,Supported +/depthwise_conv_19/Conv,,vaiml_par_0,Supported +/depthwise_conv_2/Conv,,vaiml_par_0,Supported +/depthwise_conv_20/Conv,,vaiml_par_0,Supported +/depthwise_conv_21/Conv,,vaiml_par_0,Supported +/depthwise_conv_22/Conv,,vaiml_par_0,Supported +/depthwise_conv_23/Conv,,vaiml_par_0,Supported +/depthwise_conv_24/Conv,,vaiml_par_0,Supported +/depthwise_conv_25/Conv,,vaiml_par_0,Supported +/depthwise_conv_26/Conv,,vaiml_par_0,Supported +/depthwise_conv_27/Conv,,vaiml_par_0,Supported +/depthwise_conv_28/Conv,,vaiml_par_0,Supported +/depthwise_conv_29/Conv,,vaiml_par_0,Supported +/depthwise_conv_3/Conv,,vaiml_par_0,Supported +/depthwise_conv_4/Conv,,vaiml_par_0,Supported +/depthwise_conv_5/Conv,,vaiml_par_0,Supported +/depthwise_conv_6/Conv,,vaiml_par_0,Supported +/depthwise_conv_7/Conv,,vaiml_par_0,Supported +/depthwise_conv_8/Conv,,vaiml_par_0,Supported +/depthwise_conv_9/Conv,,vaiml_par_0,Supported +/downsample/Mul,,vaiml_par_0,Supported +/downsample/Mul_1,,vaiml_par_0,Supported +/downsample/ReduceSum,,vaiml_par_0,Supported +/downsample/ReduceSum_1,,vaiml_par_0,Supported +/downsample/Reshape,,vaiml_par_0,Supported +/downsample/Softmax,,vaiml_par_0,Supported +/downsample_1/Mul,,vaiml_par_0,Supported +/downsample_1/Mul_1,,vaiml_par_0,Supported +/downsample_1/ReduceSum,,vaiml_par_0,Supported +/downsample_1/ReduceSum_1,,vaiml_par_0,Supported +/downsample_1/Reshape,,vaiml_par_0,Supported +/downsample_1/Softmax,,vaiml_par_0,Supported +/downsample_2/Mul,,vaiml_par_0,Supported +/downsample_2/Mul_1,,vaiml_par_0,Supported +/downsample_2/ReduceSum,,vaiml_par_0,Supported +/downsample_2/ReduceSum_1,,vaiml_par_0,Supported +/downsample_2/Reshape,,vaiml_par_0,Supported +/downsample_2/Softmax,,vaiml_par_0,Supported +/downsample_3/Mul,,vaiml_par_0,Supported +/downsample_3/Mul_1,,vaiml_par_0,Supported +/downsample_3/ReduceSum,,vaiml_par_0,Supported +/downsample_3/ReduceSum_1,,vaiml_par_0,Supported +/downsample_3/Reshape,,vaiml_par_0,Supported +/downsample_3/Softmax,,vaiml_par_0,Supported +/downsample_output/Mul,,vaiml_par_0,Supported +/downsample_output/Mul_1,,vaiml_par_0,Supported +/downsample_output/ReduceSum,,vaiml_par_0,Supported +/downsample_output/ReduceSum_1,,vaiml_par_0,Supported +/downsample_output/Reshape,,vaiml_par_0,Supported +/downsample_output/Softmax,,vaiml_par_0,Supported +/encoder_embed/Reshape_new_reshape,,vaiml_par_0,Supported +/encoder_embed/Transpose,,vaiml_par_0,Supported +/encoder_embed/Unsqueeze,,vaiml_par_0,Supported +/encoder_embed/conv/conv.0/Conv,,vaiml_par_0,Supported +/encoder_embed/conv/conv.2/Mul,,vaiml_par_0,Supported +/encoder_embed/conv/conv.2/Sigmoid,,vaiml_par_0,Supported +/encoder_embed/conv/conv.2/Sub,,vaiml_par_0,Supported +/encoder_embed/conv/conv.3/Conv,,vaiml_par_0,Supported +/encoder_embed/conv/conv.5/Mul,,vaiml_par_0,Supported +/encoder_embed/conv/conv.5/Sigmoid,,vaiml_par_0,Supported +/encoder_embed/conv/conv.5/Sub,,vaiml_par_0,Supported +/encoder_embed/conv/conv.6/Conv,,vaiml_par_0,Supported +/encoder_embed/conv/conv.8/Mul,,vaiml_par_0,Supported +/encoder_embed/conv/conv.8/Sigmoid,,vaiml_par_0,Supported +/encoder_embed/conv/conv.8/Sub,,vaiml_par_0,Supported +/encoder_embed/out/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/encoder_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/activation/Mul,,vaiml_par_0,Supported +/feed_forward1/activation/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_1/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_1/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_1/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_10/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_10/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_10/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_11/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_11/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_11/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_12/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_12/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_12/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_13/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_13/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_13/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_14/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_14/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_14/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_2/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_2/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_2/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_3/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_3/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_3/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_4/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_4/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_4/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_5/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_5/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_5/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_6/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_6/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_6/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_7/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_7/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_7/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_8/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_8/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_8/Sub,,vaiml_par_0,Supported +/feed_forward1/activation_9/Mul,,vaiml_par_0,Supported +/feed_forward1/activation_9/Sigmoid,,vaiml_par_0,Supported +/feed_forward1/activation_9/Sub,,vaiml_par_0,Supported +/feed_forward1/in_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/in_proj_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward1/out_proj_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/activation/Mul,,vaiml_par_0,Supported +/feed_forward2/activation/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_1/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_1/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_1/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_10/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_10/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_10/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_11/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_11/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_11/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_12/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_12/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_12/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_13/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_13/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_13/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_14/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_14/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_14/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_2/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_2/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_2/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_3/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_3/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_3/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_4/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_4/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_4/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_5/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_5/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_5/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_6/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_6/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_6/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_7/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_7/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_7/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_8/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_8/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_8/Sub,,vaiml_par_0,Supported +/feed_forward2/activation_9/Mul,,vaiml_par_0,Supported +/feed_forward2/activation_9/Sigmoid,,vaiml_par_0,Supported +/feed_forward2/activation_9/Sub,,vaiml_par_0,Supported +/feed_forward2/in_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/in_proj_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward2/out_proj_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/activation/Mul,,vaiml_par_0,Supported +/feed_forward3/activation/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_1/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_1/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_1/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_10/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_10/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_10/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_11/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_11/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_11/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_12/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_12/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_12/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_13/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_13/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_13/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_14/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_14/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_14/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_2/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_2/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_2/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_3/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_3/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_3/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_4/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_4/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_4/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_5/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_5/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_5/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_6/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_6/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_6/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_7/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_7/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_7/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_8/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_8/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_8/Sub,,vaiml_par_0,Supported +/feed_forward3/activation_9/Mul,,vaiml_par_0,Supported +/feed_forward3/activation_9/Sigmoid,,vaiml_par_0,Supported +/feed_forward3/activation_9/Sub,,vaiml_par_0,Supported +/feed_forward3/in_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/in_proj_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/feed_forward3/out_proj_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj2/MatMul,,vaiml_par_0,Supported +/in_proj2_1/MatMul,,vaiml_par_0,Supported +/in_proj2_10/MatMul,,vaiml_par_0,Supported +/in_proj2_11/MatMul,,vaiml_par_0,Supported +/in_proj2_12/MatMul,,vaiml_par_0,Supported +/in_proj2_13/MatMul,,vaiml_par_0,Supported +/in_proj2_14/MatMul,,vaiml_par_0,Supported +/in_proj2_2/MatMul,,vaiml_par_0,Supported +/in_proj2_3/MatMul,,vaiml_par_0,Supported +/in_proj2_4/MatMul,,vaiml_par_0,Supported +/in_proj2_5/MatMul,,vaiml_par_0,Supported +/in_proj2_6/MatMul,,vaiml_par_0,Supported +/in_proj2_7/MatMul,,vaiml_par_0,Supported +/in_proj2_8/MatMul,,vaiml_par_0,Supported +/in_proj2_9/MatMul,,vaiml_par_0,Supported +/in_proj_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/in_proj_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/norm_final/Add,,vaiml_par_0,Supported +/norm_final/Mul,,vaiml_par_0,Supported +/norm_final/Mul_1,,vaiml_par_0,Supported +/norm_final/Pow,,vaiml_par_0,Supported +/norm_final/ReduceMean,,vaiml_par_0,Supported +/norm_final_1/Add,,vaiml_par_0,Supported +/norm_final_1/Mul,,vaiml_par_0,Supported +/norm_final_1/Mul_1,,vaiml_par_0,Supported +/norm_final_1/Pow,,vaiml_par_0,Supported +/norm_final_1/ReduceMean,,vaiml_par_0,Supported +/norm_final_10/Add,,vaiml_par_0,Supported +/norm_final_10/Mul,,vaiml_par_0,Supported +/norm_final_10/Mul_1,,vaiml_par_0,Supported +/norm_final_10/Pow,,vaiml_par_0,Supported +/norm_final_10/ReduceMean,,vaiml_par_0,Supported +/norm_final_11/Add,,vaiml_par_0,Supported +/norm_final_11/Mul,,vaiml_par_0,Supported +/norm_final_11/Mul_1,,vaiml_par_0,Supported +/norm_final_11/Pow,,vaiml_par_0,Supported +/norm_final_11/ReduceMean,,vaiml_par_0,Supported +/norm_final_12/Add,,vaiml_par_0,Supported +/norm_final_12/Mul,,vaiml_par_0,Supported +/norm_final_12/Mul_1,,vaiml_par_0,Supported +/norm_final_12/Pow,,vaiml_par_0,Supported +/norm_final_12/ReduceMean,,vaiml_par_0,Supported +/norm_final_13/Add,,vaiml_par_0,Supported +/norm_final_13/Mul,,vaiml_par_0,Supported +/norm_final_13/Mul_1,,vaiml_par_0,Supported +/norm_final_13/Pow,,vaiml_par_0,Supported +/norm_final_13/ReduceMean,,vaiml_par_0,Supported +/norm_final_14/Add,,vaiml_par_0,Supported +/norm_final_14/Mul,,vaiml_par_0,Supported +/norm_final_14/Mul_1,,vaiml_par_0,Supported +/norm_final_14/Pow,,vaiml_par_0,Supported +/norm_final_14/ReduceMean,,vaiml_par_0,Supported +/norm_final_2/Add,,vaiml_par_0,Supported +/norm_final_2/Mul,,vaiml_par_0,Supported +/norm_final_2/Mul_1,,vaiml_par_0,Supported +/norm_final_2/Pow,,vaiml_par_0,Supported +/norm_final_2/ReduceMean,,vaiml_par_0,Supported +/norm_final_3/Add,,vaiml_par_0,Supported +/norm_final_3/Mul,,vaiml_par_0,Supported +/norm_final_3/Mul_1,,vaiml_par_0,Supported +/norm_final_3/Pow,,vaiml_par_0,Supported +/norm_final_3/ReduceMean,,vaiml_par_0,Supported +/norm_final_4/Add,,vaiml_par_0,Supported +/norm_final_4/Mul,,vaiml_par_0,Supported +/norm_final_4/Mul_1,,vaiml_par_0,Supported +/norm_final_4/Pow,,vaiml_par_0,Supported +/norm_final_4/ReduceMean,,vaiml_par_0,Supported +/norm_final_5/Add,,vaiml_par_0,Supported +/norm_final_5/Mul,,vaiml_par_0,Supported +/norm_final_5/Mul_1,,vaiml_par_0,Supported +/norm_final_5/Pow,,vaiml_par_0,Supported +/norm_final_5/ReduceMean,,vaiml_par_0,Supported +/norm_final_6/Add,,vaiml_par_0,Supported +/norm_final_6/Mul,,vaiml_par_0,Supported +/norm_final_6/Mul_1,,vaiml_par_0,Supported +/norm_final_6/Pow,,vaiml_par_0,Supported +/norm_final_6/ReduceMean,,vaiml_par_0,Supported +/norm_final_7/Add,,vaiml_par_0,Supported +/norm_final_7/Mul,,vaiml_par_0,Supported +/norm_final_7/Mul_1,,vaiml_par_0,Supported +/norm_final_7/Pow,,vaiml_par_0,Supported +/norm_final_7/ReduceMean,,vaiml_par_0,Supported +/norm_final_8/Add,,vaiml_par_0,Supported +/norm_final_8/Mul,,vaiml_par_0,Supported +/norm_final_8/Mul_1,,vaiml_par_0,Supported +/norm_final_8/Pow,,vaiml_par_0,Supported +/norm_final_8/ReduceMean,,vaiml_par_0,Supported +/norm_final_9/Add,,vaiml_par_0,Supported +/norm_final_9/Mul,,vaiml_par_0,Supported +/norm_final_9/Mul_1,,vaiml_par_0,Supported +/norm_final_9/Pow,,vaiml_par_0,Supported +/norm_final_9/ReduceMean,,vaiml_par_0,Supported +/out_combiner/Add,,vaiml_par_0,Supported +/out_combiner/Mul,,vaiml_par_0,Supported +/out_combiner/Mul_1,,vaiml_par_0,Supported +/out_combiner_1/Add,,vaiml_par_0,Supported +/out_combiner_1/Mul,,vaiml_par_0,Supported +/out_combiner_1/Mul_1,,vaiml_par_0,Supported +/out_combiner_2/Add,,vaiml_par_0,Supported +/out_combiner_2/Mul,,vaiml_par_0,Supported +/out_combiner_2/Mul_1,,vaiml_par_0,Supported +/out_combiner_3/Add,,vaiml_par_0,Supported +/out_combiner_3/Mul,,vaiml_par_0,Supported +/out_combiner_3/Mul_1,,vaiml_par_0,Supported +/out_proj2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_1/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_10/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_11/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_12/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_13/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_14/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_2/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_3/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_4/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_5/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_6/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_7/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_8/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/out_proj2_9/MatMul/MatMulAddFusion,,vaiml_par_0,Supported +/pointwise_conv1/Conv,,vaiml_par_0,Supported +/pointwise_conv1_1/Conv,,vaiml_par_0,Supported +/pointwise_conv1_10/Conv,,vaiml_par_0,Supported +/pointwise_conv1_11/Conv,,vaiml_par_0,Supported +/pointwise_conv1_12/Conv,,vaiml_par_0,Supported +/pointwise_conv1_13/Conv,,vaiml_par_0,Supported +/pointwise_conv1_14/Conv,,vaiml_par_0,Supported +/pointwise_conv1_15/Conv,,vaiml_par_0,Supported +/pointwise_conv1_16/Conv,,vaiml_par_0,Supported +/pointwise_conv1_17/Conv,,vaiml_par_0,Supported +/pointwise_conv1_18/Conv,,vaiml_par_0,Supported +/pointwise_conv1_19/Conv,,vaiml_par_0,Supported +/pointwise_conv1_2/Conv,,vaiml_par_0,Supported +/pointwise_conv1_20/Conv,,vaiml_par_0,Supported +/pointwise_conv1_21/Conv,,vaiml_par_0,Supported +/pointwise_conv1_22/Conv,,vaiml_par_0,Supported +/pointwise_conv1_23/Conv,,vaiml_par_0,Supported +/pointwise_conv1_24/Conv,,vaiml_par_0,Supported +/pointwise_conv1_25/Conv,,vaiml_par_0,Supported +/pointwise_conv1_26/Conv,,vaiml_par_0,Supported +/pointwise_conv1_27/Conv,,vaiml_par_0,Supported +/pointwise_conv1_28/Conv,,vaiml_par_0,Supported +/pointwise_conv1_29/Conv,,vaiml_par_0,Supported +/pointwise_conv1_3/Conv,,vaiml_par_0,Supported +/pointwise_conv1_4/Conv,,vaiml_par_0,Supported +/pointwise_conv1_5/Conv,,vaiml_par_0,Supported +/pointwise_conv1_6/Conv,,vaiml_par_0,Supported +/pointwise_conv1_7/Conv,,vaiml_par_0,Supported +/pointwise_conv1_8/Conv,,vaiml_par_0,Supported +/pointwise_conv1_9/Conv,,vaiml_par_0,Supported +/pointwise_conv2/Conv,,vaiml_par_0,Supported +/pointwise_conv2_1/Conv,,vaiml_par_0,Supported +/pointwise_conv2_10/Conv,,vaiml_par_0,Supported +/pointwise_conv2_11/Conv,,vaiml_par_0,Supported +/pointwise_conv2_12/Conv,,vaiml_par_0,Supported +/pointwise_conv2_13/Conv,,vaiml_par_0,Supported +/pointwise_conv2_14/Conv,,vaiml_par_0,Supported +/pointwise_conv2_15/Conv,,vaiml_par_0,Supported +/pointwise_conv2_16/Conv,,vaiml_par_0,Supported +/pointwise_conv2_17/Conv,,vaiml_par_0,Supported +/pointwise_conv2_18/Conv,,vaiml_par_0,Supported +/pointwise_conv2_19/Conv,,vaiml_par_0,Supported +/pointwise_conv2_2/Conv,,vaiml_par_0,Supported +/pointwise_conv2_20/Conv,,vaiml_par_0,Supported +/pointwise_conv2_21/Conv,,vaiml_par_0,Supported +/pointwise_conv2_22/Conv,,vaiml_par_0,Supported +/pointwise_conv2_23/Conv,,vaiml_par_0,Supported +/pointwise_conv2_24/Conv,,vaiml_par_0,Supported +/pointwise_conv2_25/Conv,,vaiml_par_0,Supported +/pointwise_conv2_26/Conv,,vaiml_par_0,Supported +/pointwise_conv2_27/Conv,,vaiml_par_0,Supported +/pointwise_conv2_28/Conv,,vaiml_par_0,Supported +/pointwise_conv2_29/Conv,,vaiml_par_0,Supported +/pointwise_conv2_3/Conv,,vaiml_par_0,Supported +/pointwise_conv2_4/Conv,,vaiml_par_0,Supported +/pointwise_conv2_5/Conv,,vaiml_par_0,Supported +/pointwise_conv2_6/Conv,,vaiml_par_0,Supported +/pointwise_conv2_7/Conv,,vaiml_par_0,Supported +/pointwise_conv2_8/Conv,,vaiml_par_0,Supported +/pointwise_conv2_9/Conv,,vaiml_par_0,Supported +/proj/MatMul,,vaiml_par_0,Supported +/proj_1/MatMul,,vaiml_par_0,Supported +/proj_10/MatMul,,vaiml_par_0,Supported +/proj_11/MatMul,,vaiml_par_0,Supported +/proj_12/MatMul,,vaiml_par_0,Supported +/proj_13/MatMul,,vaiml_par_0,Supported +/proj_14/MatMul,,vaiml_par_0,Supported +/proj_2/MatMul,,vaiml_par_0,Supported +/proj_3/MatMul,,vaiml_par_0,Supported +/proj_4/MatMul,,vaiml_par_0,Supported +/proj_5/MatMul,,vaiml_par_0,Supported +/proj_6/MatMul,,vaiml_par_0,Supported +/proj_7/MatMul,,vaiml_par_0,Supported +/proj_8/MatMul,,vaiml_par_0,Supported +/proj_9/MatMul,,vaiml_par_0,Supported +/skip_modules.4/Add,,vaiml_par_0,Supported +/skip_modules.4/Mul,,vaiml_par_0,Supported +/skip_modules.4/Mul_1,,vaiml_par_0,Supported +/upsample/Add,,vaiml_par_0,Supported +/upsample/Expand,,vaiml_par_0,Supported +/upsample/Reshape_1,,vaiml_par_0,Supported +/upsample/Unsqueeze,,vaiml_par_0,Supported +/upsample_1/Add,,vaiml_par_0,Supported +/upsample_1/Expand,,vaiml_par_0,Supported +/upsample_1/Reshape_1,,vaiml_par_0,Supported +/upsample_1/Unsqueeze,,vaiml_par_0,Supported +/upsample_2/Add,,vaiml_par_0,Supported +/upsample_2/Expand,,vaiml_par_0,Supported +/upsample_2/Reshape_1,,vaiml_par_0,Supported +/upsample_2/Unsqueeze,,vaiml_par_0,Supported +/upsample_3/Add,,vaiml_par_0,Supported +/upsample_3/Expand,,vaiml_par_0,Supported +/upsample_3/Reshape_1,,vaiml_par_0,Supported +/upsample_3/Unsqueeze,,vaiml_par_0,Supported +gemm_input_reshape_token_104,,vaiml_par_0,Supported +gemm_input_reshape_token_110,,vaiml_par_0,Supported +gemm_input_reshape_token_116,,vaiml_par_0,Supported +gemm_input_reshape_token_122,,vaiml_par_0,Supported +gemm_input_reshape_token_134,,vaiml_par_0,Supported +gemm_input_reshape_token_14,,vaiml_par_0,Supported +gemm_input_reshape_token_140,,vaiml_par_0,Supported +gemm_input_reshape_token_152,,vaiml_par_0,Supported +gemm_input_reshape_token_158,,vaiml_par_0,Supported +gemm_input_reshape_token_164,,vaiml_par_0,Supported +gemm_input_reshape_token_170,,vaiml_par_0,Supported +gemm_input_reshape_token_176,,vaiml_par_0,Supported +gemm_input_reshape_token_188,,vaiml_par_0,Supported +gemm_input_reshape_token_194,,vaiml_par_0,Supported +gemm_input_reshape_token_2,,vaiml_par_0,Supported +gemm_input_reshape_token_206,,vaiml_par_0,Supported +gemm_input_reshape_token_212,,vaiml_par_0,Supported +gemm_input_reshape_token_218,,vaiml_par_0,Supported +gemm_input_reshape_token_224,,vaiml_par_0,Supported +gemm_input_reshape_token_230,,vaiml_par_0,Supported +gemm_input_reshape_token_242,,vaiml_par_0,Supported +gemm_input_reshape_token_248,,vaiml_par_0,Supported +gemm_input_reshape_token_26,,vaiml_par_0,Supported +gemm_input_reshape_token_260,,vaiml_par_0,Supported +gemm_input_reshape_token_266,,vaiml_par_0,Supported +gemm_input_reshape_token_272,,vaiml_par_0,Supported +gemm_input_reshape_token_278,,vaiml_par_0,Supported +gemm_input_reshape_token_284,,vaiml_par_0,Supported +gemm_input_reshape_token_296,,vaiml_par_0,Supported +gemm_input_reshape_token_302,,vaiml_par_0,Supported +gemm_input_reshape_token_314,,vaiml_par_0,Supported +gemm_input_reshape_token_32,,vaiml_par_0,Supported +gemm_input_reshape_token_320,,vaiml_par_0,Supported +gemm_input_reshape_token_326,,vaiml_par_0,Supported +gemm_input_reshape_token_332,,vaiml_par_0,Supported +gemm_input_reshape_token_338,,vaiml_par_0,Supported +gemm_input_reshape_token_350,,vaiml_par_0,Supported +gemm_input_reshape_token_356,,vaiml_par_0,Supported +gemm_input_reshape_token_368,,vaiml_par_0,Supported +gemm_input_reshape_token_374,,vaiml_par_0,Supported +gemm_input_reshape_token_380,,vaiml_par_0,Supported +gemm_input_reshape_token_386,,vaiml_par_0,Supported +gemm_input_reshape_token_392,,vaiml_par_0,Supported +gemm_input_reshape_token_404,,vaiml_par_0,Supported +gemm_input_reshape_token_410,,vaiml_par_0,Supported +gemm_input_reshape_token_422,,vaiml_par_0,Supported +gemm_input_reshape_token_428,,vaiml_par_0,Supported +gemm_input_reshape_token_434,,vaiml_par_0,Supported +gemm_input_reshape_token_44,,vaiml_par_0,Supported +gemm_input_reshape_token_440,,vaiml_par_0,Supported +gemm_input_reshape_token_446,,vaiml_par_0,Supported +gemm_input_reshape_token_458,,vaiml_par_0,Supported +gemm_input_reshape_token_464,,vaiml_par_0,Supported +gemm_input_reshape_token_476,,vaiml_par_0,Supported +gemm_input_reshape_token_482,,vaiml_par_0,Supported +gemm_input_reshape_token_488,,vaiml_par_0,Supported +gemm_input_reshape_token_494,,vaiml_par_0,Supported +gemm_input_reshape_token_50,,vaiml_par_0,Supported +gemm_input_reshape_token_500,,vaiml_par_0,Supported +gemm_input_reshape_token_512,,vaiml_par_0,Supported +gemm_input_reshape_token_518,,vaiml_par_0,Supported +gemm_input_reshape_token_530,,vaiml_par_0,Supported +gemm_input_reshape_token_536,,vaiml_par_0,Supported +gemm_input_reshape_token_542,,vaiml_par_0,Supported +gemm_input_reshape_token_548,,vaiml_par_0,Supported +gemm_input_reshape_token_554,,vaiml_par_0,Supported +gemm_input_reshape_token_56,,vaiml_par_0,Supported +gemm_input_reshape_token_566,,vaiml_par_0,Supported +gemm_input_reshape_token_572,,vaiml_par_0,Supported +gemm_input_reshape_token_584,,vaiml_par_0,Supported +gemm_input_reshape_token_590,,vaiml_par_0,Supported +gemm_input_reshape_token_596,,vaiml_par_0,Supported +gemm_input_reshape_token_602,,vaiml_par_0,Supported +gemm_input_reshape_token_608,,vaiml_par_0,Supported +gemm_input_reshape_token_62,,vaiml_par_0,Supported +gemm_input_reshape_token_620,,vaiml_par_0,Supported +gemm_input_reshape_token_626,,vaiml_par_0,Supported +gemm_input_reshape_token_638,,vaiml_par_0,Supported +gemm_input_reshape_token_644,,vaiml_par_0,Supported +gemm_input_reshape_token_650,,vaiml_par_0,Supported +gemm_input_reshape_token_656,,vaiml_par_0,Supported +gemm_input_reshape_token_662,,vaiml_par_0,Supported +gemm_input_reshape_token_674,,vaiml_par_0,Supported +gemm_input_reshape_token_68,,vaiml_par_0,Supported +gemm_input_reshape_token_680,,vaiml_par_0,Supported +gemm_input_reshape_token_692,,vaiml_par_0,Supported +gemm_input_reshape_token_698,,vaiml_par_0,Supported +gemm_input_reshape_token_704,,vaiml_par_0,Supported +gemm_input_reshape_token_710,,vaiml_par_0,Supported +gemm_input_reshape_token_716,,vaiml_par_0,Supported +gemm_input_reshape_token_728,,vaiml_par_0,Supported +gemm_input_reshape_token_734,,vaiml_par_0,Supported +gemm_input_reshape_token_746,,vaiml_par_0,Supported +gemm_input_reshape_token_752,,vaiml_par_0,Supported +gemm_input_reshape_token_758,,vaiml_par_0,Supported +gemm_input_reshape_token_764,,vaiml_par_0,Supported +gemm_input_reshape_token_770,,vaiml_par_0,Supported +gemm_input_reshape_token_782,,vaiml_par_0,Supported +gemm_input_reshape_token_788,,vaiml_par_0,Supported +gemm_input_reshape_token_8,,vaiml_par_0,Supported +gemm_input_reshape_token_80,,vaiml_par_0,Supported +gemm_input_reshape_token_800,,vaiml_par_0,Supported +gemm_input_reshape_token_806,,vaiml_par_0,Supported +gemm_input_reshape_token_812,,vaiml_par_0,Supported +gemm_input_reshape_token_86,,vaiml_par_0,Supported +gemm_input_reshape_token_98,,vaiml_par_0,Supported +gemm_output_reshape,,vaiml_par_0,Supported +gemm_output_reshape_token_101,,vaiml_par_0,Supported +gemm_output_reshape_token_107,,vaiml_par_0,Supported +gemm_output_reshape_token_11,,vaiml_par_0,Supported +gemm_output_reshape_token_113,,vaiml_par_0,Supported +gemm_output_reshape_token_119,,vaiml_par_0,Supported +gemm_output_reshape_token_125,,vaiml_par_0,Supported +gemm_output_reshape_token_131,,vaiml_par_0,Supported +gemm_output_reshape_token_137,,vaiml_par_0,Supported +gemm_output_reshape_token_143,,vaiml_par_0,Supported +gemm_output_reshape_token_149,,vaiml_par_0,Supported +gemm_output_reshape_token_155,,vaiml_par_0,Supported +gemm_output_reshape_token_161,,vaiml_par_0,Supported +gemm_output_reshape_token_167,,vaiml_par_0,Supported +gemm_output_reshape_token_17,,vaiml_par_0,Supported +gemm_output_reshape_token_173,,vaiml_par_0,Supported +gemm_output_reshape_token_179,,vaiml_par_0,Supported +gemm_output_reshape_token_185,,vaiml_par_0,Supported +gemm_output_reshape_token_191,,vaiml_par_0,Supported +gemm_output_reshape_token_197,,vaiml_par_0,Supported +gemm_output_reshape_token_203,,vaiml_par_0,Supported +gemm_output_reshape_token_209,,vaiml_par_0,Supported +gemm_output_reshape_token_215,,vaiml_par_0,Supported +gemm_output_reshape_token_221,,vaiml_par_0,Supported +gemm_output_reshape_token_227,,vaiml_par_0,Supported +gemm_output_reshape_token_23,,vaiml_par_0,Supported +gemm_output_reshape_token_233,,vaiml_par_0,Supported +gemm_output_reshape_token_239,,vaiml_par_0,Supported +gemm_output_reshape_token_245,,vaiml_par_0,Supported +gemm_output_reshape_token_251,,vaiml_par_0,Supported +gemm_output_reshape_token_257,,vaiml_par_0,Supported +gemm_output_reshape_token_263,,vaiml_par_0,Supported +gemm_output_reshape_token_269,,vaiml_par_0,Supported +gemm_output_reshape_token_275,,vaiml_par_0,Supported +gemm_output_reshape_token_281,,vaiml_par_0,Supported +gemm_output_reshape_token_287,,vaiml_par_0,Supported +gemm_output_reshape_token_29,,vaiml_par_0,Supported +gemm_output_reshape_token_293,,vaiml_par_0,Supported +gemm_output_reshape_token_299,,vaiml_par_0,Supported +gemm_output_reshape_token_305,,vaiml_par_0,Supported +gemm_output_reshape_token_311,,vaiml_par_0,Supported +gemm_output_reshape_token_317,,vaiml_par_0,Supported +gemm_output_reshape_token_323,,vaiml_par_0,Supported +gemm_output_reshape_token_329,,vaiml_par_0,Supported +gemm_output_reshape_token_335,,vaiml_par_0,Supported +gemm_output_reshape_token_341,,vaiml_par_0,Supported +gemm_output_reshape_token_347,,vaiml_par_0,Supported +gemm_output_reshape_token_35,,vaiml_par_0,Supported +gemm_output_reshape_token_353,,vaiml_par_0,Supported +gemm_output_reshape_token_359,,vaiml_par_0,Supported +gemm_output_reshape_token_365,,vaiml_par_0,Supported +gemm_output_reshape_token_371,,vaiml_par_0,Supported +gemm_output_reshape_token_377,,vaiml_par_0,Supported +gemm_output_reshape_token_383,,vaiml_par_0,Supported +gemm_output_reshape_token_389,,vaiml_par_0,Supported +gemm_output_reshape_token_395,,vaiml_par_0,Supported +gemm_output_reshape_token_401,,vaiml_par_0,Supported +gemm_output_reshape_token_407,,vaiml_par_0,Supported +gemm_output_reshape_token_41,,vaiml_par_0,Supported +gemm_output_reshape_token_413,,vaiml_par_0,Supported +gemm_output_reshape_token_419,,vaiml_par_0,Supported +gemm_output_reshape_token_425,,vaiml_par_0,Supported +gemm_output_reshape_token_431,,vaiml_par_0,Supported +gemm_output_reshape_token_437,,vaiml_par_0,Supported +gemm_output_reshape_token_443,,vaiml_par_0,Supported +gemm_output_reshape_token_449,,vaiml_par_0,Supported +gemm_output_reshape_token_455,,vaiml_par_0,Supported +gemm_output_reshape_token_461,,vaiml_par_0,Supported +gemm_output_reshape_token_467,,vaiml_par_0,Supported +gemm_output_reshape_token_47,,vaiml_par_0,Supported +gemm_output_reshape_token_473,,vaiml_par_0,Supported +gemm_output_reshape_token_479,,vaiml_par_0,Supported +gemm_output_reshape_token_485,,vaiml_par_0,Supported +gemm_output_reshape_token_491,,vaiml_par_0,Supported +gemm_output_reshape_token_497,,vaiml_par_0,Supported +gemm_output_reshape_token_5,,vaiml_par_0,Supported +gemm_output_reshape_token_503,,vaiml_par_0,Supported +gemm_output_reshape_token_509,,vaiml_par_0,Supported +gemm_output_reshape_token_515,,vaiml_par_0,Supported +gemm_output_reshape_token_521,,vaiml_par_0,Supported +gemm_output_reshape_token_527,,vaiml_par_0,Supported +gemm_output_reshape_token_53,,vaiml_par_0,Supported +gemm_output_reshape_token_533,,vaiml_par_0,Supported +gemm_output_reshape_token_539,,vaiml_par_0,Supported +gemm_output_reshape_token_545,,vaiml_par_0,Supported +gemm_output_reshape_token_551,,vaiml_par_0,Supported +gemm_output_reshape_token_557,,vaiml_par_0,Supported +gemm_output_reshape_token_563,,vaiml_par_0,Supported +gemm_output_reshape_token_569,,vaiml_par_0,Supported +gemm_output_reshape_token_575,,vaiml_par_0,Supported +gemm_output_reshape_token_581,,vaiml_par_0,Supported +gemm_output_reshape_token_587,,vaiml_par_0,Supported +gemm_output_reshape_token_59,,vaiml_par_0,Supported +gemm_output_reshape_token_593,,vaiml_par_0,Supported +gemm_output_reshape_token_599,,vaiml_par_0,Supported +gemm_output_reshape_token_605,,vaiml_par_0,Supported +gemm_output_reshape_token_611,,vaiml_par_0,Supported +gemm_output_reshape_token_617,,vaiml_par_0,Supported +gemm_output_reshape_token_623,,vaiml_par_0,Supported +gemm_output_reshape_token_629,,vaiml_par_0,Supported +gemm_output_reshape_token_635,,vaiml_par_0,Supported +gemm_output_reshape_token_641,,vaiml_par_0,Supported +gemm_output_reshape_token_647,,vaiml_par_0,Supported +gemm_output_reshape_token_65,,vaiml_par_0,Supported +gemm_output_reshape_token_653,,vaiml_par_0,Supported +gemm_output_reshape_token_659,,vaiml_par_0,Supported +gemm_output_reshape_token_665,,vaiml_par_0,Supported +gemm_output_reshape_token_671,,vaiml_par_0,Supported +gemm_output_reshape_token_677,,vaiml_par_0,Supported +gemm_output_reshape_token_683,,vaiml_par_0,Supported +gemm_output_reshape_token_689,,vaiml_par_0,Supported +gemm_output_reshape_token_695,,vaiml_par_0,Supported +gemm_output_reshape_token_701,,vaiml_par_0,Supported +gemm_output_reshape_token_707,,vaiml_par_0,Supported +gemm_output_reshape_token_71,,vaiml_par_0,Supported +gemm_output_reshape_token_713,,vaiml_par_0,Supported +gemm_output_reshape_token_719,,vaiml_par_0,Supported +gemm_output_reshape_token_725,,vaiml_par_0,Supported +gemm_output_reshape_token_731,,vaiml_par_0,Supported +gemm_output_reshape_token_737,,vaiml_par_0,Supported +gemm_output_reshape_token_743,,vaiml_par_0,Supported +gemm_output_reshape_token_749,,vaiml_par_0,Supported +gemm_output_reshape_token_755,,vaiml_par_0,Supported +gemm_output_reshape_token_761,,vaiml_par_0,Supported +gemm_output_reshape_token_767,,vaiml_par_0,Supported +gemm_output_reshape_token_77,,vaiml_par_0,Supported +gemm_output_reshape_token_773,,vaiml_par_0,Supported +gemm_output_reshape_token_779,,vaiml_par_0,Supported +gemm_output_reshape_token_785,,vaiml_par_0,Supported +gemm_output_reshape_token_791,,vaiml_par_0,Supported +gemm_output_reshape_token_797,,vaiml_par_0,Supported +gemm_output_reshape_token_803,,vaiml_par_0,Supported +gemm_output_reshape_token_809,,vaiml_par_0,Supported +gemm_output_reshape_token_815,,,Not supported. Check aie_unsupported_original_ops.json +gemm_output_reshape_token_83,,vaiml_par_0,Supported +gemm_output_reshape_token_89,,vaiml_par_0,Supported +gemm_output_reshape_token_95,,vaiml_par_0,Supported diff --git a/original-info-signature.txt b/original-info-signature.txt new file mode 100644 index 0000000000000000000000000000000000000000..4262119974eb805ab03f9c534be3592fbbdd1a54 --- /dev/null +++ b/original-info-signature.txt @@ -0,0 +1 @@ +4f00fb244983f7c2158dc9333522f122 \ No newline at end of file diff --git a/original-model-signature.txt b/original-model-signature.txt new file mode 100644 index 0000000000000000000000000000000000000000..a140fbdd51635fddbbb7d2aed4a544fc5a9518b8 --- /dev/null +++ b/original-model-signature.txt @@ -0,0 +1 @@ +54bcb1ebe11c2810ff44b6aa9d673d26 \ No newline at end of file diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1024.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1024.bin new file mode 100644 index 0000000000000000000000000000000000000000..9846bb0b8dd472bed5ecb5313dcaea13a8fe8cdb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1024.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1cbb7bc810aea157194ce162fb59a3bb9d1cb5067385cf99fb8dacf8c14652 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1025.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1025.bin new file mode 100644 index 0000000000000000000000000000000000000000..9846bb0b8dd472bed5ecb5313dcaea13a8fe8cdb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1025.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1cbb7bc810aea157194ce162fb59a3bb9d1cb5067385cf99fb8dacf8c14652 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1026.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1026.bin new file mode 100644 index 0000000000000000000000000000000000000000..9846bb0b8dd472bed5ecb5313dcaea13a8fe8cdb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1026.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1cbb7bc810aea157194ce162fb59a3bb9d1cb5067385cf99fb8dacf8c14652 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1027.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1027.bin new file mode 100644 index 0000000000000000000000000000000000000000..9846bb0b8dd472bed5ecb5313dcaea13a8fe8cdb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1027.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1cbb7bc810aea157194ce162fb59a3bb9d1cb5067385cf99fb8dacf8c14652 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1028.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1028.bin new file mode 100644 index 0000000000000000000000000000000000000000..9846bb0b8dd472bed5ecb5313dcaea13a8fe8cdb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1028.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1cbb7bc810aea157194ce162fb59a3bb9d1cb5067385cf99fb8dacf8c14652 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1029.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1029.bin new file mode 100644 index 0000000000000000000000000000000000000000..9846bb0b8dd472bed5ecb5313dcaea13a8fe8cdb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1cbb7bc810aea157194ce162fb59a3bb9d1cb5067385cf99fb8dacf8c14652 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_103.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..eec64fe3df7f6488af0a0410cc7fce5daa2d9985 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2b3e29fe5bd251988feea615a1ccf5ed2e381dc2986da327f06e26ae3efd6bd +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1030.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1030.bin new file mode 100644 index 0000000000000000000000000000000000000000..9846bb0b8dd472bed5ecb5313dcaea13a8fe8cdb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1030.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1cbb7bc810aea157194ce162fb59a3bb9d1cb5067385cf99fb8dacf8c14652 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1031.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1031.bin new file mode 100644 index 0000000000000000000000000000000000000000..ecc2b9e57e9036ac767bb86c843432f5ddc9b836 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1031.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39b4626db932ba0b1e7c77800c3421c7d0200251c00618a40cab340cffc09b1 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1032.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1032.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fe7449257f268c9add8e67948550b56342fd703 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1032.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d50045548d162b49aeb0480d930ecdcb5750f3fee8514a06144442eaea49548f +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1102.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1102.bin new file mode 100644 index 0000000000000000000000000000000000000000..799fe8e5480ba5a78bdd1bf91ff0366d9dd0d7d5 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac36f7378bb8b4a68e1d26174d0ead04f5c5b594f66da1e055b492647b6ba794 +size 364 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1105.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1105.bin new file mode 100644 index 0000000000000000000000000000000000000000..424c5fb015e7acbc8ce27d74d0b4c5a8b0acf34f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7539b4f67550b84095642c5f934384bbff82a890ac0cb4c3f5188c1d0b42502e +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1106.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1106.bin new file mode 100644 index 0000000000000000000000000000000000000000..b498ddea698c51e7769f52b626bf39258f9e0922 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2aca22a8c9a5024d2ab41656a520ecb52b0fa0765770ac67ad948dcea38c196 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1107.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1107.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1f5d94661462002ffe2d09484298b3b5d504b35 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868f58edd7cc2f467919106b8dd145be94c84930f7ec456b5c5fadfda0b0a877 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1108.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1108.bin new file mode 100644 index 0000000000000000000000000000000000000000..91c8c1c0388f605fc2062ae4b411fa2dbfe16558 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccaed9efcff1fc4870983995ee7bbb5b05d386cd38feec04bb73d043d2cf644a +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1109.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1109.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a8da13aa00544ef142c96f4894f75eb9501632b --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c420d1316352cd5ab94c939c35ec4d543900a6cc46b32f99db5006264b2ded8c +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_111.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..78a2a396fedc378f5395d5017b513e95f88e74e8 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a37deaed87eefa12752fa02dd8dddafb23e6077f24099c77f7524c4cbcf368 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1110.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1110.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cb65b3807f9060f6343fa771697e0e75f1cf1d9 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ebb528872d631b65a20c4406eb831c621b38ac80ac73ecc631550cbf81e437 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1111.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1111.bin new file mode 100644 index 0000000000000000000000000000000000000000..7adb78d226833b5e161d15821f18b46457a1ace2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbd1669b8942afcccfcfb0b03f84bc738f7b8e9cc1b170d36baa50b3d8b0b02b +size 304 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1112.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1112.bin new file mode 100644 index 0000000000000000000000000000000000000000..97f38ea320cb71d360ea2f6c79fdf3189576141a --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ecfb3fe9d45e789b0bc4cb4064c821642958f20639afbe6bf297ab4b059f61 +size 264 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1113.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1113.bin new file mode 100644 index 0000000000000000000000000000000000000000..858c75ada81985fe5e68ab58c37cbe030e0e67fb --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a37c428efbbf29b26c3941b2a52ea9fa94be0268906355a4a47849d9cb50e5 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1114.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1114.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cb65b3807f9060f6343fa771697e0e75f1cf1d9 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ebb528872d631b65a20c4406eb831c621b38ac80ac73ecc631550cbf81e437 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1115.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1115.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a7b4257d1cde412d0fb4899d6f9cd128b8f6e82 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f4d83261c199cbc10ac8dbfd48342be3abd7e0fdd1a68e3297d0ce23e9f17e +size 304 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1116.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1116.bin new file mode 100644 index 0000000000000000000000000000000000000000..29a6357d3d064e437dbb6301f43787b0a03a4e3f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2331f715707154b3f908f9a7c7a5694d98422cb369d753bb12ffc234c33c5abc +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1117.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1117.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c2b3fa62dfa9c5d7c3c9b0af296d70e941df149 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f160f4118b232a4e9c458f3d7291cd4e5b39d0c5f5537a5a515b9522394134a +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1118.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1118.bin new file mode 100644 index 0000000000000000000000000000000000000000..31d9655369c31d66a36c302bd91f08988d93cc02 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bab8648a7161b2ab962e42de0e18d5e302d26ea6f799847f8092a03d5ae283 +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1119.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1119.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a9f2625544884a065caf0141ee8a3eb13e73837 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c830af89b8ef3f6b0b3970ee7832552100b77370e87948a686dc2f7a69abe2 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_112.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..63c803f8227086902f47f059cb12066f7d0bd5f6 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d3ae82e0737ab097af2842451462ef2165086ba0afdce55cbd120212226054 +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1120.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1120.bin new file mode 100644 index 0000000000000000000000000000000000000000..91540d2d6371a1952163c0deffed44727475c5b7 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f9d7a5cf8fcb05d02512769d5adcc92e581898f870ba0a1fc7369fc57cd001 +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1121.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1121.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1122.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1122.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1123.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1123.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1124.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1124.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1125.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1125.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1126.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1126.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1126.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1127.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1127.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1128.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1128.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1129.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1129.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_113.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..8515c5cafb38f0bdc0b26c585790cb90a0185c03 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8127a0d21d25204b9c10830349fd30f22c5e96ff909ca11c5e20fd0ec3d8bf67 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1130.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1130.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f5f4c0f330fadfcfe5fa244fc85a8bfd45c9f79 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c09b090ee9d003316781d1771c1671ef951fd9e5d65db1feca793040555c58 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1131.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1131.bin new file mode 100644 index 0000000000000000000000000000000000000000..79b96248e16ff7d324b0edaef8c611e57390d9e1 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6a0042441b4e84f291eccfaeed01f4659bf1ee1c7131c64087b644d28cb8f3 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1132.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1132.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1132.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1133.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1133.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f5f4c0f330fadfcfe5fa244fc85a8bfd45c9f79 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1133.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c09b090ee9d003316781d1771c1671ef951fd9e5d65db1feca793040555c58 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1134.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1134.bin new file mode 100644 index 0000000000000000000000000000000000000000..79b96248e16ff7d324b0edaef8c611e57390d9e1 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1134.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6a0042441b4e84f291eccfaeed01f4659bf1ee1c7131c64087b644d28cb8f3 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1141.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1141.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1141.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1142.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1142.bin new file mode 100644 index 0000000000000000000000000000000000000000..db3948ee89050231b40abedb18967a5f99a7e9d3 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1142.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f243b0114b3fa5773e2110edf5e02304317690ab9a03766b2a2ba24012da49e6 +size 264 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1143.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1143.bin new file mode 100644 index 0000000000000000000000000000000000000000..2483023185f17075474a7852de0a00dddb88459b --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1143.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:341011e0805fd5adda89f79a0e89b971788fe9961565fe5ba8d4941c44f36723 +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1144.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1144.bin new file mode 100644 index 0000000000000000000000000000000000000000..116a5909a1e258c645f08ffe9a1d3529fc466740 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1144.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6c6818bb19d4da41de2b94fdb7dffc012778bafe1f8557e0d6705384b90485 +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1145.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1145.bin new file mode 100644 index 0000000000000000000000000000000000000000..47c9418a83484d7c1e7bf7e1c09888a14e563ad2 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1145.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f00b76f5e28e60530d6c92411b6a35f3b4c910909cc4ead94d7c43307ba807 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1146.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1146.bin new file mode 100644 index 0000000000000000000000000000000000000000..87d3c9ee8973548460f328d173e6cbe836bf2635 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1146.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:279270629383c318a666e5f5f11493a692bd9b0dc7d58a55c4dcae7abdafda3a +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1147.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1147.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8f51213d6379890c374140a86cf79a5e4fe38ec --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1147.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315872a5e09d4af2f2f056c5ea4bc958a3c9a4e1cce1483847a70fe008b75f92 +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1148.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1148.bin new file mode 100644 index 0000000000000000000000000000000000000000..79b96248e16ff7d324b0edaef8c611e57390d9e1 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1148.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6a0042441b4e84f291eccfaeed01f4659bf1ee1c7131c64087b644d28cb8f3 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1149.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1149.bin new file mode 100644 index 0000000000000000000000000000000000000000..76915eeea10b9e00980727ca4e9f7ef124ba351b --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1149.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d26b51b66c3c50c0aa79638926cd06e1b1755de685279ef4ca9f0c52658d5bbd +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_115.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..f74db97213a7de5945b254805ced34158edd9d83 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e29993a9695b5b05949b6ba6389eb3be36ddf6fcdd24f736b4f4beab43b2cc6 +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1150.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1150.bin new file mode 100644 index 0000000000000000000000000000000000000000..cfb48919c38eb77e0fb4fe8d2261de21680ac01a --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1150.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395a59cf502a0631fe19439910e27d4784cc492b1a91b34d6822ae98d78364b4 +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1151.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1151.bin new file mode 100644 index 0000000000000000000000000000000000000000..56c0cba4119cf4ff6947a71608618094b27014aa --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1151.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102afedca8ba44ec0b23a40bc69249da2a8861d1c20f3454f158afb6a38356ae +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1152.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1152.bin new file mode 100644 index 0000000000000000000000000000000000000000..552cf2f85da9b6cfae01bc897ed77df05b5253ce --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1152.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ee364f3c5e4fda28e3f0713de1320d08d11043b6bf13f93b71bf090fdcbdf5 +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1153.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1153.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fa1e954eeba2e6023b635b1b8a9c2c9efdd49c0 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1153.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8673ddd81067ad0f4205f4f396545b0adc76afe32b69704adbe7983b051f89c +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1154.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1154.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4b09f072e051a7ff3b05d304eb1f22df4e4d7fc --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1154.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313cc0289a931ce7ca4990835925a31afc1d81f2ab689ce816d2cc7cdbf80d87 +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1155.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1155.bin new file mode 100644 index 0000000000000000000000000000000000000000..633b8c990a97a14977c475c9b7da3372c49558d8 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1155.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df423ee0ace7dbd14c5254b40d715d1958f5b02e82ad3181ca793c9e3c862d4 +size 384 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1156.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1156.bin new file mode 100644 index 0000000000000000000000000000000000000000..ccb2c49f19262c68dbb7f12dea469167cf17328e --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1156.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5aa5573c0b24c403fea96422cf71912820890f723b5db6b7106a7883053d204 +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1157.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1157.bin new file mode 100644 index 0000000000000000000000000000000000000000..57b3d445d6ae79f85356093522d93a127b8c20fc --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1157.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc69b30c5dba4e53cb0680516d5b5f10b7222ba3eba2685d4fe5c3e7f8c477fd +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1158.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1158.bin new file mode 100644 index 0000000000000000000000000000000000000000..0caeb75292ed9503c25be2d1a6bc90a08d8d1e0f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1158.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5505a7855256893c4e80615c70301695930bb7ddae716ed3b050d79d918bf4e2 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1159.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1159.bin new file mode 100644 index 0000000000000000000000000000000000000000..c37ef6058dd6ffca88919914a1fc92c638b87c2c --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1159.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7b10c3d91645c2b5b567614dc71b3fea0d318d19940b577ce71f688b15c6c8 +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1169.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1169.bin new file mode 100644 index 0000000000000000000000000000000000000000..82de4e8d57cb75e652110922c570fbae1d13ba65 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1169.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e93c779a779bc344a25eac207fee8fcff358c83742b9642cdb2ff03d16ecf7d +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_117.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..eefae214ca14814d88c53aff874eda8bbe3f5fc3 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3ac39993d11200d9c91c31b67820dfd7596fa01fa3b2a140513250b9739c57 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1170.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1170.bin new file mode 100644 index 0000000000000000000000000000000000000000..91540d2d6371a1952163c0deffed44727475c5b7 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1170.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f9d7a5cf8fcb05d02512769d5adcc92e581898f870ba0a1fc7369fc57cd001 +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1171.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1171.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c74b9089fcbac828f3616201f3ceacc9b1e0b7a --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1171.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a660e1503f8ab46484d766d870c1cc9392d958a6dc82b1c7cffdb731a9192770 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1172.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1172.bin new file mode 100644 index 0000000000000000000000000000000000000000..37b409b2159a82afa268956b22683d1084451f62 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1172.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bfcedaa4ce36d4c093949f3217ebf6ba0fecff8b96c1985f7d5a95400b93549 +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1173.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1173.bin new file mode 100644 index 0000000000000000000000000000000000000000..2483023185f17075474a7852de0a00dddb88459b --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1173.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:341011e0805fd5adda89f79a0e89b971788fe9961565fe5ba8d4941c44f36723 +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1174.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1174.bin new file mode 100644 index 0000000000000000000000000000000000000000..116a5909a1e258c645f08ffe9a1d3529fc466740 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1174.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6c6818bb19d4da41de2b94fdb7dffc012778bafe1f8557e0d6705384b90485 +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1175.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1175.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c74b9089fcbac828f3616201f3ceacc9b1e0b7a --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1175.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a660e1503f8ab46484d766d870c1cc9392d958a6dc82b1c7cffdb731a9192770 +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1176.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1176.bin new file mode 100644 index 0000000000000000000000000000000000000000..87d3c9ee8973548460f328d173e6cbe836bf2635 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1176.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:279270629383c318a666e5f5f11493a692bd9b0dc7d58a55c4dcae7abdafda3a +size 348 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1177.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1177.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac846a619a621143621bf60739e0026596e9098c --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1177.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2650cfa825df13aa90fdd8ae56d36f2730a7fe8bff6d50e44978c33fd6504f8a +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1178.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1178.bin new file mode 100644 index 0000000000000000000000000000000000000000..297cc7695d7d7bb4fcdfaba1d5c1adaa6d5f270f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1178.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53442df314cfbd6111f1e966ac0f04705c588e6718cc3968dc032a5276894f1f +size 376 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1179.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1179.bin new file mode 100644 index 0000000000000000000000000000000000000000..2071f0a3758487bdb1824efab7769c98993b8e0f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1179.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b882ff848dccb3d7105d7a8340970ec2a42b2d00a3e7ceac96c57cb661f965c8 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_118.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6d48cb4034e76eff342056319a813d645a8906d --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7613956f391c7059e6659ff22e51a2fb680e3e8405c6336027b2290fd0b7a5 +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1180.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1180.bin new file mode 100644 index 0000000000000000000000000000000000000000..12a0fb80c4a5578ca1ca5b0cfc6409a434292bdd --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1180.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73f4f3d53a70a5e70c1de7f75aaa3d440ad9978579ffea5d153c38c2a37aa44 +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1181.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1181.bin new file mode 100644 index 0000000000000000000000000000000000000000..b75a41305dde01942f2d57132f961eda13598a02 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1181.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153668bec357b59459b7ab4ceb1cc53dc33df482ceb52c3280ff3f54700e20a3 +size 244 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1182.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1182.bin new file mode 100644 index 0000000000000000000000000000000000000000..a65acb0a0553e07fb87bfc7efbe73308ef5434fc --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1182.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:569bb6a456dd91cf223a7efd4db4c9603ccb466e887bdaa9036a317aea70d48d +size 296 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1183.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1183.bin new file mode 100644 index 0000000000000000000000000000000000000000..00308c768c4558a5566266f5e52eba7a7b97472a --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1183.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07f3741cf8e7985f5f677a4e21f18d24c7c948cb293816cb77139971a06083c +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1184.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1184.bin new file mode 100644 index 0000000000000000000000000000000000000000..631b4dc3153d215ca3ff2130d5a2c2348fc96951 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1184.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea1b8069d32d4817aa6d5a8767b858d4962a978931edecb53eb9939faa3fbcb +size 260 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1185.bin b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1185.bin new file mode 100644 index 0000000000000000000000000000000000000000..141219296ec6d455dba4e5e5c62ceb07323ffe4a --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_1185.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba98585fdc728096c2f365e1b9985c3a349799ebd733a7d9cb68581577dff84a +size 384 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.# b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.# new file mode 100644 index 0000000000000000000000000000000000000000..6cca2809fc0754d65b3e39ce3bde8d6a5a2429ff --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.# @@ -0,0 +1,2 @@ +3e5793ea7630d339bf6d1c92777c78cd48557be0 +1a735f496f1284ab86135ede88939a8aeeb375a3 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.## b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.## new file mode 100644 index 0000000000000000000000000000000000000000..ceb3ad8d50a7c5a1d3e24745c0083475deb3f554 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.## @@ -0,0 +1,2 @@ +6e4529911c095f683458acd363249f4973916e38 +92e103875c6a39bc8cdbe4a21c02c5a94ba3cc96 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.calltree b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.calltree new file mode 100644 index 0000000000000000000000000000000000000000..4ef50c51e6b0fc95a6d005f03e3bad10d6289608 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.calltree @@ -0,0 +1,96 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:35:25 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable77 ../Release/0_0_reloadable77.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable77.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3591677 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z15_b13749_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z14_b8148_wrapperPPv (referenced text) + _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + memset + _Z14_b8170_wrapperPPv (referenced text) + _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + _Z14_b7835_wrapperPPv (referenced text) + _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + _Z15_b14160_wrapperPPv (referenced text) + _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + _Z15_b13739_wrapperPPv (referenced text) + _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _Z15_b13744_wrapperPPv (referenced text) + _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 256 0 0 546 9978 _Z13kernelWrapperPPvjjjj + 0 128 1 1 36 1124 _Z15_b13749_wrapperPPv + 64 128 1 2 602 1088 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 154 178 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 308 308 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 64 1 1 32 690 _Z14_b8148_wrapperPPv + 64 64 1 2 484 658 _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + 0 0 2 3 174 174 memset + 0 128 1 1 32 988 _Z14_b8170_wrapperPPv + 128 128 1 2 178 956 _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + 0 0 2 3 52 298 _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + 0 0 3 4 162 162 _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + 0 0 2 4 84 84 _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + 0 0 2 3 480 480 _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + 0 64 1 1 32 1646 _Z14_b7835_wrapperPPv + 64 64 1 2 202 1614 _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + 0 0 2 3 262 262 _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + 0 0 2 3 1150 1150 _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + 0 192 1 1 36 2736 _Z15_b14160_wrapperPPv + 128 192 1 2 478 2700 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 792 934 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 64 64 2 3 360 1288 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + 0 0 3 4 674 674 _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + 0 0 2 4 254 254 _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + 0 192 1 1 32 1282 _Z15_b13739_wrapperPPv + 64 192 1 2 488 1250 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 128 2 3 60 318 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + 64 64 3 4 178 202 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + 128 128 2 3 114 444 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + 0 0 3 4 330 330 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 0 64 1 1 32 966 _Z15_b13744_wrapperPPv + 64 64 1 2 488 934 _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 68 68 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + 0 0 2 3 378 378 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + +Maximum call level : 5 +Maximum stack level: 4 +Maximum stack size : 256 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.cmic2 b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..c39b88024c290b0f0812064e84e67893e8bfa529 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.cmic2 @@ -0,0 +1,14042 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:35:27 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable77 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable77.cc" 91 first +.src_ref 0 "0_0_reloadable77.cc" 93 60 +.src_ref 0 "0_0_reloadable77.cc" 93 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 91 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable77.cc" 98 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable77.cc" 95 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 95 60 +.src_ref 0 "0_0_reloadable77.cc" 95 110 +.src_ref 0 "0_0_reloadable77.cc" 98 60 +.src_ref 0 "0_0_reloadable77.cc" 101 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 95 60 +.src_ref 0 "0_0_reloadable77.cc" 95 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 95 60 +.src_ref 0 "0_0_reloadable77.cc" 95 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 95 60 +.src_ref 0 "0_0_reloadable77.cc" 98 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 98 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 98 60 +.src_ref 0 "0_0_reloadable77.cc" 98 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 98 60 +.src_ref 0 "0_0_reloadable77.cc" 98 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 98 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 101 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 101 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 101 7 + 2816 "01000100" // MOVXM p7, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "10000000" // /* MW 5 */ + 2818 "11000100" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 101 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 101 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 104 60 +.src_ref 0 "0_0_reloadable77.cc" 106 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable77.cc" 104 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 106 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable77.cc" 106 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 109 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 111 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 111 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 111 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 3088 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3089 "00000000" // /* MW 3 */ + 3090 "00101000" // /* MW 2 */ + 3091 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3092 "01000100" // MOVXM p0, #508704 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3093 "01000000" // /* MW 5 */ + 3094 "11000110" // /* MW 4 */ + 3095 "11000000" // /* MW 3 */ + 3096 "00000111" // /* MW 2 */ + 3097 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3098 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3099 "10000000" // /* MW 3 */ + 3100 "00000000" // /* MW 2 */ + 3101 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 3102 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3103 "00000001" // /* MW 3 */ + 3104 "00000100" // /* MW 2 */ + 3105 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3106 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3107 "00000001" // /* MW 3 */ + 3108 "00010100" // /* MW 2 */ + 3109 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3111 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 3120 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3121 "00010000" // /* MW 9 */ + 3122 "10000000" // /* MW 8 */ + 3123 "00110001" // /* MW 7 */ + 3124 "11110000" // /* MW 6 */ + 3125 "00000001" // /* MW 5 */ + 3126 "00000000" // /* MW 4 */ + 3127 "11010000" // /* MW 3 */ + 3128 "10000101" // /* MW 2 */ + 3129 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 3130 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3131 "00000001" // /* MW 5 */ + 3132 "00000000" // /* MW 4 */ + 3133 "00000000" // /* MW 3 */ + 3134 "00001000" // /* MW 2 */ + 3135 "00000000" // /* MW 1 */ + 3136 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3137 "00111101" // /* MW 3 */ + 3138 "11111000" // /* MW 2 */ + 3139 "00001111" // /* MW 1 */ + 3140 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "11110101" // /* MW 3 */ + 3142 "11111101" // /* MW 2 */ + 3143 "00001111" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 3150 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "00101001" // /* MW 3 */ + 3152 "00011100" // /* MW 2 */ + 3153 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 3154 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3155 "00101110" // /* MW 3 */ + 3156 "00011100" // /* MW 2 */ + 3157 "00000001" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 3170 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3171 "00101001" // /* MW 3 */ + 3172 "00011100" // /* MW 2 */ + 3173 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 3174 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3175 "00101110" // /* MW 3 */ + 3176 "00000100" // /* MW 2 */ + 3177 "00000001" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ + 3182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3183 "00000000" // /* MW 1 */ + 3184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3185 "00000000" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 3190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00101001" // /* MW 3 */ + 3192 "00011100" // /* MW 2 */ + 3193 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 3194 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "00101110" // /* MW 3 */ + 3196 "00010100" // /* MW 2 */ + 3197 "00000001" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ + 3200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3201 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 3202 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 3203 "00000001" // /* MW 5 */ + 3204 "00000000" // /* MW 4 */ + 3205 "00001000" // /* MW 3 */ + 3206 "00000110" // /* MW 2 */ + 3207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3213 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 3214 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3215 "00101001" // /* MW 3 */ + 3216 "11011100" // /* MW 2 */ + 3217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.delay_slot + 3218 "00101110" // NOPA; NOPS; MOV r15, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3219 "00011100" // /* MW 13 */ + 3220 "00000000" // /* MW 12 */ + 3221 "00000000" // /* MW 11 */ + 3222 "00000111" // /* MW 10 */ + 3223 "10000110" // /* MW 9 */ + 3224 "01011110" // /* MW 8 */ + 3225 "00000000" // /* MW 7 */ + 3226 "00000000" // /* MW 6 */ + 3227 "10110110" // /* MW 5 */ + 3228 "00000010" // /* MW 4 */ + 3229 "11110000" // /* MW 3 */ + 3230 "00101100" // /* MW 2 */ + 3231 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 3232 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3233 "00001000" // /* MW 9 */ + 3234 "11000100" // /* MW 8 */ + 3235 "00110011" // /* MW 7 */ + 3236 "01101000" // /* MW 6 */ + 3237 "00000000" // /* MW 5 */ + 3238 "00000001" // /* MW 4 */ + 3239 "00100000" // /* MW 3 */ + 3240 "00000111" // /* MW 2 */ + 3241 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 3242 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3243 "01011000" // /* MW 9 */ + 3244 "11111101" // /* MW 8 */ + 3245 "00000111" // /* MW 7 */ + 3246 "00001000" // /* MW 6 */ + 3247 "10000000" // /* MW 5 */ + 3248 "00000001" // /* MW 4 */ + 3249 "10000000" // /* MW 3 */ + 3250 "11100010" // /* MW 2 */ + 3251 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 3252 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3253 "00000001" // /* MW 9 */ + 3254 "10100000" // /* MW 8 */ + 3255 "00000111" // /* MW 7 */ + 3256 "10000000" // /* MW 6 */ + 3257 "00010001" // /* MW 5 */ + 3258 "00001010" // /* MW 4 */ + 3259 "00100000" // /* MW 3 */ + 3260 "10111110" // /* MW 2 */ + 3261 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 3262 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3263 "01001010" // /* MW 3 */ + 3264 "00000110" // /* MW 2 */ + 3265 "00000000" // /* MW 1 */ + 3266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3271 "00010111" // /* MW 3 */ + 3272 "00000010" // /* MW 2 */ + 3273 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3275 "00000000" // /* MW 3 */ + 3276 "00101000" // /* MW 2 */ + 3277 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3279 "00000101" // /* MW 3 */ + 3280 "00100010" // /* MW 2 */ + 3281 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3283 "00000001" // /* MW 5 */ + 3284 "00000000" // /* MW 4 */ + 3285 "00000000" // /* MW 3 */ + 3286 "11111000" // /* MW 2 */ + 3287 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00100111" // /* MW 3 */ + 3290 "01110111" // /* MW 2 */ + 3291 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "10000010" // /* MW 3 */ + 3294 "00100001" // /* MW 2 */ + 3295 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3297 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 40 first +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.function_start + 3312 "10111010" // MOVA m0, #20; MOVXM p0, #508684 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3313 "00010000" // /* MW 9 */ + 3314 "10000110" // /* MW 8 */ + 3315 "00110001" // /* MW 7 */ + 3316 "11110000" // /* MW 6 */ + 3317 "00000001" // /* MW 5 */ + 3318 "00000000" // /* MW 4 */ + 3319 "10000000" // /* MW 3 */ + 3320 "10000000" // /* MW 2 */ + 3321 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 + 3322 "10111010" // LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3323 "01011000" // /* MW 9 */ + 3324 "00000110" // /* MW 8 */ + 3325 "00101000" // /* MW 7 */ + 3326 "00101000" // /* MW 6 */ + 3327 "00100000" // /* MW 5 */ + 3328 "00000000" // /* MW 4 */ + 3329 "01010000" // /* MW 3 */ + 3330 "00000001" // /* MW 2 */ + 3331 "00000001" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 43 4 first + 3342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3343 "00000000" // /* MW 3 */ + 3344 "00101000" // /* MW 2 */ + 3345 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.delay_slot + 3346 "00011000" // NEZ r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "11110000" // /* MW 3 */ + 3348 "00000110" // /* MW 2 */ + 3349 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.delay_slot + 3350 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00001000" // /* MW 3 */ + 3352 "10000000" // /* MW 2 */ + 3353 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 first +.delay_slot + 3354 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00011101" // /* MW 3 */ + 3356 "00000000" // /* MW 2 */ + 3357 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 23 +.delay_slot + 3358 "01011100" // ST r0, [p0, #4]; LSHL r2, r3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3359 "00111011" // /* MW 5 */ + 3360 "10001000" // /* MW 4 */ + 3361 "00110001" // /* MW 3 */ + 3362 "10000010" // /* MW 2 */ + 3363 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 23 +.delay_slot + 3364 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3365 "01010001" // /* MW 3 */ + 3366 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3367 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_broadcasting.h" 35 +.src_ref 2 "elementwise_binary_broadcasting.h" 35 first +.function_start + 3376 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000000" // /* MW 4 */ + 3379 "00000000" // /* MW 3 */ + 3380 "00001000" // /* MW 2 */ + 3381 "00000000" // /* MW 1 */ + 3382 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00111101" // /* MW 3 */ + 3384 "11111100" // /* MW 2 */ + 3385 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 first +.no_stack_arguments + 3386 "00000100" // JL #3120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3120 delay_slots=5 */ + 3387 "00000001" // /* MW 5 */ + 3388 "00000000" // /* MW 4 */ + 3389 "00011000" // /* MW 3 */ + 3390 "00000110" // /* MW 2 */ + 3391 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 +.delay_slot + 3392 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000000" // /* MW 5 */ + 3394 "11000110" // /* MW 4 */ + 3395 "11000000" // /* MW 3 */ + 3396 "00000111" // /* MW 2 */ + 3397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "00000001" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.return_address + 3408 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00111001" // /* MW 3 */ + 3410 "11111100" // /* MW 2 */ + 3411 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 first +.tail_call + 3412 "10000100" // J #3312 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3312 delay_slots=5 */ + 3413 "00000000" // /* MW 5 */ + 3414 "00000000" // /* MW 4 */ + 3415 "01111000" // /* MW 3 */ + 3416 "00000110" // /* MW 2 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.delay_slot + 3418 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "00000000" // /* MW 5 */ + 3420 "11000110" // /* MW 4 */ + 3421 "11000000" // /* MW 3 */ + 3422 "00000111" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 38 4 first +.delay_slot + 3424 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3425 "00000001" // /* MW 5 */ + 3426 "00000000" // /* MW 4 */ + 3427 "00000000" // /* MW 3 */ + 3428 "11111000" // /* MW 2 */ + 3429 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3435 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 48 first +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 +.function_start + 3440 "10111010" // MOVA m0, #20; MOVXM p3, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3441 "00010000" // /* MW 9 */ + 3442 "10000000" // /* MW 8 */ + 3443 "10110001" // /* MW 7 */ + 3444 "11110001" // /* MW 6 */ + 3445 "00000001" // /* MW 5 */ + 3446 "00000000" // /* MW 4 */ + 3447 "10000000" // /* MW 3 */ + 3448 "10000000" // /* MW 2 */ + 3449 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 first + 3450 "10011000" // LDA r0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3451 "00010110" // /* MW 3 */ + 3452 "00111100" // /* MW 2 */ + 3453 "00000011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3454 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3455 "10000001" // /* MW 5 */ + 3456 "11001101" // /* MW 4 */ + 3457 "01011000" // /* MW 3 */ + 3458 "00000101" // /* MW 2 */ + 3459 "01100001" // /* MW 1 */ + 3460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3461 "00000000" // /* MW 1 */ + 3462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3463 "00000000" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 12 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 35 + 3472 "10000100" // JNZ r1, #3536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3536 delay_slots=5 */ + 3473 "00000001" // /* MW 5 */ + 3474 "01000000" // /* MW 4 */ + 3475 "11101000" // /* MW 3 */ + 3476 "00000110" // /* MW 2 */ + 3477 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 +.delay_slot + 3478 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3479 "11101001" // /* MW 3 */ + 3480 "11000100" // /* MW 2 */ + 3481 "00010111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 first +.delay_slot + 3482 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3483 "00101101" // /* MW 3 */ + 3484 "00000000" // /* MW 2 */ + 3485 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 62 28 first + 3492 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00110010" // /* MW 3 */ + 3494 "00000100" // /* MW 2 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "10000100" // J #3568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3568 delay_slots=5 */ + 3503 "00000000" // /* MW 5 */ + 3504 "00000000" // /* MW 4 */ + 3505 "11111000" // /* MW 3 */ + 3506 "00000110" // /* MW 2 */ + 3507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3511 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 3512 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "01110010" // /* MW 3 */ + 3514 "00000101" // /* MW 2 */ + 3515 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3516 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "01100111" // /* MW 3 */ + 3518 "00000001" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3520 "11100001" // NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00000000" // /* MW 15 */ + 3522 "00000000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "00010011" // /* MW 7 */ + 3530 "00000100" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.src_ref 2 "elementwise_binary_broadcasting.h" 65 28 first + 3536 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "00110010" // /* MW 3 */ + 3538 "00000100" // /* MW 2 */ + 3539 "00000001" // /* MW 1 */ + 3540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3541 "00000000" // /* MW 1 */ + 3542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3543 "00000000" // /* MW 1 */ + 3544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3545 "00000000" // /* MW 1 */ + 3546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3547 "00000000" // /* MW 1 */ + 3548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3549 "00000000" // /* MW 1 */ + 3550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3551 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 3552 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "01110010" // /* MW 3 */ + 3554 "00000101" // /* MW 2 */ + 3555 "00011000" // /* MW 1 */ + 3556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3557 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3558 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "00000000" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00010011" // /* MW 5 */ + 3564 "00000100" // /* MW 4 */ + 3565 "11110001" // /* MW 3 */ + 3566 "00101100" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first + 3568 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01001000" // /* MW 9 */ + 3570 "00111111" // /* MW 8 */ + 3571 "10111000" // /* MW 7 */ + 3572 "10001010" // /* MW 6 */ + 3573 "00000111" // /* MW 5 */ + 3574 "00000000" // /* MW 4 */ + 3575 "11010000" // /* MW 3 */ + 3576 "10000000" // /* MW 2 */ + 3577 "10001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3578 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #3680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3579 "00010000" // /* MW 9 */ + 3580 "00110000" // /* MW 8 */ + 3581 "01111111" // /* MW 7 */ + 3582 "00000000" // /* MW 6 */ + 3583 "00000000" // /* MW 5 */ + 3584 "00000000" // /* MW 4 */ + 3585 "11010000" // /* MW 3 */ + 3586 "10010000" // /* MW 2 */ + 3587 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3588 "01000100" // MOVXM le, #3712 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3589 "00000000" // /* MW 5 */ + 3590 "11111101" // /* MW 4 */ + 3591 "00000110" // /* MW 3 */ + 3592 "00000000" // /* MW 2 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3594 "01000100" // MOVXM p4, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "01000000" // /* MW 5 */ + 3596 "11000100" // /* MW 4 */ + 3597 "11001000" // /* MW 3 */ + 3598 "00000111" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3600 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00100010" // /* MW 3 */ + 3602 "00000100" // /* MW 2 */ + 3603 "00000100" // /* MW 1 */ + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first + 3608 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "10101011" // /* MW 3 */ + 3610 "00001000" // /* MW 2 */ + 3611 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 148 20 first + 3612 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "00101011" // /* MW 3 */ + 3614 "00101001" // /* MW 2 */ + 3615 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first + 3616 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00101011" // /* MW 3 */ + 3618 "00001000" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "00101011" // /* MW 3 */ + 3622 "00101010" // /* MW 2 */ + 3623 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "00000000" // /* MW 5 */ + 3626 "11110101" // /* MW 4 */ + 3627 "01110000" // /* MW 3 */ + 3628 "00010101" // /* MW 2 */ + 3629 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3631 "00111101" // /* MW 7 */ + 3632 "00101000" // /* MW 6 */ + 3633 "00000011" // /* MW 5 */ + 3634 "00000100" // /* MW 4 */ + 3635 "01110000" // /* MW 3 */ + 3636 "00100101" // /* MW 2 */ + 3637 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3639 "00101011" // /* MW 3 */ + 3640 "00001000" // /* MW 2 */ + 3641 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3643 "00111101" // /* MW 7 */ + 3644 "00010000" // /* MW 6 */ + 3645 "00000100" // /* MW 5 */ + 3646 "00000100" // /* MW 4 */ + 3647 "01110000" // /* MW 3 */ + 3648 "01000101" // /* MW 2 */ + 3649 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3651 "10101011" // /* MW 3 */ + 3652 "00001000" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3655 "00111101" // /* MW 7 */ + 3656 "00101000" // /* MW 6 */ + 3657 "00000011" // /* MW 5 */ + 3658 "00000100" // /* MW 4 */ + 3659 "01110000" // /* MW 3 */ + 3660 "00100101" // /* MW 2 */ + 3661 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3663 "00101011" // /* MW 3 */ + 3664 "00001000" // /* MW 2 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3667 "00111101" // /* MW 13 */ + 3668 "00010000" // /* MW 12 */ + 3669 "00000100" // /* MW 11 */ + 3670 "01010111" // /* MW 10 */ + 3671 "00011010" // /* MW 9 */ + 3672 "01000000" // /* MW 8 */ + 3673 "00000000" // /* MW 7 */ + 3674 "00000000" // /* MW 6 */ + 3675 "01000110" // /* MW 5 */ + 3676 "00111011" // /* MW 4 */ + 3677 "01110100" // /* MW 3 */ + 3678 "01000101" // /* MW 2 */ + 3679 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3680 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "10101011" // /* MW 3 */ + 3682 "00001000" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3685 "00111101" // /* MW 11 */ + 3686 "00101000" // /* MW 10 */ + 3687 "00000011" // /* MW 9 */ + 3688 "10001110" // /* MW 8 */ + 3689 "00010001" // /* MW 7 */ + 3690 "00001111" // /* MW 6 */ + 3691 "00100001" // /* MW 5 */ + 3692 "00000000" // /* MW 4 */ + 3693 "01110000" // /* MW 3 */ + 3694 "00100101" // /* MW 2 */ + 3695 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3697 "00000000" // /* MW 15 */ + 3698 "00000000" // /* MW 14 */ + 3699 "01111000" // /* MW 13 */ + 3700 "10100101" // /* MW 12 */ + 3701 "00000001" // /* MW 11 */ + 3702 "00000000" // /* MW 10 */ + 3703 "00000000" // /* MW 9 */ + 3704 "00000000" // /* MW 8 */ + 3705 "01011011" // /* MW 7 */ + 3706 "00000001" // /* MW 6 */ + 3707 "00100000" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "01110000" // /* MW 3 */ + 3710 "00000101" // /* MW 2 */ + 3711 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3713 "10000001" // /* MW 15 */ + 3714 "00100000" // /* MW 14 */ + 3715 "01111000" // /* MW 13 */ + 3716 "10100101" // /* MW 12 */ + 3717 "00000001" // /* MW 11 */ + 3718 "00000000" // /* MW 10 */ + 3719 "00000000" // /* MW 9 */ + 3720 "00000000" // /* MW 8 */ + 3721 "10100011" // /* MW 7 */ + 3722 "00011101" // /* MW 6 */ + 3723 "00100010" // /* MW 5 */ + 3724 "00000000" // /* MW 4 */ + 3725 "01110000" // /* MW 3 */ + 3726 "01000101" // /* MW 2 */ + 3727 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3729 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3731 "00111101" // /* MW 7 */ + 3732 "00101000" // /* MW 6 */ + 3733 "00000011" // /* MW 5 */ + 3734 "00000010" // /* MW 4 */ + 3735 "01100000" // /* MW 3 */ + 3736 "11000100" // /* MW 2 */ + 3737 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3741 "00111101" // /* MW 7 */ + 3742 "00010000" // /* MW 6 */ + 3743 "00000100" // /* MW 5 */ + 3744 "00000010" // /* MW 4 */ + 3745 "01100000" // /* MW 3 */ + 3746 "10110100" // /* MW 2 */ + 3747 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.src_ref 2 "elementwise_binary_broadcasting.h" 80 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3751 "00000000" // /* MW 5 */ + 3752 "01010000" // /* MW 4 */ + 3753 "01100000" // /* MW 3 */ + 3754 "11000100" // /* MW 2 */ + 3755 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 3758 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3759 "10100011" // /* MW 3 */ + 3760 "00011101" // /* MW 2 */ + 3761 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 3764 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3765 "00100011" // /* MW 3 */ + 3766 "00011110" // /* MW 2 */ + 3767 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3769 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 first +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.function_start + 3776 "00111010" // MOVS p2, p1; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3777 "01110001" // /* MW 9 */ + 3778 "00000000" // /* MW 8 */ + 3779 "00000000" // /* MW 7 */ + 3780 "00000000" // /* MW 6 */ + 3781 "00000100" // /* MW 5 */ + 3782 "00000000" // /* MW 4 */ + 3783 "01100000" // /* MW 3 */ + 3784 "10010001" // /* MW 2 */ + 3785 "01010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 + 3786 "00000010" // ST lr, [sp, #-4]; MOV r16, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3787 "01110000" // /* MW 7 */ + 3788 "01100000" // /* MW 6 */ + 3789 "00001000" // /* MW 5 */ + 3790 "00000010" // /* MW 4 */ + 3791 "10110000" // /* MW 3 */ + 3792 "10000111" // /* MW 2 */ + 3793 "11111111" // /* MW 1 */ + 3794 "11111000" // MOV r17, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "11100000" // /* MW 3 */ + 3796 "01010101" // /* MW 2 */ + 3797 "00011100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 + 3798 "01000100" // MOVXM p3, #508684 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3799 "00011000" // /* MW 5 */ + 3800 "11000110" // /* MW 4 */ + 3801 "11000110" // /* MW 3 */ + 3802 "00000111" // /* MW 2 */ + 3803 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 first + 3804 "00010100" // LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3805 "10000000" // /* MW 5 */ + 3806 "11010001" // /* MW 4 */ + 3807 "01010000" // /* MW 3 */ + 3808 "11101101" // /* MW 2 */ + 3809 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 3810 "00001100" // LDA.s16 r18, [p3], #-14; VST sfh, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3811 "01010110" // /* MW 5 */ + 3812 "00001110" // /* MW 4 */ + 3813 "01010000" // /* MW 3 */ + 3814 "11001010" // /* MW 2 */ + 3815 "01110011" // /* MW 1 */ + 3816 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3817 "01010111" // /* MW 3 */ + 3818 "00000110" // /* MW 2 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ + 3822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3823 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 first +.no_stack_arguments + 3824 "00000100" // JL #3440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3440 delay_slots=5 */ + 3825 "00000001" // /* MW 5 */ + 3826 "00000000" // /* MW 4 */ + 3827 "10111000" // /* MW 3 */ + 3828 "00000110" // /* MW 2 */ + 3829 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.delay_slot + 3830 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3831 "11000000" // /* MW 3 */ + 3832 "01010000" // /* MW 2 */ + 3833 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 first +.delay_slot + 3836 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "00010010" // /* MW 3 */ + 3838 "00100101" // /* MW 2 */ + 3839 "00010100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3840 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000001" // /* MW 5 */ + 3842 "11010010" // /* MW 4 */ + 3843 "01000010" // /* MW 3 */ + 3844 "00100000" // /* MW 2 */ + 3845 "10001100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3846 "10111010" // NOPA; NOPB; MOV p0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111110" // /* MW 9 */ + 3848 "00010000" // /* MW 8 */ + 3849 "00110100" // /* MW 7 */ + 3850 "00000000" // /* MW 6 */ + 3851 "00010000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.return_address + 3856 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00111001" // /* MW 3 */ + 3858 "11111100" // /* MW 2 */ + 3859 "00000111" // /* MW 1 */ + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ + 3862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3863 "00000000" // /* MW 1 */ + 3864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3865 "00000000" // /* MW 1 */ + 3866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3867 "00000000" // /* MW 1 */ + 3868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3869 "00000000" // /* MW 1 */ + 3870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 first + 3872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3873 "00000000" // /* MW 3 */ + 3874 "00101000" // /* MW 2 */ + 3875 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.delay_slot + 3876 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3877 "00000001" // /* MW 5 */ + 3878 "00000000" // /* MW 4 */ + 3879 "00000000" // /* MW 3 */ + 3880 "11110000" // /* MW 2 */ + 3881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3889 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 152 first +.src_ref 6 "superkernels.cpp" 157 6 +.function_start + 3904 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3905 "00000000" // /* MW 5 */ + 3906 "11000100" // /* MW 4 */ + 3907 "11000110" // /* MW 3 */ + 3908 "00000111" // /* MW 2 */ + 3909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 first + 3910 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3911 "11000001" // /* MW 5 */ + 3912 "10110101" // /* MW 4 */ + 3913 "11011000" // /* MW 3 */ + 3914 "11000010" // /* MW 2 */ + 3915 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 152 + 3916 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3917 "00000001" // /* MW 5 */ + 3918 "00000000" // /* MW 4 */ + 3919 "00000000" // /* MW 3 */ + 3920 "00001000" // /* MW 2 */ + 3921 "00000000" // /* MW 1 */ + 3922 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3923 "01110000" // /* MW 7 */ + 3924 "11010000" // /* MW 6 */ + 3925 "00001011" // /* MW 5 */ + 3926 "00000000" // /* MW 4 */ + 3927 "10110000" // /* MW 3 */ + 3928 "01100011" // /* MW 2 */ + 3929 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 11 + 3930 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3931 "00010001" // /* MW 9 */ + 3932 "00000010" // /* MW 8 */ + 3933 "00110001" // /* MW 7 */ + 3934 "11110011" // /* MW 6 */ + 3935 "00000001" // /* MW 5 */ + 3936 "00000000" // /* MW 4 */ + 3937 "10110000" // /* MW 3 */ + 3938 "10000010" // /* MW 2 */ + 3939 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3940 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3941 "11000000" // /* MW 3 */ + 3942 "11010100" // /* MW 2 */ + 3943 "00011011" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 +.src_ref 6 "superkernels.cpp" 157 16 + 3948 "10000100" // JNZ r16, #4112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4112 delay_slots=5 */ + 3949 "00000001" // /* MW 5 */ + 3950 "01000000" // /* MW 4 */ + 3951 "00001000" // /* MW 3 */ + 3952 "00001000" // /* MW 2 */ + 3953 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 22 first +.delay_slot + 3954 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10010000" // /* MW 3 */ + 3956 "01100010" // /* MW 2 */ + 3957 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 30 +.delay_slot + 3958 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3959 "11111011" // /* MW 3 */ + 3960 "01100011" // /* MW 2 */ + 3961 "00010100" // /* MW 1 */ +.delay_slot + 3962 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3963 "00111101" // /* MW 3 */ + 3964 "11110100" // /* MW 2 */ + 3965 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 154 11 +.delay_slot + 3966 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3967 "01110000" // /* MW 7 */ + 3968 "01100000" // /* MW 6 */ + 3969 "00110000" // /* MW 5 */ + 3970 "00000011" // /* MW 4 */ + 3971 "00110000" // /* MW 3 */ + 3972 "11000110" // /* MW 2 */ + 3973 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 +.src_ref 6 "superkernels.cpp" 171 2 +.delay_slot + 3974 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3975 "00000000" // /* MW 5 */ + 3976 "11000110" // /* MW 4 */ + 3977 "11000000" // /* MW 3 */ + 3978 "00000111" // /* MW 2 */ + 3979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3981 "01000000" // /* MW 5 */ + 3982 "11000100" // /* MW 4 */ + 3983 "11000100" // /* MW 3 */ + 3984 "00000111" // /* MW 2 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "00010000" // /* MW 9 */ + 3988 "00001110" // /* MW 8 */ + 3989 "00110001" // /* MW 7 */ + 3990 "11110001" // /* MW 6 */ + 3991 "00000001" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "11100000" // /* MW 3 */ + 3994 "11000000" // /* MW 2 */ + 3995 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 "00000100" // JL #3376 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3376 delay_slots=5 */ + 3999 "00000001" // /* MW 5 */ + 4000 "00000000" // /* MW 4 */ + 4001 "10011000" // /* MW 3 */ + 4002 "00000110" // /* MW 2 */ + 4003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4009 "00110001" // /* MW 3 */ + 4010 "00100000" // /* MW 2 */ + 4011 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4012 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4013 "00000101" // /* MW 3 */ + 4014 "00100000" // /* MW 2 */ + 4015 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4016 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "01111000" // /* MW 13 */ + 4020 "10100101" // /* MW 12 */ + 4021 "00000001" // /* MW 11 */ + 4022 "00000000" // /* MW 10 */ + 4023 "00000000" // /* MW 9 */ + 4024 "10000000" // /* MW 8 */ + 4025 "00010001" // /* MW 7 */ + 4026 "00000110" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 +.return_address + 4032 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4033 "00001000" // /* MW 5 */ + 4034 "11000100" // /* MW 4 */ + 4035 "11000100" // /* MW 3 */ + 4036 "00000111" // /* MW 2 */ + 4037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 first +.src_ref 6 "superkernels.cpp" 164 65 + 4038 "10111010" // LDA r16, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4039 "00010000" // /* MW 9 */ + 4040 "10000000" // /* MW 8 */ + 4041 "00110001" // /* MW 7 */ + 4042 "11110001" // /* MW 6 */ + 4043 "00000001" // /* MW 5 */ + 4044 "00000000" // /* MW 4 */ + 4045 "11010000" // /* MW 3 */ + 4046 "11000010" // /* MW 2 */ + 4047 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 +.src_ref 6 "superkernels.cpp" 164 65 +.src_ref 6 "superkernels.cpp" 171 2 + 4048 "10111010" // LDA r17, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "00010000" // /* MW 9 */ + 4050 "10000000" // /* MW 8 */ + 4051 "00110001" // /* MW 7 */ + 4052 "11110001" // /* MW 6 */ + 4053 "00000001" // /* MW 5 */ + 4054 "00000000" // /* MW 4 */ + 4055 "11010000" // /* MW 3 */ + 4056 "11000110" // /* MW 2 */ + 4057 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 first +.src_ref 6 "superkernels.cpp" 164 16 +.src_ref 6 "superkernels.cpp" 169 47 + 4058 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4059 "00010000" // /* MW 9 */ + 4060 "00000100" // /* MW 8 */ + 4061 "10110001" // /* MW 7 */ + 4062 "11110000" // /* MW 6 */ + 4063 "00000001" // /* MW 5 */ + 4064 "00000000" // /* MW 4 */ + 4065 "01010000" // /* MW 3 */ + 4066 "11001011" // /* MW 2 */ + 4067 "01001010" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "10000100" // J #4128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4128 delay_slots=5 */ + 4073 "00000000" // /* MW 5 */ + 4074 "00000000" // /* MW 4 */ + 4075 "00010000" // /* MW 3 */ + 4076 "00001000" // /* MW 2 */ + 4077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 +.delay_slot + 4078 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4079 "00110000" // /* MW 5 */ + 4080 "11000100" // /* MW 4 */ + 4081 "11000000" // /* MW 3 */ + 4082 "00000111" // /* MW 2 */ + 4083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 27 first +.delay_slot + 4086 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4087 "00001111" // /* MW 3 */ + 4088 "01100001" // /* MW 2 */ + 4089 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 first +.delay_slot + 4090 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4091 "10100011" // /* MW 5 */ + 4092 "00001100" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 16 first +.delay_slot + 4096 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4097 "00000000" // /* MW 15 */ + 4098 "00000000" // /* MW 14 */ + 4099 "01111000" // /* MW 13 */ + 4100 "10100101" // /* MW 12 */ + 4101 "00000001" // /* MW 11 */ + 4102 "00000000" // /* MW 10 */ + 4103 "00000000" // /* MW 9 */ + 4104 "10000000" // /* MW 8 */ + 4105 "00010001" // /* MW 7 */ + 4106 "00000110" // /* MW 6 */ + 4107 "00100001" // /* MW 5 */ + 4108 "00000000" // /* MW 4 */ + 4109 "11110000" // /* MW 3 */ + 4110 "00101100" // /* MW 2 */ + 4111 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 169 47 +.src_ref 6 "superkernels.cpp" 171 2 + 4112 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4113 "00000000" // /* MW 15 */ + 4114 "00000000" // /* MW 14 */ + 4115 "00010000" // /* MW 13 */ + 4116 "00000100" // /* MW 12 */ + 4117 "10110001" // /* MW 11 */ + 4118 "11110000" // /* MW 10 */ + 4119 "00000001" // /* MW 9 */ + 4120 "00000000" // /* MW 8 */ + 4121 "10001011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "00100010" // /* MW 5 */ + 4124 "00000000" // /* MW 4 */ + 4125 "11110000" // /* MW 3 */ + 4126 "00101100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4128 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4129 "00000000" // /* MW 7 */ + 4130 "11000011" // /* MW 6 */ + 4131 "10110011" // /* MW 5 */ + 4132 "00000011" // /* MW 4 */ + 4133 "01100000" // /* MW 3 */ + 4134 "10010001" // /* MW 2 */ + 4135 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 168 2 + 4136 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4137 "00010000" // /* MW 9 */ + 4138 "00000000" // /* MW 8 */ + 4139 "00110001" // /* MW 7 */ + 4140 "11110000" // /* MW 6 */ + 4141 "00000001" // /* MW 5 */ + 4142 "00000000" // /* MW 4 */ + 4143 "11010000" // /* MW 3 */ + 4144 "11101110" // /* MW 2 */ + 4145 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4146 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4147 "00010110" // /* MW 3 */ + 4148 "11111110" // /* MW 2 */ + 4149 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4150 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "00110110" // /* MW 3 */ + 4152 "11111110" // /* MW 2 */ + 4153 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4154 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4155 "01010110" // /* MW 3 */ + 4156 "01000110" // /* MW 2 */ + 4157 "00000111" // /* MW 1 */ + 4158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4159 "00000000" // /* MW 1 */ + 4160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4161 "00000000" // /* MW 1 */ + 4162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4163 "00000000" // /* MW 1 */ + 4164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4165 "00000000" // /* MW 1 */ + 4166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4167 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4168 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "00000010" // /* MW 3 */ + 4170 "01100001" // /* MW 2 */ + 4171 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4172 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4173 "00010001" // /* MW 3 */ + 4174 "00000110" // /* MW 2 */ + 4175 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4176 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4177 "11111101" // /* MW 3 */ + 4178 "11100000" // /* MW 2 */ + 4179 "00010111" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ + 4184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4186 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4187 "00001000" // /* MW 3 */ + 4188 "10010011" // /* MW 2 */ + 4189 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 + 4190 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4191 "10000001" // /* MW 5 */ + 4192 "10101101" // /* MW 4 */ + 4193 "10100111" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00000100" // /* MW 1 */ + 4196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4197 "00000000" // /* MW 1 */ + 4198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4199 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first + 4200 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4201 "00110110" // /* MW 3 */ + 4202 "00000110" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4204 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4205 "10000001" // /* MW 5 */ + 4206 "11011101" // /* MW 4 */ + 4207 "11011100" // /* MW 3 */ + 4208 "11001010" // /* MW 2 */ + 4209 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 47 first + 4210 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "01110110" // /* MW 3 */ + 4212 "00000110" // /* MW 2 */ + 4213 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4214 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "10011110" // /* MW 3 */ + 4216 "01011100" // /* MW 2 */ + 4217 "00000111" // /* MW 1 */ + 4218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 171 2 first +.no_stack_arguments + 4220 "00000100" // JL #3776 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 4221 "00000001" // /* MW 5 */ + 4222 "00000000" // /* MW 4 */ + 4223 "01100000" // /* MW 3 */ + 4224 "00000111" // /* MW 2 */ + 4225 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4227 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first +.delay_slot + 4228 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4229 "00000111" // /* MW 3 */ + 4230 "01100010" // /* MW 2 */ + 4231 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 +.delay_slot + 4232 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4233 "00110001" // /* MW 3 */ + 4234 "00000110" // /* MW 2 */ + 4235 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 first +.delay_slot + 4236 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4237 "00001101" // /* MW 3 */ + 4238 "11100001" // /* MW 2 */ + 4239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 +.delay_slot + 4240 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4241 "00000000" // /* MW 15 */ + 4242 "00000000" // /* MW 14 */ + 4243 "10101000" // /* MW 13 */ + 4244 "10100000" // /* MW 12 */ + 4245 "00110100" // /* MW 11 */ + 4246 "00000000" // /* MW 10 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "01011011" // /* MW 7 */ + 4250 "00000001" // /* MW 6 */ + 4251 "00100000" // /* MW 5 */ + 4252 "00000000" // /* MW 4 */ + 4253 "11110000" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 173 6 +.src_ref 6 "superkernels.cpp" 174 14 +.return_address + 4256 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4257 "00010000" // /* MW 9 */ + 4258 "00000000" // /* MW 8 */ + 4259 "00110001" // /* MW 7 */ + 4260 "11110011" // /* MW 6 */ + 4261 "00000001" // /* MW 5 */ + 4262 "00000000" // /* MW 4 */ + 4263 "11010000" // /* MW 3 */ + 4264 "11000110" // /* MW 2 */ + 4265 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4266 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4267 "00000101" // /* MW 3 */ + 4268 "00100000" // /* MW 2 */ + 4269 "00010000" // /* MW 1 */ + 4270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4271 "00000000" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4280 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "00001000" // /* MW 3 */ + 4282 "01010001" // /* MW 2 */ + 4283 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 173 19 + 4284 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4285 "00010000" // /* MW 9 */ + 4286 "00001100" // /* MW 8 */ + 4287 "00110001" // /* MW 7 */ + 4288 "11110001" // /* MW 6 */ + 4289 "00000001" // /* MW 5 */ + 4290 "00000000" // /* MW 4 */ + 4291 "11010000" // /* MW 3 */ + 4292 "11001110" // /* MW 2 */ + 4293 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 first + 4294 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4295 "00110110" // /* MW 3 */ + 4296 "00000110" // /* MW 2 */ + 4297 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 19 + 4298 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4299 "01010110" // /* MW 3 */ + 4300 "00000110" // /* MW 2 */ + 4301 "00000010" // /* MW 1 */ + 4302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4303 "00000000" // /* MW 1 */ + 4304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4305 "00000000" // /* MW 1 */ + 4306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4307 "00000000" // /* MW 1 */ + 4308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4309 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4310 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "00110001" // /* MW 3 */ + 4312 "00100001" // /* MW 2 */ + 4313 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4314 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4315 "00010001" // /* MW 3 */ + 4316 "11100110" // /* MW 2 */ + 4317 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 16 first + 4318 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4319 "00101000" // /* MW 3 */ + 4320 "01100001" // /* MW 2 */ + 4321 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 + 4322 "10000100" // JNZ r16, #4352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4352 delay_slots=5 */ + 4323 "00000001" // /* MW 5 */ + 4324 "01000000" // /* MW 4 */ + 4325 "10000000" // /* MW 3 */ + 4326 "00001000" // /* MW 2 */ + 4327 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4337 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 + 4338 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00000001" // /* MW 3 */ + 4340 "00100000" // /* MW 2 */ + 4341 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 first + 4342 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "00000000" // /* MW 7 */ + 4346 "10000000" // /* MW 6 */ + 4347 "00010001" // /* MW 5 */ + 4348 "00000110" // /* MW 4 */ + 4349 "11110110" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 176 + 4352 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4353 "00111001" // /* MW 3 */ + 4354 "11110100" // /* MW 2 */ + 4355 "00000111" // /* MW 1 */ + 4356 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4357 "00011001" // /* MW 3 */ + 4358 "11111011" // /* MW 2 */ + 4359 "00000111" // /* MW 1 */ + 4360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4361 "00000000" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4367 "11110001" // /* MW 3 */ + 4368 "11111101" // /* MW 2 */ + 4369 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4373 "00000000" // /* MW 3 */ + 4374 "00101000" // /* MW 2 */ + 4375 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4377 "10100000" // /* MW 3 */ + 4378 "01100111" // /* MW 2 */ + 4379 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 +.delay_slot + 4380 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4381 "00000001" // /* MW 5 */ + 4382 "00000000" // /* MW 4 */ + 4383 "00000000" // /* MW 3 */ + 4384 "11111000" // /* MW 2 */ + 4385 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4391 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 2 "elementwise_unary.h" 95 first +.src_ref 2 "elementwise_unary.h" 97 22 +.src_ref 2 "elementwise_unary.h" 97 24 first +.function_start + 4400 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4401 "00010000" // /* MW 9 */ + 4402 "11000000" // /* MW 8 */ + 4403 "00110001" // /* MW 7 */ + 4404 "11110000" // /* MW 6 */ + 4405 "00000001" // /* MW 5 */ + 4406 "00000000" // /* MW 4 */ + 4407 "11010000" // /* MW 3 */ + 4408 "10000101" // /* MW 2 */ + 4409 "00100011" // /* MW 1 */ + 4410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4411 "00000000" // /* MW 1 */ + 4412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4413 "00000000" // /* MW 1 */ + 4414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4415 "00000000" // /* MW 1 */ + 4416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4417 "00000000" // /* MW 1 */ + 4418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4419 "00000000" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 97 22 first + 4422 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4423 "00101001" // /* MW 3 */ + 4424 "00011100" // /* MW 2 */ + 4425 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 24 first + 4426 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4427 "00101110" // /* MW 3 */ + 4428 "00000100" // /* MW 2 */ + 4429 "00000001" // /* MW 1 */ + 4430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4431 "00000000" // /* MW 1 */ + 4432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4433 "00000000" // /* MW 1 */ + 4434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4435 "00000000" // /* MW 1 */ + 4436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4437 "00000000" // /* MW 1 */ + 4438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4439 "00000000" // /* MW 1 */ + 4440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4441 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 22 + 4442 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00101001" // /* MW 3 */ + 4444 "00000100" // /* MW 2 */ + 4445 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 24 first + 4446 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "00101110" // /* MW 3 */ + 4448 "00010100" // /* MW 2 */ + 4449 "00000001" // /* MW 1 */ + 4450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4451 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 101 4 first + 4452 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4453 "00000000" // /* MW 3 */ + 4454 "00101000" // /* MW 2 */ + 4455 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 22 first +.delay_slot + 4464 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4465 "00101001" // /* MW 3 */ + 4466 "00010100" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 4467 "00001000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 107 first +.src_ref 2 "elementwise_unary.h" 113 37 +.src_ref 2 "elementwise_unary.h" 113 78 +.src_ref 2 "elementwise_unary.h" 142 19 +.function_start + 4480 "10110110" // MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4481 "00010000" // /* MW 11 */ + 4482 "11000000" // /* MW 10 */ + 4483 "00110001" // /* MW 9 */ + 4484 "11110001" // /* MW 8 */ + 4485 "00000001" // /* MW 7 */ + 4486 "00000000" // /* MW 6 */ + 4487 "01101000" // /* MW 5 */ + 4488 "00111101" // /* MW 4 */ + 4489 "00000000" // /* MW 3 */ + 4490 "01000000" // /* MW 2 */ + 4491 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 113 37 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 "10110110" // LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4493 "00010000" // /* MW 11 */ + 4494 "00010000" // /* MW 10 */ + 4495 "00110001" // /* MW 9 */ + 4496 "11110001" // /* MW 8 */ + 4497 "00000001" // /* MW 7 */ + 4498 "00000000" // /* MW 6 */ + 4499 "11101000" // /* MW 5 */ + 4500 "00111011" // /* MW 4 */ + 4501 "11010000" // /* MW 3 */ + 4502 "10001010" // /* MW 2 */ + 4503 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 142 19 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 "10110110" // LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4505 "00010000" // /* MW 11 */ + 4506 "01000000" // /* MW 10 */ + 4507 "11001000" // /* MW 9 */ + 4508 "00010000" // /* MW 8 */ + 4509 "00000000" // /* MW 7 */ + 4510 "00000000" // /* MW 6 */ + 4511 "01101000" // /* MW 5 */ + 4512 "00111101" // /* MW 4 */ + 4513 "01010000" // /* MW 3 */ + 4514 "10000100" // /* MW 2 */ + 4515 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 "11110100" // VLDB x7, [p0], #64; VBCST.16 x0, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4517 "11100101" // /* MW 5 */ + 4518 "00110010" // /* MW 4 */ + 4519 "10000000" // /* MW 3 */ + 4520 "10111110" // /* MW 2 */ + 4521 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 "01000100" // MOVXM r4, #49280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4523 "00000000" // /* MW 5 */ + 4524 "00100001" // /* MW 4 */ + 4525 "11000010" // /* MW 3 */ + 4526 "00000000" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "11111000" // VBCST.16 x1, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4529 "01110010" // /* MW 3 */ + 4530 "10010001" // /* MW 2 */ + 4531 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 "01000100" // MOVXM r3, #32767 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4533 "11111110" // /* MW 5 */ + 4534 "10111111" // /* MW 4 */ + 4535 "01110001" // /* MW 3 */ + 4536 "00000000" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 "11111000" // VMIN_GE.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4539 "00101100" // /* MW 3 */ + 4540 "01010000" // /* MW 2 */ + 4541 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "elementwise_unary.h" 113 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 "11100100" // LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4543 "11011001" // /* MW 5 */ + 4544 "10000001" // /* MW 4 */ + 4545 "10110110" // /* MW 3 */ + 4546 "00000001" // /* MW 2 */ + 4547 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 +.src_ref 2 "elementwise_unary.h" 166 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 "11100100" // MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4549 "01011001" // /* MW 5 */ + 4550 "01110000" // /* MW 4 */ + 4551 "00001000" // /* MW 3 */ + 4552 "01010000" // /* MW 2 */ + 4553 "00001111" // /* MW 1 */ + 4554 "11111000" // VBCST.16 x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4555 "01110010" // /* MW 3 */ + 4556 "00001101" // /* MW 2 */ + 4557 "00011001" // /* MW 1 */ + 4558 "01000100" // MOVXM r5, #15616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4559 "00000000" // /* MW 5 */ + 4560 "10111010" // /* MW 4 */ + 4561 "00110010" // /* MW 3 */ + 4562 "00000000" // /* MW 2 */ + 4563 "00000000" // /* MW 1 */ + 4564 "11111000" // VBCST.16 x3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "01110010" // /* MW 3 */ + 4566 "10010101" // /* MW 2 */ + 4567 "00011001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 + 4568 "01000100" // MOVXM r17, #16128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "00000000" // /* MW 5 */ + 4570 "10111110" // /* MW 4 */ + 4571 "00111000" // /* MW 3 */ + 4572 "00000000" // /* MW 2 */ + 4573 "00000000" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4574 "01111000" // VBAND x11, x6, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4575 "00101011" // /* MW 3 */ + 4576 "10110001" // /* MW 2 */ + 4577 "00011101" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4578 "11100100" // MOVX r17, #828; VBCST.16 x5, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4579 "11100101" // /* MW 5 */ + 4580 "10001010" // /* MW 4 */ + 4581 "00100101" // /* MW 3 */ + 4582 "01011110" // /* MW 2 */ + 4583 "01100100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4584 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4585 "01100001" // /* MW 7 */ + 4586 "11100111" // /* MW 6 */ + 4587 "10001100" // /* MW 5 */ + 4588 "11100110" // /* MW 4 */ + 4589 "11101100" // /* MW 3 */ + 4590 "11000000" // /* MW 2 */ + 4591 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4592 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4593 "00101011" // /* MW 3 */ + 4594 "01001001" // /* MW 2 */ + 4595 "00011100" // /* MW 1 */ + 4596 "01000100" // MOVXM r2, #16000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4597 "00000000" // /* MW 5 */ + 4598 "00111101" // /* MW 4 */ + 4599 "00110001" // /* MW 3 */ + 4600 "00000000" // /* MW 2 */ + 4601 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4602 "01100010" // VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4603 "00000001" // /* MW 7 */ + 4604 "11100111" // /* MW 6 */ + 4605 "10001010" // /* MW 5 */ + 4606 "11100110" // /* MW 4 */ + 4607 "01110010" // /* MW 3 */ + 4608 "00001001" // /* MW 2 */ + 4609 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 4610 "11111000" // VCONV.fp32.bf16 cml0, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4611 "10001010" // /* MW 3 */ + 4612 "00001011" // /* MW 2 */ + 4613 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4614 "01100010" // VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4615 "10000001" // /* MW 7 */ + 4616 "00001100" // /* MW 6 */ + 4617 "10001011" // /* MW 5 */ + 4618 "11100110" // /* MW 4 */ + 4619 "00101100" // /* MW 3 */ + 4620 "01010000" // /* MW 2 */ + 4621 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 "01010110" // VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4623 "10000001" // /* MW 11 */ + 4624 "00010010" // /* MW 10 */ + 4625 "10001001" // /* MW 9 */ + 4626 "00000010" // /* MW 8 */ + 4627 "00100100" // /* MW 7 */ + 4628 "10001111" // /* MW 6 */ + 4629 "00000000" // /* MW 5 */ + 4630 "00000000" // /* MW 4 */ + 4631 "11000000" // /* MW 3 */ + 4632 "01000010" // /* MW 2 */ + 4633 "10110010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 "11111000" // VMAX_LT.bf16 x6, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11101100" // /* MW 3 */ + 4636 "01000000" // /* MW 2 */ + 4637 "00011011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 "01011010" // MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4639 "11000011" // /* MW 9 */ + 4640 "01110110" // /* MW 8 */ + 4641 "10001010" // /* MW 7 */ + 4642 "00000010" // /* MW 6 */ + 4643 "00101010" // /* MW 5 */ + 4644 "10110111" // /* MW 4 */ + 4645 "00000000" // /* MW 3 */ + 4646 "00000000" // /* MW 2 */ + 4647 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 125 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 "00000010" // VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4649 "10000000" // /* MW 7 */ + 4650 "00111111" // /* MW 6 */ + 4651 "10111000" // /* MW 5 */ + 4652 "00000010" // /* MW 4 */ + 4653 "11000000" // /* MW 3 */ + 4654 "00100010" // /* MW 2 */ + 4655 "01010010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first + 4656 "11111000" // VMIN_GE.bf16 x8, r16, x7, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4657 "00101100" // /* MW 3 */ + 4658 "00111000" // /* MW 2 */ + 4659 "00011100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4660 "11110110" // NOPA; NOPB; NOPS; VBAND x11, x6, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4661 "10110000" // /* MW 11 */ + 4662 "10010101" // /* MW 10 */ + 4663 "11011000" // /* MW 9 */ + 4664 "00000010" // /* MW 8 */ + 4665 "01011011" // /* MW 7 */ + 4666 "00000001" // /* MW 6 */ + 4667 "00100000" // /* MW 5 */ + 4668 "00000000" // /* MW 4 */ + 4669 "11110000" // /* MW 3 */ + 4670 "00101100" // /* MW 2 */ + 4671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 142 19 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first +.loop_nesting 1 + 4672 "01001010" // VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4673 "00100011" // /* MW 9 */ + 4674 "00101011" // /* MW 8 */ + 4675 "10001100" // /* MW 7 */ + 4676 "11100110" // /* MW 6 */ + 4677 "11101100" // /* MW 5 */ + 4678 "11000000" // /* MW 4 */ + 4679 "01101100" // /* MW 3 */ + 4680 "00111101" // /* MW 2 */ + 4681 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "abs.hpp" 32 22 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 "01001010" // VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4683 "01100001" // /* MW 9 */ + 4684 "11100111" // /* MW 8 */ + 4685 "10001100" // /* MW 7 */ + 4686 "01100110" // /* MW 6 */ + 4687 "00101011" // /* MW 5 */ + 4688 "01001001" // /* MW 4 */ + 4689 "11101100" // /* MW 3 */ + 4690 "00111011" // /* MW 2 */ + 4691 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "10000001" // /* MW 3 */ + 4694 "00001100" // /* MW 2 */ + 4695 "10001011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4697 "00000001" // /* MW 3 */ + 4698 "11100111" // /* MW 2 */ + 4699 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 "01100010" // VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4701 "10000001" // /* MW 7 */ + 4702 "00010010" // /* MW 6 */ + 4703 "10001001" // /* MW 5 */ + 4704 "00000010" // /* MW 4 */ + 4705 "01100000" // /* MW 3 */ + 4706 "10100100" // /* MW 2 */ + 4707 "00100011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 "01111010" // NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4711 "00000000" // /* MW 9 */ + 4712 "00000000" // /* MW 8 */ + 4713 "00000000" // /* MW 7 */ + 4714 "00000000" // /* MW 6 */ + 4715 "00100011" // /* MW 5 */ + 4716 "00011110" // /* MW 4 */ + 4717 "11110001" // /* MW 3 */ + 4718 "00101100" // /* MW 2 */ + 4719 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "00010110" // /* MW 12 */ + 4725 "00101000" // /* MW 11 */ + 4726 "00000010" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "00010110" // /* MW 7 */ + 4730 "10010010" // /* MW 6 */ + 4731 "00100101" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "01110110" // /* MW 12 */ + 4741 "10100000" // /* MW 11 */ + 4742 "00000001" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 "00011011" // NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "10110110" // /* MW 15 */ + 4754 "01010011" // /* MW 14 */ + 4755 "01111100" // /* MW 13 */ + 4756 "00010110" // /* MW 12 */ + 4757 "00011100" // /* MW 11 */ + 4758 "00000010" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "00010110" // /* MW 7 */ + 4762 "10010001" // /* MW 6 */ + 4763 "00100010" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.src_ref 4 "abs.hpp" 32 22 first +.end_of_loop + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "10111000" // /* MW 13 */ + 4772 "10010101" // /* MW 12 */ + 4773 "11011000" // /* MW 11 */ + 4774 "00000010" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.loop_nesting 0 + 4784 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4785 "00100011" // /* MW 7 */ + 4786 "00101011" // /* MW 6 */ + 4787 "10001100" // /* MW 5 */ + 4788 "11100110" // /* MW 4 */ + 4789 "11101100" // /* MW 3 */ + 4790 "11000000" // /* MW 2 */ + 4791 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4793 "00101011" // /* MW 3 */ + 4794 "01001001" // /* MW 2 */ + 4795 "00011100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 "01001000" // VMUL.f dm4, x3, x11, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100001" // /* MW 3 */ + 4798 "11100111" // /* MW 2 */ + 4799 "10001100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4801 "00000001" // /* MW 3 */ + 4802 "11100111" // /* MW 2 */ + 4803 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4805 "00100011" // /* MW 3 */ + 4806 "00011101" // /* MW 2 */ + 4807 "00001001" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4809 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4811 "00100011" // /* MW 3 */ + 4812 "00011110" // /* MW 2 */ + 4813 "00001001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4814 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "10000001" // /* MW 3 */ + 4816 "00001100" // /* MW 2 */ + 4817 "10001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 + 4818 "01100010" // VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4819 "10000001" // /* MW 7 */ + 4820 "00010010" // /* MW 6 */ + 4821 "10001001" // /* MW 5 */ + 4822 "00000010" // /* MW 4 */ + 4823 "11000000" // /* MW 3 */ + 4824 "01000010" // /* MW 2 */ + 4825 "10110010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 + 4826 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4827 "00010110" // /* MW 3 */ + 4828 "10010001" // /* MW 2 */ + 4829 "00001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first + 4830 "01001000" // VMSC.f dm2, dm3, x11, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4831 "11000011" // /* MW 3 */ + 4832 "01110110" // /* MW 2 */ + 4833 "10001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4834 "01001000" // VMSC.f dm4, dm1, x5, x9, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4835 "00100011" // /* MW 3 */ + 4836 "00101011" // /* MW 2 */ + 4837 "10001100" // /* MW 1 */ + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 129 4 first + 4840 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4841 "00000000" // /* MW 3 */ + 4842 "00101000" // /* MW 2 */ + 4843 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4847 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.delay_slot + 4848 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "00100011" // /* MW 3 */ + 4850 "00011101" // /* MW 2 */ + 4851 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.delay_slot + 4852 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4853 "00100011" // /* MW 3 */ + 4854 "00011110" // /* MW 2 */ + 4855 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 4857 "00000000" // /* MW 1 */ +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_sigmoid1d _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 210 first +.src_ref 6 "superkernels.cpp" 215 6 +.function_start + 4864 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4865 "00000000" // /* MW 5 */ + 4866 "11000100" // /* MW 4 */ + 4867 "11000110" // /* MW 3 */ + 4868 "00000111" // /* MW 2 */ + 4869 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 first + 4870 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4871 "11000001" // /* MW 5 */ + 4872 "10110101" // /* MW 4 */ + 4873 "11011000" // /* MW 3 */ + 4874 "11000010" // /* MW 2 */ + 4875 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 210 + 4876 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4877 "00000001" // /* MW 5 */ + 4878 "00000000" // /* MW 4 */ + 4879 "00000000" // /* MW 3 */ + 4880 "00001000" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ + 4882 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4883 "01110000" // /* MW 7 */ + 4884 "11010000" // /* MW 6 */ + 4885 "00001011" // /* MW 5 */ + 4886 "00000000" // /* MW 4 */ + 4887 "10110000" // /* MW 3 */ + 4888 "01100011" // /* MW 2 */ + 4889 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 11 + 4890 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4891 "00010001" // /* MW 9 */ + 4892 "00000010" // /* MW 8 */ + 4893 "00110001" // /* MW 7 */ + 4894 "11110011" // /* MW 6 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "10110000" // /* MW 3 */ + 4898 "10000010" // /* MW 2 */ + 4899 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 4900 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "11000000" // /* MW 3 */ + 4902 "11010100" // /* MW 2 */ + 4903 "00011011" // /* MW 1 */ + 4904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4905 "00000000" // /* MW 1 */ + 4906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4907 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 +.src_ref 6 "superkernels.cpp" 215 16 + 4908 "10000100" // JNZ r16, #5072 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5072 delay_slots=5 */ + 4909 "00000001" // /* MW 5 */ + 4910 "01000000" // /* MW 4 */ + 4911 "11101000" // /* MW 3 */ + 4912 "00001001" // /* MW 2 */ + 4913 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 22 first +.delay_slot + 4914 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4915 "10010000" // /* MW 3 */ + 4916 "01100010" // /* MW 2 */ + 4917 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 30 +.delay_slot + 4918 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4919 "11111011" // /* MW 3 */ + 4920 "01100011" // /* MW 2 */ + 4921 "00010100" // /* MW 1 */ +.delay_slot + 4922 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4923 "00111101" // /* MW 3 */ + 4924 "11110100" // /* MW 2 */ + 4925 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 212 11 +.delay_slot + 4926 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4927 "01110000" // /* MW 7 */ + 4928 "01100000" // /* MW 6 */ + 4929 "00110000" // /* MW 5 */ + 4930 "00000011" // /* MW 4 */ + 4931 "00110000" // /* MW 3 */ + 4932 "11000110" // /* MW 2 */ + 4933 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 +.src_ref 6 "superkernels.cpp" 229 2 +.delay_slot + 4934 "01000100" // MOVXM p0, #508800 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4935 "00000000" // /* MW 5 */ + 4936 "11000111" // /* MW 4 */ + 4937 "11000000" // /* MW 3 */ + 4938 "00000111" // /* MW 2 */ + 4939 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4941 "01000000" // /* MW 5 */ + 4942 "11000100" // /* MW 4 */ + 4943 "11000100" // /* MW 3 */ + 4944 "00000111" // /* MW 2 */ + 4945 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4947 "00010000" // /* MW 9 */ + 4948 "00001110" // /* MW 8 */ + 4949 "00110001" // /* MW 7 */ + 4950 "11110001" // /* MW 6 */ + 4951 "00000001" // /* MW 5 */ + 4952 "00000000" // /* MW 4 */ + 4953 "11100000" // /* MW 3 */ + 4954 "11000000" // /* MW 2 */ + 4955 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4957 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 "00000100" // JL #4400 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4400 delay_slots=5 */ + 4959 "00000001" // /* MW 5 */ + 4960 "00000000" // /* MW 4 */ + 4961 "10011000" // /* MW 3 */ + 4962 "00001000" // /* MW 2 */ + 4963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4967 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4969 "00110001" // /* MW 3 */ + 4970 "00100000" // /* MW 2 */ + 4971 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4972 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4973 "00000101" // /* MW 3 */ + 4974 "00100000" // /* MW 2 */ + 4975 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4976 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "10000000" // /* MW 8 */ + 4985 "00010001" // /* MW 7 */ + 4986 "00000110" // /* MW 6 */ + 4987 "00100010" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 +.return_address + 4992 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4993 "00001000" // /* MW 5 */ + 4994 "11000100" // /* MW 4 */ + 4995 "11000100" // /* MW 3 */ + 4996 "00000111" // /* MW 2 */ + 4997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 first +.src_ref 6 "superkernels.cpp" 222 46 + 4998 "10111010" // LDA r16, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4999 "00010000" // /* MW 9 */ + 5000 "11000000" // /* MW 8 */ + 5001 "00110001" // /* MW 7 */ + 5002 "11110001" // /* MW 6 */ + 5003 "00000001" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11010000" // /* MW 3 */ + 5006 "11000010" // /* MW 2 */ + 5007 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 +.src_ref 6 "superkernels.cpp" 222 46 +.src_ref 6 "superkernels.cpp" 229 2 + 5008 "10111010" // LDA r17, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5009 "00010000" // /* MW 9 */ + 5010 "11000000" // /* MW 8 */ + 5011 "00110001" // /* MW 7 */ + 5012 "11110001" // /* MW 6 */ + 5013 "00000001" // /* MW 5 */ + 5014 "00000000" // /* MW 4 */ + 5015 "11010000" // /* MW 3 */ + 5016 "11000110" // /* MW 2 */ + 5017 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 first +.src_ref 6 "superkernels.cpp" 222 16 +.src_ref 6 "superkernels.cpp" 227 47 + 5018 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5019 "00010000" // /* MW 9 */ + 5020 "00000100" // /* MW 8 */ + 5021 "10110001" // /* MW 7 */ + 5022 "11110000" // /* MW 6 */ + 5023 "00000001" // /* MW 5 */ + 5024 "00000000" // /* MW 4 */ + 5025 "01010000" // /* MW 3 */ + 5026 "11001011" // /* MW 2 */ + 5027 "01001000" // /* MW 1 */ + 5028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5029 "00000000" // /* MW 1 */ + 5030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5031 "00000000" // /* MW 1 */ + 5032 "10000100" // J #5088 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5088 delay_slots=5 */ + 5033 "00000000" // /* MW 5 */ + 5034 "00000000" // /* MW 4 */ + 5035 "11110000" // /* MW 3 */ + 5036 "00001001" // /* MW 2 */ + 5037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 +.delay_slot + 5038 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5039 "00110000" // /* MW 5 */ + 5040 "11000100" // /* MW 4 */ + 5041 "11000000" // /* MW 3 */ + 5042 "00000111" // /* MW 2 */ + 5043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5045 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 27 first +.delay_slot + 5046 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5047 "00001111" // /* MW 3 */ + 5048 "01100001" // /* MW 2 */ + 5049 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 first +.delay_slot + 5050 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5051 "10100011" // /* MW 5 */ + 5052 "00001100" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 16 first +.delay_slot + 5056 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "10000000" // /* MW 8 */ + 5065 "00010001" // /* MW 7 */ + 5066 "00000110" // /* MW 6 */ + 5067 "00100001" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 227 47 +.src_ref 6 "superkernels.cpp" 229 2 + 5072 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "00010000" // /* MW 13 */ + 5076 "00000100" // /* MW 12 */ + 5077 "10110001" // /* MW 11 */ + 5078 "11110000" // /* MW 10 */ + 5079 "00000001" // /* MW 9 */ + 5080 "00000000" // /* MW 8 */ + 5081 "10001011" // /* MW 7 */ + 5082 "10000000" // /* MW 6 */ + 5083 "00100010" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5088 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5089 "00000000" // /* MW 7 */ + 5090 "11000011" // /* MW 6 */ + 5091 "10110011" // /* MW 5 */ + 5092 "00000011" // /* MW 4 */ + 5093 "01100000" // /* MW 3 */ + 5094 "10010001" // /* MW 2 */ + 5095 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 226 2 + 5096 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5097 "00010000" // /* MW 9 */ + 5098 "00000000" // /* MW 8 */ + 5099 "00110001" // /* MW 7 */ + 5100 "11110000" // /* MW 6 */ + 5101 "00000001" // /* MW 5 */ + 5102 "00000000" // /* MW 4 */ + 5103 "11010000" // /* MW 3 */ + 5104 "11101110" // /* MW 2 */ + 5105 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5106 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5107 "00010110" // /* MW 3 */ + 5108 "11111110" // /* MW 2 */ + 5109 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5110 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5111 "00110110" // /* MW 3 */ + 5112 "11111110" // /* MW 2 */ + 5113 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5114 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5115 "01010110" // /* MW 3 */ + 5116 "01000110" // /* MW 2 */ + 5117 "00000111" // /* MW 1 */ + 5118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5119 "00000000" // /* MW 1 */ + 5120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5121 "00000000" // /* MW 1 */ + 5122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5123 "00000000" // /* MW 1 */ + 5124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5125 "00000000" // /* MW 1 */ + 5126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5128 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5129 "00000010" // /* MW 3 */ + 5130 "01100001" // /* MW 2 */ + 5131 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 5132 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5133 "00010001" // /* MW 3 */ + 5134 "00000110" // /* MW 2 */ + 5135 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 5136 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5137 "11111101" // /* MW 3 */ + 5138 "11100000" // /* MW 2 */ + 5139 "00010111" // /* MW 1 */ + 5140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5141 "00000000" // /* MW 1 */ + 5142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5143 "00000000" // /* MW 1 */ + 5144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5145 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5146 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5147 "00001000" // /* MW 3 */ + 5148 "10010011" // /* MW 2 */ + 5149 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 + 5150 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5151 "10000001" // /* MW 5 */ + 5152 "10101101" // /* MW 4 */ + 5153 "10100111" // /* MW 3 */ + 5154 "00000000" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ + 5156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5157 "00000000" // /* MW 1 */ + 5158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5159 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first + 5160 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "00110110" // /* MW 3 */ + 5162 "00000110" // /* MW 2 */ + 5163 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 5164 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5165 "10000001" // /* MW 5 */ + 5166 "11011101" // /* MW 4 */ + 5167 "11011100" // /* MW 3 */ + 5168 "11001010" // /* MW 2 */ + 5169 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 47 first + 5170 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5171 "01110110" // /* MW 3 */ + 5172 "00000110" // /* MW 2 */ + 5173 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 5174 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5175 "10011110" // /* MW 3 */ + 5176 "01011100" // /* MW 2 */ + 5177 "00000111" // /* MW 1 */ + 5178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5179 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 229 2 first +.no_stack_arguments + 5180 "00000100" // JL #4480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4480 delay_slots=5 */ + 5181 "00000001" // /* MW 5 */ + 5182 "00000000" // /* MW 4 */ + 5183 "11000000" // /* MW 3 */ + 5184 "00001000" // /* MW 2 */ + 5185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5187 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first +.delay_slot + 5188 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "00000111" // /* MW 3 */ + 5190 "01100010" // /* MW 2 */ + 5191 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 +.delay_slot + 5192 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "00110001" // /* MW 3 */ + 5194 "00000110" // /* MW 2 */ + 5195 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 first +.delay_slot + 5196 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "00001101" // /* MW 3 */ + 5198 "11100001" // /* MW 2 */ + 5199 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 +.delay_slot + 5200 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5201 "00000000" // /* MW 15 */ + 5202 "00000000" // /* MW 14 */ + 5203 "10101000" // /* MW 13 */ + 5204 "10100000" // /* MW 12 */ + 5205 "00110100" // /* MW 11 */ + 5206 "00000000" // /* MW 10 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "01011011" // /* MW 7 */ + 5210 "00000001" // /* MW 6 */ + 5211 "00100000" // /* MW 5 */ + 5212 "00000000" // /* MW 4 */ + 5213 "11110000" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 231 6 +.src_ref 6 "superkernels.cpp" 232 14 +.return_address + 5216 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5217 "00010000" // /* MW 9 */ + 5218 "00000000" // /* MW 8 */ + 5219 "00110001" // /* MW 7 */ + 5220 "11110011" // /* MW 6 */ + 5221 "00000001" // /* MW 5 */ + 5222 "00000000" // /* MW 4 */ + 5223 "11010000" // /* MW 3 */ + 5224 "11000110" // /* MW 2 */ + 5225 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 5226 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5227 "00000101" // /* MW 3 */ + 5228 "00100000" // /* MW 2 */ + 5229 "00010000" // /* MW 1 */ + 5230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5231 "00000000" // /* MW 1 */ + 5232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5233 "00000000" // /* MW 1 */ + 5234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5235 "00000000" // /* MW 1 */ + 5236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5237 "00000000" // /* MW 1 */ + 5238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5239 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5240 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5241 "00001000" // /* MW 3 */ + 5242 "01010001" // /* MW 2 */ + 5243 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 231 19 + 5244 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5245 "00010000" // /* MW 9 */ + 5246 "00001100" // /* MW 8 */ + 5247 "00110001" // /* MW 7 */ + 5248 "11110001" // /* MW 6 */ + 5249 "00000001" // /* MW 5 */ + 5250 "00000000" // /* MW 4 */ + 5251 "11010000" // /* MW 3 */ + 5252 "11001110" // /* MW 2 */ + 5253 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 first + 5254 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5255 "00110110" // /* MW 3 */ + 5256 "00000110" // /* MW 2 */ + 5257 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 19 + 5258 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "01010110" // /* MW 3 */ + 5260 "00000110" // /* MW 2 */ + 5261 "00000010" // /* MW 1 */ + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ + 5264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5265 "00000000" // /* MW 1 */ + 5266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5267 "00000000" // /* MW 1 */ + 5268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5270 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5271 "00110001" // /* MW 3 */ + 5272 "00100001" // /* MW 2 */ + 5273 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5274 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5275 "00010001" // /* MW 3 */ + 5276 "11100110" // /* MW 2 */ + 5277 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 16 first + 5278 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5279 "00101000" // /* MW 3 */ + 5280 "01100001" // /* MW 2 */ + 5281 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 + 5282 "10000100" // JNZ r16, #5312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5312 delay_slots=5 */ + 5283 "00000001" // /* MW 5 */ + 5284 "01000000" // /* MW 4 */ + 5285 "01100000" // /* MW 3 */ + 5286 "00001010" // /* MW 2 */ + 5287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5297 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 + 5298 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5299 "00000001" // /* MW 3 */ + 5300 "00100000" // /* MW 2 */ + 5301 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 first + 5302 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00000000" // /* MW 7 */ + 5306 "10000000" // /* MW 6 */ + 5307 "00010001" // /* MW 5 */ + 5308 "00000110" // /* MW 4 */ + 5309 "11110110" // /* MW 3 */ + 5310 "00101100" // /* MW 2 */ + 5311 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 234 + 5312 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "00111001" // /* MW 3 */ + 5314 "11110100" // /* MW 2 */ + 5315 "00000111" // /* MW 1 */ + 5316 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5317 "00011001" // /* MW 3 */ + 5318 "11111011" // /* MW 2 */ + 5319 "00000111" // /* MW 1 */ + 5320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5321 "00000000" // /* MW 1 */ + 5322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "11110001" // /* MW 3 */ + 5328 "11111101" // /* MW 2 */ + 5329 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5331 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5333 "00000000" // /* MW 3 */ + 5334 "00101000" // /* MW 2 */ + 5335 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5337 "10100000" // /* MW 3 */ + 5338 "01100111" // /* MW 2 */ + 5339 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 +.delay_slot + 5340 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5341 "00000001" // /* MW 5 */ + 5342 "00000000" // /* MW 4 */ + 5343 "00000000" // /* MW 3 */ + 5344 "11111000" // /* MW 2 */ + 5345 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 5351 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 5360 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5361 "00000000" // /* MW 3 */ + 5362 "00101000" // /* MW 2 */ + 5363 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5364 "01000100" // MOVXM p0, #508768 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5365 "11000000" // /* MW 5 */ + 5366 "11000110" // /* MW 4 */ + 5367 "11000000" // /* MW 3 */ + 5368 "00000111" // /* MW 2 */ + 5369 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5370 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "10000000" // /* MW 3 */ + 5372 "00000000" // /* MW 2 */ + 5373 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 5374 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "00000001" // /* MW 3 */ + 5376 "00000100" // /* MW 2 */ + 5377 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5378 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "00000001" // /* MW 3 */ + 5380 "00010100" // /* MW 2 */ + 5381 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 5383 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 5392 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5393 "00010000" // /* MW 9 */ + 5394 "10100000" // /* MW 8 */ + 5395 "00110001" // /* MW 7 */ + 5396 "11110000" // /* MW 6 */ + 5397 "00000001" // /* MW 5 */ + 5398 "00000000" // /* MW 4 */ + 5399 "11010000" // /* MW 3 */ + 5400 "10000101" // /* MW 2 */ + 5401 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 5402 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5403 "00000001" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "00000000" // /* MW 3 */ + 5406 "00001000" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ + 5408 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "00111101" // /* MW 3 */ + 5410 "11111100" // /* MW 2 */ + 5411 "00001111" // /* MW 1 */ + 5412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5413 "00000000" // /* MW 1 */ + 5414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5415 "00000000" // /* MW 1 */ + 5416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5417 "00000000" // /* MW 1 */ + 5418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5419 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 5420 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "00101001" // /* MW 3 */ + 5422 "00011100" // /* MW 2 */ + 5423 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 5424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5425 "00101110" // /* MW 3 */ + 5426 "00011100" // /* MW 2 */ + 5427 "00000001" // /* MW 1 */ + 5428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5429 "00000000" // /* MW 1 */ + 5430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5431 "00000000" // /* MW 1 */ + 5432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5433 "00000000" // /* MW 1 */ + 5434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5435 "00000000" // /* MW 1 */ + 5436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5437 "00000000" // /* MW 1 */ + 5438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5439 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 5440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5441 "00101001" // /* MW 3 */ + 5442 "00011100" // /* MW 2 */ + 5443 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 5444 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "00101110" // /* MW 3 */ + 5446 "00000100" // /* MW 2 */ + 5447 "00000001" // /* MW 1 */ + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ + 5450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5451 "00000000" // /* MW 1 */ + 5452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5453 "00000000" // /* MW 1 */ + 5454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5455 "00000000" // /* MW 1 */ + 5456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5457 "00000000" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 5460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5461 "00101001" // /* MW 3 */ + 5462 "00011100" // /* MW 2 */ + 5463 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 5464 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5465 "00101110" // /* MW 3 */ + 5466 "00010100" // /* MW 2 */ + 5467 "00000001" // /* MW 1 */ + 5468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5469 "00000000" // /* MW 1 */ + 5470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 5472 "00000100" // JL #5360 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5360 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "00000000" // /* MW 4 */ + 5475 "01111000" // /* MW 3 */ + 5476 "00001010" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot + 5478 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5479 "10011101" // /* MW 3 */ + 5480 "11111011" // /* MW 2 */ + 5481 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5485 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 5486 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5487 "00101001" // /* MW 3 */ + 5488 "11011100" // /* MW 2 */ + 5489 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 5490 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5491 "00011100" // /* MW 13 */ + 5492 "00000000" // /* MW 12 */ + 5493 "00000000" // /* MW 11 */ + 5494 "00000111" // /* MW 10 */ + 5495 "00000110" // /* MW 9 */ + 5496 "01111011" // /* MW 8 */ + 5497 "00000000" // /* MW 7 */ + 5498 "00000000" // /* MW 6 */ + 5499 "10110110" // /* MW 5 */ + 5500 "00000010" // /* MW 4 */ + 5501 "11110000" // /* MW 3 */ + 5502 "00101100" // /* MW 2 */ + 5503 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 5504 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5505 "00111001" // /* MW 3 */ + 5506 "11111100" // /* MW 2 */ + 5507 "00000111" // /* MW 1 */ + 5508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5509 "00000000" // /* MW 1 */ + 5510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5511 "00000000" // /* MW 1 */ + 5512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5513 "00000000" // /* MW 1 */ + 5514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "10011001" // /* MW 3 */ + 5520 "11111011" // /* MW 2 */ + 5521 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5523 "00000000" // /* MW 3 */ + 5524 "00101000" // /* MW 2 */ + 5525 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5531 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5533 "00000001" // /* MW 3 */ + 5534 "00100000" // /* MW 2 */ + 5535 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5537 "01110001" // /* MW 9 */ + 5538 "00000000" // /* MW 8 */ + 5539 "00000000" // /* MW 7 */ + 5540 "00000000" // /* MW 6 */ + 5541 "11111110" // /* MW 5 */ + 5542 "00111111" // /* MW 4 */ + 5543 "00110000" // /* MW 3 */ + 5544 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 5545 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 5552 "10111010" // MOVA m0, #32; MOVXM p3, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "00010000" // /* MW 9 */ + 5554 "10100000" // /* MW 8 */ + 5555 "10110001" // /* MW 7 */ + 5556 "11110001" // /* MW 6 */ + 5557 "00000001" // /* MW 5 */ + 5558 "00000000" // /* MW 4 */ + 5559 "10000000" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 5562 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5563 "00010000" // /* MW 9 */ + 5564 "00010000" // /* MW 8 */ + 5565 "00110001" // /* MW 7 */ + 5566 "11110010" // /* MW 6 */ + 5567 "00000001" // /* MW 5 */ + 5568 "00000000" // /* MW 4 */ + 5569 "11010000" // /* MW 3 */ + 5570 "00000110" // /* MW 2 */ + 5571 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 5572 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5573 "01011000" // /* MW 9 */ + 5574 "11111010" // /* MW 8 */ + 5575 "01101111" // /* MW 7 */ + 5576 "10001000" // /* MW 6 */ + 5577 "00000111" // /* MW 5 */ + 5578 "00011000" // /* MW 4 */ + 5579 "11010000" // /* MW 3 */ + 5580 "10010000" // /* MW 2 */ + 5581 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 5582 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #5744 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5583 "00010000" // /* MW 9 */ + 5584 "00111000" // /* MW 8 */ + 5585 "01111011" // /* MW 7 */ + 5586 "00000100" // /* MW 6 */ + 5587 "00000000" // /* MW 5 */ + 5588 "00000000" // /* MW 4 */ + 5589 "11010000" // /* MW 3 */ + 5590 "10000000" // /* MW 2 */ + 5591 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 5592 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5593 "00010000" // /* MW 9 */ + 5594 "01000000" // /* MW 8 */ + 5595 "10111011" // /* MW 7 */ + 5596 "00000101" // /* MW 6 */ + 5597 "00000000" // /* MW 5 */ + 5598 "00000000" // /* MW 4 */ + 5599 "01010000" // /* MW 3 */ + 5600 "10001000" // /* MW 2 */ + 5601 "10000000" // /* MW 1 */ + 5602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5603 "00000000" // /* MW 1 */ + 5604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5605 "00000000" // /* MW 1 */ + 5606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5607 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 5608 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5609 "00111101" // /* MW 3 */ + 5610 "01000010" // /* MW 2 */ + 5611 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 5612 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5613 "11111100" // /* MW 3 */ + 5614 "01110000" // /* MW 2 */ + 5615 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 5616 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5617 "11101000" // /* MW 5 */ + 5618 "01010000" // /* MW 4 */ + 5619 "01110000" // /* MW 3 */ + 5620 "00010011" // /* MW 2 */ + 5621 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5623 "10000000" // /* MW 7 */ + 5624 "10111010" // /* MW 6 */ + 5625 "01101000" // /* MW 5 */ + 5626 "01010000" // /* MW 4 */ + 5627 "01110000" // /* MW 3 */ + 5628 "00011011" // /* MW 2 */ + 5629 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5631 "11101000" // /* MW 5 */ + 5632 "01010000" // /* MW 4 */ + 5633 "01110000" // /* MW 3 */ + 5634 "00010011" // /* MW 2 */ + 5635 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5637 "01101000" // /* MW 5 */ + 5638 "01010000" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00011011" // /* MW 2 */ + 5641 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5643 "11101000" // /* MW 5 */ + 5644 "01010000" // /* MW 4 */ + 5645 "01110000" // /* MW 3 */ + 5646 "00010011" // /* MW 2 */ + 5647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5649 "01101000" // /* MW 5 */ + 5650 "01010000" // /* MW 4 */ + 5651 "01110000" // /* MW 3 */ + 5652 "00011011" // /* MW 2 */ + 5653 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5655 "11101000" // /* MW 5 */ + 5656 "01010000" // /* MW 4 */ + 5657 "01110000" // /* MW 3 */ + 5658 "00010011" // /* MW 2 */ + 5659 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5661 "01000001" // /* MW 9 */ + 5662 "11100010" // /* MW 8 */ + 5663 "00000000" // /* MW 7 */ + 5664 "00011101" // /* MW 6 */ + 5665 "00110100" // /* MW 5 */ + 5666 "00101000" // /* MW 4 */ + 5667 "01110000" // /* MW 3 */ + 5668 "00011011" // /* MW 2 */ + 5669 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5671 "01100001" // /* MW 9 */ + 5672 "11100000" // /* MW 8 */ + 5673 "00000001" // /* MW 7 */ + 5674 "00011101" // /* MW 6 */ + 5675 "01110100" // /* MW 5 */ + 5676 "00101000" // /* MW 4 */ + 5677 "01110000" // /* MW 3 */ + 5678 "00010011" // /* MW 2 */ + 5679 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5681 "01000001" // /* MW 9 */ + 5682 "11100010" // /* MW 8 */ + 5683 "00000000" // /* MW 7 */ + 5684 "00011101" // /* MW 6 */ + 5685 "00110100" // /* MW 5 */ + 5686 "00101000" // /* MW 4 */ + 5687 "01110000" // /* MW 3 */ + 5688 "00011011" // /* MW 2 */ + 5689 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5691 "01100001" // /* MW 9 */ + 5692 "11100000" // /* MW 8 */ + 5693 "00000001" // /* MW 7 */ + 5694 "00011101" // /* MW 6 */ + 5695 "01110100" // /* MW 5 */ + 5696 "00101000" // /* MW 4 */ + 5697 "01110000" // /* MW 3 */ + 5698 "00010011" // /* MW 2 */ + 5699 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5701 "01000001" // /* MW 11 */ + 5702 "11100010" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "10001110" // /* MW 8 */ + 5705 "10101101" // /* MW 7 */ + 5706 "00000000" // /* MW 6 */ + 5707 "01101000" // /* MW 5 */ + 5708 "01010000" // /* MW 4 */ + 5709 "01110000" // /* MW 3 */ + 5710 "00011011" // /* MW 2 */ + 5711 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "00000011" // /* MW 15 */ + 5714 "00001111" // /* MW 14 */ + 5715 "01111000" // /* MW 13 */ + 5716 "10100101" // /* MW 12 */ + 5717 "00000001" // /* MW 11 */ + 5718 "00000000" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "11101000" // /* MW 5 */ + 5724 "01010000" // /* MW 4 */ + 5725 "01110000" // /* MW 3 */ + 5726 "00010011" // /* MW 2 */ + 5727 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00010010" // /* MW 15 */ + 5730 "00000111" // /* MW 14 */ + 5731 "01111000" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "00100011" // /* MW 7 */ + 5738 "00011100" // /* MW 6 */ + 5739 "01101010" // /* MW 5 */ + 5740 "01010000" // /* MW 4 */ + 5741 "01110000" // /* MW 3 */ + 5742 "00011011" // /* MW 2 */ + 5743 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5744 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000011" // /* MW 15 */ + 5746 "00001111" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "10100011" // /* MW 7 */ + 5754 "00011100" // /* MW 6 */ + 5755 "11101010" // /* MW 5 */ + 5756 "01010000" // /* MW 4 */ + 5757 "01110000" // /* MW 3 */ + 5758 "00010011" // /* MW 2 */ + 5759 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "00010010" // /* MW 15 */ + 5762 "00000111" // /* MW 14 */ + 5763 "01111000" // /* MW 13 */ + 5764 "10100101" // /* MW 12 */ + 5765 "00000001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "00100011" // /* MW 7 */ + 5770 "00011100" // /* MW 6 */ + 5771 "01101010" // /* MW 5 */ + 5772 "01010000" // /* MW 4 */ + 5773 "01110000" // /* MW 3 */ + 5774 "00011011" // /* MW 2 */ + 5775 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5776 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5777 "01100001" // /* MW 7 */ + 5778 "11100000" // /* MW 6 */ + 5779 "00000001" // /* MW 5 */ + 5780 "00000010" // /* MW 4 */ + 5781 "01100000" // /* MW 3 */ + 5782 "10010100" // /* MW 2 */ + 5783 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5785 "01000001" // /* MW 7 */ + 5786 "11100010" // /* MW 6 */ + 5787 "00000000" // /* MW 5 */ + 5788 "00000010" // /* MW 4 */ + 5789 "01100000" // /* MW 3 */ + 5790 "10000100" // /* MW 2 */ + 5791 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5793 "01100001" // /* MW 7 */ + 5794 "11100000" // /* MW 6 */ + 5795 "00000001" // /* MW 5 */ + 5796 "00000010" // /* MW 4 */ + 5797 "01100000" // /* MW 3 */ + 5798 "10010100" // /* MW 2 */ + 5799 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5801 "01000001" // /* MW 7 */ + 5802 "11100010" // /* MW 6 */ + 5803 "00000000" // /* MW 5 */ + 5804 "00000010" // /* MW 4 */ + 5805 "01100000" // /* MW 3 */ + 5806 "10000100" // /* MW 2 */ + 5807 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5809 "01100001" // /* MW 7 */ + 5810 "11100000" // /* MW 6 */ + 5811 "00000001" // /* MW 5 */ + 5812 "00000010" // /* MW 4 */ + 5813 "01100000" // /* MW 3 */ + 5814 "10010100" // /* MW 2 */ + 5815 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5817 "01000001" // /* MW 7 */ + 5818 "11100010" // /* MW 6 */ + 5819 "00000000" // /* MW 5 */ + 5820 "00000010" // /* MW 4 */ + 5821 "01100000" // /* MW 3 */ + 5822 "10000100" // /* MW 2 */ + 5823 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5825 "01100001" // /* MW 7 */ + 5826 "11100000" // /* MW 6 */ + 5827 "00000001" // /* MW 5 */ + 5828 "00000010" // /* MW 4 */ + 5829 "01100000" // /* MW 3 */ + 5830 "10010100" // /* MW 2 */ + 5831 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "00100011" // /* MW 3 */ + 5834 "00011100" // /* MW 2 */ + 5835 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 5837 "00000000" // /* MW 5 */ + 5838 "01010000" // /* MW 4 */ + 5839 "01100000" // /* MW 3 */ + 5840 "10010100" // /* MW 2 */ + 5841 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "00100011" // /* MW 3 */ + 5844 "00011100" // /* MW 2 */ + 5845 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5847 "10100011" // /* MW 3 */ + 5848 "00011100" // /* MW 2 */ + 5849 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 5850 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5851 "00100011" // /* MW 3 */ + 5852 "00011100" // /* MW 2 */ + 5853 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 5854 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10100011" // /* MW 3 */ + 5856 "00011100" // /* MW 2 */ + 5857 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 5859 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 5872 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5873 "00000000" // /* MW 5 */ + 5874 "11000100" // /* MW 4 */ + 5875 "11001000" // /* MW 3 */ + 5876 "00000111" // /* MW 2 */ + 5877 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 5878 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5879 "11000001" // /* MW 5 */ + 5880 "10110101" // /* MW 4 */ + 5881 "11011000" // /* MW 3 */ + 5882 "11000010" // /* MW 2 */ + 5883 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 5884 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5885 "00000001" // /* MW 5 */ + 5886 "00000000" // /* MW 4 */ + 5887 "00000000" // /* MW 3 */ + 5888 "00001000" // /* MW 2 */ + 5889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 5890 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5891 "01111001" // /* MW 9 */ + 5892 "01100000" // /* MW 8 */ + 5893 "11001010" // /* MW 7 */ + 5894 "10000001" // /* MW 6 */ + 5895 "00010100" // /* MW 5 */ + 5896 "00100011" // /* MW 4 */ + 5897 "10110000" // /* MW 3 */ + 5898 "00111010" // /* MW 2 */ + 5899 "11111111" // /* MW 1 */ + 5900 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5901 "01110000" // /* MW 7 */ + 5902 "11010000" // /* MW 6 */ + 5903 "00001011" // /* MW 5 */ + 5904 "00000000" // /* MW 4 */ + 5905 "10110000" // /* MW 3 */ + 5906 "10000011" // /* MW 2 */ + 5907 "11111101" // /* MW 1 */ + 5908 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5909 "00010101" // /* MW 3 */ + 5910 "11111100" // /* MW 2 */ + 5911 "00001111" // /* MW 1 */ + 5912 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5913 "00111101" // /* MW 3 */ + 5914 "11110000" // /* MW 2 */ + 5915 "00001111" // /* MW 1 */ + 5916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5917 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 5918 "10000100" // JNZ r16, #6064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6064 delay_slots=5 */ + 5919 "00000001" // /* MW 5 */ + 5920 "01000000" // /* MW 4 */ + 5921 "11011000" // /* MW 3 */ + 5922 "00001011" // /* MW 2 */ + 5923 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 5924 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5925 "11111011" // /* MW 3 */ + 5926 "01100011" // /* MW 2 */ + 5927 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5928 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5929 "00001000" // /* MW 5 */ + 5930 "11000100" // /* MW 4 */ + 5931 "11000100" // /* MW 3 */ + 5932 "00000111" // /* MW 2 */ + 5933 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5934 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "01110000" // /* MW 7 */ + 5936 "01100000" // /* MW 6 */ + 5937 "00110111" // /* MW 5 */ + 5938 "00000001" // /* MW 4 */ + 5939 "00110000" // /* MW 3 */ + 5940 "11000110" // /* MW 2 */ + 5941 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 5942 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "11000000" // /* MW 3 */ + 5944 "11010110" // /* MW 2 */ + 5945 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 5946 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "00010001" // /* MW 9 */ + 5948 "10100000" // /* MW 8 */ + 5949 "10110001" // /* MW 7 */ + 5950 "11110011" // /* MW 6 */ + 5951 "00000001" // /* MW 5 */ + 5952 "00000000" // /* MW 4 */ + 5953 "10110000" // /* MW 3 */ + 5954 "10100011" // /* MW 2 */ + 5955 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 "00111010" // MOVS p0, p7; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5957 "00010001" // /* MW 9 */ + 5958 "00010000" // /* MW 8 */ + 5959 "00110001" // /* MW 7 */ + 5960 "11110001" // /* MW 6 */ + 5961 "00000001" // /* MW 5 */ + 5962 "00000000" // /* MW 4 */ + 5963 "01100000" // /* MW 3 */ + 5964 "10010001" // /* MW 2 */ + 5965 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5967 "00010000" // /* MW 9 */ + 5968 "00001110" // /* MW 8 */ + 5969 "00110001" // /* MW 7 */ + 5970 "11110001" // /* MW 6 */ + 5971 "00000001" // /* MW 5 */ + 5972 "00000000" // /* MW 4 */ + 5973 "11100000" // /* MW 3 */ + 5974 "11000000" // /* MW 2 */ + 5975 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5977 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 "00000100" // JL #5392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5392 delay_slots=5 */ + 5979 "00000001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "10001000" // /* MW 3 */ + 5982 "00001010" // /* MW 2 */ + 5983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5987 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5989 "00110001" // /* MW 3 */ + 5990 "00100000" // /* MW 2 */ + 5991 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5992 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5993 "00000101" // /* MW 3 */ + 5994 "00100000" // /* MW 2 */ + 5995 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5996 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010001" // /* MW 3 */ + 5998 "00000110" // /* MW 2 */ + 5999 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 6000 "10111010" // LDA r16, [p7]; MOVXM p1, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6001 "00010000" // /* MW 9 */ + 6002 "00000010" // /* MW 8 */ + 6003 "10110001" // /* MW 7 */ + 6004 "11110000" // /* MW 6 */ + 6005 "00000001" // /* MW 5 */ + 6006 "00000000" // /* MW 4 */ + 6007 "11010000" // /* MW 3 */ + 6008 "11000010" // /* MW 2 */ + 6009 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 6010 "10111010" // LDA r17, [p1]; MOVXM p3, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6011 "00010000" // /* MW 9 */ + 6012 "00000100" // /* MW 8 */ + 6013 "10110001" // /* MW 7 */ + 6014 "11110001" // /* MW 6 */ + 6015 "00000001" // /* MW 5 */ + 6016 "00000000" // /* MW 4 */ + 6017 "11010000" // /* MW 3 */ + 6018 "11000110" // /* MW 2 */ + 6019 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 6020 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6021 "00010000" // /* MW 9 */ + 6022 "00000110" // /* MW 8 */ + 6023 "10110001" // /* MW 7 */ + 6024 "11110000" // /* MW 6 */ + 6025 "00000001" // /* MW 5 */ + 6026 "00000000" // /* MW 4 */ + 6027 "01010000" // /* MW 3 */ + 6028 "11001011" // /* MW 2 */ + 6029 "11101010" // /* MW 1 */ + 6030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6031 "00000000" // /* MW 1 */ + 6032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6033 "00000000" // /* MW 1 */ + 6034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6035 "00000000" // /* MW 1 */ + 6036 "10000100" // J #6080 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6080 delay_slots=5 */ + 6037 "00000000" // /* MW 5 */ + 6038 "00000000" // /* MW 4 */ + 6039 "11100000" // /* MW 3 */ + 6040 "00001011" // /* MW 2 */ + 6041 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 6042 "01000100" // MOVXM p2, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6043 "00110000" // /* MW 5 */ + 6044 "11000100" // /* MW 4 */ + 6045 "11000100" // /* MW 3 */ + 6046 "00000111" // /* MW 2 */ + 6047 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 6048 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6049 "00001111" // /* MW 3 */ + 6050 "01100001" // /* MW 2 */ + 6051 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 6052 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6053 "01010001" // /* MW 3 */ + 6054 "00000110" // /* MW 2 */ + 6055 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 6056 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6057 "00010001" // /* MW 3 */ + 6058 "00000110" // /* MW 2 */ + 6059 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 6060 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "00010001" // /* MW 3 */ + 6062 "00000110" // /* MW 2 */ + 6063 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 6064 "01000100" // MOVXM p3, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6065 "00010000" // /* MW 5 */ + 6066 "11000100" // /* MW 4 */ + 6067 "11000110" // /* MW 3 */ + 6068 "00000111" // /* MW 2 */ + 6069 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 6070 "10111010" // NOPA; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6071 "00010000" // /* MW 9 */ + 6072 "00000110" // /* MW 8 */ + 6073 "10110001" // /* MW 7 */ + 6074 "11110000" // /* MW 6 */ + 6075 "00000001" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "11110000" // /* MW 3 */ + 6078 "00101100" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6080 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6081 "10000110" // /* MW 3 */ + 6082 "01100111" // /* MW 2 */ + 6083 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 6084 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6085 "00010000" // /* MW 9 */ + 6086 "00000000" // /* MW 8 */ + 6087 "00110001" // /* MW 7 */ + 6088 "11110001" // /* MW 6 */ + 6089 "00000001" // /* MW 5 */ + 6090 "00000000" // /* MW 4 */ + 6091 "11010000" // /* MW 3 */ + 6092 "11101110" // /* MW 2 */ + 6093 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6094 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6095 "00010110" // /* MW 3 */ + 6096 "11111110" // /* MW 2 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6098 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00110110" // /* MW 3 */ + 6100 "11111110" // /* MW 2 */ + 6101 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 6102 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6103 "01010110" // /* MW 3 */ + 6104 "00000110" // /* MW 2 */ + 6105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6106 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6107 "01110110" // /* MW 3 */ + 6108 "01000110" // /* MW 2 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6118 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00000010" // /* MW 3 */ + 6120 "01100001" // /* MW 2 */ + 6121 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 6122 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6123 "00001110" // /* MW 5 */ + 6124 "01000000" // /* MW 4 */ + 6125 "00111001" // /* MW 3 */ + 6126 "11000010" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 6128 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "00010001" // /* MW 3 */ + 6130 "00000110" // /* MW 2 */ + 6131 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6132 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6133 "11111101" // /* MW 3 */ + 6134 "11100000" // /* MW 2 */ + 6135 "00010111" // /* MW 1 */ + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ + 6138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6139 "00000000" // /* MW 1 */ + 6140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6141 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6142 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00001000" // /* MW 3 */ + 6144 "11010011" // /* MW 2 */ + 6145 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6146 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00000110" // /* MW 3 */ + 6148 "01100111" // /* MW 2 */ + 6149 "00011010" // /* MW 1 */ + 6150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6151 "00000000" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6154 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "01110110" // /* MW 3 */ + 6156 "11111111" // /* MW 2 */ + 6157 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6158 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6159 "00110110" // /* MW 3 */ + 6160 "11111110" // /* MW 2 */ + 6161 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6162 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6163 "01010110" // /* MW 3 */ + 6164 "11111110" // /* MW 2 */ + 6165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6166 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6167 "01110110" // /* MW 3 */ + 6168 "01010110" // /* MW 2 */ + 6169 "00000010" // /* MW 1 */ + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6180 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6181 "00010010" // /* MW 3 */ + 6182 "10100011" // /* MW 2 */ + 6183 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6184 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6185 "00110001" // /* MW 3 */ + 6186 "00000110" // /* MW 2 */ + 6187 "00001010" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ + 6190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6191 "00000000" // /* MW 1 */ + 6192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6193 "00000000" // /* MW 1 */ + 6194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6195 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6196 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6197 "00001000" // /* MW 3 */ + 6198 "11010011" // /* MW 2 */ + 6199 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 6200 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111001" // /* MW 9 */ + 6202 "01100000" // /* MW 8 */ + 6203 "11001110" // /* MW 7 */ + 6204 "00101001" // /* MW 6 */ + 6205 "00000000" // /* MW 5 */ + 6206 "00000001" // /* MW 4 */ + 6207 "01100000" // /* MW 3 */ + 6208 "00010001" // /* MW 2 */ + 6209 "11010001" // /* MW 1 */ + 6210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6211 "00000000" // /* MW 1 */ + 6212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6213 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6214 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6215 "00011001" // /* MW 3 */ + 6216 "11101110" // /* MW 2 */ + 6217 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 6218 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6219 "00111011" // /* MW 5 */ + 6220 "11011000" // /* MW 4 */ + 6221 "11011111" // /* MW 3 */ + 6222 "11000110" // /* MW 2 */ + 6223 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 6224 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6225 "10000001" // /* MW 5 */ + 6226 "11011101" // /* MW 4 */ + 6227 "11010110" // /* MW 3 */ + 6228 "11010010" // /* MW 2 */ + 6229 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6231 "01010110" // /* MW 3 */ + 6232 "01001110" // /* MW 2 */ + 6233 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6235 "00011110" // /* MW 3 */ + 6236 "01011101" // /* MW 2 */ + 6237 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6239 "11000000" // /* MW 3 */ + 6240 "01100000" // /* MW 2 */ + 6241 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6245 "01110110" // /* MW 3 */ + 6246 "00000110" // /* MW 2 */ + 6247 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 "00000100" // JL #5552 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5552 delay_slots=5 */ + 6251 "00000001" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "11011000" // /* MW 3 */ + 6254 "00001010" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "11000000" // /* MW 3 */ + 6258 "11010100" // /* MW 2 */ + 6259 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6260 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "00001101" // /* MW 3 */ + 6262 "01100011" // /* MW 2 */ + 6263 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 6264 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00001101" // /* MW 3 */ + 6266 "00100001" // /* MW 2 */ + 6267 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 6268 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "01000001" // /* MW 3 */ + 6270 "01101001" // /* MW 2 */ + 6271 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6272 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6273 "00000000" // /* MW 15 */ + 6274 "00000000" // /* MW 14 */ + 6275 "10101000" // /* MW 13 */ + 6276 "11100010" // /* MW 12 */ + 6277 "00110100" // /* MW 11 */ + 6278 "00000000" // /* MW 10 */ + 6279 "00000000" // /* MW 9 */ + 6280 "00000000" // /* MW 8 */ + 6281 "01011011" // /* MW 7 */ + 6282 "00000001" // /* MW 6 */ + 6283 "00100000" // /* MW 5 */ + 6284 "00000000" // /* MW 4 */ + 6285 "11110000" // /* MW 3 */ + 6286 "00101100" // /* MW 2 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6288 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6289 "01111000" // /* MW 9 */ + 6290 "11010000" // /* MW 8 */ + 6291 "10110011" // /* MW 7 */ + 6292 "00101000" // /* MW 6 */ + 6293 "00000000" // /* MW 5 */ + 6294 "00000001" // /* MW 4 */ + 6295 "11010000" // /* MW 3 */ + 6296 "11000110" // /* MW 2 */ + 6297 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 6298 "01000100" // MOVXM p6, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6299 "00110000" // /* MW 5 */ + 6300 "11000100" // /* MW 4 */ + 6301 "11001100" // /* MW 3 */ + 6302 "00000111" // /* MW 2 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ + 6308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6309 "00000000" // /* MW 1 */ + 6310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6311 "00000000" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6314 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "00001000" // /* MW 3 */ + 6316 "01010001" // /* MW 2 */ + 6317 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6318 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "00110110" // /* MW 3 */ + 6320 "11110110" // /* MW 2 */ + 6321 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6322 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6323 "00011001" // /* MW 3 */ + 6324 "11101101" // /* MW 2 */ + 6325 "00000111" // /* MW 1 */ + 6326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6327 "00000000" // /* MW 1 */ + 6328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6329 "00000000" // /* MW 1 */ + 6330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6331 "00000000" // /* MW 1 */ + 6332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6333 "00000000" // /* MW 1 */ + 6334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6335 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6336 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6337 "00010001" // /* MW 3 */ + 6338 "00100011" // /* MW 2 */ + 6339 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6340 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6341 "01100011" // /* MW 5 */ + 6342 "11101100" // /* MW 4 */ + 6343 "11010011" // /* MW 3 */ + 6344 "11000110" // /* MW 2 */ + 6345 "01001010" // /* MW 1 */ + 6346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6347 "00000000" // /* MW 1 */ + 6348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6349 "00000000" // /* MW 1 */ + 6350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6351 "00000000" // /* MW 1 */ + 6352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6353 "00000000" // /* MW 1 */ + 6354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6355 "00000000" // /* MW 1 */ + 6356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6357 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6358 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6359 "00001000" // /* MW 3 */ + 6360 "01010001" // /* MW 2 */ + 6361 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 6362 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6363 "00010000" // /* MW 9 */ + 6364 "00000000" // /* MW 8 */ + 6365 "10110001" // /* MW 7 */ + 6366 "11110000" // /* MW 6 */ + 6367 "00000001" // /* MW 5 */ + 6368 "00000000" // /* MW 4 */ + 6369 "11010000" // /* MW 3 */ + 6370 "11001110" // /* MW 2 */ + 6371 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 6372 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6373 "01010110" // /* MW 3 */ + 6374 "00000110" // /* MW 2 */ + 6375 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6376 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6377 "00110110" // /* MW 3 */ + 6378 "00000110" // /* MW 2 */ + 6379 "00000001" // /* MW 1 */ + 6380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6388 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6389 "00110001" // /* MW 3 */ + 6390 "00100001" // /* MW 2 */ + 6391 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6392 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6393 "00010001" // /* MW 3 */ + 6394 "11100110" // /* MW 2 */ + 6395 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 6396 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6397 "00101000" // /* MW 3 */ + 6398 "01100001" // /* MW 2 */ + 6399 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6400 "10000100" // JNZ r16, #6432 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6432 delay_slots=5 */ + 6401 "00000001" // /* MW 5 */ + 6402 "01000000" // /* MW 4 */ + 6403 "10010000" // /* MW 3 */ + 6404 "00001100" // /* MW 2 */ + 6405 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6415 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 6416 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6417 "00000001" // /* MW 3 */ + 6418 "00100000" // /* MW 2 */ + 6419 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 6420 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6421 "11000001" // /* MW 11 */ + 6422 "00001000" // /* MW 10 */ + 6423 "10000011" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "00000000" // /* MW 7 */ + 6426 "00000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 6432 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11110000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "11110001" // /* MW 3 */ + 6438 "11111101" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ + 6440 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "10011001" // /* MW 3 */ + 6442 "11110111" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6445 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6447 "11010001" // /* MW 3 */ + 6448 "11111001" // /* MW 2 */ + 6449 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00101000" // /* MW 2 */ + 6457 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00001011" // /* MW 3 */ + 6460 "10001110" // /* MW 2 */ + 6461 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 6462 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6463 "00000001" // /* MW 5 */ + 6464 "00000000" // /* MW 4 */ + 6465 "00000000" // /* MW 3 */ + 6466 "11111000" // /* MW 2 */ + 6467 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6473 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 7 "conv2d_dw_bf16_params.h" 177 first +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.function_start + 6480 "10111010" // LDA el0, [p0], #4; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6481 "00010000" // /* MW 9 */ + 6482 "11100000" // /* MW 8 */ + 6483 "10110001" // /* MW 7 */ + 6484 "11110000" // /* MW 6 */ + 6485 "00000001" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "11010000" // /* MW 3 */ + 6488 "10000101" // /* MW 2 */ + 6489 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6490 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6491 "01011000" // /* MW 9 */ + 6492 "00000000" // /* MW 8 */ + 6493 "00001000" // /* MW 7 */ + 6494 "01001011" // /* MW 6 */ + 6495 "00000000" // /* MW 5 */ + 6496 "00000001" // /* MW 4 */ + 6497 "11010000" // /* MW 3 */ + 6498 "10000001" // /* MW 2 */ + 6499 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 177 + 6500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6501 "00000001" // /* MW 5 */ + 6502 "00000000" // /* MW 4 */ + 6503 "00000000" // /* MW 3 */ + 6504 "00001000" // /* MW 2 */ + 6505 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 + 6506 "00111010" // ST p7, [sp, #-16]; MOVXM p7, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6507 "00010001" // /* MW 9 */ + 6508 "11100000" // /* MW 8 */ + 6509 "10110001" // /* MW 7 */ + 6510 "11110011" // /* MW 6 */ + 6511 "00000001" // /* MW 5 */ + 6512 "00000000" // /* MW 4 */ + 6513 "10110000" // /* MW 3 */ + 6514 "01110011" // /* MW 2 */ + 6515 "11111110" // /* MW 1 */ + 6516 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "00111101" // /* MW 3 */ + 6518 "11111100" // /* MW 2 */ + 6519 "00001111" // /* MW 1 */ + 6520 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6521 "11010101" // /* MW 3 */ + 6522 "11110101" // /* MW 2 */ + 6523 "00001111" // /* MW 1 */ + 6524 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6525 "11110101" // /* MW 3 */ + 6526 "11111001" // /* MW 2 */ + 6527 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6528 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6529 "00101001" // /* MW 3 */ + 6530 "00011100" // /* MW 2 */ + 6531 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6532 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6533 "00001001" // /* MW 3 */ + 6534 "00011100" // /* MW 2 */ + 6535 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6536 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00101110" // /* MW 3 */ + 6538 "00000100" // /* MW 2 */ + 6539 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6540 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "00001110" // /* MW 3 */ + 6542 "00010100" // /* MW 2 */ + 6543 "00000000" // /* MW 1 */ + 6544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6545 "00000000" // /* MW 1 */ + 6546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6547 "00000000" // /* MW 1 */ + 6548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6549 "00000000" // /* MW 1 */ + 6550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6551 "00000000" // /* MW 1 */ + 6552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6553 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6554 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6555 "00101001" // /* MW 3 */ + 6556 "00000100" // /* MW 2 */ + 6557 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6558 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6559 "00001001" // /* MW 3 */ + 6560 "00010100" // /* MW 2 */ + 6561 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 first + 6562 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6563 "00101010" // /* MW 3 */ + 6564 "01011110" // /* MW 2 */ + 6565 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 52 + 6566 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6567 "01001010" // /* MW 3 */ + 6568 "11101110" // /* MW 2 */ + 6569 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6570 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6571 "00101010" // /* MW 3 */ + 6572 "11101100" // /* MW 2 */ + 6573 "00000111" // /* MW 1 */ + 6574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6575 "00000000" // /* MW 1 */ + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ + 6580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6581 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.no_stack_arguments + 6582 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6583 "00000001" // /* MW 5 */ + 6584 "00000000" // /* MW 4 */ + 6585 "01011000" // /* MW 3 */ + 6586 "00011000" // /* MW 2 */ + 6587 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 38 +.delay_slot + 6588 "01011100" // ST r18, [sp, #-20]; SUB r14, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6589 "01000011" // /* MW 5 */ + 6590 "10111010" // /* MW 4 */ + 6591 "10111000" // /* MW 3 */ + 6592 "11001010" // /* MW 2 */ + 6593 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 +.delay_slot + 6594 "00111010" // ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6595 "01111001" // /* MW 9 */ + 6596 "01010000" // /* MW 8 */ + 6597 "11101000" // /* MW 7 */ + 6598 "01000101" // /* MW 6 */ + 6599 "00001000" // /* MW 5 */ + 6600 "00000011" // /* MW 4 */ + 6601 "10110000" // /* MW 3 */ + 6602 "10000110" // /* MW 2 */ + 6603 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6604 "01011100" // ST r16, [sp, #-24]; LT r27, r14, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6605 "00010101" // /* MW 5 */ + 6606 "01101111" // /* MW 4 */ + 6607 "10110111" // /* MW 3 */ + 6608 "01000010" // /* MW 2 */ + 6609 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6610 "10011000" // SUB r17, r24, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6611 "11100001" // /* MW 3 */ + 6612 "00100010" // /* MW 2 */ + 6613 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6614 "01111010" // NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6615 "00010010" // /* MW 9 */ + 6616 "10000001" // /* MW 8 */ + 6617 "00000011" // /* MW 7 */ + 6618 "00000000" // /* MW 6 */ + 6619 "01011011" // /* MW 5 */ + 6620 "00000001" // /* MW 4 */ + 6621 "11110000" // /* MW 3 */ + 6622 "00101100" // /* MW 2 */ + 6623 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 32 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.return_address + 6624 "10111010" // LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6625 "01011000" // /* MW 9 */ + 6626 "00000000" // /* MW 8 */ + 6627 "00001000" // /* MW 7 */ + 6628 "00110110" // /* MW 6 */ + 6629 "01000111" // /* MW 5 */ + 6630 "00011111" // /* MW 4 */ + 6631 "01010000" // /* MW 3 */ + 6632 "11000101" // /* MW 2 */ + 6633 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 52 + 6634 "00101100" // LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6635 "01000011" // /* MW 5 */ + 6636 "01001100" // /* MW 4 */ + 6637 "01011000" // /* MW 3 */ + 6638 "11001001" // /* MW 2 */ + 6639 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6640 "00101100" // LDA r1, [sp, #-28]; LT r27, r20, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6641 "00010101" // /* MW 5 */ + 6642 "01101110" // /* MW 4 */ + 6643 "00101010" // /* MW 3 */ + 6644 "10000110" // /* MW 2 */ + 6645 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 + 6646 "00011000" // SEL.EQZ r19, r2, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6647 "00110010" // /* MW 3 */ + 6648 "10100111" // /* MW 2 */ + 6649 "00010000" // /* MW 1 */ + 6650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6651 "00000000" // /* MW 1 */ + 6652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6653 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.no_stack_arguments + 6654 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6655 "00000001" // /* MW 5 */ + 6656 "00000000" // /* MW 4 */ + 6657 "01011000" // /* MW 3 */ + 6658 "00011000" // /* MW 2 */ + 6659 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.delay_slot + 6660 "00011000" // EXTEND.s16 r19, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6661 "01110000" // /* MW 3 */ + 6662 "11100110" // /* MW 2 */ + 6663 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 87 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 38 first +.delay_slot + 6664 "00111010" // ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6665 "01001001" // /* MW 9 */ + 6666 "11000000" // /* MW 8 */ + 6667 "11101100" // /* MW 7 */ + 6668 "00001101" // /* MW 6 */ + 6669 "11101001" // /* MW 5 */ + 6670 "00100010" // /* MW 4 */ + 6671 "10110000" // /* MW 3 */ + 6672 "01001010" // /* MW 2 */ + 6673 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6674 "10011000" // LT r27, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6675 "00001010" // /* MW 3 */ + 6676 "10110111" // /* MW 2 */ + 6677 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6678 "10011000" // SUB r17, r16, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6679 "11100001" // /* MW 3 */ + 6680 "00100010" // /* MW 2 */ + 6681 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6682 "00101100" // NOPA; SEL.EQZ r0, r14, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6683 "00100100" // /* MW 5 */ + 6684 "00000010" // /* MW 4 */ + 6685 "11110111" // /* MW 3 */ + 6686 "00101100" // /* MW 2 */ + 6687 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 +.return_address + 6688 "10111010" // LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6689 "01011000" // /* MW 9 */ + 6690 "01000010" // /* MW 8 */ + 6691 "00000000" // /* MW 7 */ + 6692 "01001000" // /* MW 6 */ + 6693 "00110000" // /* MW 5 */ + 6694 "00000001" // /* MW 4 */ + 6695 "00100000" // /* MW 3 */ + 6696 "10000110" // /* MW 2 */ + 6697 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6698 "10111010" // LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6699 "01011000" // /* MW 9 */ + 6700 "00001000" // /* MW 8 */ + 6701 "01001000" // /* MW 7 */ + 6702 "00001010" // /* MW 6 */ + 6703 "10000000" // /* MW 5 */ + 6704 "00000001" // /* MW 4 */ + 6705 "01010000" // /* MW 3 */ + 6706 "01010001" // /* MW 2 */ + 6707 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 + 6708 "10111010" // LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6709 "01011000" // /* MW 9 */ + 6710 "00010111" // /* MW 8 */ + 6711 "11101000" // /* MW 7 */ + 6712 "01001011" // /* MW 6 */ + 6713 "00000111" // /* MW 5 */ + 6714 "00111111" // /* MW 4 */ + 6715 "00100000" // /* MW 3 */ + 6716 "01110010" // /* MW 2 */ + 6717 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 + 6718 "10111010" // LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6719 "01011000" // /* MW 9 */ + 6720 "00000110" // /* MW 8 */ + 6721 "10101000" // /* MW 7 */ + 6722 "11001010" // /* MW 6 */ + 6723 "10100111" // /* MW 5 */ + 6724 "00111111" // /* MW 4 */ + 6725 "00100000" // /* MW 3 */ + 6726 "11011010" // /* MW 2 */ + 6727 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 + 6728 "10111010" // LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6729 "01011000" // /* MW 9 */ + 6730 "00100000" // /* MW 8 */ + 6731 "00000000" // /* MW 7 */ + 6732 "10001001" // /* MW 6 */ + 6733 "11010111" // /* MW 5 */ + 6734 "00001111" // /* MW 4 */ + 6735 "00100000" // /* MW 3 */ + 6736 "00001110" // /* MW 2 */ + 6737 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6738 "10111010" // MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6739 "01011000" // /* MW 9 */ + 6740 "10000000" // /* MW 8 */ + 6741 "00001000" // /* MW 7 */ + 6742 "00101000" // /* MW 6 */ + 6743 "01110000" // /* MW 5 */ + 6744 "00000001" // /* MW 4 */ + 6745 "10000000" // /* MW 3 */ + 6746 "11000000" // /* MW 2 */ + 6747 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 + 6748 "10111010" // MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6749 "01111000" // /* MW 9 */ + 6750 "10110000" // /* MW 8 */ + 6751 "10011101" // /* MW 7 */ + 6752 "00001100" // /* MW 6 */ + 6753 "00010001" // /* MW 5 */ + 6754 "00110001" // /* MW 4 */ + 6755 "10000000" // /* MW 3 */ + 6756 "01000100" // /* MW 2 */ + 6757 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6758 "10011000" // XOR r30, r1, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6759 "11100110" // /* MW 3 */ + 6760 "01111100" // /* MW 2 */ + 6761 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6762 "10011000" // LT r27, r30, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6763 "10001010" // /* MW 3 */ + 6764 "10110111" // /* MW 2 */ + 6765 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 206 70 + 6766 "00100100" // SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6767 "11111111" // /* MW 5 */ + 6768 "10111100" // /* MW 4 */ + 6769 "01000011" // /* MW 3 */ + 6770 "01100010" // /* MW 2 */ + 6771 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 + 6772 "00100100" // EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6773 "00000010" // /* MW 5 */ + 6774 "00110110" // /* MW 4 */ + 6775 "00001011" // /* MW 3 */ + 6776 "10001110" // /* MW 2 */ + 6777 "10001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 88 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 22 first + 6778 "00100100" // MUL r30, r15, r20; ADD.NC r14, r30, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6779 "00000001" // /* MW 5 */ + 6780 "00111110" // /* MW 4 */ + 6781 "11110111" // /* MW 3 */ + 6782 "10101001" // /* MW 2 */ + 6783 "01111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 first + 6784 "00100100" // MUL r2, r1, r14; ADD.NC r17, r22, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6785 "00000001" // /* MW 5 */ + 6786 "10110110" // /* MW 4 */ + 6787 "11111000" // /* MW 3 */ + 6788 "10011101" // /* MW 2 */ + 6789 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 first + 6790 "10011000" // EQ r27, r19, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6791 "00010111" // /* MW 3 */ + 6792 "11110110" // /* MW 2 */ + 6793 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 55 first + 6794 "10011000" // MUL r2, r30, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6795 "00101111" // /* MW 3 */ + 6796 "10000100" // /* MW 2 */ + 6797 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 + 6798 "01100100" // SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6799 "11110101" // /* MW 5 */ + 6800 "00111111" // /* MW 4 */ + 6801 "01001001" // /* MW 3 */ + 6802 "11100100" // /* MW 2 */ + 6803 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 first + 6804 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00001101" // /* MW 3 */ + 6806 "10100001" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 first + 6808 "10011000" // LSHL r2, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6809 "10101101" // /* MW 3 */ + 6810 "01000101" // /* MW 2 */ + 6811 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 204 79 + 6812 "00100100" // MUL r2, r2, r28; ADD.NC r4, r2, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6813 "11111111" // /* MW 5 */ + 6814 "00100010" // /* MW 4 */ + 6815 "11110010" // /* MW 3 */ + 6816 "10111001" // /* MW 2 */ + 6817 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 first + 6818 "10011000" // LSHL r3, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6819 "01011101" // /* MW 3 */ + 6820 "11000111" // /* MW 2 */ + 6821 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 197 39 first + 6822 "01011100" // ST r2, [p7], #-4; MUL r5, r15, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6823 "00111111" // /* MW 5 */ + 6824 "10010100" // /* MW 4 */ + 6825 "00110111" // /* MW 3 */ + 6826 "10001010" // /* MW 2 */ + 6827 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 + 6828 "00111010" // ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6829 "01011001" // /* MW 9 */ + 6830 "00000100" // /* MW 8 */ + 6831 "00001000" // /* MW 7 */ + 6832 "00100110" // /* MW 6 */ + 6833 "01101011" // /* MW 5 */ + 6834 "00111011" // /* MW 4 */ + 6835 "00110000" // /* MW 3 */ + 6836 "01000010" // /* MW 2 */ + 6837 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 44 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 42 first + 6838 "01011100" // ST r31, [p7], #-16; ADD r22, r5, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6839 "11000001" // /* MW 5 */ + 6840 "11011010" // /* MW 4 */ + 6841 "00110010" // /* MW 3 */ + 6842 "11111110" // /* MW 2 */ + 6843 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 193 22 first +.src_ref 7 "conv2d_dw_bf16_params.h" 201 47 first + 6844 "01011100" // ST r3, [p7], #24; MUL r31, r22, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6845 "10011111" // /* MW 5 */ + 6846 "01111110" // /* MW 4 */ + 6847 "00111011" // /* MW 3 */ + 6848 "10001110" // /* MW 2 */ + 6849 "11101101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 204 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 first + 6850 "01011100" // ST r4, [p7], #4; LSHL r22, r22, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6851 "00011011" // /* MW 5 */ + 6852 "01011010" // /* MW 4 */ + 6853 "00111011" // /* MW 3 */ + 6854 "10010010" // /* MW 2 */ + 6855 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 first + 6856 "10011000" // LSHL r3, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "01111101" // /* MW 3 */ + 6858 "11000111" // /* MW 2 */ + 6859 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 + 6860 "10011000" // LSHL r4, r4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6861 "01011101" // /* MW 3 */ + 6862 "00001001" // /* MW 2 */ + 6863 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 first + 6864 "10100100" // SUB r25, r22, r3; ADD.NC r4, r4, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6865 "00000010" // /* MW 5 */ + 6866 "00100100" // /* MW 4 */ + 6867 "00110010" // /* MW 3 */ + 6868 "01000110" // /* MW 2 */ + 6869 "10110110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6870 "10111010" // MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6871 "10101000" // /* MW 9 */ + 6872 "01000000" // /* MW 8 */ + 6873 "00101110" // /* MW 7 */ + 6874 "00001111" // /* MW 6 */ + 6875 "01100010" // /* MW 5 */ + 6876 "00000110" // /* MW 4 */ + 6877 "00000000" // /* MW 3 */ + 6878 "00000000" // /* MW 2 */ + 6879 "11111000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6880 "01011100" // ST r0, [p7], #4; MUL r1, r31, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6881 "00111111" // /* MW 5 */ + 6882 "10000100" // /* MW 4 */ + 6883 "00111111" // /* MW 3 */ + 6884 "10000010" // /* MW 2 */ + 6885 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 206 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 53 first + 6886 "01011100" // ST r7, [p7], #4; MUL r31, r31, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6887 "11111111" // /* MW 5 */ + 6888 "11111100" // /* MW 4 */ + 6889 "00111111" // /* MW 3 */ + 6890 "10011110" // /* MW 2 */ + 6891 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 207 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 first + 6892 "01011100" // ST r6, [p7], #4; LSHL r5, r5, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6893 "01011011" // /* MW 5 */ + 6894 "10010110" // /* MW 4 */ + 6895 "00110010" // /* MW 3 */ + 6896 "10011010" // /* MW 2 */ + 6897 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 73 + 6898 "00100100" // LSHL r6, r31, r23; ADD.NC r31, r5, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6899 "11111111" // /* MW 5 */ + 6900 "10100101" // /* MW 4 */ + 6901 "10111111" // /* MW 3 */ + 6902 "10101111" // /* MW 2 */ + 6903 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6904 "10111010" // MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6905 "10101000" // /* MW 9 */ + 6906 "10001000" // /* MW 8 */ + 6907 "01001001" // /* MW 7 */ + 6908 "01101110" // /* MW 6 */ + 6909 "01011001" // /* MW 5 */ + 6910 "00101000" // /* MW 4 */ + 6911 "00000000" // /* MW 3 */ + 6912 "11100100" // /* MW 2 */ + 6913 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 + 6914 "10011000" // SUB r18, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6915 "00100001" // /* MW 3 */ + 6916 "00100101" // /* MW 2 */ + 6917 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 211 77 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6918 "00111010" // ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6919 "11001001" // /* MW 9 */ + 6920 "01111111" // /* MW 8 */ + 6921 "01001001" // /* MW 7 */ + 6922 "11101110" // /* MW 6 */ + 6923 "00011011" // /* MW 5 */ + 6924 "00000010" // /* MW 4 */ + 6925 "00110000" // /* MW 3 */ + 6926 "11001010" // /* MW 2 */ + 6927 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 211 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6928 "01011100" // ST r18, [p7], #4; ADD r6, r1, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6929 "11000001" // /* MW 5 */ + 6930 "10011010" // /* MW 4 */ + 6931 "00110000" // /* MW 3 */ + 6932 "11001010" // /* MW 2 */ + 6933 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 212 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6934 "01011100" // ST r22, [p7], #4; LSHL r1, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6935 "10011011" // /* MW 5 */ + 6936 "10000100" // /* MW 4 */ + 6937 "00111111" // /* MW 3 */ + 6938 "11011010" // /* MW 2 */ + 6939 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 213 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 + 6940 "00111010" // ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6941 "01011001" // /* MW 9 */ + 6942 "11111111" // /* MW 8 */ + 6943 "00101111" // /* MW 7 */ + 6944 "10000100" // /* MW 6 */ + 6945 "01100000" // /* MW 5 */ + 6946 "00000111" // /* MW 4 */ + 6947 "00110000" // /* MW 3 */ + 6948 "11111110" // /* MW 2 */ + 6949 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 214 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 6950 "00111010" // ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6951 "01011001" // /* MW 9 */ + 6952 "01000000" // /* MW 8 */ + 6953 "11001000" // /* MW 7 */ + 6954 "00001110" // /* MW 6 */ + 6955 "00111011" // /* MW 5 */ + 6956 "00001100" // /* MW 4 */ + 6957 "00110000" // /* MW 3 */ + 6958 "11100110" // /* MW 2 */ + 6959 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 215 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 first + 6960 "01011100" // ST r3, [p7], #4; LSHL r16, r15, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6961 "00011011" // /* MW 5 */ + 6962 "11000010" // /* MW 4 */ + 6963 "00110111" // /* MW 3 */ + 6964 "10001110" // /* MW 2 */ + 6965 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 218 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6966 "01011100" // ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6967 "00000100" // /* MW 5 */ + 6968 "00000011" // /* MW 4 */ + 6969 "00110000" // /* MW 3 */ + 6970 "11001010" // /* MW 2 */ + 6971 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 60 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 first + 6972 "10100100" // LSHL r3, r30, r23; ADD.NC r0, r16, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6973 "00000010" // /* MW 5 */ + 6974 "00110000" // /* MW 4 */ + 6975 "10110000" // /* MW 3 */ + 6976 "11101111" // /* MW 2 */ + 6977 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 + 6978 "01011100" // ST r0, [p7], #4; SUB r16, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6979 "01100011" // /* MW 5 */ + 6980 "01000000" // /* MW 4 */ + 6981 "00111000" // /* MW 3 */ + 6982 "10000010" // /* MW 2 */ + 6983 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 220 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 first + 6984 "00111010" // ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6985 "00001001" // /* MW 9 */ + 6986 "00010000" // /* MW 8 */ + 6987 "11101100" // /* MW 7 */ + 6988 "00110011" // /* MW 6 */ + 6989 "00010001" // /* MW 5 */ + 6990 "00001010" // /* MW 4 */ + 6991 "00110000" // /* MW 3 */ + 6992 "11111110" // /* MW 2 */ + 6993 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 221 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 6994 "01011100" // ST r31, [p7], #4; LSHL r31, r18, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6995 "10111011" // /* MW 5 */ + 6996 "01111110" // /* MW 4 */ + 6997 "00111001" // /* MW 3 */ + 6998 "11111110" // /* MW 2 */ + 6999 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 first + 7000 "01011100" // ST r22, [p7], #4; LSHL r2, r1, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7001 "10111011" // /* MW 5 */ + 7002 "10001010" // /* MW 4 */ + 7003 "00110000" // /* MW 3 */ + 7004 "11011010" // /* MW 2 */ + 7005 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 224 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 + 7006 "01011100" // ST r1, [p7], #4; SUB r1, r24, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7007 "01000011" // /* MW 5 */ + 7008 "00000100" // /* MW 4 */ + 7009 "00111100" // /* MW 3 */ + 7010 "10000110" // /* MW 2 */ + 7011 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 7012 "01011100" // ST r22, [p7], #4; SUB r2, r24, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7013 "11100011" // /* MW 5 */ + 7014 "00001011" // /* MW 4 */ + 7015 "00111100" // /* MW 3 */ + 7016 "11011010" // /* MW 2 */ + 7017 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 226 43 first + 7018 "10011000" // ST r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7019 "00110001" // /* MW 3 */ + 7020 "00011100" // /* MW 2 */ + 7021 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 228 40 first + 7022 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7023 "01010001" // /* MW 3 */ + 7024 "00011110" // /* MW 2 */ + 7025 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 first + 7026 "10011000" // ST r22, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7027 "11010001" // /* MW 3 */ + 7028 "00011110" // /* MW 2 */ + 7029 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 first + 7030 "10011000" // ST r2, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7031 "01010001" // /* MW 3 */ + 7032 "00001000" // /* MW 2 */ + 7033 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 first + 7034 "10011000" // LDA.u8 r1, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "00101010" // /* MW 3 */ + 7036 "00101000" // /* MW 2 */ + 7037 "00000111" // /* MW 1 */ + 7038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7039 "00000000" // /* MW 1 */ + 7040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7041 "00000000" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ + 7046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7047 "00000000" // /* MW 1 */ + 7048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7049 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 58 + 7050 "10000100" // JZ r1, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 7051 "00000001" // /* MW 5 */ + 7052 "00000000" // /* MW 4 */ + 7053 "11011000" // /* MW 3 */ + 7054 "00001101" // /* MW 2 */ + 7055 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 +.delay_slot + 7056 "00011000" // MOVX r16, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "00001101" // /* MW 3 */ + 7058 "00100000" // /* MW 2 */ + 7059 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 first +.delay_slot + 7060 "10011000" // LSHL r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001101" // /* MW 3 */ + 7062 "11100001" // /* MW 2 */ + 7063 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.delay_slot + 7064 "01000100" // MOVXM r31, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7065 "00000000" // /* MW 5 */ + 7066 "10100000" // /* MW 4 */ + 7067 "00001111" // /* MW 3 */ + 7068 "01111111" // /* MW 2 */ + 7069 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 + 7074 "01111110" // NOPA; NOPB; NOPS; MOVX r31, #0; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7075 "01100000" // /* MW 13 */ + 7076 "00101011" // /* MW 12 */ + 7077 "00000000" // /* MW 11 */ + 7078 "10101111" // /* MW 10 */ + 7079 "00110100" // /* MW 9 */ + 7080 "00000000" // /* MW 8 */ + 7081 "00000001" // /* MW 7 */ + 7082 "00111110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 + 7088 "10111010" // MOVA m0, #-197; MOVXM p0, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00010000" // /* MW 8 */ + 7091 "00110001" // /* MW 7 */ + 7092 "11110000" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "10000000" // /* MW 3 */ + 7096 "01100000" // /* MW 2 */ + 7097 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 first + 7098 "10111010" // LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "10111000" // /* MW 9 */ + 7100 "11111000" // /* MW 8 */ + 7101 "00000001" // /* MW 7 */ + 7102 "10100100" // /* MW 6 */ + 7103 "11011000" // /* MW 5 */ + 7104 "00111011" // /* MW 4 */ + 7105 "01010000" // /* MW 3 */ + 7106 "11000100" // /* MW 2 */ + 7107 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 + 7108 "10111010" // LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "01111000" // /* MW 9 */ + 7110 "01001001" // /* MW 8 */ + 7111 "00000000" // /* MW 7 */ + 7112 "10101000" // /* MW 6 */ + 7113 "11110000" // /* MW 5 */ + 7114 "00000001" // /* MW 4 */ + 7115 "00100000" // /* MW 3 */ + 7116 "00000110" // /* MW 2 */ + 7117 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 first +.src_ref 7 "conv2d_dw_bf16_params.h" 240 + 7118 "10111010" // LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7119 "01011000" // /* MW 9 */ + 7120 "11001001" // /* MW 8 */ + 7121 "10000000" // /* MW 7 */ + 7122 "11101100" // /* MW 6 */ + 7123 "11111111" // /* MW 5 */ + 7124 "00011111" // /* MW 4 */ + 7125 "00100000" // /* MW 3 */ + 7126 "10000111" // /* MW 2 */ + 7127 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 7128 "00101100" // LDA p0, [sp, #-16]; MOVX r25, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7129 "00000010" // /* MW 5 */ + 7130 "01100100" // /* MW 4 */ + 7131 "00100000" // /* MW 3 */ + 7132 "00000011" // /* MW 2 */ + 7133 "11111110" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "11010001" // /* MW 3 */ + 7138 "11110101" // /* MW 2 */ + 7139 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 39 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 "00011000" // ST.s16 r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00110111" // /* MW 3 */ + 7142 "00101100" // /* MW 2 */ + 7143 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 "11100100" // MUL r28, r29, r28; MOV crRnd, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7145 "01000001" // /* MW 5 */ + 7146 "01110001" // /* MW 4 */ + 7147 "11111111" // /* MW 3 */ + 7148 "00111001" // /* MW 2 */ + 7149 "11101111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 "00111010" // VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7151 "01111001" // /* MW 9 */ + 7152 "01010000" // /* MW 8 */ + 7153 "11101000" // /* MW 7 */ + 7154 "01101101" // /* MW 6 */ + 7155 "00011101" // /* MW 5 */ + 7156 "00011111" // /* MW 4 */ + 7157 "11000000" // /* MW 3 */ + 7158 "00000010" // /* MW 2 */ + 7159 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 109 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 "00100100" // MUL r20, r28, r20; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7161 "11111111" // /* MW 5 */ + 7162 "10110001" // /* MW 4 */ + 7163 "11111000" // /* MW 3 */ + 7164 "00101001" // /* MW 2 */ + 7165 "11100101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 "01100100" // LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7167 "00000011" // /* MW 5 */ + 7168 "10000010" // /* MW 4 */ + 7169 "10110000" // /* MW 3 */ + 7170 "01100111" // /* MW 2 */ + 7171 "10100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 52 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 "10011000" // MUL r28, r30, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7173 "11101111" // /* MW 3 */ + 7174 "10111000" // /* MW 2 */ + 7175 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7176 "10011000" // LSHL r21, r17, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "01011101" // /* MW 3 */ + 7178 "01101011" // /* MW 2 */ + 7179 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 first + 7180 "01011100" // ST r20, [p7], #4; LSHL r23, r28, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7181 "11111011" // /* MW 5 */ + 7182 "01011110" // /* MW 4 */ + 7183 "00111110" // /* MW 3 */ + 7184 "11010010" // /* MW 2 */ + 7185 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 235 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7186 "01011100" // ST r29, [p7], #4; SUB r26, r31, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7187 "10100011" // /* MW 5 */ + 7188 "11101010" // /* MW 4 */ + 7189 "00111111" // /* MW 3 */ + 7190 "11110110" // /* MW 2 */ + 7191 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7192 "01011100" // ST r28, [p7], m0; MAC r21, r21, r31, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7193 "01001100" // /* MW 5 */ + 7194 "11010110" // /* MW 4 */ + 7195 "00111111" // /* MW 3 */ + 7196 "01110010" // /* MW 2 */ + 7197 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 first + 7198 "10011000" // LDA.u8 r20, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "10001010" // /* MW 3 */ + 7200 "00101010" // /* MW 2 */ + 7201 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7202 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7203 "00000001" // /* MW 5 */ + 7204 "00000000" // /* MW 4 */ + 7205 "00000000" // /* MW 3 */ + 7206 "11111000" // /* MW 2 */ + 7207 "11111111" // /* MW 1 */ + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 first + 7214 "10011000" // LSHL r30, r30, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7215 "00111101" // /* MW 3 */ + 7216 "10111101" // /* MW 2 */ + 7217 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7218 "10011000" // SUB r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7219 "01010001" // /* MW 3 */ + 7220 "10101011" // /* MW 2 */ + 7221 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 first + 7222 "10011000" // EQ r27, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7223 "01000111" // /* MW 3 */ + 7224 "11110111" // /* MW 2 */ + 7225 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 7226 "00011000" // SEL.EQZ r19, r24, r23, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7227 "01110010" // /* MW 3 */ + 7228 "00100111" // /* MW 2 */ + 7229 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 39 + 7230 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7231 "01110001" // /* MW 3 */ + 7232 "00011110" // /* MW 2 */ + 7233 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 238 39 first + 7234 "10011000" // ST r16, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7235 "00010001" // /* MW 3 */ + 7236 "00011110" // /* MW 2 */ + 7237 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first + 7238 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7239 "00110001" // /* MW 3 */ + 7240 "00011110" // /* MW 2 */ + 7241 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7242 "01011100" // ST r22, [p7], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7243 "00000000" // /* MW 5 */ + 7244 "01010000" // /* MW 4 */ + 7245 "00110000" // /* MW 3 */ + 7246 "11011010" // /* MW 2 */ + 7247 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first +.delay_slot + 7248 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7249 "01010001" // /* MW 3 */ + 7250 "00011110" // /* MW 2 */ + 7251 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7252 "10011000" // ST r26, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7253 "01010001" // /* MW 3 */ + 7254 "00011111" // /* MW 2 */ + 7255 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7256 "10011000" // ST r21, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7257 "10110001" // /* MW 3 */ + 7258 "00011110" // /* MW 2 */ + 7259 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7260 "10011000" // ST r25, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7261 "00110001" // /* MW 3 */ + 7262 "00000111" // /* MW 2 */ + 7263 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7264 "00000010" // ST r25, [p7, #4]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7265 "01110000" // /* MW 7 */ + 7266 "01100000" // /* MW 6 */ + 7267 "10110000" // /* MW 5 */ + 7268 "00000011" // /* MW 4 */ + 7269 "00110000" // /* MW 3 */ + 7270 "11100110" // /* MW 2 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 7271 "11100010" // /* MW 1 */ +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function conv2d_dw_core _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 158 first +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 183 4 +.function_start + 7280 "10110110" // MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7281 "00010000" // /* MW 11 */ + 7282 "00010110" // /* MW 10 */ + 7283 "00110010" // /* MW 9 */ + 7284 "11110010" // /* MW 8 */ + 7285 "00000001" // /* MW 7 */ + 7286 "00000000" // /* MW 6 */ + 7287 "01101000" // /* MW 5 */ + 7288 "00111011" // /* MW 4 */ + 7289 "10000000" // /* MW 3 */ + 7290 "00011000" // /* MW 2 */ + 7291 "11110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7292 "10110110" // LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7293 "01011000" // /* MW 11 */ + 7294 "10000000" // /* MW 10 */ + 7295 "10000000" // /* MW 9 */ + 7296 "00001010" // /* MW 8 */ + 7297 "00010010" // /* MW 7 */ + 7298 "00000000" // /* MW 6 */ + 7299 "11101000" // /* MW 5 */ + 7300 "00111000" // /* MW 4 */ + 7301 "11010000" // /* MW 3 */ + 7302 "10101000" // /* MW 2 */ + 7303 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 202 56 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 229 12 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 +.src_ref 7 "conv2d_dw_bf16.h" 231 12 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 +.src_ref 7 "conv2d_dw_bf16.h" 233 12 +.src_ref 7 "conv2d_dw_bf16.h" 234 12 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 + 7304 "10111010" // LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7305 "01011000" // /* MW 9 */ + 7306 "10010000" // /* MW 8 */ + 7307 "00000111" // /* MW 7 */ + 7308 "10001010" // /* MW 6 */ + 7309 "00000111" // /* MW 5 */ + 7310 "00000000" // /* MW 4 */ + 7311 "11010000" // /* MW 3 */ + 7312 "10100100" // /* MW 2 */ + 7313 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 + 7314 "10111010" // LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7315 "01011000" // /* MW 9 */ + 7316 "00000000" // /* MW 8 */ + 7317 "01100000" // /* MW 7 */ + 7318 "00001010" // /* MW 6 */ + 7319 "00100100" // /* MW 5 */ + 7320 "00000000" // /* MW 4 */ + 7321 "11010000" // /* MW 3 */ + 7322 "11101000" // /* MW 2 */ + 7323 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7324 "01110110" // LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7325 "00010000" // /* MW 11 */ + 7326 "10110000" // /* MW 10 */ + 7327 "01111110" // /* MW 9 */ + 7328 "00000100" // /* MW 8 */ + 7329 "00000000" // /* MW 7 */ + 7330 "00000000" // /* MW 6 */ + 7331 "01001011" // /* MW 5 */ + 7332 "00010000" // /* MW 4 */ + 7333 "11010111" // /* MW 3 */ + 7334 "11100100" // /* MW 2 */ + 7335 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 + 7336 "01110110" // LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7337 "00010000" // /* MW 11 */ + 7338 "11011000" // /* MW 10 */ + 7339 "10111110" // /* MW 9 */ + 7340 "00000101" // /* MW 8 */ + 7341 "00000000" // /* MW 7 */ + 7342 "00000000" // /* MW 6 */ + 7343 "01001011" // /* MW 5 */ + 7344 "00010000" // /* MW 4 */ + 7345 "11010010" // /* MW 3 */ + 7346 "10100000" // /* MW 2 */ + 7347 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7348 "01110110" // LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7349 "00010000" // /* MW 11 */ + 7350 "00010000" // /* MW 10 */ + 7351 "10110001" // /* MW 9 */ + 7352 "11110010" // /* MW 8 */ + 7353 "00000001" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "01001011" // /* MW 5 */ + 7356 "00010000" // /* MW 4 */ + 7357 "11010110" // /* MW 3 */ + 7358 "10001000" // /* MW 2 */ + 7359 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7360 "01110110" // LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7361 "01111000" // /* MW 11 */ + 7362 "11000000" // /* MW 10 */ + 7363 "11100100" // /* MW 9 */ + 7364 "00001001" // /* MW 8 */ + 7365 "00110110" // /* MW 7 */ + 7366 "00000000" // /* MW 6 */ + 7367 "01001011" // /* MW 5 */ + 7368 "00010000" // /* MW 4 */ + 7369 "11010001" // /* MW 3 */ + 7370 "10000100" // /* MW 2 */ + 7371 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7372 "10111010" // LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7373 "01100010" // /* MW 9 */ + 7374 "01000011" // /* MW 8 */ + 7375 "00011000" // /* MW 7 */ + 7376 "00000001" // /* MW 6 */ + 7377 "01001011" // /* MW 5 */ + 7378 "00010000" // /* MW 4 */ + 7379 "11010000" // /* MW 3 */ + 7380 "11001000" // /* MW 2 */ + 7381 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first + 7382 "11010100" // LDA dn4, [p4], #8; MOV dc5, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7383 "00000001" // /* MW 5 */ + 7384 "10010011" // /* MW 4 */ + 7385 "11011011" // /* MW 3 */ + 7386 "11000100" // /* MW 2 */ + 7387 "10000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7388 "10011000" // LDA m0, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00000110" // /* MW 3 */ + 7390 "00101100" // /* MW 2 */ + 7391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7392 "10011000" // LDA dj1, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7393 "11000110" // /* MW 3 */ + 7394 "11111100" // /* MW 2 */ + 7395 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7396 "00111100" // LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7397 "01101000" // /* MW 5 */ + 7398 "10110001" // /* MW 4 */ + 7399 "11010000" // /* MW 3 */ + 7400 "10010100" // /* MW 2 */ + 7401 "10000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7402 "10011000" // LDA dj5, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7403 "11000110" // /* MW 3 */ + 7404 "11111110" // /* MW 2 */ + 7405 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7406 "10011000" // LDA dn5, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7407 "10100110" // /* MW 3 */ + 7408 "00101110" // /* MW 2 */ + 7409 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7410 "10011000" // LDA m1, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7411 "10000110" // /* MW 3 */ + 7412 "00101100" // /* MW 2 */ + 7413 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 first + 7414 "10011000" // LDA dj7, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7415 "11000110" // /* MW 3 */ + 7416 "11111111" // /* MW 2 */ + 7417 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7418 "10011000" // LDA dn7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7419 "10100110" // /* MW 3 */ + 7420 "00101111" // /* MW 2 */ + 7421 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7422 "10011000" // LDA m7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7423 "10000110" // /* MW 3 */ + 7424 "00101111" // /* MW 2 */ + 7425 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 first + 7426 "10011000" // LDA dj3, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7427 "11000110" // /* MW 3 */ + 7428 "11111101" // /* MW 2 */ + 7429 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7430 "10011000" // LDA dn3, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7431 "10100110" // /* MW 3 */ + 7432 "00101101" // /* MW 2 */ + 7433 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7434 "10011000" // LDA m3, [p4], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7435 "10000110" // /* MW 3 */ + 7436 "11001001" // /* MW 2 */ + 7437 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7438 "10011000" // LDA r4, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7439 "10010110" // /* MW 3 */ + 7440 "10101000" // /* MW 2 */ + 7441 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7442 "10011000" // LDA.s16 r7, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7443 "11110010" // /* MW 3 */ + 7444 "10001000" // /* MW 2 */ + 7445 "00000100" // /* MW 1 */ + 7446 "10011000" // LDA m4, [p4], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7447 "00000110" // /* MW 3 */ + 7448 "01001110" // /* MW 2 */ + 7449 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7450 "00111100" // LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7451 "11101000" // /* MW 5 */ + 7452 "11100001" // /* MW 4 */ + 7453 "11010011" // /* MW 3 */ + 7454 "10010110" // /* MW 2 */ + 7455 "10010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first + 7456 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7457 "00101011" // /* MW 3 */ + 7458 "00000100" // /* MW 2 */ + 7459 "00000010" // /* MW 1 */ + 7460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7461 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7462 "10011000" // LDA.s8 r6, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000010" // /* MW 3 */ + 7464 "00000100" // /* MW 2 */ + 7465 "00000101" // /* MW 1 */ + 7466 "00011000" // ADD r4, r4, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "11111011" // /* MW 3 */ + 7468 "00001001" // /* MW 2 */ + 7469 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 4 + 7470 "10111010" // LDA r17, [p4]; MOVXM p4, #7664 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7471 "00010000" // /* MW 9 */ + 7472 "11111000" // /* MW 8 */ + 7473 "00110110" // /* MW 7 */ + 7474 "00000110" // /* MW 6 */ + 7475 "00000000" // /* MW 5 */ + 7476 "00000000" // /* MW 4 */ + 7477 "11010000" // /* MW 3 */ + 7478 "11000110" // /* MW 2 */ + 7479 "10000000" // /* MW 1 */ + 7480 "11111000" // VBCST.16 x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7481 "01110010" // /* MW 3 */ + 7482 "00011101" // /* MW 2 */ + 7483 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first + 7484 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7485 "00000011" // /* MW 3 */ + 7486 "00011100" // /* MW 2 */ + 7487 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7488 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "11111111" // /* MW 3 */ + 7490 "01110010" // /* MW 2 */ + 7491 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7492 "01100110" // NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7493 "01000001" // /* MW 11 */ + 7494 "01101101" // /* MW 10 */ + 7495 "00000100" // /* MW 9 */ + 7496 "11100010" // /* MW 8 */ + 7497 "10001010" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00100011" // /* MW 5 */ + 7500 "00000000" // /* MW 4 */ + 7501 "11110000" // /* MW 3 */ + 7502 "00101100" // /* MW 2 */ + 7503 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 "00001011" // NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7505 "00011010" // /* MW 15 */ + 7506 "00001000" // /* MW 14 */ + 7507 "10101000" // /* MW 13 */ + 7508 "00000011" // /* MW 12 */ + 7509 "00001110" // /* MW 11 */ + 7510 "00000010" // /* MW 10 */ + 7511 "11010100" // /* MW 9 */ + 7512 "00001101" // /* MW 8 */ + 7513 "01011011" // /* MW 7 */ + 7514 "00000001" // /* MW 6 */ + 7515 "00100000" // /* MW 5 */ + 7516 "00000000" // /* MW 4 */ + 7517 "11110000" // /* MW 3 */ + 7518 "00101100" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 7520 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7521 "01101110" // /* MW 9 */ + 7522 "10000011" // /* MW 8 */ + 7523 "10000100" // /* MW 7 */ + 7524 "00000010" // /* MW 6 */ + 7525 "11110100" // /* MW 5 */ + 7526 "11110000" // /* MW 4 */ + 7527 "01110001" // /* MW 3 */ + 7528 "10110011" // /* MW 2 */ + 7529 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7531 "00000001" // /* MW 9 */ + 7532 "10001001" // /* MW 8 */ + 7533 "00000010" // /* MW 7 */ + 7534 "01000110" // /* MW 6 */ + 7535 "00001011" // /* MW 5 */ + 7536 "10011100" // /* MW 4 */ + 7537 "11101010" // /* MW 3 */ + 7538 "00111000" // /* MW 2 */ + 7539 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7541 "00000001" // /* MW 9 */ + 7542 "00110101" // /* MW 8 */ + 7543 "00000001" // /* MW 7 */ + 7544 "11000110" // /* MW 6 */ + 7545 "10001010" // /* MW 5 */ + 7546 "00110000" // /* MW 4 */ + 7547 "01101010" // /* MW 3 */ + 7548 "10110001" // /* MW 2 */ + 7549 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00001010" // /* MW 3 */ + 7552 "10001001" // /* MW 2 */ + 7553 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7555 "10100001" // /* MW 7 */ + 7556 "01001000" // /* MW 6 */ + 7557 "00000100" // /* MW 5 */ + 7558 "11000110" // /* MW 4 */ + 7559 "10001110" // /* MW 3 */ + 7560 "10110000" // /* MW 2 */ + 7561 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7563 "10100001" // /* MW 7 */ + 7564 "00110110" // /* MW 6 */ + 7565 "00000010" // /* MW 5 */ + 7566 "01000110" // /* MW 4 */ + 7567 "00001111" // /* MW 3 */ + 7568 "10011100" // /* MW 2 */ + 7569 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7571 "00001110" // /* MW 3 */ + 7572 "10001001" // /* MW 2 */ + 7573 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7574 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7575 "11100001" // /* MW 7 */ + 7576 "10010010" // /* MW 6 */ + 7577 "00000011" // /* MW 5 */ + 7578 "01000110" // /* MW 4 */ + 7579 "00000011" // /* MW 3 */ + 7580 "00011100" // /* MW 2 */ + 7581 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7582 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7583 "11100001" // /* MW 7 */ + 7584 "01010110" // /* MW 6 */ + 7585 "00000000" // /* MW 5 */ + 7586 "01000110" // /* MW 4 */ + 7587 "00000111" // /* MW 3 */ + 7588 "00011100" // /* MW 2 */ + 7589 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7590 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7591 "00001101" // /* MW 5 */ + 7592 "01100001" // /* MW 4 */ + 7593 "11110100" // /* MW 3 */ + 7594 "00101100" // /* MW 2 */ + 7595 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7596 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7597 "01000001" // /* MW 3 */ + 7598 "01101101" // /* MW 2 */ + 7599 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7601 "00011010" // /* MW 15 */ + 7602 "00001000" // /* MW 14 */ + 7603 "01111000" // /* MW 13 */ + 7604 "10100101" // /* MW 12 */ + 7605 "00000001" // /* MW 11 */ + 7606 "00000000" // /* MW 10 */ + 7607 "00000000" // /* MW 9 */ + 7608 "00000000" // /* MW 8 */ + 7609 "01011011" // /* MW 7 */ + 7610 "00000001" // /* MW 6 */ + 7611 "00100000" // /* MW 5 */ + 7612 "00000000" // /* MW 4 */ + 7613 "11110000" // /* MW 3 */ + 7614 "00101100" // /* MW 2 */ + 7615 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7616 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7617 "01101110" // /* MW 9 */ + 7618 "10000011" // /* MW 8 */ + 7619 "10000100" // /* MW 7 */ + 7620 "00000010" // /* MW 6 */ + 7621 "10010000" // /* MW 5 */ + 7622 "01110011" // /* MW 4 */ + 7623 "11110010" // /* MW 3 */ + 7624 "00001100" // /* MW 2 */ + 7625 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7627 "00000001" // /* MW 7 */ + 7628 "10001001" // /* MW 6 */ + 7629 "00000010" // /* MW 5 */ + 7630 "01000110" // /* MW 4 */ + 7631 "00001011" // /* MW 3 */ + 7632 "10011100" // /* MW 2 */ + 7633 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7635 "00000001" // /* MW 7 */ + 7636 "00110101" // /* MW 6 */ + 7637 "00000001" // /* MW 5 */ + 7638 "11000110" // /* MW 4 */ + 7639 "10001010" // /* MW 3 */ + 7640 "00110000" // /* MW 2 */ + 7641 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7643 "00001010" // /* MW 3 */ + 7644 "10001001" // /* MW 2 */ + 7645 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7647 "10100001" // /* MW 7 */ + 7648 "01001000" // /* MW 6 */ + 7649 "00000100" // /* MW 5 */ + 7650 "01000110" // /* MW 4 */ + 7651 "00001111" // /* MW 3 */ + 7652 "10011100" // /* MW 2 */ + 7653 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7655 "10100001" // /* MW 9 */ + 7656 "00110110" // /* MW 8 */ + 7657 "00000010" // /* MW 7 */ + 7658 "11000010" // /* MW 6 */ + 7659 "10001110" // /* MW 5 */ + 7660 "10110000" // /* MW 4 */ + 7661 "11110100" // /* MW 3 */ + 7662 "00101100" // /* MW 2 */ + 7663 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7664 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7665 "00011101" // /* MW 5 */ + 7666 "00010010" // /* MW 4 */ + 7667 "10001011" // /* MW 3 */ + 7668 "00011110" // /* MW 2 */ + 7669 "00111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 "01011010" // MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7671 "11100001" // /* MW 9 */ + 7672 "10010010" // /* MW 8 */ + 7673 "00000011" // /* MW 7 */ + 7674 "00000010" // /* MW 6 */ + 7675 "11101010" // /* MW 5 */ + 7676 "10110111" // /* MW 4 */ + 7677 "00000000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7681 "11100001" // /* MW 11 */ + 7682 "01010110" // /* MW 10 */ + 7683 "00000000" // /* MW 9 */ + 7684 "00000010" // /* MW 8 */ + 7685 "11100101" // /* MW 7 */ + 7686 "10001111" // /* MW 6 */ + 7687 "00000000" // /* MW 5 */ + 7688 "00000000" // /* MW 4 */ + 7689 "01110000" // /* MW 3 */ + 7690 "10000101" // /* MW 2 */ + 7691 "01000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7693 "11111111" // /* MW 3 */ + 7694 "01110010" // /* MW 2 */ + 7695 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7697 "10011011" // /* MW 3 */ + 7698 "00011101" // /* MW 2 */ + 7699 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7701 "01110100" // /* MW 3 */ + 7702 "00011100" // /* MW 2 */ + 7703 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7705 "10110100" // /* MW 3 */ + 7706 "01011000" // /* MW 2 */ + 7707 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7709 "10010110" // /* MW 3 */ + 7710 "00010001" // /* MW 2 */ + 7711 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00010110" // /* MW 3 */ + 7714 "00010000" // /* MW 2 */ + 7715 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01101100" // /* MW 3 */ + 7718 "01010000" // /* MW 2 */ + 7719 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7721 "01000100" // /* MW 3 */ + 7722 "01010011" // /* MW 2 */ + 7723 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 "00000010" // VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7725 "01110000" // /* MW 7 */ + 7726 "00110110" // /* MW 6 */ + 7727 "10101000" // /* MW 5 */ + 7728 "00000010" // /* MW 4 */ + 7729 "01100000" // /* MW 3 */ + 7730 "01000010" // /* MW 2 */ + 7731 "01110001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7733 "00000011" // /* MW 3 */ + 7734 "00011100" // /* MW 2 */ + 7735 "00011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 "00000010" // VST.3D x10, [p3], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7737 "01110000" // /* MW 7 */ + 7738 "01000101" // /* MW 6 */ + 7739 "10000000" // /* MW 5 */ + 7740 "00000001" // /* MW 4 */ + 7741 "01100000" // /* MW 3 */ + 7742 "01010010" // /* MW 2 */ + 7743 "01100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7745 "01000001" // /* MW 7 */ + 7746 "01101101" // /* MW 6 */ + 7747 "00000100" // /* MW 5 */ + 7748 "01000110" // /* MW 4 */ + 7749 "00000111" // /* MW 3 */ + 7750 "00011100" // /* MW 2 */ + 7751 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7753 "01000001" // /* MW 7 */ + 7754 "00000011" // /* MW 6 */ + 7755 "00000001" // /* MW 5 */ + 7756 "11000110" // /* MW 4 */ + 7757 "10000110" // /* MW 3 */ + 7758 "00110000" // /* MW 2 */ + 7759 "00000010" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 7760 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7761 "01101110" // /* MW 9 */ + 7762 "10000011" // /* MW 8 */ + 7763 "10000100" // /* MW 7 */ + 7764 "00000010" // /* MW 6 */ + 7765 "11110100" // /* MW 5 */ + 7766 "11110000" // /* MW 4 */ + 7767 "01110001" // /* MW 3 */ + 7768 "10110011" // /* MW 2 */ + 7769 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7771 "00000001" // /* MW 9 */ + 7772 "10001001" // /* MW 8 */ + 7773 "00000010" // /* MW 7 */ + 7774 "01000110" // /* MW 6 */ + 7775 "00001011" // /* MW 5 */ + 7776 "10011100" // /* MW 4 */ + 7777 "11101010" // /* MW 3 */ + 7778 "00111000" // /* MW 2 */ + 7779 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7781 "00000001" // /* MW 9 */ + 7782 "00110101" // /* MW 8 */ + 7783 "00000001" // /* MW 7 */ + 7784 "11000110" // /* MW 6 */ + 7785 "10001010" // /* MW 5 */ + 7786 "00110000" // /* MW 4 */ + 7787 "01101010" // /* MW 3 */ + 7788 "10110001" // /* MW 2 */ + 7789 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "00001010" // /* MW 3 */ + 7792 "10001001" // /* MW 2 */ + 7793 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7795 "10100001" // /* MW 7 */ + 7796 "01001000" // /* MW 6 */ + 7797 "00000100" // /* MW 5 */ + 7798 "11000110" // /* MW 4 */ + 7799 "10001110" // /* MW 3 */ + 7800 "10110000" // /* MW 2 */ + 7801 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7803 "10100001" // /* MW 7 */ + 7804 "00110110" // /* MW 6 */ + 7805 "00000010" // /* MW 5 */ + 7806 "01000110" // /* MW 4 */ + 7807 "00001111" // /* MW 3 */ + 7808 "10011100" // /* MW 2 */ + 7809 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7811 "00001110" // /* MW 3 */ + 7812 "10001001" // /* MW 2 */ + 7813 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7814 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7815 "11100001" // /* MW 7 */ + 7816 "10010010" // /* MW 6 */ + 7817 "00000011" // /* MW 5 */ + 7818 "01000110" // /* MW 4 */ + 7819 "00000011" // /* MW 3 */ + 7820 "00011100" // /* MW 2 */ + 7821 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7822 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7823 "11100001" // /* MW 7 */ + 7824 "01010110" // /* MW 6 */ + 7825 "00000000" // /* MW 5 */ + 7826 "01000110" // /* MW 4 */ + 7827 "00000111" // /* MW 3 */ + 7828 "00011100" // /* MW 2 */ + 7829 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7830 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7831 "00001101" // /* MW 5 */ + 7832 "01100001" // /* MW 4 */ + 7833 "11110100" // /* MW 3 */ + 7834 "00101100" // /* MW 2 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7836 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "01000001" // /* MW 3 */ + 7838 "01101101" // /* MW 2 */ + 7839 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7841 "00011010" // /* MW 15 */ + 7842 "00001000" // /* MW 14 */ + 7843 "01111000" // /* MW 13 */ + 7844 "10100101" // /* MW 12 */ + 7845 "00000001" // /* MW 11 */ + 7846 "00000000" // /* MW 10 */ + 7847 "00000000" // /* MW 9 */ + 7848 "00000000" // /* MW 8 */ + 7849 "01011011" // /* MW 7 */ + 7850 "00000001" // /* MW 6 */ + 7851 "00100000" // /* MW 5 */ + 7852 "00000000" // /* MW 4 */ + 7853 "11110000" // /* MW 3 */ + 7854 "00101100" // /* MW 2 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 202 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7856 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 7857 "01101000" // /* MW 11 */ + 7858 "10000011" // /* MW 10 */ + 7859 "10000100" // /* MW 9 */ + 7860 "00000010" // /* MW 8 */ + 7861 "01001001" // /* MW 7 */ + 7862 "00001000" // /* MW 6 */ + 7863 "00100000" // /* MW 5 */ + 7864 "11100111" // /* MW 4 */ + 7865 "11110100" // /* MW 3 */ + 7866 "00001100" // /* MW 2 */ + 7867 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7869 "00000001" // /* MW 7 */ + 7870 "10001001" // /* MW 6 */ + 7871 "00000010" // /* MW 5 */ + 7872 "01000110" // /* MW 4 */ + 7873 "00001011" // /* MW 3 */ + 7874 "10011100" // /* MW 2 */ + 7875 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7877 "00000001" // /* MW 7 */ + 7878 "00110101" // /* MW 6 */ + 7879 "00000001" // /* MW 5 */ + 7880 "11000110" // /* MW 4 */ + 7881 "10001010" // /* MW 3 */ + 7882 "00110000" // /* MW 2 */ + 7883 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7885 "00001010" // /* MW 3 */ + 7886 "10001001" // /* MW 2 */ + 7887 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7889 "10100001" // /* MW 7 */ + 7890 "01001000" // /* MW 6 */ + 7891 "00000100" // /* MW 5 */ + 7892 "01000110" // /* MW 4 */ + 7893 "00001111" // /* MW 3 */ + 7894 "10011100" // /* MW 2 */ + 7895 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7897 "10100001" // /* MW 7 */ + 7898 "00110110" // /* MW 6 */ + 7899 "00000010" // /* MW 5 */ + 7900 "11000110" // /* MW 4 */ + 7901 "10001110" // /* MW 3 */ + 7902 "10110000" // /* MW 2 */ + 7903 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7904 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "00001110" // /* MW 3 */ + 7906 "10001001" // /* MW 2 */ + 7907 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7908 "01001000" // VMAC.f dm3, dm4, x9, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7909 "11100001" // /* MW 3 */ + 7910 "10010010" // /* MW 2 */ + 7911 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7912 "01001000" // VMAC.f dm0, dm2, x11, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7913 "11100001" // /* MW 3 */ + 7914 "01010110" // /* MW 2 */ + 7915 "00000000" // /* MW 1 */ + 7916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7917 "00000000" // /* MW 1 */ + 7918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7919 "00000000" // /* MW 1 */ + 7920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7921 "00000000" // /* MW 1 */ + 7922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7923 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 7924 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "10010110" // /* MW 3 */ + 7926 "00010001" // /* MW 2 */ + 7927 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 248 first + 7928 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7929 "00000000" // /* MW 5 */ + 7930 "01010000" // /* MW 4 */ + 7931 "11000000" // /* MW 3 */ + 7932 "00000010" // /* MW 2 */ + 7933 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7934 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7935 "01101100" // /* MW 3 */ + 7936 "01010000" // /* MW 2 */ + 7937 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.delay_slot + 7938 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7939 "01000100" // /* MW 3 */ + 7940 "01010011" // /* MW 2 */ + 7941 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7942 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7943 "01101100" // /* MW 3 */ + 7944 "01010000" // /* MW 2 */ + 7945 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.delay_slot + 7946 "00011000" // VST x8, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7947 "00010011" // /* MW 3 */ + 7948 "10001010" // /* MW 2 */ + 7949 "00001011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 7950 "00011000" // VST.3D x10, [p3], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7951 "10010011" // /* MW 3 */ + 7952 "00111010" // /* MW 2 */ +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + 7953 "00001011" // /* MW 1 */ +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function conv2d_dw_shuffle _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 254 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 +.function_start + 7968 "10110110" // MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7969 "00010000" // /* MW 11 */ + 7970 "01001100" // /* MW 10 */ + 7971 "10110010" // /* MW 9 */ + 7972 "11110001" // /* MW 8 */ + 7973 "00000001" // /* MW 7 */ + 7974 "00000000" // /* MW 6 */ + 7975 "01101000" // /* MW 5 */ + 7976 "00111001" // /* MW 4 */ + 7977 "10000010" // /* MW 3 */ + 7978 "10010000" // /* MW 2 */ + 7979 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 + 7980 "10110110" // LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7981 "01011000" // /* MW 11 */ + 7982 "00000001" // /* MW 10 */ + 7983 "01101000" // /* MW 9 */ + 7984 "01101000" // /* MW 8 */ + 7985 "01000111" // /* MW 7 */ + 7986 "00111110" // /* MW 6 */ + 7987 "01101000" // /* MW 5 */ + 7988 "00111000" // /* MW 4 */ + 7989 "11010100" // /* MW 3 */ + 7990 "10000100" // /* MW 2 */ + 7991 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first + 7992 "10111010" // LDA dj0, [p3], #4; MOVXM ls, #8080 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7993 "00010000" // /* MW 9 */ + 7994 "11001000" // /* MW 8 */ + 7995 "01111111" // /* MW 7 */ + 7996 "00000100" // /* MW 6 */ + 7997 "00000000" // /* MW 5 */ + 7998 "00000000" // /* MW 4 */ + 7999 "11010000" // /* MW 3 */ + 8000 "10001000" // /* MW 2 */ + 8001 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 258 4 + 8002 "10111010" // LDA dn4, [p3], #4; MOVXM le, #8176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8003 "00010000" // /* MW 9 */ + 8004 "11111000" // /* MW 8 */ + 8005 "10111111" // /* MW 7 */ + 8006 "00000101" // /* MW 6 */ + 8007 "00000000" // /* MW 5 */ + 8008 "00000000" // /* MW 4 */ + 8009 "11010000" // /* MW 3 */ + 8010 "11000100" // /* MW 2 */ + 8011 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 +.src_ref 7 "conv2d_dw_bf16.h" 264 16 +.src_ref 7 "conv2d_dw_bf16.h" 266 47 + 8012 "10111010" // LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8013 "01011000" // /* MW 9 */ + 8014 "00010010" // /* MW 8 */ + 8015 "00001000" // /* MW 7 */ + 8016 "01001000" // /* MW 6 */ + 8017 "00010110" // /* MW 5 */ + 8018 "00000000" // /* MW 4 */ + 8019 "11010000" // /* MW 3 */ + 8020 "11001000" // /* MW 2 */ + 8021 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 267 47 + 8022 "00101100" // LDA m0, [p3], #4; MOVX r2, #19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8023 "10011010" // /* MW 5 */ + 8024 "00001000" // /* MW 4 */ + 8025 "11010000" // /* MW 3 */ + 8026 "10000000" // /* MW 2 */ + 8027 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8028 "10011000" // LDA dc0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "01100110" // /* MW 3 */ + 8030 "00011100" // /* MW 2 */ + 8031 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8032 "10011000" // LDA dc4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "01100110" // /* MW 3 */ + 8034 "10001010" // /* MW 2 */ + 8035 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 51 first + 8036 "10011000" // LDA r5, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "10110110" // /* MW 3 */ + 8038 "00000100" // /* MW 2 */ + 8039 "00000011" // /* MW 1 */ + 8040 "10011000" // LDA r6, [p3, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8041 "11010110" // /* MW 3 */ + 8042 "00100100" // /* MW 2 */ + 8043 "00000011" // /* MW 1 */ + 8044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8045 "00000000" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ + 8050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8051 "00000000" // /* MW 1 */ + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ + 8054 "10011000" // LSHL r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8055 "01001101" // /* MW 3 */ + 8056 "01001000" // /* MW 2 */ + 8057 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8058 "00100100" // LSHL r3, r6, r3; ADD.NC lc, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8059 "11111111" // /* MW 5 */ + 8060 "11100100" // /* MW 4 */ + 8061 "10111010" // /* MW 3 */ + 8062 "11000111" // /* MW 2 */ + 8063 "00110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8065 "00000000" // /* MW 15 */ + 8066 "00000000" // /* MW 14 */ + 8067 "01111000" // /* MW 13 */ + 8068 "11010000" // /* MW 12 */ + 8069 "11000000" // /* MW 11 */ + 8070 "00000000" // /* MW 10 */ + 8071 "00000000" // /* MW 9 */ + 8072 "00000000" // /* MW 8 */ + 8073 "01011011" // /* MW 7 */ + 8074 "00000001" // /* MW 6 */ + 8075 "00100000" // /* MW 5 */ + 8076 "00000000" // /* MW 4 */ + 8077 "11110000" // /* MW 3 */ + 8078 "00101100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 8080 "11100001" // VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8081 "00000000" // /* MW 15 */ + 8082 "00000000" // /* MW 14 */ + 8083 "00111000" // /* MW 13 */ + 8084 "00000010" // /* MW 12 */ + 8085 "01001000" // /* MW 11 */ + 8086 "00000000" // /* MW 10 */ + 8087 "00000000" // /* MW 9 */ + 8088 "00000000" // /* MW 8 */ + 8089 "10001011" // /* MW 7 */ + 8090 "10000000" // /* MW 6 */ + 8091 "01101100" // /* MW 5 */ + 8092 "00111001" // /* MW 4 */ + 8093 "01110010" // /* MW 3 */ + 8094 "10000011" // /* MW 2 */ + 8095 "01000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8097 "00000000" // /* MW 15 */ + 8098 "00000000" // /* MW 14 */ + 8099 "00111000" // /* MW 13 */ + 8100 "00000010" // /* MW 12 */ + 8101 "11000000" // /* MW 11 */ + 8102 "00000000" // /* MW 10 */ + 8103 "00000000" // /* MW 9 */ + 8104 "00000000" // /* MW 8 */ + 8105 "01011011" // /* MW 7 */ + 8106 "00000001" // /* MW 6 */ + 8107 "00100000" // /* MW 5 */ + 8108 "00000000" // /* MW 4 */ + 8109 "11110000" // /* MW 3 */ + 8110 "00101100" // /* MW 2 */ + 8111 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first + 8112 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8113 "00000000" // /* MW 15 */ + 8114 "00000000" // /* MW 14 */ + 8115 "11101000" // /* MW 13 */ + 8116 "11000000" // /* MW 12 */ + 8117 "01000100" // /* MW 11 */ + 8118 "00000000" // /* MW 10 */ + 8119 "00000000" // /* MW 9 */ + 8120 "00000000" // /* MW 8 */ + 8121 "01011011" // /* MW 7 */ + 8122 "00000001" // /* MW 6 */ + 8123 "00100000" // /* MW 5 */ + 8124 "00000000" // /* MW 4 */ + 8125 "11110000" // /* MW 3 */ + 8126 "00101100" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first + 8128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8129 "00000000" // /* MW 15 */ + 8130 "00000000" // /* MW 14 */ + 8131 "11101000" // /* MW 13 */ + 8132 "11000100" // /* MW 12 */ + 8133 "00000100" // /* MW 11 */ + 8134 "00000000" // /* MW 10 */ + 8135 "00000000" // /* MW 9 */ + 8136 "00000000" // /* MW 8 */ + 8137 "01011011" // /* MW 7 */ + 8138 "00000001" // /* MW 6 */ + 8139 "00100000" // /* MW 5 */ + 8140 "00000000" // /* MW 4 */ + 8141 "11110000" // /* MW 3 */ + 8142 "00101100" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first + 8144 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00100110" // /* MW 7 */ + 8154 "00011000" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8160 "11100001" // NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8161 "00000000" // /* MW 15 */ + 8162 "00000000" // /* MW 14 */ + 8163 "01111000" // /* MW 13 */ + 8164 "10100101" // /* MW 12 */ + 8165 "00000001" // /* MW 11 */ + 8166 "00000000" // /* MW 10 */ + 8167 "00000000" // /* MW 9 */ + 8168 "10000000" // /* MW 8 */ + 8169 "00000110" // /* MW 7 */ + 8170 "00100000" // /* MW 6 */ + 8171 "00100100" // /* MW 5 */ + 8172 "00000000" // /* MW 4 */ + 8173 "11110000" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8177 "00000000" // /* MW 15 */ + 8178 "00000000" // /* MW 14 */ + 8179 "01111000" // /* MW 13 */ + 8180 "10100101" // /* MW 12 */ + 8181 "00000001" // /* MW 11 */ + 8182 "00000000" // /* MW 10 */ + 8183 "00000000" // /* MW 9 */ + 8184 "00000000" // /* MW 8 */ + 8185 "01011011" // /* MW 7 */ + 8186 "00000001" // /* MW 6 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.src_ref 7 "conv2d_dw_bf16.h" 274 first +.loop_nesting 0 + 8192 "00111010" // MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 8193 "00111001" // /* MW 9 */ + 8194 "00000010" // /* MW 8 */ + 8195 "01001000" // /* MW 7 */ + 8196 "00000000" // /* MW 6 */ + 8197 "01000000" // /* MW 5 */ + 8198 "00000001" // /* MW 4 */ + 8199 "01100000" // /* MW 3 */ + 8200 "00010001" // /* MW 2 */ + 8201 "10010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.delay_slot + 8202 "01111000" // VSHUFFLE x3, x0, x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8203 "00000100" // /* MW 3 */ + 8204 "10000000" // /* MW 2 */ + 8205 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first +.delay_slot + 8206 "11011000" // VSHUFFLE bmlh0, x1, x3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8207 "10000001" // /* MW 3 */ + 8208 "10001001" // /* MW 2 */ + 8209 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first +.delay_slot + 8210 "11011000" // VSHUFFLE bmll0, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8211 "10001001" // /* MW 3 */ + 8212 "00001001" // /* MW 2 */ + 8213 "00011000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 8214 "10011000" // VST.3D bmlh0, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8215 "00100110" // /* MW 3 */ + 8216 "00011000" // /* MW 2 */ + 8217 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first +.delay_slot + 8218 "10011000" // VST bmll0, [p4, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8219 "00000110" // /* MW 3 */ + 8220 "00100000" // /* MW 2 */ +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + 8221 "00001100" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 282 first +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.function_start + 8224 "10111010" // LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8225 "01011000" // /* MW 9 */ + 8226 "00111111" // /* MW 8 */ + 8227 "00000111" // /* MW 7 */ + 8228 "00101000" // /* MW 6 */ + 8229 "00010000" // /* MW 5 */ + 8230 "00000001" // /* MW 4 */ + 8231 "11010000" // /* MW 3 */ + 8232 "10010011" // /* MW 2 */ + 8233 "00100000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 77 + 8234 "10111010" // MOVA m1, #-208; MOVXM p4, #509064 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8235 "00010000" // /* MW 9 */ + 8236 "01000100" // /* MW 8 */ + 8237 "00110010" // /* MW 7 */ + 8238 "11110010" // /* MW 6 */ + 8239 "00000001" // /* MW 5 */ + 8240 "00000000" // /* MW 4 */ + 8241 "10000000" // /* MW 3 */ + 8242 "00000100" // /* MW 2 */ + 8243 "11100110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 first +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8244 "01010100" // LDA r16, [p4], m0; MOV m0, #201 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8245 "00100101" // /* MW 5 */ + 8246 "00000011" // /* MW 4 */ + 8247 "11010000" // /* MW 3 */ + 8248 "01000010" // /* MW 2 */ + 8249 "10000001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8250 "10011000" // LDA.u8 r19, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8251 "01101010" // /* MW 3 */ + 8252 "00001010" // /* MW 2 */ + 8253 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 287 77 first + 8254 "10011000" // LDA r18, [p4], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8255 "01010110" // /* MW 3 */ + 8256 "00101010" // /* MW 2 */ + 8257 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 282 + 8258 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8259 "00000001" // /* MW 5 */ + 8260 "00000000" // /* MW 4 */ + 8261 "00000000" // /* MW 3 */ + 8262 "00001000" // /* MW 2 */ + 8263 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 "00001100" // LDA p0, [p0]; ST lr, [sp, #-8] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8265 "01111011" // /* MW 5 */ + 8266 "11110000" // /* MW 4 */ + 8267 "11011111" // /* MW 3 */ + 8268 "10000011" // /* MW 2 */ + 8269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 "00001100" // LDA r15, [p2]; ST p2, [sp, #-16] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8271 "00111011" // /* MW 5 */ + 8272 "11100010" // /* MW 4 */ + 8273 "11011111" // /* MW 3 */ + 8274 "10111110" // /* MW 2 */ + 8275 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "10011101" // /* MW 3 */ + 8278 "11111111" // /* MW 2 */ + 8279 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 "00111010" // ST p6, [sp, #-20]; JL #7280 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8281 "01000001" // /* MW 9 */ + 8282 "00000000" // /* MW 8 */ + 8283 "00000000" // /* MW 7 */ + 8284 "10001110" // /* MW 6 */ + 8285 "00000011" // /* MW 5 */ + 8286 "00000000" // /* MW 4 */ + 8287 "10110000" // /* MW 3 */ + 8288 "11100011" // /* MW 2 */ + 8289 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 +.src_ref 7 "conv2d_dw_bf16.h" 285 89 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 "00111010" // ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8291 "01111001" // /* MW 9 */ + 8292 "01100000" // /* MW 8 */ + 8293 "00110001" // /* MW 7 */ + 8294 "01111101" // /* MW 6 */ + 8295 "00001000" // /* MW 5 */ + 8296 "00100111" // /* MW 4 */ + 8297 "10110000" // /* MW 3 */ + 8298 "10111110" // /* MW 2 */ + 8299 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 +.delay_slot + 8300 "11111000" // MOV p6, p4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8301 "11000000" // /* MW 3 */ + 8302 "01101000" // /* MW 2 */ + 8303 "00011110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.delay_slot + 8304 "01011100" // ST p1, [sp, #-24]; LSHL r16, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8305 "00111011" // /* MW 5 */ + 8306 "01000010" // /* MW 4 */ + 8307 "10111000" // /* MW 3 */ + 8308 "00010011" // /* MW 2 */ + 8309 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 first +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.delay_slot + 8310 "00111010" // MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8311 "01111001" // /* MW 9 */ + 8312 "00010000" // /* MW 8 */ + 8313 "00000100" // /* MW 7 */ + 8314 "11101100" // /* MW 6 */ + 8315 "00001000" // /* MW 5 */ + 8316 "00100101" // /* MW 4 */ + 8317 "01100000" // /* MW 3 */ + 8318 "00010001" // /* MW 2 */ + 8319 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 first +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.delay_slot + 8320 "11100001" // NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8321 "00000000" // /* MW 15 */ + 8322 "00000000" // /* MW 14 */ + 8323 "10101000" // /* MW 13 */ + 8324 "11100000" // /* MW 12 */ + 8325 "10110011" // /* MW 11 */ + 8326 "00000001" // /* MW 10 */ + 8327 "00000000" // /* MW 9 */ + 8328 "00000000" // /* MW 8 */ + 8329 "01011011" // /* MW 7 */ + 8330 "00000001" // /* MW 6 */ + 8331 "00100000" // /* MW 5 */ + 8332 "00010111" // /* MW 4 */ + 8333 "11110010" // /* MW 3 */ + 8334 "00101100" // /* MW 2 */ + 8335 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 first +.src_ref 7 "conv2d_dw_bf16.h" 290 24 +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.return_address + 8336 "00101100" // LDA.u8 r16, [p6, #7]; MOVX r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8337 "00010010" // /* MW 5 */ + 8338 "01000100" // /* MW 4 */ + 8339 "01010000" // /* MW 3 */ + 8340 "11000001" // /* MW 2 */ + 8341 "11001110" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ + 8346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8347 "00000000" // /* MW 1 */ + 8348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8349 "00000000" // /* MW 1 */ + 8350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8351 "00000000" // /* MW 1 */ + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 24 + 8354 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8355 "00001000" // /* MW 3 */ + 8356 "01100001" // /* MW 2 */ + 8357 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 8 + 8358 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8359 "00000001" // /* MW 5 */ + 8360 "01000000" // /* MW 4 */ + 8361 "10110000" // /* MW 3 */ + 8362 "00010000" // /* MW 2 */ + 8363 "10000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 61 +.delay_slot + 8364 "01000100" // MOVXM p4, #509064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8365 "00010000" // /* MW 5 */ + 8366 "11001001" // /* MW 4 */ + 8367 "11001000" // /* MW 3 */ + 8368 "00000111" // /* MW 2 */ + 8369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 292 61 first +.src_ref 7 "conv2d_dw_bf16.h" 292 71 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 + 8378 "10111010" // LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8379 "01011000" // /* MW 9 */ + 8380 "00110000" // /* MW 8 */ + 8381 "00000111" // /* MW 7 */ + 8382 "00101000" // /* MW 6 */ + 8383 "00000000" // /* MW 5 */ + 8384 "00000000" // /* MW 4 */ + 8385 "11010000" // /* MW 3 */ + 8386 "11000010" // /* MW 2 */ + 8387 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 118 + 8388 "10011000" // LDA r21, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "10110110" // /* MW 3 */ + 8390 "00101110" // /* MW 2 */ + 8391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 59 first + 8392 "10011000" // LDA r18, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8393 "01010110" // /* MW 3 */ + 8394 "00011110" // /* MW 2 */ + 8395 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 293 31 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8396 "11010100" // LDA r19, [sp, #-24]; MOV p0, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8397 "10000001" // /* MW 5 */ + 8398 "11010001" // /* MW 4 */ + 8399 "00100000" // /* MW 3 */ + 8400 "01001110" // /* MW 2 */ + 8401 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8402 "10111010" // LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8403 "01110010" // /* MW 9 */ + 8404 "01100000" // /* MW 8 */ + 8405 "10110000" // /* MW 7 */ + 8406 "00000011" // /* MW 6 */ + 8407 "10001011" // /* MW 5 */ + 8408 "10011100" // /* MW 4 */ + 8409 "11010000" // /* MW 3 */ + 8410 "01010010" // /* MW 2 */ + 8411 "10000001" // /* MW 1 */ + 8412 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8413 "00011001" // /* MW 3 */ + 8414 "11101111" // /* MW 2 */ + 8415 "00000111" // /* MW 1 */ + 8416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8417 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first + 8418 "10011000" // LSHL r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00011101" // /* MW 3 */ + 8420 "00100011" // /* MW 2 */ + 8421 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 71 + 8422 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001101" // /* MW 3 */ + 8424 "00100000" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 first +.no_stack_arguments + 8426 "00000100" // JL #7280 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "00111000" // /* MW 3 */ + 8430 "00001110" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first +.src_ref 7 "conv2d_dw_bf16.h" 294 30 first +.delay_slot + 8432 "10100100" // LSHL r18, r18, r0; ADD.NC r22, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8433 "10001010" // /* MW 5 */ + 8434 "00110011" // /* MW 4 */ + 8435 "10111011" // /* MW 3 */ + 8436 "10000001" // /* MW 2 */ + 8437 "10010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.delay_slot + 8438 "10100100" // LSHL r17, r21, r0; ADD.NC r21, r15, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8439 "10010010" // /* MW 5 */ + 8440 "10101111" // /* MW 4 */ + 8441 "10111010" // /* MW 3 */ + 8442 "01000001" // /* MW 2 */ + 8443 "10101100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.delay_slot + 8444 "10100100" // LSHL r18, r20, r0; ADD.NC p1, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8445 "10110010" // /* MW 5 */ + 8446 "11010001" // /* MW 4 */ + 8447 "10110010" // /* MW 3 */ + 8448 "10000001" // /* MW 2 */ + 8449 "10100100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 293 31 first +.delay_slot + 8450 "01011000" // ADD.NC p2, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8451 "11000001" // /* MW 3 */ + 8452 "01101001" // /* MW 2 */ + 8453 "00011010" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 68 first +.delay_slot + 8454 "10111010" // NOPA; NOPB; ADD.NC p3, r21, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8455 "10101110" // /* MW 9 */ + 8456 "01100100" // /* MW 8 */ + 8457 "10110101" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00010000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 297 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 84 +.return_address + 8464 "10111010" // LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8465 "01011000" // /* MW 9 */ + 8466 "00110100" // /* MW 8 */ + 8467 "00000111" // /* MW 7 */ + 8468 "00101000" // /* MW 6 */ + 8469 "00000000" // /* MW 5 */ + 8470 "00000000" // /* MW 4 */ + 8471 "00100000" // /* MW 3 */ + 8472 "01000011" // /* MW 2 */ + 8473 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 84 first + 8474 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8475 "00010110" // /* MW 3 */ + 8476 "11111110" // /* MW 2 */ + 8477 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 + 8478 "11010100" // LDA p7, [sp, #-4]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8479 "10000001" // /* MW 5 */ + 8480 "11011101" // /* MW 4 */ + 8481 "00100110" // /* MW 3 */ + 8482 "11110011" // /* MW 2 */ + 8483 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 first + 8484 "10011000" // LDA r17, [p3], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8485 "00110110" // /* MW 3 */ + 8486 "00001010" // /* MW 2 */ + 8487 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 + 8488 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8489 "00111001" // /* MW 3 */ + 8490 "11111000" // /* MW 2 */ + 8491 "00000111" // /* MW 1 */ + 8492 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8493 "11110001" // /* MW 3 */ + 8494 "11110101" // /* MW 2 */ + 8495 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8496 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8497 "00000001" // /* MW 5 */ + 8498 "00000000" // /* MW 4 */ + 8499 "00000000" // /* MW 3 */ + 8500 "11111000" // /* MW 2 */ + 8501 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8502 "10011000" // LDA r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8503 "01010110" // /* MW 3 */ + 8504 "00000110" // /* MW 2 */ + 8505 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first + 8506 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8507 "00001101" // /* MW 3 */ + 8508 "00100000" // /* MW 2 */ + 8509 "00010100" // /* MW 1 */ + 8510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8511 "00000000" // /* MW 1 */ + 8512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8513 "00000000" // /* MW 1 */ + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 first +.tail_call + 8516 "10000100" // J #7968 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7968 delay_slots=5 */ + 8517 "00000000" // /* MW 5 */ + 8518 "00000000" // /* MW 4 */ + 8519 "10010000" // /* MW 3 */ + 8520 "00001111" // /* MW 2 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 first +.delay_slot + 8522 "10011000" // LSHL r17, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001101" // /* MW 3 */ + 8524 "01100010" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.delay_slot + 8526 "01011000" // ADD.NC r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "11001001" // /* MW 3 */ + 8528 "01011000" // /* MW 2 */ + 8529 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first +.delay_slot + 8530 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8531 "01000001" // /* MW 3 */ + 8532 "01101001" // /* MW 2 */ + 8533 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 +.delay_slot + 8534 "11111000" // MOV p0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8535 "00100000" // /* MW 3 */ + 8536 "01101001" // /* MW 2 */ + 8537 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 93 first +.delay_slot + 8538 "10010100" // NOPA; ADD.NC p2, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "10000010" // /* MW 5 */ + 8540 "11010001" // /* MW 4 */ + 8541 "11110100" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 301 +.return_address + 8544 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8545 "00111001" // /* MW 3 */ + 8546 "11111000" // /* MW 2 */ + 8547 "00000111" // /* MW 1 */ + 8548 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8549 "11110001" // /* MW 3 */ + 8550 "11110101" // /* MW 2 */ + 8551 "00000111" // /* MW 1 */ + 8552 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8553 "10011001" // /* MW 3 */ + 8554 "11111111" // /* MW 2 */ + 8555 "00000111" // /* MW 1 */ + 8556 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8557 "00011001" // /* MW 3 */ + 8558 "11101111" // /* MW 2 */ + 8559 "00000111" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8566 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8567 "00000000" // /* MW 3 */ + 8568 "00101000" // /* MW 2 */ + 8569 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 +.delay_slot + 8570 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8571 "00000001" // /* MW 5 */ + 8572 "00000000" // /* MW 4 */ + 8573 "00000000" // /* MW 3 */ + 8574 "11111000" // /* MW 2 */ + 8575 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 8583 "00000000" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 352 first +.src_ref 6 "superkernels.cpp" 357 6 +.function_start + 8592 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8593 "00000000" // /* MW 5 */ + 8594 "11000100" // /* MW 4 */ + 8595 "11001000" // /* MW 3 */ + 8596 "00000111" // /* MW 2 */ + 8597 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 first + 8598 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8599 "01000001" // /* MW 5 */ + 8600 "00101111" // /* MW 4 */ + 8601 "11010000" // /* MW 3 */ + 8602 "11000010" // /* MW 2 */ + 8603 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 352 + 8604 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8605 "00000001" // /* MW 5 */ + 8606 "00000000" // /* MW 4 */ + 8607 "00000000" // /* MW 3 */ + 8608 "00010000" // /* MW 2 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8611 "01110000" // /* MW 7 */ + 8612 "01110000" // /* MW 6 */ + 8613 "00101101" // /* MW 5 */ + 8614 "00000010" // /* MW 4 */ + 8615 "10110000" // /* MW 3 */ + 8616 "00111010" // /* MW 2 */ + 8617 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 + 8618 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8619 "01110000" // /* MW 7 */ + 8620 "11110000" // /* MW 6 */ + 8621 "10101000" // /* MW 5 */ + 8622 "00000001" // /* MW 4 */ + 8623 "10110000" // /* MW 3 */ + 8624 "10110110" // /* MW 2 */ + 8625 "11111111" // /* MW 1 */ + 8626 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8627 "00011101" // /* MW 3 */ + 8628 "11101100" // /* MW 2 */ + 8629 "00001111" // /* MW 1 */ + 8630 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "10011101" // /* MW 3 */ + 8632 "11110111" // /* MW 2 */ + 8633 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 + 8634 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8635 "01110000" // /* MW 7 */ + 8636 "01100000" // /* MW 6 */ + 8637 "11001010" // /* MW 5 */ + 8638 "00000001" // /* MW 4 */ + 8639 "10110000" // /* MW 3 */ + 8640 "00000010" // /* MW 2 */ + 8641 "11111110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 +.src_ref 6 "superkernels.cpp" 357 16 + 8642 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8643 "00000001" // /* MW 5 */ + 8644 "01000000" // /* MW 4 */ + 8645 "00100000" // /* MW 3 */ + 8646 "00010001" // /* MW 2 */ + 8647 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 8648 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8649 "11000000" // /* MW 3 */ + 8650 "11010110" // /* MW 2 */ + 8651 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 22 first +.delay_slot + 8652 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8653 "10010000" // /* MW 3 */ + 8654 "01100010" // /* MW 2 */ + 8655 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 30 +.delay_slot + 8656 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8657 "11111011" // /* MW 3 */ + 8658 "01100011" // /* MW 2 */ + 8659 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8660 "01000100" // MOVXM p3, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8661 "00001000" // /* MW 5 */ + 8662 "11000100" // /* MW 4 */ + 8663 "11000110" // /* MW 3 */ + 8664 "00000111" // /* MW 2 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8666 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8667 "00110001" // /* MW 3 */ + 8668 "00000110" // /* MW 2 */ + 8669 "00001011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 369 2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 "00111010" // MOVS p7, p1; MOVXM p1, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8671 "00010001" // /* MW 9 */ + 8672 "00010000" // /* MW 8 */ + 8673 "10110001" // /* MW 7 */ + 8674 "11110000" // /* MW 6 */ + 8675 "00000001" // /* MW 5 */ + 8676 "00000000" // /* MW 4 */ + 8677 "01100000" // /* MW 3 */ + 8678 "10010001" // /* MW 2 */ + 8679 "11110000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 6 "superkernels.cpp" 359 4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8681 "00010000" // /* MW 11 */ + 8682 "00001110" // /* MW 10 */ + 8683 "10110001" // /* MW 9 */ + 8684 "11110000" // /* MW 8 */ + 8685 "00000001" // /* MW 7 */ + 8686 "00000000" // /* MW 6 */ + 8687 "10001011" // /* MW 5 */ + 8688 "10001000" // /* MW 4 */ + 8689 "11100000" // /* MW 3 */ + 8690 "11000000" // /* MW 2 */ + 8691 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 359 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 "00000100" // JL #6480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 8695 "00000001" // /* MW 5 */ + 8696 "00000000" // /* MW 4 */ + 8697 "10101000" // /* MW 3 */ + 8698 "00001100" // /* MW 2 */ + 8699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8705 "00110001" // /* MW 3 */ + 8706 "00100000" // /* MW 2 */ + 8707 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8708 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8709 "00000101" // /* MW 3 */ + 8710 "00100000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8712 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8713 "01110000" // /* MW 7 */ + 8714 "10100101" // /* MW 6 */ + 8715 "00000001" // /* MW 5 */ + 8716 "00000000" // /* MW 4 */ + 8717 "00110000" // /* MW 3 */ + 8718 "11000010" // /* MW 2 */ + 8719 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 +.src_ref 6 "superkernels.cpp" 369 2 +.return_address + 8720 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8721 "00000000" // /* MW 7 */ + 8722 "10000010" // /* MW 6 */ + 8723 "00110011" // /* MW 5 */ + 8724 "00000001" // /* MW 4 */ + 8725 "01100000" // /* MW 3 */ + 8726 "10010001" // /* MW 2 */ + 8727 "00110011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 17 first + 8728 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8729 "00111010" // /* MW 3 */ + 8730 "00000110" // /* MW 2 */ + 8731 "00000010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 +.src_ref 6 "superkernels.cpp" 361 15 first + 8732 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8733 "00010000" // /* MW 9 */ + 8734 "00001100" // /* MW 8 */ + 8735 "00110001" // /* MW 7 */ + 8736 "11110001" // /* MW 6 */ + 8737 "00000001" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "01010000" // /* MW 3 */ + 8740 "11000011" // /* MW 2 */ + 8741 "01000100" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8745 "00000000" // /* MW 5 */ + 8746 "00000000" // /* MW 4 */ + 8747 "00101000" // /* MW 3 */ + 8748 "00010001" // /* MW 2 */ + 8749 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 +.src_ref 6 "superkernels.cpp" 365 26 +.delay_slot + 8750 "01000100" // MOVXM p3, #508432 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8751 "00100000" // /* MW 5 */ + 8752 "11000100" // /* MW 4 */ + 8753 "11000110" // /* MW 3 */ + 8754 "00000111" // /* MW 2 */ + 8755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8757 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8759 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 first +.delay_slot + 8760 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8761 "00110001" // /* MW 3 */ + 8762 "00000110" // /* MW 2 */ + 8763 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 first +.delay_slot + 8764 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8765 "00010001" // /* MW 3 */ + 8766 "00000110" // /* MW 2 */ + 8767 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 6 "superkernels.cpp" 365 26 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "00010000" // /* MW 13 */ + 8772 "00001000" // /* MW 12 */ + 8773 "10110001" // /* MW 11 */ + 8774 "11110001" // /* MW 10 */ + 8775 "00000001" // /* MW 9 */ + 8776 "00000000" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 242 49 first + 8784 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8785 "10000110" // /* MW 3 */ + 8786 "01100111" // /* MW 2 */ + 8787 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 365 15 + 8788 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8789 "00010000" // /* MW 9 */ + 8790 "00000010" // /* MW 8 */ + 8791 "00110001" // /* MW 7 */ + 8792 "11110010" // /* MW 6 */ + 8793 "00000001" // /* MW 5 */ + 8794 "00000000" // /* MW 4 */ + 8795 "11010000" // /* MW 3 */ + 8796 "11101110" // /* MW 2 */ + 8797 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 8798 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010110" // /* MW 3 */ + 8800 "11111110" // /* MW 2 */ + 8801 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 8802 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8803 "00110110" // /* MW 3 */ + 8804 "11111110" // /* MW 2 */ + 8805 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 8806 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8807 "01010110" // /* MW 3 */ + 8808 "01000110" // /* MW 2 */ + 8809 "00000010" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ + 8814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8815 "00000000" // /* MW 1 */ + 8816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8817 "00000000" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 8820 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8821 "00000010" // /* MW 3 */ + 8822 "01100001" // /* MW 2 */ + 8823 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 8824 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "00010001" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 8828 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "11111101" // /* MW 3 */ + 8830 "11100000" // /* MW 2 */ + 8831 "00010111" // /* MW 1 */ + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8835 "00000000" // /* MW 1 */ + 8836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8837 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 8838 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8839 "00001000" // /* MW 3 */ + 8840 "10010011" // /* MW 2 */ + 8841 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 364 11 +.src_ref 6 "superkernels.cpp" 367 47 +.src_ref 6 "superkernels.cpp" 372 6 +.src_ref 6 "superkernels.cpp" 373 16 + 8842 "10111010" // MOVA r15, #1; MOVXM p7, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8843 "00010000" // /* MW 9 */ + 8844 "00000000" // /* MW 8 */ + 8845 "10110001" // /* MW 7 */ + 8846 "11110011" // /* MW 6 */ + 8847 "00000001" // /* MW 5 */ + 8848 "00000000" // /* MW 4 */ + 8849 "00000000" // /* MW 3 */ + 8850 "00101111" // /* MW 2 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 + 8852 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8853 "11000001" // /* MW 5 */ + 8854 "00101011" // /* MW 4 */ + 8855 "00101000" // /* MW 3 */ + 8856 "00000000" // /* MW 2 */ + 8857 "00000110" // /* MW 1 */ + 8858 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8859 "01011010" // /* MW 3 */ + 8860 "01101000" // /* MW 2 */ + 8861 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 + 8862 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8863 "10000001" // /* MW 5 */ + 8864 "00101001" // /* MW 4 */ + 8865 "00100111" // /* MW 3 */ + 8866 "11010011" // /* MW 2 */ + 8867 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 15 first + 8868 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8869 "00110110" // /* MW 3 */ + 8870 "00000110" // /* MW 2 */ + 8871 "00000100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 26 +.src_ref 6 "superkernels.cpp" 369 2 + 8872 "10111010" // LDA r16, [p3]; MOVXM p3, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8873 "00010000" // /* MW 9 */ + 8874 "11100000" // /* MW 8 */ + 8875 "10110001" // /* MW 7 */ + 8876 "11110001" // /* MW 6 */ + 8877 "00000001" // /* MW 5 */ + 8878 "00000000" // /* MW 4 */ + 8879 "11010000" // /* MW 3 */ + 8880 "11000010" // /* MW 2 */ + 8881 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8882 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "01010110" // /* MW 3 */ + 8884 "00000110" // /* MW 2 */ + 8885 "00000111" // /* MW 1 */ + 8886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8887 "00000000" // /* MW 1 */ + 8888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8889 "00000000" // /* MW 1 */ + 8890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8891 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8892 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8893 "01110110" // /* MW 3 */ + 8894 "00000110" // /* MW 2 */ + 8895 "00000101" // /* MW 1 */ + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 24 first + 8898 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8899 "00001111" // /* MW 3 */ + 8900 "01100001" // /* MW 2 */ + 8901 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8902 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8903 "00000111" // /* MW 3 */ + 8904 "10100010" // /* MW 2 */ + 8905 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first + 8906 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8907 "11111101" // /* MW 3 */ + 8908 "00100000" // /* MW 2 */ + 8909 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 369 2 first +.no_stack_arguments + 8910 "00000100" // JL #8224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8224 delay_slots=5 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "00010000" // /* MW 3 */ + 8914 "00010000" // /* MW 2 */ + 8915 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first +.delay_slot + 8916 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8917 "00110001" // /* MW 3 */ + 8918 "00000110" // /* MW 2 */ + 8919 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first +.delay_slot + 8920 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8921 "11000001" // /* MW 3 */ + 8922 "01001001" // /* MW 2 */ + 8923 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 225 10 first +.delay_slot + 8924 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8925 "00100101" // /* MW 3 */ + 8926 "10110100" // /* MW 2 */ + 8927 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 first +.delay_slot + 8928 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8929 "00010101" // /* MW 3 */ + 8930 "10111011" // /* MW 2 */ + 8931 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 +.delay_slot + 8932 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8933 "11000001" // /* MW 11 */ + 8934 "10001010" // /* MW 10 */ + 8935 "11011111" // /* MW 9 */ + 8936 "00000011" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "00100000" // /* MW 5 */ + 8940 "00000000" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.return_address + 8944 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8945 "00001010" // /* MW 3 */ + 8946 "01100111" // /* MW 2 */ + 8947 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first + 8948 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8949 "00010110" // /* MW 3 */ + 8950 "00000110" // /* MW 2 */ + 8951 "00000010" // /* MW 1 */ + 8952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8953 "00000000" // /* MW 1 */ + 8954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8955 "00000000" // /* MW 1 */ + 8956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8957 "00000000" // /* MW 1 */ + 8958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8959 "00000000" // /* MW 1 */ + 8960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8961 "00000000" // /* MW 1 */ + 8962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8963 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 8964 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8965 "11111000" // /* MW 3 */ + 8966 "00010000" // /* MW 2 */ + 8967 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 372 19 + 8968 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8969 "00010000" // /* MW 9 */ + 8970 "00001100" // /* MW 8 */ + 8971 "10110001" // /* MW 7 */ + 8972 "11110000" // /* MW 6 */ + 8973 "00000001" // /* MW 5 */ + 8974 "00000000" // /* MW 4 */ + 8975 "11010000" // /* MW 3 */ + 8976 "11000010" // /* MW 2 */ + 8977 "01011100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 19 first + 8978 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8979 "01010110" // /* MW 3 */ + 8980 "00000110" // /* MW 2 */ + 8981 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 8982 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8983 "00110110" // /* MW 3 */ + 8984 "00000110" // /* MW 2 */ + 8985 "00000111" // /* MW 1 */ + 8986 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8987 "10011001" // /* MW 3 */ + 8988 "11110100" // /* MW 2 */ + 8989 "00000111" // /* MW 1 */ + 8990 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "11010001" // /* MW 3 */ + 8992 "11111001" // /* MW 2 */ + 8993 "00000111" // /* MW 1 */ + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 8998 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8999 "00000001" // /* MW 3 */ + 9000 "11100001" // /* MW 2 */ + 9001 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 9002 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9003 "00010001" // /* MW 3 */ + 9004 "11100110" // /* MW 2 */ + 9005 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 16 first + 9006 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9007 "00101000" // /* MW 3 */ + 9008 "01100001" // /* MW 2 */ + 9009 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 9010 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 9011 "00000001" // /* MW 5 */ + 9012 "01000000" // /* MW 4 */ + 9013 "10101000" // /* MW 3 */ + 9014 "00010001" // /* MW 2 */ + 9015 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 +.delay_slot + 9016 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9017 "00000001" // /* MW 3 */ + 9018 "00110000" // /* MW 2 */ + 9019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9027 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 first + 9028 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9029 "11000001" // /* MW 11 */ + 9030 "10001000" // /* MW 10 */ + 9031 "10000011" // /* MW 9 */ + 9032 "00000011" // /* MW 8 */ + 9033 "00000000" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 375 + 9040 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9041 "01000001" // /* MW 5 */ + 9042 "11101101" // /* MW 4 */ + 9043 "00101110" // /* MW 3 */ + 9044 "10110110" // /* MW 2 */ + 9045 "11111111" // /* MW 1 */ + 9046 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9047 "11110001" // /* MW 3 */ + 9048 "11110001" // /* MW 2 */ + 9049 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 first + 9050 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9051 "00000000" // /* MW 3 */ + 9052 "00101000" // /* MW 2 */ + 9053 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 +.delay_slot + 9054 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9055 "00000001" // /* MW 5 */ + 9056 "00000000" // /* MW 4 */ + 9057 "00000000" // /* MW 3 */ + 9058 "11110000" // /* MW 2 */ + 9059 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9065 "00000000" // /* MW 1 */ +.delay_slot + 9066 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "11000000" // /* MW 3 */ + 9068 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9069 "00011111" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable77.cc" 21 first +.src_ref 0 "0_0_reloadable77.cc" 23 79 +.function_start + 9072 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9073 "11000000" // /* MW 3 */ + 9074 "01100000" // /* MW 2 */ + 9075 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 23 79 first + 9076 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "00011110" // /* MW 3 */ + 9078 "00111100" // /* MW 2 */ + 9079 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 24 47 first + 9080 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9081 "10011110" // /* MW 3 */ + 9082 "11101100" // /* MW 2 */ + 9083 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 26 81 first + 9084 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9085 "10011110" // /* MW 3 */ + 9086 "00010101" // /* MW 2 */ + 9087 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 25 80 first + 9088 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9089 "00011110" // /* MW 3 */ + 9090 "00000101" // /* MW 2 */ + 9091 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 22 4 first +.tail_call + 9092 "10000100" // J #5872 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5872 delay_slots=5 */ + 9093 "00000000" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "01111000" // /* MW 3 */ + 9096 "00001011" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 9107 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function buffer_pad_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.function_start + 9120 "11010100" // LDA el0, [p1]; MOV r17, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9121 "10000001" // /* MW 5 */ + 9122 "10101001" // /* MW 4 */ + 9123 "11011000" // /* MW 3 */ + 9124 "10000101" // /* MW 2 */ + 9125 "00100000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 first + 9126 "00011000" // ADD.NC p1, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9127 "10000010" // /* MW 3 */ + 9128 "01101000" // /* MW 2 */ + 9129 "00011001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9130 "10011000" // LDA r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9131 "01010110" // /* MW 3 */ + 9132 "00011110" // /* MW 2 */ + 9133 "00000001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 27 33 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9134 "10011000" // LDA r15, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9135 "11110110" // /* MW 3 */ + 9136 "00000101" // /* MW 2 */ + 9137 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9139 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9141 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9143 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9145 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9146 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9147 "10100000" // /* MW 3 */ + 9148 "00010111" // /* MW 2 */ + 9149 "00011000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9150 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9151 "00000001" // /* MW 5 */ + 9152 "00000000" // /* MW 4 */ + 9153 "00000000" // /* MW 3 */ + 9154 "00001000" // /* MW 2 */ + 9155 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 43 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9156 "01100100" // MUL r18, r15, r18; MOV r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9157 "11111101" // /* MW 5 */ + 9158 "00111111" // /* MW 4 */ + 9159 "11111000" // /* MW 3 */ + 9160 "10100101" // /* MW 2 */ + 9161 "01111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9162 "00111010" // ST r18, [sp, #-20]; MOVXM r17, #1073741823 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9163 "10010001" // /* MW 9 */ + 9164 "11111111" // /* MW 8 */ + 9165 "00101111" // /* MW 7 */ + 9166 "11111110" // /* MW 6 */ + 9167 "11111111" // /* MW 5 */ + 9168 "00001111" // /* MW 4 */ + 9169 "10110000" // /* MW 3 */ + 9170 "11001010" // /* MW 2 */ + 9171 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 9172 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9173 "00001101" // /* MW 3 */ + 9174 "10100001" // /* MW 2 */ + 9175 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 9176 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9177 "00000100" // /* MW 3 */ + 9178 "01100001" // /* MW 2 */ + 9179 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 22 + 9180 "10000100" // JZ r16, #9264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9264 delay_slots=5 */ + 9181 "00000001" // /* MW 5 */ + 9182 "00000000" // /* MW 4 */ + 9183 "00011000" // /* MW 3 */ + 9184 "00010010" // /* MW 2 */ + 9185 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.delay_slot + 9186 "11010100" // LDA p7, [p0]; MOV p0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9187 "10000001" // /* MW 5 */ + 9188 "11011101" // /* MW 4 */ + 9189 "11010000" // /* MW 3 */ + 9190 "11110011" // /* MW 2 */ + 9191 "00000000" // /* MW 1 */ +.delay_slot + 9192 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9193 "00011101" // /* MW 3 */ + 9194 "11111000" // /* MW 2 */ + 9195 "00001111" // /* MW 1 */ +.delay_slot + 9196 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9197 "11010101" // /* MW 3 */ + 9198 "11110101" // /* MW 2 */ + 9199 "00001111" // /* MW 1 */ +.delay_slot + 9200 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9201 "00111101" // /* MW 3 */ + 9202 "11110000" // /* MW 2 */ + 9203 "00001111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 25 24 first +.delay_slot + 9204 "00001100" // LDA r14, [p1, #-8]; ST r0, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9205 "00101011" // /* MW 5 */ + 9206 "11111000" // /* MW 4 */ + 9207 "11011111" // /* MW 3 */ + 9208 "10111010" // /* MW 2 */ + 9209 "00111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 9210 "01011100" // ST el0, [sp, #-24]; MOVX r0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9211 "00000010" // /* MW 5 */ + 9212 "00000000" // /* MW 4 */ + 9213 "10110000" // /* MW 3 */ + 9214 "00000101" // /* MW 2 */ + 9215 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 9216 "00011000" // LDA p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9217 "10011001" // /* MW 3 */ + 9218 "11101000" // /* MW 2 */ + 9219 "00000111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 first +.no_stack_arguments + 9220 "00000100" // JL #12608 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12608 delay_slots=5 */ + 9221 "00000001" // /* MW 5 */ + 9222 "00000000" // /* MW 4 */ + 9223 "10100000" // /* MW 3 */ + 9224 "00011000" // /* MW 2 */ + 9225 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.delay_slot + 9226 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9227 "00001001" // /* MW 3 */ + 9228 "00100010" // /* MW 2 */ + 9229 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 first +.delay_slot + 9230 "10011000" // LSHL r1, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9231 "00011101" // /* MW 3 */ + 9232 "00000011" // /* MW 2 */ + 9233 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9238 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "01111110" // /* MW 9 */ + 9240 "10100101" // /* MW 8 */ + 9241 "00000001" // /* MW 7 */ + 9242 "00000000" // /* MW 6 */ + 9243 "00010000" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11110000" // /* MW 3 */ + 9246 "00101100" // /* MW 2 */ + 9247 "00000000" // /* MW 1 */ +.return_address + 9248 "10000100" // J #9280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9280 delay_slots=5 */ + 9249 "00000000" // /* MW 5 */ + 9250 "00000000" // /* MW 4 */ + 9251 "00100000" // /* MW 3 */ + 9252 "00010010" // /* MW 2 */ + 9253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9259 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9263 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 9264 "11100001" // NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9265 "00000000" // /* MW 15 */ + 9266 "00000000" // /* MW 14 */ + 9267 "01111000" // /* MW 13 */ + 9268 "10100101" // /* MW 12 */ + 9269 "00000001" // /* MW 11 */ + 9270 "00000000" // /* MW 10 */ + 9271 "00000000" // /* MW 9 */ + 9272 "10000000" // /* MW 8 */ + 9273 "00101101" // /* MW 7 */ + 9274 "11101000" // /* MW 6 */ + 9275 "00100111" // /* MW 5 */ + 9276 "00000000" // /* MW 4 */ + 9277 "11110000" // /* MW 3 */ + 9278 "00101100" // /* MW 2 */ + 9279 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 22 first + 9280 "10000100" // JZ r15, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */ + 9281 "00000001" // /* MW 5 */ + 9282 "00000000" // /* MW 4 */ + 9283 "10101000" // /* MW 3 */ + 9284 "00010010" // /* MW 2 */ + 9285 "01111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9287 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9295 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 first + 9296 "10111010" // LDA r17, [sp, #-20]; MOVXM ls, #9392 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9297 "00010000" // /* MW 9 */ + 9298 "01011000" // /* MW 8 */ + 9299 "01111010" // /* MW 7 */ + 9300 "00001000" // /* MW 6 */ + 9301 "00000000" // /* MW 5 */ + 9302 "00000000" // /* MW 4 */ + 9303 "00100000" // /* MW 3 */ + 9304 "11000110" // /* MW 2 */ + 9305 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 + 9306 "10111010" // MOVA r19, #1; MOVXM le, #9488 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9307 "00010000" // /* MW 9 */ + 9308 "10001000" // /* MW 8 */ + 9309 "10111010" // /* MW 7 */ + 9310 "00001001" // /* MW 6 */ + 9311 "00000000" // /* MW 5 */ + 9312 "00000000" // /* MW 4 */ + 9313 "00000000" // /* MW 3 */ + 9314 "00110011" // /* MW 2 */ + 9315 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 9316 "10111010" // LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9317 "11001000" // /* MW 9 */ + 9318 "11111111" // /* MW 8 */ + 9319 "00001011" // /* MW 7 */ + 9320 "11101110" // /* MW 6 */ + 9321 "01001001" // /* MW 5 */ + 9322 "00011101" // /* MW 4 */ + 9323 "00100000" // /* MW 3 */ + 9324 "01001010" // /* MW 2 */ + 9325 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 9326 "10111010" // LDA lr, [sp, #-16]; MOVXM p0, #9360 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9327 "00010000" // /* MW 9 */ + 9328 "01001000" // /* MW 8 */ + 9329 "00110010" // /* MW 7 */ + 9330 "00001000" // /* MW 6 */ + 9331 "00000000" // /* MW 5 */ + 9332 "00000000" // /* MW 4 */ + 9333 "00100000" // /* MW 3 */ + 9334 "00000111" // /* MW 2 */ + 9335 "11111110" // /* MW 1 */ + 9336 "11111000" // MOV m0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9337 "00100000" // /* MW 3 */ + 9338 "00001010" // /* MW 2 */ + 9339 "00011000" // /* MW 1 */ + 9340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9341 "00000000" // /* MW 1 */ + 9342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9343 "00000000" // /* MW 1 */ + 9344 "11100001" // NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9345 "00000000" // /* MW 15 */ + 9346 "00000000" // /* MW 14 */ + 9347 "01111000" // /* MW 13 */ + 9348 "10100101" // /* MW 12 */ + 9349 "00000001" // /* MW 11 */ + 9350 "11101100" // /* MW 10 */ + 9351 "00011001" // /* MW 9 */ + 9352 "00100011" // /* MW 8 */ + 9353 "01011011" // /* MW 7 */ + 9354 "00000001" // /* MW 6 */ + 9355 "00100000" // /* MW 5 */ + 9356 "00000000" // /* MW 4 */ + 9357 "11110000" // /* MW 3 */ + 9358 "00101100" // /* MW 2 */ + 9359 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.loop_nesting 1 + 9360 "10000100" // JZ r14, #9504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9504 delay_slots=5 */ + 9361 "00000001" // /* MW 5 */ + 9362 "00000000" // /* MW 4 */ + 9363 "10010000" // /* MW 3 */ + 9364 "00010010" // /* MW 2 */ + 9365 "01110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9375 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 9376 "00000010" // MOVS p2, p7; MOV lc, r14 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9377 "01110000" // /* MW 7 */ + 9378 "10010000" // /* MW 6 */ + 9379 "10111011" // /* MW 5 */ + 9380 "00000010" // /* MW 4 */ + 9381 "01100000" // /* MW 3 */ + 9382 "10010001" // /* MW 2 */ + 9383 "01010011" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 9384 "00000010" // NOPS; MOV p1, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9385 "01110000" // /* MW 7 */ + 9386 "10010000" // /* MW 6 */ + 9387 "10110100" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "01100000" // /* MW 3 */ + 9390 "00101011" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 first +.begin_of_loop +.loop_nesting 2 + 9392 "11100001" // LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9393 "00000000" // /* MW 15 */ + 9394 "00000000" // /* MW 14 */ + 9395 "01111000" // /* MW 13 */ + 9396 "10100101" // /* MW 12 */ + 9397 "00000001" // /* MW 11 */ + 9398 "00000000" // /* MW 10 */ + 9399 "00000000" // /* MW 9 */ + 9400 "00000000" // /* MW 8 */ + 9401 "01011011" // /* MW 7 */ + 9402 "00000001" // /* MW 6 */ + 9403 "00100000" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "01010000" // /* MW 3 */ + 9406 "11001110" // /* MW 2 */ + 9407 "01000011" // /* MW 1 */ + 9408 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9409 "00000000" // /* MW 15 */ + 9410 "00000000" // /* MW 14 */ + 9411 "01111000" // /* MW 13 */ + 9412 "10100101" // /* MW 12 */ + 9413 "00000001" // /* MW 11 */ + 9414 "00000000" // /* MW 10 */ + 9415 "00000000" // /* MW 9 */ + 9416 "00000000" // /* MW 8 */ + 9417 "01011011" // /* MW 7 */ + 9418 "00000001" // /* MW 6 */ + 9419 "00100000" // /* MW 5 */ + 9420 "00000000" // /* MW 4 */ + 9421 "11110000" // /* MW 3 */ + 9422 "00101100" // /* MW 2 */ + 9423 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 9424 "11100001" // ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9425 "00000000" // /* MW 15 */ + 9426 "00000000" // /* MW 14 */ + 9427 "01111000" // /* MW 13 */ + 9428 "10100101" // /* MW 12 */ + 9429 "00000001" // /* MW 11 */ + 9430 "00000000" // /* MW 10 */ + 9431 "00000000" // /* MW 9 */ + 9432 "00000000" // /* MW 8 */ + 9433 "01011011" // /* MW 7 */ + 9434 "00000001" // /* MW 6 */ + 9435 "00100000" // /* MW 5 */ + 9436 "00000000" // /* MW 4 */ + 9437 "11100000" // /* MW 3 */ + 9438 "11001110" // /* MW 2 */ + 9439 "00100011" // /* MW 1 */ + 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "01111000" // /* MW 13 */ + 9444 "10100101" // /* MW 12 */ + 9445 "00000001" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "00100000" // /* MW 5 */ + 9452 "00000000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ + 9456 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9457 "00000000" // /* MW 15 */ + 9458 "00000000" // /* MW 14 */ + 9459 "01111000" // /* MW 13 */ + 9460 "10100101" // /* MW 12 */ + 9461 "00000001" // /* MW 11 */ + 9462 "00000000" // /* MW 10 */ + 9463 "00000000" // /* MW 9 */ + 9464 "00000000" // /* MW 8 */ + 9465 "01011011" // /* MW 7 */ + 9466 "00000001" // /* MW 6 */ + 9467 "00100000" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "11110000" // /* MW 3 */ + 9470 "00101100" // /* MW 2 */ + 9471 "00000000" // /* MW 1 */ + 9472 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9473 "00000000" // /* MW 15 */ + 9474 "00000000" // /* MW 14 */ + 9475 "01111000" // /* MW 13 */ + 9476 "10100101" // /* MW 12 */ + 9477 "00000001" // /* MW 11 */ + 9478 "00000000" // /* MW 10 */ + 9479 "00000000" // /* MW 9 */ + 9480 "00000000" // /* MW 8 */ + 9481 "01011011" // /* MW 7 */ + 9482 "00000001" // /* MW 6 */ + 9483 "00100000" // /* MW 5 */ + 9484 "00000000" // /* MW 4 */ + 9485 "11110000" // /* MW 3 */ + 9486 "00101100" // /* MW 2 */ + 9487 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 9488 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9489 "00000000" // /* MW 15 */ + 9490 "00000000" // /* MW 14 */ + 9491 "01111000" // /* MW 13 */ + 9492 "10100101" // /* MW 12 */ + 9493 "00000001" // /* MW 11 */ + 9494 "00000000" // /* MW 10 */ + 9495 "00000000" // /* MW 9 */ + 9496 "00000000" // /* MW 8 */ + 9497 "01011011" // /* MW 7 */ + 9498 "00000001" // /* MW 6 */ + 9499 "00100000" // /* MW 5 */ + 9500 "00000000" // /* MW 4 */ + 9501 "11110000" // /* MW 3 */ + 9502 "00101100" // /* MW 2 */ + 9503 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.loop_nesting 1 + 9504 "00011100" // PADDB [p7], m0; JNZD r16, r16, p0 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 9505 "01000000" // /* MW 5 */ + 9506 "01000000" // /* MW 4 */ + 9507 "00001000" // /* MW 3 */ + 9508 "01110010" // /* MW 2 */ + 9509 "11100001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9517 "00000000" // /* MW 1 */ +.delay_slot + 9518 "01011000" // ADD.NC r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9519 "11001001" // /* MW 3 */ + 9520 "10011000" // /* MW 2 */ + 9521 "00011100" // /* MW 1 */ +.loop_nesting 0 + 9522 "10000100" // J #9568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9568 delay_slots=5 */ + 9523 "00000000" // /* MW 5 */ + 9524 "00000000" // /* MW 4 */ + 9525 "10110000" // /* MW 3 */ + 9526 "00010010" // /* MW 2 */ + 9527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9536 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9537 "00000000" // /* MW 15 */ + 9538 "00000000" // /* MW 14 */ + 9539 "01111000" // /* MW 13 */ + 9540 "10100101" // /* MW 12 */ + 9541 "00000001" // /* MW 11 */ + 9542 "00000000" // /* MW 10 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "01011011" // /* MW 7 */ + 9546 "00000001" // /* MW 6 */ + 9547 "00100000" // /* MW 5 */ + 9548 "00000000" // /* MW 4 */ + 9549 "11110000" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 9552 "11100001" // LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9553 "00000000" // /* MW 15 */ + 9554 "00000000" // /* MW 14 */ + 9555 "01111000" // /* MW 13 */ + 9556 "10100101" // /* MW 12 */ + 9557 "00000001" // /* MW 11 */ + 9558 "00000000" // /* MW 10 */ + 9559 "00000000" // /* MW 9 */ + 9560 "00000000" // /* MW 8 */ + 9561 "01011011" // /* MW 7 */ + 9562 "00000001" // /* MW 6 */ + 9563 "00100000" // /* MW 5 */ + 9564 "00000000" // /* MW 4 */ + 9565 "00100000" // /* MW 3 */ + 9566 "00000111" // /* MW 2 */ + 9567 "11111110" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 9568 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9569 "11010001" // /* MW 3 */ + 9570 "11110101" // /* MW 2 */ + 9571 "00000111" // /* MW 1 */ + 9572 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9573 "10011001" // /* MW 3 */ + 9574 "11111011" // /* MW 2 */ + 9575 "00000111" // /* MW 1 */ + 9576 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9577 "11110001" // /* MW 3 */ + 9578 "11111101" // /* MW 2 */ + 9579 "00000111" // /* MW 1 */ + 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9581 "00000000" // /* MW 1 */ + 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9583 "00000000" // /* MW 1 */ + 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9585 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 first + 9586 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9587 "00000000" // /* MW 3 */ + 9588 "00101000" // /* MW 2 */ + 9589 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 +.delay_slot + 9590 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9591 "00000001" // /* MW 5 */ + 9592 "00000000" // /* MW 4 */ + 9593 "00000000" // /* MW 3 */ + 9594 "11111000" // /* MW 2 */ + 9595 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + 9603 "00000000" // /* MW 1 */ +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function _b8148_wrapper _Z14_b8148_wrapperPPv +.src_ref 0 "0_0_reloadable77.cc" 30 first +.src_ref 0 "0_0_reloadable77.cc" 32 79 +.function_start + 9616 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9617 "11000000" // /* MW 3 */ + 9618 "01100000" // /* MW 2 */ + 9619 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 32 79 first + 9620 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9621 "00011110" // /* MW 3 */ + 9622 "00011100" // /* MW 2 */ + 9623 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 34 46 first + 9624 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9625 "00011110" // /* MW 3 */ + 9626 "00010101" // /* MW 2 */ + 9627 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 33 80 first + 9628 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9629 "10011110" // /* MW 3 */ + 9630 "00000100" // /* MW 2 */ + 9631 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 31 4 first +.tail_call + 9632 "10000100" // J #9120 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9120 delay_slots=5 */ + 9633 "00000000" // /* MW 5 */ + 9634 "00000000" // /* MW 4 */ + 9635 "11010000" // /* MW 3 */ + 9636 "00010001" // /* MW 2 */ + 9637 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 + 9647 "00000000" // /* MW 1 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function load_slice_generic_innermost_rtp _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 40 first +.src_ref 3 "slice_generic_innermost_params.h" 41 19 first +.function_start + 9648 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9649 "00101110" // /* MW 3 */ + 9650 "00011100" // /* MW 2 */ + 9651 "00000001" // /* MW 1 */ + 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9653 "00000000" // /* MW 1 */ + 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9655 "00000000" // /* MW 1 */ + 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9657 "00000000" // /* MW 1 */ + 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9659 "00000000" // /* MW 1 */ + 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9661 "00000000" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 41 17 first + 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00101001" // /* MW 3 */ + 9666 "00011100" // /* MW 2 */ + 9667 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 19 first + 9668 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9669 "00101110" // /* MW 3 */ + 9670 "00011100" // /* MW 2 */ + 9671 "00000001" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ + 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9679 "00000000" // /* MW 1 */ + 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9681 "00000000" // /* MW 1 */ + 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9683 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 17 + 9684 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "00101001" // /* MW 3 */ + 9686 "00011100" // /* MW 2 */ + 9687 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 19 first + 9688 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9689 "00101110" // /* MW 3 */ + 9690 "00011100" // /* MW 2 */ + 9691 "00000001" // /* MW 1 */ + 9692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9693 "00000000" // /* MW 1 */ + 9694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9695 "00000000" // /* MW 1 */ + 9696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9697 "00000000" // /* MW 1 */ + 9698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9699 "00000000" // /* MW 1 */ + 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9701 "00000000" // /* MW 1 */ + 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9703 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 17 + 9704 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9705 "00101001" // /* MW 3 */ + 9706 "00011100" // /* MW 2 */ + 9707 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 19 first + 9708 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9709 "00101110" // /* MW 3 */ + 9710 "00011100" // /* MW 2 */ + 9711 "00000001" // /* MW 1 */ + 9712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9713 "00000000" // /* MW 1 */ + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9715 "00000000" // /* MW 1 */ + 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9717 "00000000" // /* MW 1 */ + 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9719 "00000000" // /* MW 1 */ + 9720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9721 "00000000" // /* MW 1 */ + 9722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9723 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 17 + 9724 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9725 "00101001" // /* MW 3 */ + 9726 "00011100" // /* MW 2 */ + 9727 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 19 first + 9728 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "00101110" // /* MW 3 */ + 9730 "00011100" // /* MW 2 */ + 9731 "00000001" // /* MW 1 */ + 9732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9733 "00000000" // /* MW 1 */ + 9734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9735 "00000000" // /* MW 1 */ + 9736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9737 "00000000" // /* MW 1 */ + 9738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9739 "00000000" // /* MW 1 */ + 9740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9741 "00000000" // /* MW 1 */ + 9742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9743 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 17 + 9744 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "00101001" // /* MW 3 */ + 9746 "00011100" // /* MW 2 */ + 9747 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 17 first + 9748 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9749 "00101110" // /* MW 3 */ + 9750 "00011100" // /* MW 2 */ + 9751 "00000001" // /* MW 1 */ + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ + 9756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9757 "00000000" // /* MW 1 */ + 9758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9759 "00000000" // /* MW 1 */ + 9760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9761 "00000000" // /* MW 1 */ + 9762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9763 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 15 + 9764 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9765 "00101001" // /* MW 3 */ + 9766 "00011100" // /* MW 2 */ + 9767 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 18 first + 9768 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9769 "00101110" // /* MW 3 */ + 9770 "00000100" // /* MW 2 */ + 9771 "00000001" // /* MW 1 */ + 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9773 "00000000" // /* MW 1 */ + 9774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9775 "00000000" // /* MW 1 */ + 9776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9777 "00000000" // /* MW 1 */ + 9778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9779 "00000000" // /* MW 1 */ + 9780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9781 "00000000" // /* MW 1 */ + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 16 + 9784 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9785 "00101001" // /* MW 3 */ + 9786 "00000100" // /* MW 2 */ + 9787 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 18 first + 9788 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00101110" // /* MW 3 */ + 9790 "00010100" // /* MW 2 */ + 9791 "00000001" // /* MW 1 */ + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 49 first + 9794 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9795 "00000000" // /* MW 3 */ + 9796 "00101000" // /* MW 2 */ + 9797 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9799 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9805 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 16 first +.delay_slot + 9806 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9807 "00101001" // /* MW 3 */ + 9808 "00010100" // /* MW 2 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 9809 "00001000" // /* MW 1 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function setup_slice_generic_innermost _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.src_ref 3 "slice_generic_innermost_params.h" 52 first +.src_ref 3 "slice_generic_innermost_params.h" 53 25 first +.src_ref 3 "slice_generic_innermost_params.h" 55 42 +.src_ref 3 "slice_generic_innermost_params.h" 58 40 +.function_start + 9824 "10111010" // LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9825 "01011000" // /* MW 9 */ + 9826 "00100000" // /* MW 8 */ + 9827 "10000000" // /* MW 7 */ + 9828 "00101000" // /* MW 6 */ + 9829 "00000000" // /* MW 5 */ + 9830 "00000000" // /* MW 4 */ + 9831 "11010000" // /* MW 3 */ + 9832 "10000110" // /* MW 2 */ + 9833 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 38 first +.src_ref 3 "slice_generic_innermost_params.h" 58 30 +.src_ref 3 "slice_generic_innermost_params.h" 59 31 + 9834 "10111010" // LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9835 "01011000" // /* MW 9 */ + 9836 "11111010" // /* MW 8 */ + 9837 "01001111" // /* MW 7 */ + 9838 "01001000" // /* MW 6 */ + 9839 "00110000" // /* MW 5 */ + 9840 "00000000" // /* MW 4 */ + 9841 "11010000" // /* MW 3 */ + 9842 "10010110" // /* MW 2 */ + 9843 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 51 +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.src_ref 3 "slice_generic_innermost_params.h" 62 27 + 9844 "01010100" // LDA r4, [p0], #8; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9845 "00000001" // /* MW 5 */ + 9846 "00000001" // /* MW 4 */ + 9847 "11010000" // /* MW 3 */ + 9848 "10010010" // /* MW 2 */ + 9849 "00000101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 55 42 first +.src_ref 3 "slice_generic_innermost_params.h" 60 27 + 9850 "01010100" // LDA r6, [p0], m1; MOV dj0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9851 "00000001" // /* MW 5 */ + 9852 "00000010" // /* MW 4 */ + 9853 "11010001" // /* MW 3 */ + 9854 "00011010" // /* MW 2 */ + 9855 "00000101" // /* MW 1 */ + 9856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9857 "00000000" // /* MW 1 */ + 9858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9859 "00000000" // /* MW 1 */ + 9860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9861 "00000000" // /* MW 1 */ + 9862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9863 "00000000" // /* MW 1 */ + 9864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9865 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 30 first + 9866 "10011000" // MUL r1, r5, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9867 "00011111" // /* MW 3 */ + 9868 "01000010" // /* MW 2 */ + 9869 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 40 first + 9870 "10011000" // AND r0, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9871 "00000100" // /* MW 3 */ + 9872 "10000000" // /* MW 2 */ + 9873 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 30 + 9874 "10011000" // OR r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9875 "00000101" // /* MW 3 */ + 9876 "11000000" // /* MW 2 */ + 9877 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 43 first +.src_ref 3 "slice_generic_innermost_params.h" 58 28 + 9878 "01011100" // ST r0, [p0], #-16; MUL r1, r1, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9879 "10011111" // /* MW 5 */ + 9880 "10000100" // /* MW 4 */ + 9881 "00110000" // /* MW 3 */ + 9882 "10000010" // /* MW 2 */ + 9883 "00011001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 75 first + 9884 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9885 "00000000" // /* MW 3 */ + 9886 "00101000" // /* MW 2 */ + 9887 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 31 first +.delay_slot + 9888 "10011000" // LSHL r0, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9889 "00101101" // /* MW 3 */ + 9890 "01000000" // /* MW 2 */ + 9891 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 25 +.delay_slot + 9892 "10011000" // ST r0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9893 "00010001" // /* MW 3 */ + 9894 "00011100" // /* MW 2 */ + 9895 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 first +.delay_slot + 9896 "10011000" // ST m0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9897 "00000001" // /* MW 3 */ + 9898 "00011100" // /* MW 2 */ + 9899 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.delay_slot + 9900 "10011000" // ST dj0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9901 "01000001" // /* MW 3 */ + 9902 "00000100" // /* MW 2 */ + 9903 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 62 27 first +.delay_slot + 9904 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9905 "00000001" // /* MW 3 */ + 9906 "00010100" // /* MW 2 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + 9907 "00001000" // /* MW 1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function setup_slice_generic_innermost_params _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 79 first +.src_ref 3 "slice_generic_innermost_params.h" 80 4 first +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 9920 "00000100" // JL #9648 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */ + 9921 "00000001" // /* MW 5 */ + 9922 "00000000" // /* MW 4 */ + 9923 "11011000" // /* MW 3 */ + 9924 "00010010" // /* MW 2 */ + 9925 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9926 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9927 "11100000" // /* MW 3 */ + 9928 "11000001" // /* MW 2 */ + 9929 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 9930 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9931 "11000000" // /* MW 3 */ + 9932 "01100000" // /* MW 2 */ + 9933 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9938 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9939 "00011100" // /* MW 13 */ + 9940 "00000000" // /* MW 12 */ + 9941 "00000000" // /* MW 11 */ + 9942 "01010111" // /* MW 10 */ + 9943 "00011010" // /* MW 9 */ + 9944 "01000000" // /* MW 8 */ + 9945 "00000000" // /* MW 7 */ + 9946 "00000000" // /* MW 6 */ + 9947 "10110110" // /* MW 5 */ + 9948 "00000010" // /* MW 4 */ + 9949 "11110000" // /* MW 3 */ + 9950 "00101100" // /* MW 2 */ + 9951 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 first +.tail_call +.return_address + 9952 "10000100" // J #9824 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9824 delay_slots=5 */ + 9953 "00000000" // /* MW 5 */ + 9954 "00000000" // /* MW 4 */ + 9955 "00110000" // /* MW 3 */ + 9956 "00010011" // /* MW 2 */ + 9957 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 9958 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9959 "10000000" // /* MW 3 */ + 9960 "01110001" // /* MW 2 */ + 9961 "00011111" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 9962 "11111000" // MOV p0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9963 "11000000" // /* MW 3 */ + 9964 "01100100" // /* MW 2 */ + 9965 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9967 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9969 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 9971 "00000000" // /* MW 1 */ +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function slice_generic_innermost _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 25 first +.src_ref 3 "slice_generic_innermost.h" 35 60 +.src_ref 3 "slice_generic_innermost.h" 54 19 +.function_start + 9984 "00000010" // MOVS p5, p1; MOV r0, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9985 "01110000" // /* MW 7 */ + 9986 "01100000" // /* MW 6 */ + 9987 "00001010" // /* MW 5 */ + 9988 "00000000" // /* MW 4 */ + 9989 "01100000" // /* MW 3 */ + 9990 "10010001" // /* MW 2 */ + 9991 "10110000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 first + 9992 "00011000" // ADD.NC p3, r0, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9993 "00010010" // /* MW 3 */ + 9994 "01100000" // /* MW 2 */ + 9995 "00011011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 + 9996 "11010100" // LDA m2, [p3], #4; MOV r0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9997 "10000001" // /* MW 5 */ + 9998 "00111101" // /* MW 4 */ + 9999 "11010000" // /* MW 3 */ + 10000 "10100000" // /* MW 2 */ + 10001 "01100011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 36 61 first + 10002 "10011000" // LDA m0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10003 "00000110" // /* MW 3 */ + 10004 "00011100" // /* MW 2 */ + 10005 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 35 first + 10006 "10011000" // LDA r2, [p3, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10007 "01010110" // /* MW 3 */ + 10008 "11010100" // /* MW 2 */ + 10009 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 59 first + 10010 "10011000" // LDA m1, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10011 "10000110" // /* MW 3 */ + 10012 "00000100" // /* MW 2 */ + 10013 "00000011" // /* MW 1 */ + 10014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10015 "00000000" // /* MW 1 */ + 10016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10017 "00000000" // /* MW 1 */ + 10018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10019 "00000000" // /* MW 1 */ + 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10021 "00000000" // /* MW 1 */ + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 40 26 first + 10024 "10000100" // JZ r2, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10025 "00000001" // /* MW 5 */ + 10026 "00000000" // /* MW 4 */ + 10027 "01101000" // /* MW 3 */ + 10028 "00010100" // /* MW 2 */ + 10029 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 10030 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10031 "11000000" // /* MW 3 */ + 10032 "01100000" // /* MW 2 */ + 10033 "00011111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 first +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 10034 "11110100" // PADDB [p7], m0; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10035 "10000001" // /* MW 5 */ + 10036 "11011101" // /* MW 4 */ + 10037 "00000110" // /* MW 3 */ + 10038 "01110010" // /* MW 2 */ + 10039 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 38 first +.delay_slot + 10040 "00011000" // PADDB [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10041 "10010000" // /* MW 3 */ + 10042 "01001011" // /* MW 2 */ + 10043 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 37 39 first +.src_ref 3 "slice_generic_innermost.h" 52 20 +.delay_slot + 10044 "11110100" // PADDB [p0], m0; MOV p4, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10045 "10000001" // /* MW 5 */ + 10046 "11000001" // /* MW 4 */ + 10047 "00001000" // /* MW 3 */ + 10048 "01110010" // /* MW 2 */ + 10049 "00000001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 37 first +.delay_slot + 10050 "00011000" // PADDB [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10051 "10010000" // /* MW 3 */ + 10052 "00101011" // /* MW 2 */ + 10053 "00111001" // /* MW 1 */ + 10054 "00011000" // MOVX r1, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10055 "00010001" // /* MW 3 */ + 10056 "00000010" // /* MW 2 */ + 10057 "00010000" // /* MW 1 */ + 10058 "10011000" // LTU r3, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10059 "00011100" // /* MW 3 */ + 10060 "10000110" // /* MW 2 */ + 10061 "00010000" // /* MW 1 */ + 10062 "10000100" // JNZ r3, #10304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10304 delay_slots=5 */ + 10063 "00000001" // /* MW 5 */ + 10064 "01000000" // /* MW 4 */ + 10065 "00100000" // /* MW 3 */ + 10066 "00010100" // /* MW 2 */ + 10067 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 +.delay_slot + 10068 "10111000" // MOV dj0, #48 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10069 "01100000" // /* MW 3 */ + 10070 "10000000" // /* MW 2 */ + 10071 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.delay_slot + 10072 "10011000" // LDA r1, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00110110" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10079 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10081 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first + 10082 "10110110" // VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #10176 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10083 "00010000" // /* MW 11 */ + 10084 "11100000" // /* MW 10 */ + 10085 "01111011" // /* MW 9 */ + 10086 "00001000" // /* MW 8 */ + 10087 "00000000" // /* MW 7 */ + 10088 "00000000" // /* MW 6 */ + 10089 "11101000" // /* MW 5 */ + 10090 "00010000" // /* MW 4 */ + 10091 "01110110" // /* MW 3 */ + 10092 "00010011" // /* MW 2 */ + 10093 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10094 "01111110" // PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #10224 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10095 "01100000" // /* MW 13 */ + 10096 "00001011" // /* MW 12 */ + 10097 "01100001" // /* MW 11 */ + 10098 "00000010" // /* MW 10 */ + 10099 "01111111" // /* MW 9 */ + 10100 "00110111" // /* MW 8 */ + 10101 "00000001" // /* MW 7 */ + 10102 "00000000" // /* MW 6 */ + 10103 "01101000" // /* MW 5 */ + 10104 "00010000" // /* MW 4 */ + 10105 "11111110" // /* MW 3 */ + 10106 "00001100" // /* MW 2 */ + 10107 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10108 "11110110" // VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10109 "01000000" // /* MW 11 */ + 10110 "10111111" // /* MW 10 */ + 10111 "10111000" // /* MW 9 */ + 10112 "00000010" // /* MW 8 */ + 10113 "01011011" // /* MW 7 */ + 10114 "00001000" // /* MW 6 */ + 10115 "11101111" // /* MW 5 */ + 10116 "00010001" // /* MW 4 */ + 10117 "01110000" // /* MW 3 */ + 10118 "00001011" // /* MW 2 */ + 10119 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10120 "00110010" // PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10121 "01011011" // /* MW 7 */ + 10122 "00001000" // /* MW 6 */ + 10123 "01101011" // /* MW 5 */ + 10124 "00010001" // /* MW 4 */ + 10125 "11111000" // /* MW 3 */ + 10126 "00001100" // /* MW 2 */ + 10127 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10128 "00111100" // PADDA [p4], m0; VLDB x0, [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10129 "01101000" // /* MW 5 */ + 10130 "00010000" // /* MW 4 */ + 10131 "11111110" // /* MW 3 */ + 10132 "00001100" // /* MW 2 */ + 10133 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10134 "01001100" // VLDB x3, [p0], m0; PADDS [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10135 "10110110" // /* MW 5 */ + 10136 "00010000" // /* MW 4 */ + 10137 "10001110" // /* MW 3 */ + 10138 "00011110" // /* MW 2 */ + 10139 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10140 "00111100" // PADDA [p0], m0; VLDB x1, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10141 "11101000" // /* MW 5 */ + 10142 "00010000" // /* MW 4 */ + 10143 "11110110" // /* MW 3 */ + 10144 "00001100" // /* MW 2 */ + 10145 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10146 "10110100" // VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10147 "00001011" // /* MW 5 */ + 10148 "00010010" // /* MW 4 */ + 10149 "10000000" // /* MW 3 */ + 10150 "00010110" // /* MW 2 */ + 10151 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10152 "00110010" // NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10153 "01011011" // /* MW 7 */ + 10154 "00001000" // /* MW 6 */ + 10155 "01101011" // /* MW 5 */ + 10156 "00010000" // /* MW 4 */ + 10157 "11111110" // /* MW 3 */ + 10158 "00101100" // /* MW 2 */ + 10159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10160 "11100001" // NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "11101000" // /* MW 13 */ + 10164 "11000010" // /* MW 12 */ + 10165 "01000000" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "10000000" // /* MW 8 */ + 10169 "00000110" // /* MW 7 */ + 10170 "00101000" // /* MW 6 */ + 10171 "11101101" // /* MW 5 */ + 10172 "00010001" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10176 "11100001" // PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10177 "00000000" // /* MW 15 */ + 10178 "00000000" // /* MW 14 */ + 10179 "11101000" // /* MW 13 */ + 10180 "10000010" // /* MW 12 */ + 10181 "00000100" // /* MW 11 */ + 10182 "00000000" // /* MW 10 */ + 10183 "00000000" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "01011011" // /* MW 7 */ + 10186 "00001000" // /* MW 6 */ + 10187 "11101111" // /* MW 5 */ + 10188 "00010000" // /* MW 4 */ + 10189 "11110110" // /* MW 3 */ + 10190 "00001100" // /* MW 2 */ + 10191 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "00000000" // /* MW 9 */ + 10200 "10000000" // /* MW 8 */ + 10201 "00100110" // /* MW 7 */ + 10202 "00101000" // /* MW 6 */ + 10203 "01101001" // /* MW 5 */ + 10204 "00010001" // /* MW 4 */ + 10205 "11111000" // /* MW 3 */ + 10206 "00001100" // /* MW 2 */ + 10207 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "11101000" // /* MW 13 */ + 10212 "11000010" // /* MW 12 */ + 10213 "01000000" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00001000" // /* MW 6 */ + 10219 "01101011" // /* MW 5 */ + 10220 "00010000" // /* MW 4 */ + 10221 "11111110" // /* MW 3 */ + 10222 "00001100" // /* MW 2 */ + 10223 "10100101" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10224 "11100001" // PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00000000" // /* MW 15 */ + 10226 "00000000" // /* MW 14 */ + 10227 "01111000" // /* MW 13 */ + 10228 "10100101" // /* MW 12 */ + 10229 "00000001" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "10000000" // /* MW 8 */ + 10233 "00000110" // /* MW 7 */ + 10234 "00101000" // /* MW 6 */ + 10235 "11101101" // /* MW 5 */ + 10236 "00010001" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00001100" // /* MW 2 */ + 10239 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10240 "11011000" // VSHUFFLE bmll0, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10241 "00000101" // /* MW 3 */ + 10242 "00001001" // /* MW 2 */ + 10243 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10244 "10011000" // VST bmlh0, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10245 "00100110" // /* MW 3 */ + 10246 "00101000" // /* MW 2 */ + 10247 "00001001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10248 "10010100" // PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10249 "00001011" // /* MW 5 */ + 10250 "00000011" // /* MW 4 */ + 10251 "11110001" // /* MW 3 */ + 10252 "00001100" // /* MW 2 */ + 10253 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10254 "10000100" // J #10448 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10448 delay_slots=5 */ + 10255 "00000000" // /* MW 5 */ + 10256 "00000000" // /* MW 4 */ + 10257 "01101000" // /* MW 3 */ + 10258 "00010100" // /* MW 2 */ + 10259 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10260 "10111010" // PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10261 "11100010" // /* MW 9 */ + 10262 "10000010" // /* MW 8 */ + 10263 "00000100" // /* MW 7 */ + 10264 "10000000" // /* MW 6 */ + 10265 "00100110" // /* MW 5 */ + 10266 "00101000" // /* MW 4 */ + 10267 "11110001" // /* MW 3 */ + 10268 "00001100" // /* MW 2 */ + 10269 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10270 "00001100" // PADDA [p1], m1; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10271 "00001101" // /* MW 5 */ + 10272 "01010000" // /* MW 4 */ + 10273 "11111010" // /* MW 3 */ + 10274 "00001100" // /* MW 2 */ + 10275 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.delay_slot + 10276 "10010100" // PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10277 "00001011" // /* MW 5 */ + 10278 "00000011" // /* MW 4 */ + 10279 "11110001" // /* MW 3 */ + 10280 "00001100" // /* MW 2 */ + 10281 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.delay_slot + 10282 "00001100" // NOPA; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10283 "00001101" // /* MW 5 */ + 10284 "01010000" // /* MW 4 */ + 10285 "11111010" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot + 10288 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10289 "00000000" // /* MW 15 */ + 10290 "00000000" // /* MW 14 */ + 10291 "01111000" // /* MW 13 */ + 10292 "10100101" // /* MW 12 */ + 10293 "00000001" // /* MW 11 */ + 10294 "00000000" // /* MW 10 */ + 10295 "00000000" // /* MW 9 */ + 10296 "10000000" // /* MW 8 */ + 10297 "00100110" // /* MW 7 */ + 10298 "00101000" // /* MW 6 */ + 10299 "00100001" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "11110000" // /* MW 3 */ + 10302 "00101100" // /* MW 2 */ + 10303 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.src_ref 3 "slice_generic_innermost.h" 40 8 first + 10304 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10305 "00100000" // /* MW 3 */ + 10306 "01110001" // /* MW 2 */ + 10307 "00011101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 10308 "01000100" // MOVXM ls, #10320 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10309 "10100000" // /* MW 5 */ + 10310 "11110000" // /* MW 4 */ + 10311 "00100001" // /* MW 3 */ + 10312 "00000000" // /* MW 2 */ + 10313 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 10314 "01000100" // MOVXM le, #10432 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10315 "10000000" // /* MW 5 */ + 10316 "11110001" // /* MW 4 */ + 10317 "00100110" // /* MW 3 */ + 10318 "00000000" // /* MW 2 */ + 10319 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.begin_of_loop +.loop_nesting 1 + 10320 "00111100" // VLDA x1, [p4], m0; VLDB x2, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10321 "01101000" // /* MW 5 */ + 10322 "00010001" // /* MW 4 */ + 10323 "01110110" // /* MW 3 */ + 10324 "00001011" // /* MW 2 */ + 10325 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first + 10326 "00110010" // PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10327 "01011011" // /* MW 7 */ + 10328 "00001000" // /* MW 6 */ + 10329 "01101100" // /* MW 5 */ + 10330 "00010000" // /* MW 4 */ + 10331 "11111110" // /* MW 3 */ + 10332 "00001100" // /* MW 2 */ + 10333 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first + 10334 "00111100" // PADDA [p7], m0; VLDB x3, [p0], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10335 "11101000" // /* MW 5 */ + 10336 "00010001" // /* MW 4 */ + 10337 "11110000" // /* MW 3 */ + 10338 "00001100" // /* MW 2 */ + 10339 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 59 21 first + 10340 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10341 "10010000" // /* MW 3 */ + 10342 "00001011" // /* MW 2 */ + 10343 "00111000" // /* MW 1 */ + 10344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10345 "00000000" // /* MW 1 */ + 10346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10347 "00000000" // /* MW 1 */ + 10348 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10349 "01100111" // /* MW 3 */ + 10350 "00000001" // /* MW 2 */ + 10351 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first + 10352 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10353 "00000000" // /* MW 15 */ + 10354 "00000000" // /* MW 14 */ + 10355 "11101000" // /* MW 13 */ + 10356 "01000010" // /* MW 12 */ + 10357 "00001000" // /* MW 11 */ + 10358 "00000000" // /* MW 10 */ + 10359 "00000000" // /* MW 9 */ + 10360 "00000000" // /* MW 8 */ + 10361 "01011011" // /* MW 7 */ + 10362 "00000001" // /* MW 6 */ + 10363 "00100000" // /* MW 5 */ + 10364 "00000000" // /* MW 4 */ + 10365 "11110000" // /* MW 3 */ + 10366 "00101100" // /* MW 2 */ + 10367 "00000000" // /* MW 1 */ + 10368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10369 "00000000" // /* MW 15 */ + 10370 "00000000" // /* MW 14 */ + 10371 "01111000" // /* MW 13 */ + 10372 "10100101" // /* MW 12 */ + 10373 "00000001" // /* MW 11 */ + 10374 "00000000" // /* MW 10 */ + 10375 "00000000" // /* MW 9 */ + 10376 "00000000" // /* MW 8 */ + 10377 "01011011" // /* MW 7 */ + 10378 "00000001" // /* MW 6 */ + 10379 "00100000" // /* MW 5 */ + 10380 "00000000" // /* MW 4 */ + 10381 "11110000" // /* MW 3 */ + 10382 "00101100" // /* MW 2 */ + 10383 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first + 10384 "11100001" // NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10385 "00000000" // /* MW 15 */ + 10386 "00000000" // /* MW 14 */ + 10387 "11101000" // /* MW 13 */ + 10388 "11000010" // /* MW 12 */ + 10389 "01000000" // /* MW 11 */ + 10390 "00000000" // /* MW 10 */ + 10391 "00000000" // /* MW 9 */ + 10392 "10000000" // /* MW 8 */ + 10393 "00000110" // /* MW 7 */ + 10394 "00101000" // /* MW 6 */ + 10395 "00100101" // /* MW 5 */ + 10396 "00000000" // /* MW 4 */ + 10397 "11110000" // /* MW 3 */ + 10398 "00101100" // /* MW 2 */ + 10399 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 55 19 first + 10400 "11100001" // NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10401 "00000000" // /* MW 15 */ + 10402 "00000000" // /* MW 14 */ + 10403 "01111000" // /* MW 13 */ + 10404 "10100101" // /* MW 12 */ + 10405 "00000001" // /* MW 11 */ + 10406 "00000000" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "01011011" // /* MW 7 */ + 10410 "00000001" // /* MW 6 */ + 10411 "00100000" // /* MW 5 */ + 10412 "01010111" // /* MW 4 */ + 10413 "11111010" // /* MW 3 */ + 10414 "00101100" // /* MW 2 */ + 10415 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first + 10416 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10417 "00000000" // /* MW 15 */ + 10418 "00000000" // /* MW 14 */ + 10419 "01111000" // /* MW 13 */ + 10420 "10100101" // /* MW 12 */ + 10421 "00000001" // /* MW 11 */ + 10422 "00000000" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "10000000" // /* MW 8 */ + 10425 "00100110" // /* MW 7 */ + 10426 "00101000" // /* MW 6 */ + 10427 "00100001" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop + 10432 "11100001" // NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10433 "00000000" // /* MW 15 */ + 10434 "00000000" // /* MW 14 */ + 10435 "01111000" // /* MW 13 */ + 10436 "10100101" // /* MW 12 */ + 10437 "00000001" // /* MW 11 */ + 10438 "00000000" // /* MW 10 */ + 10439 "00000000" // /* MW 9 */ + 10440 "00000000" // /* MW 8 */ + 10441 "01011011" // /* MW 7 */ + 10442 "00000001" // /* MW 6 */ + 10443 "00100000" // /* MW 5 */ + 10444 "01010111" // /* MW 4 */ + 10445 "11110010" // /* MW 3 */ + 10446 "00101100" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.src_ref 3 "slice_generic_innermost.h" 76 first +.loop_nesting 0 + 10448 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10449 "00000000" // /* MW 3 */ + 10450 "00101000" // /* MW 2 */ + 10451 "00010000" // /* MW 1 */ +.delay_slot + 10452 "11111000" // MOV p7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10453 "00100000" // /* MW 3 */ + 10454 "01100000" // /* MW 2 */ + 10455 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 + 10463 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function slice_generic_innermost_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 first +.function_start + 10464 "00111010" // MOVS p5, p0; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10465 "01110001" // /* MW 9 */ + 10466 "00000000" // /* MW 8 */ + 10467 "00000000" // /* MW 7 */ + 10468 "00000000" // /* MW 6 */ + 10469 "00000100" // /* MW 5 */ + 10470 "00000000" // /* MW 4 */ + 10471 "01100000" // /* MW 3 */ + 10472 "00010001" // /* MW 2 */ + 10473 "10110000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 10474 "00000010" // ST lr, [sp, #-4]; MOV p3, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10475 "01110000" // /* MW 7 */ + 10476 "01100000" // /* MW 6 */ + 10477 "10110001" // /* MW 5 */ + 10478 "00000001" // /* MW 4 */ + 10479 "10110000" // /* MW 3 */ + 10480 "10000111" // /* MW 2 */ + 10481 "11111111" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 first +.no_stack_arguments + 10482 "00111010" // MOVS p1, p2; JL #9920 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=9920 delay_slots=5 */ + 10483 "01000001" // /* MW 9 */ + 10484 "00000000" // /* MW 8 */ + 10485 "00000000" // /* MW 7 */ + 10486 "11011000" // /* MW 6 */ + 10487 "00000100" // /* MW 5 */ + 10488 "00000000" // /* MW 4 */ + 10489 "01100000" // /* MW 3 */ + 10490 "00010001" // /* MW 2 */ + 10491 "00110001" // /* MW 1 */ +.delay_slot + 10492 "11111000" // MOV p0, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10493 "11100000" // /* MW 3 */ + 10494 "01100101" // /* MW 2 */ + 10495 "00011000" // /* MW 1 */ +.delay_slot + 10496 "00011000" // PADDB [p0], #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10497 "10010000" // /* MW 3 */ + 10498 "11101111" // /* MW 2 */ + 10499 "00111000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.delay_slot + 10500 "11111000" // MOV p4, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10501 "11000000" // /* MW 3 */ + 10502 "01100000" // /* MW 2 */ + 10503 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10506 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10507 "00100000" // /* MW 5 */ + 10508 "00000000" // /* MW 4 */ + 10509 "11110000" // /* MW 3 */ + 10510 "00101100" // /* MW 2 */ + 10511 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 31 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.return_address + 10512 "10111010" // LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10513 "01110010" // /* MW 9 */ + 10514 "01110000" // /* MW 8 */ + 10515 "00101101" // /* MW 7 */ + 10516 "00000010" // /* MW 6 */ + 10517 "10001011" // /* MW 5 */ + 10518 "10010000" // /* MW 4 */ + 10519 "00100010" // /* MW 3 */ + 10520 "01001010" // /* MW 2 */ + 10521 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 44 + 10522 "00101100" // LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10523 "00100000" // /* MW 5 */ + 10524 "11000101" // /* MW 4 */ + 10525 "00101000" // /* MW 3 */ + 10526 "11011010" // /* MW 2 */ + 10527 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 57 first + 10528 "10111010" // LDA r20, [sp, #-120]; MOVXM r19, #65534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10529 "00010000" // /* MW 9 */ + 10530 "11111111" // /* MW 8 */ + 10531 "01101111" // /* MW 7 */ + 10532 "00111110" // /* MW 6 */ + 10533 "00000000" // /* MW 5 */ + 10534 "00000000" // /* MW 4 */ + 10535 "00100000" // /* MW 3 */ + 10536 "01010010" // /* MW 2 */ + 10537 "11110001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first + 10538 "00101100" // LDA p1, [p3]; ADD r17, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10539 "00100001" // /* MW 5 */ + 10540 "11000110" // /* MW 4 */ + 10541 "11011001" // /* MW 3 */ + 10542 "10010011" // /* MW 2 */ + 10543 "01100000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 70 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 59 first + 10544 "00101100" // LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10545 "01100000" // /* MW 5 */ + 10546 "11010101" // /* MW 4 */ + 10547 "00101000" // /* MW 3 */ + 10548 "11001110" // /* MW 2 */ + 10549 "11110001" // /* MW 1 */ + 10550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10551 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 10552 "10011000" // LDA r17, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10553 "00110110" // /* MW 3 */ + 10554 "00000110" // /* MW 2 */ + 10555 "00000101" // /* MW 1 */ + 10556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10557 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 36 first + 10558 "10011000" // MUL r18, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10559 "00101111" // /* MW 3 */ + 10560 "10100101" // /* MW 2 */ + 10561 "00010101" // /* MW 1 */ + 10562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10563 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 49 + 10564 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10565 "01001111" // /* MW 3 */ + 10566 "10100101" // /* MW 2 */ + 10567 "00010100" // /* MW 1 */ + 10568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10569 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 62 + 10570 "10011000" // MUL r18, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00101111" // /* MW 3 */ + 10572 "01100101" // /* MW 2 */ + 10573 "00010101" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 first +.no_stack_arguments + 10574 "00000100" // JL #9984 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9984 delay_slots=5 */ + 10575 "00000001" // /* MW 5 */ + 10576 "00000000" // /* MW 4 */ + 10577 "10000000" // /* MW 3 */ + 10578 "00010011" // /* MW 2 */ + 10579 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 57 +.delay_slot + 10580 "10011000" // MUL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10581 "00101111" // /* MW 3 */ + 10582 "11100101" // /* MW 2 */ + 10583 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 10584 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10585 "00000101" // /* MW 3 */ + 10586 "00100000" // /* MW 2 */ + 10587 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 10588 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10589 "00001101" // /* MW 3 */ + 10590 "10100001" // /* MW 2 */ + 10591 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 10592 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10593 "11000001" // /* MW 3 */ + 10594 "01101000" // /* MW 2 */ + 10595 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10596 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10597 "10000001" // /* MW 11 */ + 10598 "10101101" // /* MW 10 */ + 10599 "00000000" // /* MW 9 */ + 10600 "00000000" // /* MW 8 */ + 10601 "00000000" // /* MW 7 */ + 10602 "00000000" // /* MW 6 */ + 10603 "00100000" // /* MW 5 */ + 10604 "00000000" // /* MW 4 */ + 10605 "11110000" // /* MW 3 */ + 10606 "00101100" // /* MW 2 */ + 10607 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.return_address + 10608 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10609 "00111001" // /* MW 3 */ + 10610 "11111100" // /* MW 2 */ + 10611 "00000111" // /* MW 1 */ + 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10613 "00000000" // /* MW 1 */ + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ + 10616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10617 "00000000" // /* MW 1 */ + 10618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10619 "00000000" // /* MW 1 */ + 10620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10621 "00000000" // /* MW 1 */ + 10622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10623 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 first + 10624 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10625 "00000000" // /* MW 3 */ + 10626 "00101000" // /* MW 2 */ + 10627 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.delay_slot + 10628 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10629 "00000001" // /* MW 5 */ + 10630 "00000000" // /* MW 4 */ + 10631 "00000000" // /* MW 3 */ + 10632 "11110000" // /* MW 2 */ + 10633 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10635 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10637 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10639 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + 10641 "00000000" // /* MW 1 */ +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function _b8170_wrapper _Z14_b8170_wrapperPPv +.src_ref 0 "0_0_reloadable77.cc" 38 first +.src_ref 0 "0_0_reloadable77.cc" 40 79 +.function_start + 10656 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10657 "11000000" // /* MW 3 */ + 10658 "01100000" // /* MW 2 */ + 10659 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 40 79 first + 10660 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10661 "00011110" // /* MW 3 */ + 10662 "00011100" // /* MW 2 */ + 10663 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 42 47 first + 10664 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10665 "00011110" // /* MW 3 */ + 10666 "00010101" // /* MW 2 */ + 10667 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 41 80 first + 10668 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10669 "10011110" // /* MW 3 */ + 10670 "00000100" // /* MW 2 */ + 10671 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 39 4 first +.tail_call + 10672 "10000100" // J #10464 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10464 delay_slots=5 */ + 10673 "00000000" // /* MW 5 */ + 10674 "00000000" // /* MW 4 */ + 10675 "01110000" // /* MW 3 */ + 10676 "00010100" // /* MW 2 */ + 10677 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10679 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 + 10687 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.src_ref 3 "transposeshuffle_params.h" 71 first +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 76 18 first +.function_start + 10688 "10111010" // LDA el0, [p1], #4; MOVXM r0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10689 "00010000" // /* MW 9 */ + 10690 "01000000" // /* MW 8 */ + 10691 "00001001" // /* MW 7 */ + 10692 "11110000" // /* MW 6 */ + 10693 "00000001" // /* MW 5 */ + 10694 "00000000" // /* MW 4 */ + 10695 "11010000" // /* MW 3 */ + 10696 "10000101" // /* MW 2 */ + 10697 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 9 +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 80 28 +.src_ref 3 "transposeshuffle_params.h" 80 36 +.src_ref 3 "transposeshuffle_params.h" 81 28 +.src_ref 3 "transposeshuffle_params.h" 81 36 + 10698 "01110110" // MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10699 "00001000" // /* MW 11 */ + 10700 "00000001" // /* MW 10 */ + 10701 "00110000" // /* MW 9 */ + 10702 "10101001" // /* MW 8 */ + 10703 "00100111" // /* MW 7 */ + 10704 "00111110" // /* MW 6 */ + 10705 "00001011" // /* MW 5 */ + 10706 "10000000" // /* MW 4 */ + 10707 "10000000" // /* MW 3 */ + 10708 "00000000" // /* MW 2 */ + 10709 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 +.src_ref 3 "transposeshuffle_params.h" 86 17 +.src_ref 3 "transposeshuffle_params.h" 89 43 +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 94 4 + 10710 "01100100" // MOVX r1, #4; MOV r0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10711 "00000001" // /* MW 5 */ + 10712 "00100010" // /* MW 4 */ + 10713 "00100000" // /* MW 3 */ + 10714 "01000010" // /* MW 2 */ + 10715 "00000000" // /* MW 1 */ + 10716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10717 "00000000" // /* MW 1 */ + 10718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10719 "00000000" // /* MW 1 */ + 10720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10721 "00000000" // /* MW 1 */ + 10722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10723 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 first + 10724 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10725 "00101001" // /* MW 3 */ + 10726 "00011100" // /* MW 2 */ + 10727 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 10728 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10729 "00101110" // /* MW 3 */ + 10730 "00011100" // /* MW 2 */ + 10731 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 10732 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10733 "00001110" // /* MW 3 */ + 10734 "00011100" // /* MW 2 */ + 10735 "00000001" // /* MW 1 */ + 10736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10737 "00000000" // /* MW 1 */ + 10738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10739 "00000000" // /* MW 1 */ + 10740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10741 "00000000" // /* MW 1 */ + 10742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10743 "00000000" // /* MW 1 */ + 10744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10745 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 10746 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10747 "00101001" // /* MW 3 */ + 10748 "00011100" // /* MW 2 */ + 10749 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 10750 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10751 "00001001" // /* MW 3 */ + 10752 "00011100" // /* MW 2 */ + 10753 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 10754 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10755 "00101110" // /* MW 3 */ + 10756 "00011100" // /* MW 2 */ + 10757 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 10758 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10759 "00001110" // /* MW 3 */ + 10760 "00011100" // /* MW 2 */ + 10761 "00000001" // /* MW 1 */ + 10762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10763 "00000000" // /* MW 1 */ + 10764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10765 "00000000" // /* MW 1 */ + 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10767 "00000000" // /* MW 1 */ + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 10772 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10773 "00101001" // /* MW 3 */ + 10774 "00011100" // /* MW 2 */ + 10775 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 10776 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00001001" // /* MW 3 */ + 10778 "00011100" // /* MW 2 */ + 10779 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 10780 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00001110" // /* MW 3 */ + 10782 "00000100" // /* MW 2 */ + 10783 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 10784 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "00101110" // /* MW 3 */ + 10786 "00010100" // /* MW 2 */ + 10787 "00000001" // /* MW 1 */ + 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10789 "00000000" // /* MW 1 */ + 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10791 "00000000" // /* MW 1 */ + 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10793 "00000000" // /* MW 1 */ + 10794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10795 "00000000" // /* MW 1 */ + 10796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10797 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 10798 "10011000" // ST eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10799 "00001001" // /* MW 3 */ + 10800 "00000100" // /* MW 2 */ + 10801 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 10802 "10011000" // ST el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10803 "00101001" // /* MW 3 */ + 10804 "00010100" // /* MW 2 */ + 10805 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 28 first + 10806 "10011000" // LDA r3, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10807 "01110110" // /* MW 3 */ + 10808 "00001000" // /* MW 2 */ + 10809 "00000000" // /* MW 1 */ + 10810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10811 "00000000" // /* MW 1 */ + 10812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10813 "00000000" // /* MW 1 */ + 10814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10815 "00000000" // /* MW 1 */ + 10816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10817 "00000000" // /* MW 1 */ + 10818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10819 "00000000" // /* MW 1 */ + 10820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10821 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 36 + 10822 "10011000" // LSHL r4, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10823 "00101101" // /* MW 3 */ + 10824 "11001000" // /* MW 2 */ + 10825 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 42 +.src_ref 3 "transposeshuffle_params.h" 89 43 first + 10826 "00100100" // LSHL r3, r3, r1; ADD.NC r1, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10827 "11111111" // /* MW 5 */ + 10828 "10100100" // /* MW 4 */ + 10829 "10110000" // /* MW 3 */ + 10830 "11000011" // /* MW 2 */ + 10831 "00011000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 +.src_ref 3 "transposeshuffle_params.h" 80 19 first + 10832 "00000010" // ST r1, [p0]; MOV r4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10833 "01110000" // /* MW 7 */ + 10834 "01100000" // /* MW 6 */ + 10835 "10001000" // /* MW 5 */ + 10836 "00000000" // /* MW 4 */ + 10837 "00110000" // /* MW 3 */ + 10838 "10000110" // /* MW 2 */ + 10839 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 + 10840 "00011000" // ADD.NC p1, r4, #-60 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "01100010" // /* MW 3 */ + 10842 "01100010" // /* MW 2 */ + 10843 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 28 first + 10844 "10011000" // LDA r4, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "10010110" // /* MW 3 */ + 10846 "00001000" // /* MW 2 */ + 10847 "00000001" // /* MW 1 */ + 10848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10849 "00000000" // /* MW 1 */ + 10850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10851 "00000000" // /* MW 1 */ + 10852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10853 "00000000" // /* MW 1 */ + 10854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10855 "00000000" // /* MW 1 */ + 10856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10857 "00000000" // /* MW 1 */ + 10858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10859 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 36 +.src_ref 3 "transposeshuffle_params.h" 90 77 + 10860 "01100100" // LSHL r2, r4, r2; MOV r4, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10861 "00000001" // /* MW 5 */ + 10862 "00100010" // /* MW 4 */ + 10863 "10110010" // /* MW 3 */ + 10864 "10000101" // /* MW 2 */ + 10865 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 42 + 10866 "00011000" // ADD r2, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10867 "11111111" // /* MW 3 */ + 10868 "10000101" // /* MW 2 */ + 10869 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 19 +.src_ref 3 "transposeshuffle_params.h" 90 77 first + 10870 "01011100" // ST r2, [p1], #4; MSC r4, r4, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10871 "01011100" // /* MW 5 */ + 10872 "10010000" // /* MW 4 */ + 10873 "00110001" // /* MW 3 */ + 10874 "10001010" // /* MW 2 */ + 10875 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 first + 10876 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10877 "00010001" // /* MW 3 */ + 10878 "00011100" // /* MW 2 */ + 10879 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 + 10880 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10881 "00010001" // /* MW 3 */ + 10882 "00011100" // /* MW 2 */ + 10883 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 86 17 first + 10884 "10011000" // ST r0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10885 "00010001" // /* MW 3 */ + 10886 "00101100" // /* MW 2 */ + 10887 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 89 23 first + 10888 "10011000" // ST r3, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10889 "01110001" // /* MW 3 */ + 10890 "11111100" // /* MW 2 */ + 10891 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 90 23 first + 10892 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10893 "10010001" // /* MW 3 */ + 10894 "00101100" // /* MW 2 */ + 10895 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 91 18 first + 10896 "00000010" // ST r0, [p1]; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10897 "01110000" // /* MW 7 */ + 10898 "01100000" // /* MW 6 */ + 10899 "10101001" // /* MW 5 */ + 10900 "00000000" // /* MW 4 */ + 10901 "00110000" // /* MW 3 */ + 10902 "10000010" // /* MW 2 */ + 10903 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 + 10904 "00011000" // ADD.NC p1, r5, #-68 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10905 "11011110" // /* MW 3 */ + 10906 "01100010" // /* MW 2 */ + 10907 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 first + 10908 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10909 "00010001" // /* MW 3 */ + 10910 "00011100" // /* MW 2 */ + 10911 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 10912 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10913 "00010001" // /* MW 3 */ + 10914 "00011100" // /* MW 2 */ + 10915 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 10916 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10917 "01010001" // /* MW 3 */ + 10918 "00011100" // /* MW 2 */ + 10919 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 10920 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10921 "00110001" // /* MW 3 */ + 10922 "00011100" // /* MW 2 */ + 10923 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 95 first + 10924 "01011100" // ST r0, [p1], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 10925 "00000000" // /* MW 5 */ + 10926 "01010000" // /* MW 4 */ + 10927 "00110000" // /* MW 3 */ + 10928 "10000010" // /* MW 2 */ + 10929 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 first +.delay_slot + 10930 "10011000" // ST r3, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10931 "01110001" // /* MW 3 */ + 10932 "00101100" // /* MW 2 */ + 10933 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 10934 "10011000" // ST r2, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10935 "01010001" // /* MW 3 */ + 10936 "11111100" // /* MW 2 */ + 10937 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 10938 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10939 "10010001" // /* MW 3 */ + 10940 "00101100" // /* MW 2 */ + 10941 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 10942 "10011000" // ST r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10943 "00110001" // /* MW 3 */ + 10944 "00000100" // /* MW 2 */ + 10945 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 10946 "10011000" // ST r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10947 "00010001" // /* MW 3 */ + 10948 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + 10949 "00001001" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.src_ref 3 "transposeshuffle.h" 38 first +.src_ref 3 "transposeshuffle.h" 72 14 +.src_ref 3 "transposeshuffle.h" 79 23 +.function_start + 10960 "10111010" // MOVA r1, #2; MOVXM p2, #508556 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10961 "00010000" // /* MW 9 */ + 10962 "01000110" // /* MW 8 */ + 10963 "00110001" // /* MW 7 */ + 10964 "11110001" // /* MW 6 */ + 10965 "00000001" // /* MW 5 */ + 10966 "00000000" // /* MW 4 */ + 10967 "00000000" // /* MW 3 */ + 10968 "01000001" // /* MW 2 */ + 10969 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 14 first +.src_ref 3 "transposeshuffle.h" 72 23 + 10970 "00101100" // LDA r27, [p2]; MOVX r0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10971 "10110010" // /* MW 5 */ + 10972 "00000000" // /* MW 4 */ + 10973 "11010000" // /* MW 3 */ + 10974 "11101110" // /* MW 2 */ + 10975 "01000000" // /* MW 1 */ + 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10977 "00000000" // /* MW 1 */ + 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10979 "00000000" // /* MW 1 */ + 10980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10981 "00000000" // /* MW 1 */ + 10982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10983 "00000000" // /* MW 1 */ + 10984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10985 "00000000" // /* MW 1 */ + 10986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10987 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 23 first + 10988 "10011000" // EQ r1, r27, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10989 "00010111" // /* MW 3 */ + 10990 "11000010" // /* MW 2 */ + 10991 "00010110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 8 + 10992 "10000100" // JNZ r1, #11456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11456 delay_slots=5 */ + 10993 "00000001" // /* MW 5 */ + 10994 "01000000" // /* MW 4 */ + 10995 "01100000" // /* MW 3 */ + 10996 "00010110" // /* MW 2 */ + 10997 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 23 +.delay_slot + 10998 "00011000" // MOVX r2, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10999 "01110101" // /* MW 3 */ + 11000 "00000100" // /* MW 2 */ + 11001 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 first +.src_ref 3 "transposeshuffle.h" 72 23 first +.delay_slot + 11002 "00011000" // SEL.EQZ r0, r0, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11003 "00100010" // /* MW 3 */ + 11004 "00000000" // /* MW 2 */ + 11005 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11011 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 + 11012 "01000100" // MOVXM p2, #508560 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11013 "00100000" // /* MW 5 */ + 11014 "11000101" // /* MW 4 */ + 11015 "11000100" // /* MW 3 */ + 11016 "00000111" // /* MW 2 */ + 11017 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 first + 11018 "10011000" // LDA r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "00110110" // /* MW 3 */ + 11020 "00000100" // /* MW 2 */ + 11021 "00000010" // /* MW 1 */ + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11025 "00000000" // /* MW 1 */ + 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11027 "00000000" // /* MW 1 */ + 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11029 "00000000" // /* MW 1 */ + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ + 11032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11033 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 116 26 + 11034 "10000100" // JZ r1, #12096 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12096 delay_slots=5 */ + 11035 "00000001" // /* MW 5 */ + 11036 "00000000" // /* MW 4 */ + 11037 "10100000" // /* MW 3 */ + 11038 "00010111" // /* MW 2 */ + 11039 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11041 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11049 "00000000" // /* MW 1 */ + 11050 "00011000" // MOVX r2, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11051 "00101001" // /* MW 3 */ + 11052 "00000100" // /* MW 2 */ + 11053 "00010000" // /* MW 1 */ + 11054 "10011000" // LTU r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11055 "00101100" // /* MW 3 */ + 11056 "01000100" // /* MW 2 */ + 11057 "00010000" // /* MW 1 */ + 11058 "10000100" // JNZ r2, #11296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11296 delay_slots=5 */ + 11059 "00000001" // /* MW 5 */ + 11060 "01000000" // /* MW 4 */ + 11061 "00010000" // /* MW 3 */ + 11062 "00010110" // /* MW 2 */ + 11063 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11067 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11069 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11073 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11074 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #11200 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11075 "00010000" // /* MW 9 */ + 11076 "11100000" // /* MW 8 */ + 11077 "01111101" // /* MW 7 */ + 11078 "00001000" // /* MW 6 */ + 11079 "00000000" // /* MW 5 */ + 11080 "00000000" // /* MW 4 */ + 11081 "01101000" // /* MW 3 */ + 11082 "00111000" // /* MW 2 */ + 11083 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 116 8 first +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11084 "00111010" // VLDB x0, [p0], #64; MOVXM le, #11200 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11085 "00010000" // /* MW 9 */ + 11086 "11100000" // /* MW 8 */ + 11087 "10111101" // /* MW 7 */ + 11088 "00001001" // /* MW 6 */ + 11089 "00000000" // /* MW 5 */ + 11090 "00000000" // /* MW 4 */ + 11091 "01101000" // /* MW 3 */ + 11092 "00111000" // /* MW 2 */ + 11093 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11094 "10111010" // NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11095 "11001110" // /* MW 9 */ + 11096 "01111101" // /* MW 8 */ + 11097 "10111000" // /* MW 7 */ + 11098 "00000010" // /* MW 6 */ + 11099 "00110100" // /* MW 5 */ + 11100 "00011100" // /* MW 4 */ + 11101 "11110000" // /* MW 3 */ + 11102 "00101100" // /* MW 2 */ + 11103 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11105 "00000000" // /* MW 15 */ + 11106 "00000000" // /* MW 14 */ + 11107 "01111000" // /* MW 13 */ + 11108 "10100101" // /* MW 12 */ + 11109 "00000001" // /* MW 11 */ + 11110 "00000000" // /* MW 10 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "01011011" // /* MW 7 */ + 11114 "00000001" // /* MW 6 */ + 11115 "01101000" // /* MW 5 */ + 11116 "00111000" // /* MW 4 */ + 11117 "11110000" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11120 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11121 "00000000" // /* MW 15 */ + 11122 "00000000" // /* MW 14 */ + 11123 "01111000" // /* MW 13 */ + 11124 "10100101" // /* MW 12 */ + 11125 "00000001" // /* MW 11 */ + 11126 "00000000" // /* MW 10 */ + 11127 "00000000" // /* MW 9 */ + 11128 "00000000" // /* MW 8 */ + 11129 "01011011" // /* MW 7 */ + 11130 "00000001" // /* MW 6 */ + 11131 "01101000" // /* MW 5 */ + 11132 "00111000" // /* MW 4 */ + 11133 "11110000" // /* MW 3 */ + 11134 "00101100" // /* MW 2 */ + 11135 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11136 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11137 "00000000" // /* MW 15 */ + 11138 "00000000" // /* MW 14 */ + 11139 "01111000" // /* MW 13 */ + 11140 "10100101" // /* MW 12 */ + 11141 "00000001" // /* MW 11 */ + 11142 "00000000" // /* MW 10 */ + 11143 "00000000" // /* MW 9 */ + 11144 "00000000" // /* MW 8 */ + 11145 "01011011" // /* MW 7 */ + 11146 "00000001" // /* MW 6 */ + 11147 "01101000" // /* MW 5 */ + 11148 "00111000" // /* MW 4 */ + 11149 "11110000" // /* MW 3 */ + 11150 "00101100" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11152 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11153 "00000000" // /* MW 15 */ + 11154 "00000000" // /* MW 14 */ + 11155 "01111000" // /* MW 13 */ + 11156 "10100101" // /* MW 12 */ + 11157 "00000001" // /* MW 11 */ + 11158 "00000000" // /* MW 10 */ + 11159 "00000000" // /* MW 9 */ + 11160 "00000000" // /* MW 8 */ + 11161 "01011011" // /* MW 7 */ + 11162 "00000001" // /* MW 6 */ + 11163 "01101000" // /* MW 5 */ + 11164 "00111000" // /* MW 4 */ + 11165 "11110000" // /* MW 3 */ + 11166 "00101100" // /* MW 2 */ + 11167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11168 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11169 "00000000" // /* MW 15 */ + 11170 "00000000" // /* MW 14 */ + 11171 "11101000" // /* MW 13 */ + 11172 "00000000" // /* MW 12 */ + 11173 "00000000" // /* MW 11 */ + 11174 "00000000" // /* MW 10 */ + 11175 "00000000" // /* MW 9 */ + 11176 "00000000" // /* MW 8 */ + 11177 "01011011" // /* MW 7 */ + 11178 "00000001" // /* MW 6 */ + 11179 "01101000" // /* MW 5 */ + 11180 "00111000" // /* MW 4 */ + 11181 "11110000" // /* MW 3 */ + 11182 "00101100" // /* MW 2 */ + 11183 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.src_ref 3 "transposeshuffle.h" 120 17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11184 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11185 "00000000" // /* MW 15 */ + 11186 "00000000" // /* MW 14 */ + 11187 "11101000" // /* MW 13 */ + 11188 "00000000" // /* MW 12 */ + 11189 "00000000" // /* MW 11 */ + 11190 "00000000" // /* MW 10 */ + 11191 "00000000" // /* MW 9 */ + 11192 "00000000" // /* MW 8 */ + 11193 "01011011" // /* MW 7 */ + 11194 "00000001" // /* MW 6 */ + 11195 "01101000" // /* MW 5 */ + 11196 "00111000" // /* MW 4 */ + 11197 "11110000" // /* MW 3 */ + 11198 "00101100" // /* MW 2 */ + 11199 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11200 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11201 "00000000" // /* MW 15 */ + 11202 "00000000" // /* MW 14 */ + 11203 "11101000" // /* MW 13 */ + 11204 "00000000" // /* MW 12 */ + 11205 "00000000" // /* MW 11 */ + 11206 "00000000" // /* MW 10 */ + 11207 "00000000" // /* MW 9 */ + 11208 "10000000" // /* MW 8 */ + 11209 "00000110" // /* MW 7 */ + 11210 "00011100" // /* MW 6 */ + 11211 "01101001" // /* MW 5 */ + 11212 "00111000" // /* MW 4 */ + 11213 "11110000" // /* MW 3 */ + 11214 "00101100" // /* MW 2 */ + 11215 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 11216 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11217 "11100000" // /* MW 7 */ + 11218 "00000000" // /* MW 6 */ + 11219 "00000000" // /* MW 5 */ + 11220 "00000000" // /* MW 4 */ + 11221 "11010000" // /* MW 3 */ + 11222 "10000000" // /* MW 2 */ + 11223 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11225 "11100000" // /* MW 7 */ + 11226 "00000000" // /* MW 6 */ + 11227 "00000000" // /* MW 5 */ + 11228 "00000000" // /* MW 4 */ + 11229 "11010000" // /* MW 3 */ + 11230 "10000000" // /* MW 2 */ + 11231 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11232 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11233 "11100000" // /* MW 7 */ + 11234 "00000000" // /* MW 6 */ + 11235 "00000000" // /* MW 5 */ + 11236 "00000000" // /* MW 4 */ + 11237 "11010000" // /* MW 3 */ + 11238 "10000000" // /* MW 2 */ + 11239 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.src_ref 3 "transposeshuffle.h" 126 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11240 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 11241 "11101001" // /* MW 9 */ + 11242 "00000000" // /* MW 8 */ + 11243 "00000000" // /* MW 7 */ + 11244 "00000000" // /* MW 6 */ + 11245 "01000000" // /* MW 5 */ + 11246 "00000001" // /* MW 4 */ + 11247 "11010000" // /* MW 3 */ + 11248 "10000000" // /* MW 2 */ + 11249 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11250 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11251 "11100000" // /* MW 7 */ + 11252 "00000000" // /* MW 6 */ + 11253 "00000000" // /* MW 5 */ + 11254 "00000000" // /* MW 4 */ + 11255 "11010000" // /* MW 3 */ + 11256 "10000000" // /* MW 2 */ + 11257 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11258 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11259 "11100000" // /* MW 7 */ + 11260 "00000000" // /* MW 6 */ + 11261 "00000000" // /* MW 5 */ + 11262 "00000000" // /* MW 4 */ + 11263 "11010000" // /* MW 3 */ + 11264 "10000000" // /* MW 2 */ + 11265 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11266 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11267 "11100000" // /* MW 7 */ + 11268 "00000000" // /* MW 6 */ + 11269 "00000000" // /* MW 5 */ + 11270 "00000000" // /* MW 4 */ + 11271 "11010000" // /* MW 3 */ + 11272 "10000000" // /* MW 2 */ + 11273 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11274 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11275 "00001101" // /* MW 5 */ + 11276 "00111000" // /* MW 4 */ + 11277 "11110010" // /* MW 3 */ + 11278 "00101100" // /* MW 2 */ + 11279 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot + 11280 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11281 "00000000" // /* MW 15 */ + 11282 "00000000" // /* MW 14 */ + 11283 "01111000" // /* MW 13 */ + 11284 "10100101" // /* MW 12 */ + 11285 "00000001" // /* MW 11 */ + 11286 "00000000" // /* MW 10 */ + 11287 "00000000" // /* MW 9 */ + 11288 "10000000" // /* MW 8 */ + 11289 "00000110" // /* MW 7 */ + 11290 "00011100" // /* MW 6 */ + 11291 "00100001" // /* MW 5 */ + 11292 "00000000" // /* MW 4 */ + 11293 "11110000" // /* MW 3 */ + 11294 "00101100" // /* MW 2 */ + 11295 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.src_ref 3 "transposeshuffle.h" 116 8 first + 11296 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "10100000" // /* MW 3 */ + 11298 "01110000" // /* MW 2 */ + 11299 "00011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 11300 "01000100" // MOVXM ls, #11312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11301 "01100000" // /* MW 5 */ + 11302 "11111000" // /* MW 4 */ + 11303 "00100001" // /* MW 3 */ + 11304 "00000000" // /* MW 2 */ + 11305 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 11306 "01000100" // MOVXM le, #11424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11307 "01000000" // /* MW 5 */ + 11308 "11111001" // /* MW 4 */ + 11309 "00100110" // /* MW 3 */ + 11310 "00000000" // /* MW 2 */ + 11311 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.begin_of_loop +.loop_nesting 1 + 11312 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "00110100" // /* MW 3 */ + 11314 "00011100" // /* MW 2 */ + 11315 "00111000" // /* MW 1 */ + 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11317 "00000000" // /* MW 1 */ + 11318 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11319 "01111110" // /* MW 9 */ + 11320 "10100101" // /* MW 8 */ + 11321 "00000001" // /* MW 7 */ + 11322 "00000000" // /* MW 6 */ + 11323 "00010000" // /* MW 5 */ + 11324 "00000000" // /* MW 4 */ + 11325 "11110000" // /* MW 3 */ + 11326 "00101100" // /* MW 2 */ + 11327 "00000000" // /* MW 1 */ + 11328 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11329 "00000000" // /* MW 15 */ + 11330 "00000000" // /* MW 14 */ + 11331 "01111000" // /* MW 13 */ + 11332 "10100101" // /* MW 12 */ + 11333 "00000001" // /* MW 11 */ + 11334 "00000000" // /* MW 10 */ + 11335 "00000000" // /* MW 9 */ + 11336 "00000000" // /* MW 8 */ + 11337 "01011011" // /* MW 7 */ + 11338 "00000001" // /* MW 6 */ + 11339 "00100000" // /* MW 5 */ + 11340 "00000000" // /* MW 4 */ + 11341 "11110000" // /* MW 3 */ + 11342 "00101100" // /* MW 2 */ + 11343 "00000000" // /* MW 1 */ + 11344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11345 "00000000" // /* MW 15 */ + 11346 "00000000" // /* MW 14 */ + 11347 "01111000" // /* MW 13 */ + 11348 "10100101" // /* MW 12 */ + 11349 "00000001" // /* MW 11 */ + 11350 "00000000" // /* MW 10 */ + 11351 "00000000" // /* MW 9 */ + 11352 "00000000" // /* MW 8 */ + 11353 "01011011" // /* MW 7 */ + 11354 "00000001" // /* MW 6 */ + 11355 "00100000" // /* MW 5 */ + 11356 "00000000" // /* MW 4 */ + 11357 "11110000" // /* MW 3 */ + 11358 "00101100" // /* MW 2 */ + 11359 "00000000" // /* MW 1 */ + 11360 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11361 "00000000" // /* MW 15 */ + 11362 "00000000" // /* MW 14 */ + 11363 "01111000" // /* MW 13 */ + 11364 "10100101" // /* MW 12 */ + 11365 "00000001" // /* MW 11 */ + 11366 "00000000" // /* MW 10 */ + 11367 "00000000" // /* MW 9 */ + 11368 "00000000" // /* MW 8 */ + 11369 "01011011" // /* MW 7 */ + 11370 "00000001" // /* MW 6 */ + 11371 "00100000" // /* MW 5 */ + 11372 "00000000" // /* MW 4 */ + 11373 "11110000" // /* MW 3 */ + 11374 "00101100" // /* MW 2 */ + 11375 "00000000" // /* MW 1 */ + 11376 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11377 "00000000" // /* MW 15 */ + 11378 "00000000" // /* MW 14 */ + 11379 "01111000" // /* MW 13 */ + 11380 "10100101" // /* MW 12 */ + 11381 "00000001" // /* MW 11 */ + 11382 "00000000" // /* MW 10 */ + 11383 "00000000" // /* MW 9 */ + 11384 "00000000" // /* MW 8 */ + 11385 "01011011" // /* MW 7 */ + 11386 "00000001" // /* MW 6 */ + 11387 "00100000" // /* MW 5 */ + 11388 "00000000" // /* MW 4 */ + 11389 "11110000" // /* MW 3 */ + 11390 "00101100" // /* MW 2 */ + 11391 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 120 17 first + 11392 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11393 "00000000" // /* MW 15 */ + 11394 "00000000" // /* MW 14 */ + 11395 "11101000" // /* MW 13 */ + 11396 "00000000" // /* MW 12 */ + 11397 "00000000" // /* MW 11 */ + 11398 "00000000" // /* MW 10 */ + 11399 "00000000" // /* MW 9 */ + 11400 "00000000" // /* MW 8 */ + 11401 "01011011" // /* MW 7 */ + 11402 "00000001" // /* MW 6 */ + 11403 "00100000" // /* MW 5 */ + 11404 "00000000" // /* MW 4 */ + 11405 "11110000" // /* MW 3 */ + 11406 "00101100" // /* MW 2 */ + 11407 "00000000" // /* MW 1 */ + 11408 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11409 "00000000" // /* MW 15 */ + 11410 "00000000" // /* MW 14 */ + 11411 "01111000" // /* MW 13 */ + 11412 "10100101" // /* MW 12 */ + 11413 "00000001" // /* MW 11 */ + 11414 "00000000" // /* MW 10 */ + 11415 "00000000" // /* MW 9 */ + 11416 "00000000" // /* MW 8 */ + 11417 "01011011" // /* MW 7 */ + 11418 "00000001" // /* MW 6 */ + 11419 "00100000" // /* MW 5 */ + 11420 "00000000" // /* MW 4 */ + 11421 "11110000" // /* MW 3 */ + 11422 "00101100" // /* MW 2 */ + 11423 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.end_of_loop + 11424 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11425 "00000000" // /* MW 15 */ + 11426 "00000000" // /* MW 14 */ + 11427 "01111000" // /* MW 13 */ + 11428 "10100101" // /* MW 12 */ + 11429 "00000001" // /* MW 11 */ + 11430 "00000000" // /* MW 10 */ + 11431 "00000000" // /* MW 9 */ + 11432 "10000000" // /* MW 8 */ + 11433 "00000110" // /* MW 7 */ + 11434 "00011100" // /* MW 6 */ + 11435 "00100001" // /* MW 5 */ + 11436 "00000000" // /* MW 4 */ + 11437 "11110000" // /* MW 3 */ + 11438 "00101100" // /* MW 2 */ + 11439 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 11440 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11441 "00000000" // /* MW 3 */ + 11442 "00101000" // /* MW 2 */ + 11443 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11452 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11453 "01100111" // /* MW 3 */ + 11454 "00000001" // /* MW 2 */ + 11455 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.src_ref 3 "transposeshuffle.h" 86 34 + 11456 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11457 "00000000" // /* MW 5 */ + 11458 "11000101" // /* MW 4 */ + 11459 "11000100" // /* MW 3 */ + 11460 "00000111" // /* MW 2 */ + 11461 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 34 first + 11462 "10011000" // LDA r0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11463 "00010110" // /* MW 3 */ + 11464 "00000100" // /* MW 2 */ + 11465 "00000010" // /* MW 1 */ + 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11467 "00000000" // /* MW 1 */ + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ + 11472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11473 "00000000" // /* MW 1 */ + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11475 "00000000" // /* MW 1 */ + 11476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11477 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 26 + 11478 "10000100" // JZ r0, #12096 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12096 delay_slots=5 */ + 11479 "00000001" // /* MW 5 */ + 11480 "00000000" // /* MW 4 */ + 11481 "10100000" // /* MW 3 */ + 11482 "00010111" // /* MW 2 */ + 11483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11491 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11493 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 11494 "10111010" // MOVA m5, #36; MOVXM p4, #508548 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11495 "00010000" // /* MW 9 */ + 11496 "01000010" // /* MW 8 */ + 11497 "00110001" // /* MW 7 */ + 11498 "11110010" // /* MW 6 */ + 11499 "00000001" // /* MW 5 */ + 11500 "00000000" // /* MW 4 */ + 11501 "10000000" // /* MW 3 */ + 11502 "10010100" // /* MW 2 */ + 11503 "00000100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 + 11504 "10111010" // LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11505 "01011000" // /* MW 9 */ + 11506 "11111101" // /* MW 8 */ + 11507 "01001111" // /* MW 7 */ + 11508 "00001000" // /* MW 6 */ + 11509 "01010001" // /* MW 5 */ + 11510 "00000000" // /* MW 4 */ + 11511 "11010000" // /* MW 3 */ + 11512 "10000110" // /* MW 2 */ + 11513 "10000011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 + 11514 "10111010" // LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11515 "01011000" // /* MW 9 */ + 11516 "00000000" // /* MW 8 */ + 11517 "01100000" // /* MW 7 */ + 11518 "00101010" // /* MW 6 */ + 11519 "00110000" // /* MW 5 */ + 11520 "00000000" // /* MW 4 */ + 11521 "11010000" // /* MW 3 */ + 11522 "00010010" // /* MW 2 */ + 11523 "10010101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 11524 "01110110" // LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11525 "01011000" // /* MW 11 */ + 11526 "00100000" // /* MW 10 */ + 11527 "00000000" // /* MW 9 */ + 11528 "10001010" // /* MW 8 */ + 11529 "01100000" // /* MW 7 */ + 11530 "00000000" // /* MW 6 */ + 11531 "01001011" // /* MW 5 */ + 11532 "00010000" // /* MW 4 */ + 11533 "11010000" // /* MW 3 */ + 11534 "10010000" // /* MW 2 */ + 11535 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 11536 "01110110" // LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11537 "01011000" // /* MW 11 */ + 11538 "00110100" // /* MW 10 */ + 11539 "11101000" // /* MW 9 */ + 11540 "11111000" // /* MW 8 */ + 11541 "00001111" // /* MW 7 */ + 11542 "00000000" // /* MW 6 */ + 11543 "01001011" // /* MW 5 */ + 11544 "00010000" // /* MW 4 */ + 11545 "11010001" // /* MW 3 */ + 11546 "10010100" // /* MW 2 */ + 11547 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 11548 "01110110" // LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #11632 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11549 "00010000" // /* MW 11 */ + 11550 "10111000" // /* MW 10 */ + 11551 "00110110" // /* MW 9 */ + 11552 "00001001" // /* MW 8 */ + 11553 "00000000" // /* MW 7 */ + 11554 "00000000" // /* MW 6 */ + 11555 "01001011" // /* MW 5 */ + 11556 "00010000" // /* MW 4 */ + 11557 "11010101" // /* MW 3 */ + 11558 "10011000" // /* MW 2 */ + 11559 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 12 + 11560 "10111010" // LDA dn5, [p4], #-8; MOVXM p3, #11664 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11561 "00010000" // /* MW 9 */ + 11562 "11001000" // /* MW 8 */ + 11563 "10110110" // /* MW 7 */ + 11564 "00001001" // /* MW 6 */ + 11565 "00000000" // /* MW 5 */ + 11566 "00000000" // /* MW 4 */ + 11567 "11010000" // /* MW 3 */ + 11568 "11010100" // /* MW 2 */ + 11569 "10011101" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 11570 "00101100" // LDA dj5, [p4], m4; MOVX r16, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11571 "10101010" // /* MW 5 */ + 11572 "01000001" // /* MW 4 */ + 11573 "11010000" // /* MW 3 */ + 11574 "01011000" // /* MW 2 */ + 11575 "10010001" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 first + 11576 "10111010" // LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11577 "11001000" // /* MW 9 */ + 11578 "01111111" // /* MW 8 */ + 11579 "10101000" // /* MW 7 */ + 11580 "11100100" // /* MW 6 */ + 11581 "10110000" // /* MW 5 */ + 11582 "00001011" // /* MW 4 */ + 11583 "11010000" // /* MW 3 */ + 11584 "10000000" // /* MW 2 */ + 11585 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 first +.src_ref 3 "transposeshuffle.h" 86 8 first + 11586 "10111010" // LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11587 "11001000" // /* MW 9 */ + 11588 "00111111" // /* MW 8 */ + 11589 "10101001" // /* MW 7 */ + 11590 "01101100" // /* MW 6 */ + 11591 "00010001" // /* MW 5 */ + 11592 "00001011" // /* MW 4 */ + 11593 "11010000" // /* MW 3 */ + 11594 "10000100" // /* MW 2 */ + 11595 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 11596 "10111010" // LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11597 "01001000" // /* MW 9 */ + 11598 "01000000" // /* MW 8 */ + 11599 "10101100" // /* MW 7 */ + 11600 "01101100" // /* MW 6 */ + 11601 "00100001" // /* MW 5 */ + 11602 "00001010" // /* MW 4 */ + 11603 "11010000" // /* MW 3 */ + 11604 "10001000" // /* MW 2 */ + 11605 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 11606 "10111010" // LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11607 "01001000" // /* MW 9 */ + 11608 "10000000" // /* MW 8 */ + 11609 "01101000" // /* MW 7 */ + 11610 "10010000" // /* MW 6 */ + 11611 "01010010" // /* MW 5 */ + 11612 "00000110" // /* MW 4 */ + 11613 "11010000" // /* MW 3 */ + 11614 "11000100" // /* MW 2 */ + 11615 "10000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 11616 "11100001" // LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11617 "00000000" // /* MW 15 */ + 11618 "00000000" // /* MW 14 */ + 11619 "01111000" // /* MW 13 */ + 11620 "10100101" // /* MW 12 */ + 11621 "00000001" // /* MW 11 */ + 11622 "11111000" // /* MW 10 */ + 11623 "01011111" // /* MW 9 */ + 11624 "00001010" // /* MW 8 */ + 11625 "01011011" // /* MW 7 */ + 11626 "00000001" // /* MW 6 */ + 11627 "00100000" // /* MW 5 */ + 11628 "00000000" // /* MW 4 */ + 11629 "11010000" // /* MW 3 */ + 11630 "11001000" // /* MW 2 */ + 11631 "10011100" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 1 + 11632 "10000100" // JZ r1, #12080 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12080 delay_slots=5 */ + 11633 "00000001" // /* MW 5 */ + 11634 "00000000" // /* MW 4 */ + 11635 "10011000" // /* MW 3 */ + 11636 "00010111" // /* MW 2 */ + 11637 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11647 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 87 12 + 11648 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11649 "00000000" // /* MW 15 */ + 11650 "00000000" // /* MW 14 */ + 11651 "01111000" // /* MW 13 */ + 11652 "01010000" // /* MW 12 */ + 11653 "00101001" // /* MW 11 */ + 11654 "00000010" // /* MW 10 */ + 11655 "00000000" // /* MW 9 */ + 11656 "00000000" // /* MW 8 */ + 11657 "01011011" // /* MW 7 */ + 11658 "00000001" // /* MW 6 */ + 11659 "00100000" // /* MW 5 */ + 11660 "00000000" // /* MW 4 */ + 11661 "11110000" // /* MW 3 */ + 11662 "00101100" // /* MW 2 */ + 11663 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.src_ref 3 "transposeshuffle.h" 88 16 first +.loop_nesting 2 + 11664 "10000100" // JZ r4, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */ + 11665 "00000001" // /* MW 5 */ + 11666 "00000000" // /* MW 4 */ + 11667 "10010000" // /* MW 3 */ + 11668 "00010111" // /* MW 2 */ + 11669 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11673 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11675 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11677 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11679 "00000000" // /* MW 1 */ + 11680 "10011000" // LTU r18, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11681 "01101100" // /* MW 3 */ + 11682 "11100100" // /* MW 2 */ + 11683 "00010000" // /* MW 1 */ + 11684 "10000100" // JNZ r18, #11920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11920 delay_slots=5 */ + 11685 "00000001" // /* MW 5 */ + 11686 "01000000" // /* MW 4 */ + 11687 "01001000" // /* MW 3 */ + 11688 "00010111" // /* MW 2 */ + 11689 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11693 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11695 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11697 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11699 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 3 "transposeshuffle.h" 88 16 + 11700 "00111010" // VLDB x0, [p0, #64]; MOVXM ls, #11808 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11701 "00010000" // /* MW 9 */ + 11702 "00010000" // /* MW 8 */ + 11703 "01111111" // /* MW 7 */ + 11704 "00001000" // /* MW 6 */ + 11705 "00000000" // /* MW 5 */ + 11706 "00000000" // /* MW 4 */ + 11707 "01101000" // /* MW 3 */ + 11708 "00101000" // /* MW 2 */ + 11709 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 88 16 first + 11710 "00111010" // VLDB.3D x1, [p0], d1; MOVXM le, #11840 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11711 "00010000" // /* MW 9 */ + 11712 "00100000" // /* MW 8 */ + 11713 "10111111" // /* MW 7 */ + 11714 "00001001" // /* MW 6 */ + 11715 "00000000" // /* MW 5 */ + 11716 "00000000" // /* MW 4 */ + 11717 "11101000" // /* MW 3 */ + 11718 "01110000" // /* MW 2 */ + 11719 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11720 "10011000" // ADD.NC lc, r3, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11721 "11111110" // /* MW 3 */ + 11722 "01110001" // /* MW 2 */ + 11723 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11724 "00011000" // VLDB x0, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11725 "00110100" // /* MW 3 */ + 11726 "00010100" // /* MW 2 */ + 11727 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11728 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11729 "00000000" // /* MW 15 */ + 11730 "00000000" // /* MW 14 */ + 11731 "01111000" // /* MW 13 */ + 11732 "10100101" // /* MW 12 */ + 11733 "00000001" // /* MW 11 */ + 11734 "00000000" // /* MW 10 */ + 11735 "00000000" // /* MW 9 */ + 11736 "00000000" // /* MW 8 */ + 11737 "01011011" // /* MW 7 */ + 11738 "00000001" // /* MW 6 */ + 11739 "11101000" // /* MW 5 */ + 11740 "01110000" // /* MW 4 */ + 11741 "11110000" // /* MW 3 */ + 11742 "00101100" // /* MW 2 */ + 11743 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11744 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11745 "00000000" // /* MW 15 */ + 11746 "00000000" // /* MW 14 */ + 11747 "01111000" // /* MW 13 */ + 11748 "10100101" // /* MW 12 */ + 11749 "00000001" // /* MW 11 */ + 11750 "00000000" // /* MW 10 */ + 11751 "00000000" // /* MW 9 */ + 11752 "00000000" // /* MW 8 */ + 11753 "01011011" // /* MW 7 */ + 11754 "00000001" // /* MW 6 */ + 11755 "00100000" // /* MW 5 */ + 11756 "00000000" // /* MW 4 */ + 11757 "11110000" // /* MW 3 */ + 11758 "00101100" // /* MW 2 */ + 11759 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11760 "11100001" // NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11761 "00000000" // /* MW 15 */ + 11762 "00000000" // /* MW 14 */ + 11763 "01111000" // /* MW 13 */ + 11764 "10100101" // /* MW 12 */ + 11765 "00000001" // /* MW 11 */ + 11766 "00000000" // /* MW 10 */ + 11767 "00000000" // /* MW 9 */ + 11768 "00000000" // /* MW 8 */ + 11769 "01011011" // /* MW 7 */ + 11770 "00000001" // /* MW 6 */ + 11771 "01101000" // /* MW 5 */ + 11772 "00101000" // /* MW 4 */ + 11773 "11110000" // /* MW 3 */ + 11774 "00101100" // /* MW 2 */ + 11775 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11776 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11777 "00000000" // /* MW 15 */ + 11778 "00000000" // /* MW 14 */ + 11779 "01111000" // /* MW 13 */ + 11780 "10100101" // /* MW 12 */ + 11781 "00000001" // /* MW 11 */ + 11782 "00000000" // /* MW 10 */ + 11783 "00000000" // /* MW 9 */ + 11784 "00000000" // /* MW 8 */ + 11785 "01011011" // /* MW 7 */ + 11786 "00000001" // /* MW 6 */ + 11787 "11101000" // /* MW 5 */ + 11788 "01110000" // /* MW 4 */ + 11789 "11110000" // /* MW 3 */ + 11790 "00101100" // /* MW 2 */ + 11791 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11792 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11793 "00000000" // /* MW 15 */ + 11794 "00000000" // /* MW 14 */ + 11795 "11101000" // /* MW 13 */ + 11796 "00001110" // /* MW 12 */ + 11797 "01000100" // /* MW 11 */ + 11798 "00000000" // /* MW 10 */ + 11799 "00000000" // /* MW 9 */ + 11800 "00000000" // /* MW 8 */ + 11801 "01011011" // /* MW 7 */ + 11802 "00000001" // /* MW 6 */ + 11803 "00100000" // /* MW 5 */ + 11804 "00000000" // /* MW 4 */ + 11805 "11110000" // /* MW 3 */ + 11806 "00101100" // /* MW 2 */ + 11807 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 3 + 11808 "11100001" // NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11809 "00000000" // /* MW 15 */ + 11810 "00000000" // /* MW 14 */ + 11811 "11101000" // /* MW 13 */ + 11812 "00100000" // /* MW 12 */ + 11813 "00000100" // /* MW 11 */ + 11814 "00000000" // /* MW 10 */ + 11815 "00000000" // /* MW 9 */ + 11816 "00000000" // /* MW 8 */ + 11817 "10001011" // /* MW 7 */ + 11818 "10000100" // /* MW 6 */ + 11819 "01101100" // /* MW 5 */ + 11820 "00101000" // /* MW 4 */ + 11821 "11110000" // /* MW 3 */ + 11822 "00101100" // /* MW 2 */ + 11823 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11824 "11100001" // NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11825 "00000000" // /* MW 15 */ + 11826 "00000000" // /* MW 14 */ + 11827 "01111000" // /* MW 13 */ + 11828 "10100101" // /* MW 12 */ + 11829 "00000001" // /* MW 11 */ + 11830 "00000000" // /* MW 10 */ + 11831 "00000000" // /* MW 9 */ + 11832 "10000000" // /* MW 8 */ + 11833 "00100110" // /* MW 7 */ + 11834 "00011000" // /* MW 6 */ + 11835 "11101001" // /* MW 5 */ + 11836 "01110000" // /* MW 4 */ + 11837 "11110000" // /* MW 3 */ + 11838 "00101100" // /* MW 2 */ + 11839 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11840 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11841 "00000000" // /* MW 15 */ + 11842 "00000000" // /* MW 14 */ + 11843 "11101000" // /* MW 13 */ + 11844 "00001110" // /* MW 12 */ + 11845 "01000100" // /* MW 11 */ + 11846 "00000000" // /* MW 10 */ + 11847 "00000000" // /* MW 9 */ + 11848 "10000000" // /* MW 8 */ + 11849 "00000110" // /* MW 7 */ + 11850 "00010100" // /* MW 6 */ + 11851 "00100100" // /* MW 5 */ + 11852 "00000000" // /* MW 4 */ + 11853 "11110000" // /* MW 3 */ + 11854 "00101100" // /* MW 2 */ + 11855 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 11856 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11857 "11100000" // /* MW 7 */ + 11858 "00100000" // /* MW 6 */ + 11859 "00000100" // /* MW 5 */ + 11860 "00000000" // /* MW 4 */ + 11861 "01100000" // /* MW 3 */ + 11862 "10010001" // /* MW 2 */ + 11863 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11864 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11865 "00100110" // /* MW 3 */ + 11866 "00011000" // /* MW 2 */ + 11867 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11868 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11869 "11100000" // /* MW 7 */ + 11870 "00001110" // /* MW 6 */ + 11871 "01000100" // /* MW 5 */ + 11872 "00000000" // /* MW 4 */ + 11873 "11010000" // /* MW 3 */ + 11874 "10000000" // /* MW 2 */ + 11875 "10000010" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11876 "11011000" // VSHUFFLE bmll0, x1, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11877 "01000001" // /* MW 3 */ + 11878 "00001000" // /* MW 2 */ + 11879 "00011000" // /* MW 1 */ + 11880 "10000100" // J #12064 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12064 delay_slots=5 */ + 11881 "00000000" // /* MW 5 */ + 11882 "00000000" // /* MW 4 */ + 11883 "10010000" // /* MW 3 */ + 11884 "00010111" // /* MW 2 */ + 11885 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 11886 "00000010" // VST.3D bmlh0, [p1], d0; MOV p4, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11887 "01110000" // /* MW 7 */ + 11888 "01100000" // /* MW 6 */ + 11889 "00110001" // /* MW 5 */ + 11890 "00000010" // /* MW 4 */ + 11891 "11010000" // /* MW 3 */ + 11892 "00000100" // /* MW 2 */ + 11893 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.delay_slot + 11894 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11895 "11100000" // /* MW 7 */ + 11896 "00001110" // /* MW 6 */ + 11897 "01000100" // /* MW 5 */ + 11898 "00000000" // /* MW 4 */ + 11899 "11010000" // /* MW 3 */ + 11900 "10000000" // /* MW 2 */ + 11901 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.delay_slot + 11902 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11903 "11100000" // /* MW 7 */ + 11904 "00100000" // /* MW 6 */ + 11905 "00000100" // /* MW 5 */ + 11906 "00000000" // /* MW 4 */ + 11907 "01100000" // /* MW 3 */ + 11908 "10010001" // /* MW 2 */ + 11909 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 11910 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11911 "00100110" // /* MW 3 */ + 11912 "00011000" // /* MW 2 */ + 11913 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 11914 "00001100" // NOPA; VST bmll0, [p4, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "00001101" // /* MW 5 */ + 11916 "00101000" // /* MW 4 */ + 11917 "11111000" // /* MW 3 */ + 11918 "00101100" // /* MW 2 */ + 11919 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.src_ref 3 "transposeshuffle.h" 88 16 first + 11920 "01000100" // MOVXM ls, #11936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11921 "01000000" // /* MW 5 */ + 11922 "11111101" // /* MW 4 */ + 11923 "00100001" // /* MW 3 */ + 11924 "00000000" // /* MW 2 */ + 11925 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 11926 "01000100" // MOVXM le, #12048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11927 "00100000" // /* MW 5 */ + 11928 "11111110" // /* MW 4 */ + 11929 "00100110" // /* MW 3 */ + 11930 "00000000" // /* MW 2 */ + 11931 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 11932 "10011000" // ADD.NC lc, r2, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00000000" // /* MW 3 */ + 11934 "01110001" // /* MW 2 */ + 11935 "00011101" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.begin_of_loop +.loop_nesting 3 + 11936 "11110100" // VLDB x0, [p0, #64]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11937 "10000001" // /* MW 5 */ + 11938 "11000101" // /* MW 4 */ + 11939 "10001000" // /* MW 3 */ + 11940 "10000110" // /* MW 2 */ + 11941 "00000010" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 + 11942 "00011000" // VLDB.3D x1, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11943 "01110100" // /* MW 3 */ + 11944 "00111000" // /* MW 2 */ + 11945 "00111000" // /* MW 1 */ + 11946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11947 "00000000" // /* MW 1 */ + 11948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11949 "00000000" // /* MW 1 */ + 11950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11951 "00000000" // /* MW 1 */ + 11952 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11953 "00000000" // /* MW 15 */ + 11954 "00000000" // /* MW 14 */ + 11955 "01111000" // /* MW 13 */ + 11956 "10100101" // /* MW 12 */ + 11957 "00000001" // /* MW 11 */ + 11958 "00000000" // /* MW 10 */ + 11959 "00000000" // /* MW 9 */ + 11960 "00000000" // /* MW 8 */ + 11961 "01011011" // /* MW 7 */ + 11962 "00000001" // /* MW 6 */ + 11963 "00100000" // /* MW 5 */ + 11964 "00000000" // /* MW 4 */ + 11965 "11110000" // /* MW 3 */ + 11966 "00101100" // /* MW 2 */ + 11967 "00000000" // /* MW 1 */ + 11968 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11969 "00000000" // /* MW 15 */ + 11970 "00000000" // /* MW 14 */ + 11971 "01111000" // /* MW 13 */ + 11972 "10100101" // /* MW 12 */ + 11973 "00000001" // /* MW 11 */ + 11974 "00000000" // /* MW 10 */ + 11975 "00000000" // /* MW 9 */ + 11976 "00000000" // /* MW 8 */ + 11977 "01011011" // /* MW 7 */ + 11978 "00000001" // /* MW 6 */ + 11979 "00100000" // /* MW 5 */ + 11980 "00000000" // /* MW 4 */ + 11981 "11110000" // /* MW 3 */ + 11982 "00101100" // /* MW 2 */ + 11983 "00000000" // /* MW 1 */ + 11984 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11985 "00000000" // /* MW 15 */ + 11986 "00000000" // /* MW 14 */ + 11987 "01111000" // /* MW 13 */ + 11988 "10100101" // /* MW 12 */ + 11989 "00000001" // /* MW 11 */ + 11990 "00000000" // /* MW 10 */ + 11991 "00000000" // /* MW 9 */ + 11992 "00000000" // /* MW 8 */ + 11993 "01011011" // /* MW 7 */ + 11994 "00000001" // /* MW 6 */ + 11995 "00100000" // /* MW 5 */ + 11996 "00000000" // /* MW 4 */ + 11997 "11110000" // /* MW 3 */ + 11998 "00101100" // /* MW 2 */ + 11999 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first + 12000 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "11101000" // /* MW 13 */ + 12004 "00001110" // /* MW 12 */ + 12005 "01000100" // /* MW 11 */ + 12006 "00000000" // /* MW 10 */ + 12007 "00000000" // /* MW 9 */ + 12008 "00000000" // /* MW 8 */ + 12009 "01011011" // /* MW 7 */ + 12010 "00000001" // /* MW 6 */ + 12011 "00100000" // /* MW 5 */ + 12012 "00000000" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00101100" // /* MW 2 */ + 12015 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first + 12016 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12017 "00000000" // /* MW 15 */ + 12018 "00000000" // /* MW 14 */ + 12019 "11101000" // /* MW 13 */ + 12020 "00100000" // /* MW 12 */ + 12021 "00000100" // /* MW 11 */ + 12022 "00000000" // /* MW 10 */ + 12023 "00000000" // /* MW 9 */ + 12024 "00000000" // /* MW 8 */ + 12025 "01011011" // /* MW 7 */ + 12026 "00000001" // /* MW 6 */ + 12027 "00100000" // /* MW 5 */ + 12028 "00000000" // /* MW 4 */ + 12029 "11110000" // /* MW 3 */ + 12030 "00101100" // /* MW 2 */ + 12031 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first + 12032 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12033 "00000000" // /* MW 15 */ + 12034 "00000000" // /* MW 14 */ + 12035 "01111000" // /* MW 13 */ + 12036 "10100101" // /* MW 12 */ + 12037 "00000001" // /* MW 11 */ + 12038 "00000000" // /* MW 10 */ + 12039 "00000000" // /* MW 9 */ + 12040 "10000000" // /* MW 8 */ + 12041 "00100110" // /* MW 7 */ + 12042 "00011000" // /* MW 6 */ + 12043 "00100001" // /* MW 5 */ + 12044 "00000000" // /* MW 4 */ + 12045 "11110000" // /* MW 3 */ + 12046 "00101100" // /* MW 2 */ + 12047 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.src_ref 4 "vector.hpp" 1152 43 +.end_of_loop + 12048 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12049 "00000000" // /* MW 15 */ + 12050 "00000000" // /* MW 14 */ + 12051 "01111000" // /* MW 13 */ + 12052 "10100101" // /* MW 12 */ + 12053 "00000001" // /* MW 11 */ + 12054 "00000000" // /* MW 10 */ + 12055 "00000000" // /* MW 9 */ + 12056 "10000000" // /* MW 8 */ + 12057 "00000110" // /* MW 7 */ + 12058 "00010100" // /* MW 6 */ + 12059 "00100100" // /* MW 5 */ + 12060 "00000000" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 2 + 12064 "00011000" // JNZD r17, r17, p3 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 12065 "11100000" // /* MW 3 */ + 12066 "01100010" // /* MW 2 */ + 12067 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12069 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12076 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12077 "01100111" // /* MW 3 */ + 12078 "00000001" // /* MW 2 */ + 12079 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.src_ref 3 "transposeshuffle.h" 86 8 first +.loop_nesting 1 + 12080 "00011000" // JNZD r0, r0, p2 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 12081 "10100000" // /* MW 3 */ + 12082 "00000000" // /* MW 2 */ + 12083 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12087 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12089 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12091 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12092 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12093 "01100111" // /* MW 3 */ + 12094 "00000001" // /* MW 2 */ + 12095 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 12096 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12097 "00000000" // /* MW 3 */ + 12098 "00101000" // /* MW 2 */ + 12099 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + 12109 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 first +.function_start + 12112 "11111000" // MOV p3, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12113 "11000000" // /* MW 3 */ + 12114 "01101100" // /* MW 2 */ + 12115 "00011011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 + 12116 "00111010" // MOVS p6, p1; MOVXM p1, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12117 "00010001" // /* MW 9 */ + 12118 "00001010" // /* MW 8 */ + 12119 "10110001" // /* MW 7 */ + 12120 "11110000" // /* MW 6 */ + 12121 "00000001" // /* MW 5 */ + 12122 "00000000" // /* MW 4 */ + 12123 "01100000" // /* MW 3 */ + 12124 "10010001" // /* MW 2 */ + 12125 "11010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 first + 12126 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12127 "00010110" // /* MW 3 */ + 12128 "00000110" // /* MW 2 */ + 12129 "00000001" // /* MW 1 */ + 12130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12131 "00000000" // /* MW 1 */ + 12132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12133 "00000000" // /* MW 1 */ + 12134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12135 "00000000" // /* MW 1 */ + 12136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12137 "00000000" // /* MW 1 */ + 12138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12139 "00000000" // /* MW 1 */ + 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12141 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 15 + 12142 "10000100" // JNZ r16, #12208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12208 delay_slots=5 */ + 12143 "00000001" // /* MW 5 */ + 12144 "01000000" // /* MW 4 */ + 12145 "11011000" // /* MW 3 */ + 12146 "00010111" // /* MW 2 */ + 12147 "10000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 +.delay_slot + 12148 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12149 "00000001" // /* MW 5 */ + 12150 "00000000" // /* MW 4 */ + 12151 "00000000" // /* MW 3 */ + 12152 "00001000" // /* MW 2 */ + 12153 "00000000" // /* MW 1 */ +.delay_slot + 12154 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12155 "00111101" // /* MW 3 */ + 12156 "11110100" // /* MW 2 */ + 12157 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 12158 "00000010" // MOVS p7, p0; MOV p1, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12159 "01110000" // /* MW 7 */ + 12160 "01100000" // /* MW 6 */ + 12161 "10110111" // /* MW 5 */ + 12162 "00000000" // /* MW 4 */ + 12163 "01100000" // /* MW 3 */ + 12164 "00010001" // /* MW 2 */ + 12165 "11110000" // /* MW 1 */ +.delay_slot + 12166 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12167 "10011101" // /* MW 3 */ + 12168 "11111001" // /* MW 2 */ + 12169 "00001111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 12170 "00111010" // ST p1, [sp, #-4]; MOVXM p0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12171 "00010001" // /* MW 9 */ + 12172 "01000000" // /* MW 8 */ + 12173 "00110001" // /* MW 7 */ + 12174 "11110000" // /* MW 6 */ + 12175 "00000001" // /* MW 5 */ + 12176 "00000000" // /* MW 4 */ + 12177 "10110000" // /* MW 3 */ + 12178 "10010011" // /* MW 2 */ + 12179 "11111111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 first +.no_stack_arguments + 12180 "00000100" // JL #10688 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10688 delay_slots=5 */ + 12181 "00000001" // /* MW 5 */ + 12182 "00000000" // /* MW 4 */ + 12183 "11100000" // /* MW 3 */ + 12184 "00010100" // /* MW 2 */ + 12185 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 12186 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12187 "11000000" // /* MW 3 */ + 12188 "01100100" // /* MW 2 */ + 12189 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12195 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12196 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12197 "10000001" // /* MW 11 */ + 12198 "10101101" // /* MW 10 */ + 12199 "00000000" // /* MW 9 */ + 12200 "00000000" // /* MW 8 */ + 12201 "00000000" // /* MW 7 */ + 12202 "00000000" // /* MW 6 */ + 12203 "00100000" // /* MW 5 */ + 12204 "00000000" // /* MW 4 */ + 12205 "11110000" // /* MW 3 */ + 12206 "00101100" // /* MW 2 */ + 12207 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 3 "transposeshuffle.h" 137 72 +.return_address + 12208 "10111010" // LDA r16, [p7]; MOVXM p7, #508564 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12209 "00010000" // /* MW 9 */ + 12210 "01001010" // /* MW 8 */ + 12211 "10110001" // /* MW 7 */ + 12212 "11110011" // /* MW 6 */ + 12213 "00000001" // /* MW 5 */ + 12214 "00000000" // /* MW 4 */ + 12215 "11010000" // /* MW 3 */ + 12216 "11000010" // /* MW 2 */ + 12217 "11100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 72 first + 12218 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12219 "00110110" // /* MW 3 */ + 12220 "00000110" // /* MW 2 */ + 12221 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 12222 "10011000" // LDA p1, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12223 "10011110" // /* MW 3 */ + 12224 "00000100" // /* MW 2 */ + 12225 "00000110" // /* MW 1 */ + 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12227 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 first +.no_stack_arguments + 12228 "00000100" // JL #10960 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10960 delay_slots=5 */ + 12229 "00000001" // /* MW 5 */ + 12230 "00000000" // /* MW 4 */ + 12231 "01101000" // /* MW 3 */ + 12232 "00010101" // /* MW 2 */ + 12233 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 12234 "00011000" // MOVX r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12235 "00000101" // /* MW 3 */ + 12236 "00100100" // /* MW 2 */ + 12237 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 12238 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12239 "00000000" // /* MW 5 */ + 12240 "11000101" // /* MW 4 */ + 12241 "11000100" // /* MW 3 */ + 12242 "00000111" // /* MW 2 */ + 12243 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 12244 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "11000000" // /* MW 3 */ + 12246 "01100100" // /* MW 2 */ + 12247 "00011110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 12248 "10011000" // LSHL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "00101101" // /* MW 3 */ + 12250 "01100011" // /* MW 2 */ + 12251 "00010100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 12252 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12253 "11000001" // /* MW 3 */ + 12254 "01101000" // /* MW 2 */ + 12255 "00011000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 +.return_address + 12256 "10111010" // LDA lr, [sp, #-12]; MOVXM p2, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12257 "00010000" // /* MW 9 */ + 12258 "00001010" // /* MW 8 */ + 12259 "00110001" // /* MW 7 */ + 12260 "11110001" // /* MW 6 */ + 12261 "00000001" // /* MW 5 */ + 12262 "00000000" // /* MW 4 */ + 12263 "00100000" // /* MW 3 */ + 12264 "10000111" // /* MW 2 */ + 12265 "11111110" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first + 12266 "00101100" // LDA r16, [p2]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12267 "00000010" // /* MW 5 */ + 12268 "01100000" // /* MW 4 */ + 12269 "11010000" // /* MW 3 */ + 12270 "11000010" // /* MW 2 */ + 12271 "01000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 + 12272 "10011000" // LDA r17, [p6, #24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "00110110" // /* MW 3 */ + 12274 "01100110" // /* MW 2 */ + 12275 "00000110" // /* MW 1 */ + 12276 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12277 "00011001" // /* MW 3 */ + 12278 "11111011" // /* MW 2 */ + 12279 "00000111" // /* MW 1 */ + 12280 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12281 "10011001" // /* MW 3 */ + 12282 "11111111" // /* MW 2 */ + 12283 "00000111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 first + 12284 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12285 "00000001" // /* MW 5 */ + 12286 "00000000" // /* MW 4 */ + 12287 "00000000" // /* MW 3 */ + 12288 "11111000" // /* MW 2 */ + 12289 "11111111" // /* MW 1 */ + 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12291 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 + 12292 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12293 "00000000" // /* MW 3 */ + 12294 "00101000" // /* MW 2 */ + 12295 "00010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first +.delay_slot + 12296 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12297 "00000111" // /* MW 3 */ + 12298 "00100000" // /* MW 2 */ + 12299 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 17 +.delay_slot + 12300 "10011000" // EQ r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12301 "00000111" // /* MW 3 */ + 12302 "01110111" // /* MW 2 */ + 12303 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.delay_slot + 12304 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12305 "10000010" // /* MW 3 */ + 12306 "00100001" // /* MW 2 */ + 12307 "00010100" // /* MW 1 */ +.delay_slot + 12308 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12309 "00010001" // /* MW 3 */ + 12310 "00000110" // /* MW 2 */ + 12311 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + 12313 "00000000" // /* MW 1 */ +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function _b7835_wrapper _Z14_b7835_wrapperPPv +.src_ref 0 "0_0_reloadable77.cc" 46 first +.src_ref 0 "0_0_reloadable77.cc" 48 79 +.function_start + 12320 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12321 "11000000" // /* MW 3 */ + 12322 "01100000" // /* MW 2 */ + 12323 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 48 79 first + 12324 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12325 "00011110" // /* MW 3 */ + 12326 "00011100" // /* MW 2 */ + 12327 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 50 46 first + 12328 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12329 "00011110" // /* MW 3 */ + 12330 "00010101" // /* MW 2 */ + 12331 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 49 80 first + 12332 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12333 "10011110" // /* MW 3 */ + 12334 "00000100" // /* MW 2 */ + 12335 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 47 4 first +.tail_call + 12336 "10000100" // J #12112 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12112 delay_slots=5 */ + 12337 "00000000" // /* MW 5 */ + 12338 "00000000" // /* MW 4 */ + 12339 "10101000" // /* MW 3 */ + 12340 "00010111" // /* MW 2 */ + 12341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12343 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12345 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 + 12351 "00000000" // /* MW 1 */ +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function _b14160_wrapper _Z15_b14160_wrapperPPv +.src_ref 0 "0_0_reloadable77.cc" 54 first +.src_ref 0 "0_0_reloadable77.cc" 56 79 +.function_start + 12352 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12353 "11000000" // /* MW 3 */ + 12354 "01100000" // /* MW 2 */ + 12355 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 56 79 first + 12356 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12357 "00011110" // /* MW 3 */ + 12358 "00011100" // /* MW 2 */ + 12359 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 57 79 first + 12360 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12361 "10011110" // /* MW 3 */ + 12362 "00101100" // /* MW 2 */ + 12363 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 59 81 first + 12364 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12365 "10011110" // /* MW 3 */ + 12366 "11110101" // /* MW 2 */ + 12367 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 58 47 first + 12368 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12369 "00011110" // /* MW 3 */ + 12370 "00000101" // /* MW 2 */ + 12371 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 55 4 first +.tail_call + 12372 "10000100" // J #8592 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8592 delay_slots=5 */ + 12373 "00000000" // /* MW 5 */ + 12374 "00000000" // /* MW 4 */ + 12375 "11001000" // /* MW 3 */ + 12376 "00010000" // /* MW 2 */ + 12377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + 12387 "00000000" // /* MW 1 */ +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function _b13739_wrapper _Z15_b13739_wrapperPPv +.src_ref 0 "0_0_reloadable77.cc" 63 first +.src_ref 0 "0_0_reloadable77.cc" 65 79 +.function_start + 12400 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12401 "11000000" // /* MW 3 */ + 12402 "01100000" // /* MW 2 */ + 12403 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 65 79 first + 12404 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12405 "00011110" // /* MW 3 */ + 12406 "00101100" // /* MW 2 */ + 12407 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 67 81 first + 12408 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12409 "00011110" // /* MW 3 */ + 12410 "11110101" // /* MW 2 */ + 12411 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 66 47 first + 12412 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12413 "10011110" // /* MW 3 */ + 12414 "00000100" // /* MW 2 */ + 12415 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 64 4 first +.tail_call + 12416 "10000100" // J #3904 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3904 delay_slots=5 */ + 12417 "00000000" // /* MW 5 */ + 12418 "00000000" // /* MW 4 */ + 12419 "10100000" // /* MW 3 */ + 12420 "00000111" // /* MW 2 */ + 12421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12427 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12429 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 + 12431 "00000000" // /* MW 1 */ +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function _b13744_wrapper _Z15_b13744_wrapperPPv +.src_ref 0 "0_0_reloadable77.cc" 71 first +.src_ref 0 "0_0_reloadable77.cc" 73 79 +.function_start + 12432 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "11000000" // /* MW 3 */ + 12434 "01100000" // /* MW 2 */ + 12435 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 73 79 first + 12436 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "00011110" // /* MW 3 */ + 12438 "00101100" // /* MW 2 */ + 12439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 75 81 first + 12440 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00011110" // /* MW 3 */ + 12442 "11110101" // /* MW 2 */ + 12443 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 74 47 first + 12444 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12445 "10011110" // /* MW 3 */ + 12446 "00000100" // /* MW 2 */ + 12447 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable77.cc" 72 4 first +.tail_call + 12448 "10000100" // J #4864 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4864 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "10000000" // /* MW 3 */ + 12452 "00001001" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 + 12463 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 115 4 first +.function_start + 12464 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "01000001" // /* MW 5 */ + 12466 "10100000" // /* MW 4 */ + 12467 "00101111" // /* MW 3 */ + 12468 "11000000" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12470 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12471 "00011100" // /* MW 3 */ + 12472 "11000110" // /* MW 2 */ + 12473 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12474 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12475 "00011100" // /* MW 3 */ + 12476 "11000110" // /* MW 2 */ + 12477 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12478 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12479 "00011100" // /* MW 3 */ + 12480 "11000110" // /* MW 2 */ + 12481 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12482 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12483 "00011100" // /* MW 3 */ + 12484 "11000110" // /* MW 2 */ + 12485 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12486 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12487 "00011100" // /* MW 3 */ + 12488 "11000110" // /* MW 2 */ + 12489 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12490 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12491 "00011100" // /* MW 3 */ + 12492 "11000110" // /* MW 2 */ + 12493 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12494 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12495 "00011100" // /* MW 3 */ + 12496 "11000110" // /* MW 2 */ + 12497 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12498 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12499 "00011100" // /* MW 3 */ + 12500 "11000110" // /* MW 2 */ + 12501 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12502 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12503 "00011100" // /* MW 3 */ + 12504 "11000110" // /* MW 2 */ + 12505 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12506 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12507 "00011100" // /* MW 3 */ + 12508 "11000110" // /* MW 2 */ + 12509 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12510 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12511 "00011100" // /* MW 3 */ + 12512 "11000110" // /* MW 2 */ + 12513 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12514 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12515 "00011100" // /* MW 3 */ + 12516 "11000110" // /* MW 2 */ + 12517 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12518 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12519 "00011100" // /* MW 3 */ + 12520 "11000110" // /* MW 2 */ + 12521 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12522 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "00011100" // /* MW 3 */ + 12524 "11000110" // /* MW 2 */ + 12525 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12526 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00011100" // /* MW 3 */ + 12528 "11000110" // /* MW 2 */ + 12529 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12530 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12531 "00011100" // /* MW 3 */ + 12532 "11000110" // /* MW 2 */ + 12533 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12534 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12535 "00011100" // /* MW 3 */ + 12536 "11000110" // /* MW 2 */ + 12537 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12538 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12539 "00011100" // /* MW 3 */ + 12540 "11000110" // /* MW 2 */ + 12541 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12542 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12543 "00011100" // /* MW 3 */ + 12544 "11000110" // /* MW 2 */ + 12545 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12546 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00011100" // /* MW 3 */ + 12548 "11000110" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12550 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00011100" // /* MW 3 */ + 12552 "11000110" // /* MW 2 */ + 12553 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12554 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00011100" // /* MW 3 */ + 12556 "11000110" // /* MW 2 */ + 12557 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12558 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "00011100" // /* MW 3 */ + 12560 "11000110" // /* MW 2 */ + 12561 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12562 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12563 "00011100" // /* MW 3 */ + 12564 "11000110" // /* MW 2 */ + 12565 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12566 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12567 "00011100" // /* MW 3 */ + 12568 "11000110" // /* MW 2 */ + 12569 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12570 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12571 "00011100" // /* MW 3 */ + 12572 "11000110" // /* MW 2 */ + 12573 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12574 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "00011100" // /* MW 3 */ + 12576 "11000110" // /* MW 2 */ + 12577 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12578 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12579 "00011100" // /* MW 3 */ + 12580 "11000110" // /* MW 2 */ + 12581 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 119 first + 12582 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12583 "00000000" // /* MW 3 */ + 12584 "00101000" // /* MW 2 */ + 12585 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 first +.delay_slot + 12586 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12587 "00011100" // /* MW 3 */ + 12588 "11000110" // /* MW 2 */ + 12589 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12590 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12591 "00011100" // /* MW 3 */ + 12592 "11000110" // /* MW 2 */ + 12593 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12594 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12595 "00011100" // /* MW 3 */ + 12596 "11000110" // /* MW 2 */ + 12597 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12598 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12599 "00011100" // /* MW 3 */ + 12600 "11000110" // /* MW 2 */ + 12601 "00010000" // /* MW 1 */ +.delay_slot + 12602 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12603 "10100000" // /* MW 3 */ + 12604 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 12605 "00011000" // /* MW 1 */ +.label memset +.function memset memset +.src_ref 12 "string.c" 325 first +.src_ref 12 "string.c" 328 4 first +.function_start + 12608 "10000100" // JZ r1, #12768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12768 delay_slots=5 */ + 12609 "00000001" // /* MW 5 */ + 12610 "00000000" // /* MW 4 */ + 12611 "11110000" // /* MW 3 */ + 12612 "00011000" // /* MW 2 */ + 12613 "00001000" // /* MW 1 */ +.src_ref 12 "string.c" 329 3 +.delay_slot + 12614 "11111000" // MOV p0, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12615 "11000000" // /* MW 3 */ + 12616 "01100010" // /* MW 2 */ + 12617 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 first +.src_ref 12 "string.c" 329 3 + 12626 "00000010" // MOVS p1, p0; MOV lc, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12627 "01110000" // /* MW 7 */ + 12628 "01010000" // /* MW 6 */ + 12629 "10111000" // /* MW 5 */ + 12630 "00000010" // /* MW 4 */ + 12631 "01100000" // /* MW 3 */ + 12632 "00010001" // /* MW 2 */ + 12633 "00110000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12634 "01000100" // MOVXM ls, #12656 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12635 "11100000" // /* MW 5 */ + 12636 "11100010" // /* MW 4 */ + 12637 "00110001" // /* MW 3 */ + 12638 "00000000" // /* MW 2 */ + 12639 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12640 "11100001" // NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12641 "00000000" // /* MW 15 */ + 12642 "00000000" // /* MW 14 */ + 12643 "00010000" // /* MW 13 */ + 12644 "11101000" // /* MW 12 */ + 12645 "10111000" // /* MW 11 */ + 12646 "00001101" // /* MW 10 */ + 12647 "00000000" // /* MW 9 */ + 12648 "00000000" // /* MW 8 */ + 12649 "01011011" // /* MW 7 */ + 12650 "00000001" // /* MW 6 */ + 12651 "00100000" // /* MW 5 */ + 12652 "00000000" // /* MW 4 */ + 12653 "11110000" // /* MW 3 */ + 12654 "00101100" // /* MW 2 */ + 12655 "00000000" // /* MW 1 */ +.label ZLS_Fmemset_48 +.src_ref 12 "string.c" 329 3 first +.begin_of_loop +.loop_nesting 1 + 12656 "11100001" // ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12657 "00000000" // /* MW 15 */ + 12658 "00000000" // /* MW 14 */ + 12659 "01111000" // /* MW 13 */ + 12660 "10100101" // /* MW 12 */ + 12661 "00000001" // /* MW 11 */ + 12662 "00000000" // /* MW 10 */ + 12663 "00000000" // /* MW 9 */ + 12664 "00000000" // /* MW 8 */ + 12665 "01011011" // /* MW 7 */ + 12666 "00000001" // /* MW 6 */ + 12667 "00100000" // /* MW 5 */ + 12668 "00000000" // /* MW 4 */ + 12669 "11100000" // /* MW 3 */ + 12670 "10000000" // /* MW 2 */ + 12671 "00100011" // /* MW 1 */ + 12672 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12673 "00000000" // /* MW 15 */ + 12674 "00000000" // /* MW 14 */ + 12675 "01111000" // /* MW 13 */ + 12676 "10100101" // /* MW 12 */ + 12677 "00000001" // /* MW 11 */ + 12678 "00000000" // /* MW 10 */ + 12679 "00000000" // /* MW 9 */ + 12680 "00000000" // /* MW 8 */ + 12681 "01011011" // /* MW 7 */ + 12682 "00000001" // /* MW 6 */ + 12683 "00100000" // /* MW 5 */ + 12684 "00000000" // /* MW 4 */ + 12685 "11110000" // /* MW 3 */ + 12686 "00101100" // /* MW 2 */ + 12687 "00000000" // /* MW 1 */ + 12688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12689 "00000000" // /* MW 15 */ + 12690 "00000000" // /* MW 14 */ + 12691 "01111000" // /* MW 13 */ + 12692 "10100101" // /* MW 12 */ + 12693 "00000001" // /* MW 11 */ + 12694 "00000000" // /* MW 10 */ + 12695 "00000000" // /* MW 9 */ + 12696 "00000000" // /* MW 8 */ + 12697 "01011011" // /* MW 7 */ + 12698 "00000001" // /* MW 6 */ + 12699 "00100000" // /* MW 5 */ + 12700 "00000000" // /* MW 4 */ + 12701 "11110000" // /* MW 3 */ + 12702 "00101100" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ + 12704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12705 "00000000" // /* MW 15 */ + 12706 "00000000" // /* MW 14 */ + 12707 "01111000" // /* MW 13 */ + 12708 "10100101" // /* MW 12 */ + 12709 "00000001" // /* MW 11 */ + 12710 "00000000" // /* MW 10 */ + 12711 "00000000" // /* MW 9 */ + 12712 "00000000" // /* MW 8 */ + 12713 "01011011" // /* MW 7 */ + 12714 "00000001" // /* MW 6 */ + 12715 "00100000" // /* MW 5 */ + 12716 "00000000" // /* MW 4 */ + 12717 "11110000" // /* MW 3 */ + 12718 "00101100" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ + 12720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12721 "00000000" // /* MW 15 */ + 12722 "00000000" // /* MW 14 */ + 12723 "01111000" // /* MW 13 */ + 12724 "10100101" // /* MW 12 */ + 12725 "00000001" // /* MW 11 */ + 12726 "00000000" // /* MW 10 */ + 12727 "00000000" // /* MW 9 */ + 12728 "00000000" // /* MW 8 */ + 12729 "01011011" // /* MW 7 */ + 12730 "00000001" // /* MW 6 */ + 12731 "00100000" // /* MW 5 */ + 12732 "00000000" // /* MW 4 */ + 12733 "11110000" // /* MW 3 */ + 12734 "00101100" // /* MW 2 */ + 12735 "00000000" // /* MW 1 */ + 12736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12737 "00000000" // /* MW 15 */ + 12738 "00000000" // /* MW 14 */ + 12739 "01111000" // /* MW 13 */ + 12740 "10100101" // /* MW 12 */ + 12741 "00000001" // /* MW 11 */ + 12742 "00000000" // /* MW 10 */ + 12743 "00000000" // /* MW 9 */ + 12744 "00000000" // /* MW 8 */ + 12745 "01011011" // /* MW 7 */ + 12746 "00000001" // /* MW 6 */ + 12747 "00100000" // /* MW 5 */ + 12748 "00000000" // /* MW 4 */ + 12749 "11110000" // /* MW 3 */ + 12750 "00101100" // /* MW 2 */ + 12751 "00000000" // /* MW 1 */ +.label ZLE_Fmemset_144 +.end_of_loop + 12752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12753 "00000000" // /* MW 15 */ + 12754 "00000000" // /* MW 14 */ + 12755 "01111000" // /* MW 13 */ + 12756 "10100101" // /* MW 12 */ + 12757 "00000001" // /* MW 11 */ + 12758 "00000000" // /* MW 10 */ + 12759 "00000000" // /* MW 9 */ + 12760 "00000000" // /* MW 8 */ + 12761 "01011011" // /* MW 7 */ + 12762 "00000001" // /* MW 6 */ + 12763 "00100000" // /* MW 5 */ + 12764 "00000000" // /* MW 4 */ + 12765 "11110000" // /* MW 3 */ + 12766 "00101100" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.label TGT_Fmemset_160 +.src_ref 12 "string.c" 330 4 first +.loop_nesting 0 + 12768 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12769 "00000000" // /* MW 3 */ + 12770 "00101000" // /* MW 2 */ + 12771 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label memset__end + 12781 "00000000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/conv" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 11 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 12 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.cmico b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.lst b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.lst new file mode 100644 index 0000000000000000000000000000000000000000..75b45df169e1e61bc2ffc965ef7c71f63a16feee --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.lst @@ -0,0 +1,4100 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:35:26 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable77 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2528 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2528 0x00 0xc2 0xd0 0xe9 0xe0 0x2c LDA r16, [p0]; NEZ r26, r1 + 2534 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2540 0x0f 0xef 0x1d 0x98 ST p6, [sp, #-20] + 2544 0xfe 0x3a 0xb0 0x01 0xc8 0xd0 0x70 0x02 ST r14, [sp, #-16]; MOV r14, r3 + 2552 0xff 0x3e 0xb0 0x01 0xe8 0x50 0x70 0x02 ST r15, [sp, #-8]; MOV r15, r1 + 2560 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 2564 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 2568 0x1e 0x68 0x02 0x18 ADD.NC p6, r16, #4 + 2572 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 2576 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 2580 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 2584 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2588 0x00 0x00 NOPX + 2590 0x00 0x00 NOPX + 2592 0x00 0x00 NOPX + 2594 0x00 0x00 NOPX + 2596 0x00 0x00 NOPX + 2598 0x00 0x00 NOPX + 2600 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2604 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 2608 0xfc 0x1f 0xa0 0x35 0x39 0xe4 MOVX r16, #-1; MOV el0, r26 + 2614 0x00 0x00 NOPX + 2616 0x00 0x00 NOPX + 2618 0x00 0x00 NOPX + 2620 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2624 0x04 0x41 0x29 0xa0 0x05 0x64 MOVX r17, #2; MOV r19, #1 + 2630 0xd5 0x23 0xb9 0x21 0x81 0xe4 LSHL r20, r26, r17; MOV r18, p0 + 2636 0x9c 0x9f 0x9c 0xd2 0xa2 0xa4 LTU r18, r19, r15; ADD.NC p6, r18, r20 + 2642 0xc0 0xd2 0xd7 0xe6 0x95 0x82 0x6e 0x60 0x72 0xba LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 + 2652 0xfd 0x4a 0xb0 0x03 0x4c 0x90 0x70 0x02 ST r18, [sp, #-24]; MOV r26, r18 + 2660 0x00 0x00 NOPX + 2662 0x00 0x00 NOPX + 2664 0x00 0x00 NOPX + 2666 0x00 0x00 NOPX + 2668 0x00 0x00 NOPX + 2670 0x1e 0x6a 0x02 0x18 ADD.NC p6, r20, #4 + 2674 0x06 0x1e 0x96 0x98 LDA r20, [p6], #4 + 2678 0x06 0x3e 0xd6 0x98 LDA r22, [p6], #12 + 2682 0x06 0xee 0xb6 0x98 LDA r21, [p6], #-8 + 2686 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2690 0x00 0x00 NOPX + 2692 0x00 0x00 NOPX + 2694 0x00 0x00 NOPX + 2696 0x00 0x00 NOPX + 2698 0x00 0x00 NOPX + 2700 0x00 0x00 NOPX + 2702 0x15 0x29 0x62 0x18 SEL.EQZ r20, r20, r22, r27 + 2706 0x0e 0xd6 0x91 0x98 ST r20, [p6, #-12] + 2710 0x00 0x00 NOPX + 2712 0x00 0x00 NOPX + 2714 0x00 0x00 NOPX + 2716 0x00 0x00 NOPX + 2718 0x15 0x57 0x08 0x18 ACQ.COND r21, r16, r26 + 2722 0x14 0xa5 0x1d 0x98 LSHL r18, r18, r17 + 2726 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 2732 0x76 0x9e 0x0c 0xd3 0x92 0xa4 NEZ r26, r14; ADD.NC p6, r19, r18 + 2738 0xc0 0xca 0xdf 0xc6 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-32] + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x00 0x00 NOPX + 2756 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 2760 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2764 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2768 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 2772 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2776 0x00 0x00 NOPX + 2778 0x00 0x00 NOPX + 2780 0x00 0x00 NOPX + 2782 0x00 0x00 NOPX + 2784 0x00 0x00 NOPX + 2786 0x00 0x00 NOPX + 2788 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 2792 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2796 0x00 0x00 NOPX + 2798 0x00 0x00 NOPX + 2800 0x00 0x00 NOPX + 2802 0x00 0x00 NOPX + 2804 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 2808 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 2812 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 2816 0x00 0x07 0xce 0xc4 0x80 0x44 MOVXM p7, #508480 + 2822 0xe0 0x13 0xdf 0xb8 0x5b 0x0c LDA p1, [p7, dj0]; ST el0, [sp, #-36] + 2828 0x00 0x00 NOPX + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX +.no_stack_arguments + 2840 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2844 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2848 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2854 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2864 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2870 0x07 0xdf 0x51 0x18 LDA r26, [sp, #-36] + 2874 0x07 0xe4 0x41 0x18 LDA dj0, [sp, #-28] + 2878 0x07 0xe8 0x29 0x18 LDA el0, [sp, #-24] + 2882 0x07 0xe0 0x09 0x18 LDA eh0, [sp, #-32] + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x18 0x68 0x88 0x18 ADD.NC p0, r17, #16 + 2894 0x00 0x06 0x36 0x98 LDA r17, [p0] + 2898 0x00 0x00 NOPX + 2900 0x00 0x00 NOPX + 2902 0x00 0x00 NOPX + 2904 0x00 0x00 NOPX + 2906 0x00 0x00 NOPX + 2908 0x00 0x00 NOPX + 2910 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2914 0x1e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p0, #-4]; MOV r27, r15 + 2920 0xe0 0x4a 0xdd 0x40 0x39 0xd4 LDA r18, [p7, dj0]; MOV r26, el0 + 2926 0x00 0x00 NOPX + 2928 0x00 0x00 NOPX + 2930 0x00 0x00 NOPX + 2932 0x00 0x00 NOPX + 2934 0x00 0x00 NOPX + 2936 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2940 0x8c 0x66 0x4e 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 + 2946 0xe0 0xc6 0xd1 0xec 0x63 0x0c LDA r17, [p7]; ST r17, [p0, #-4] + 2952 0x00 0x00 NOPX + 2954 0x00 0x00 NOPX + 2956 0x00 0x00 NOPX + 2958 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 0x1e 0xa1 0x1c 0xf8 MOV r26, eh0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2970 0xfe 0xc6 0xdd 0xc0 0x39 0xd4 LDA r17, [p7, #-4]; MOV r27, el0 + 2976 0x06 0x06 0x56 0x98 LDA r18, [p6] + 2980 0x00 0x00 NOPX + 2982 0x00 0x00 NOPX + 2984 0x00 0x00 NOPX + 2986 0x00 0x00 NOPX + 2988 0x00 0x00 NOPX + 2990 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2994 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 3000 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x00 0x00 NOPX + 3016 0x00 0x00 NOPX + 3018 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 3022 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3026 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] + 3030 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 0x1e 0xd7 0x20 0xf8 MOV r27, r14 +.delay_slot + 3066 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 3070 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 3088 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3088 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3092 0x00 0x07 0xc0 0xc6 0x40 0x44 MOVXM p0, #508704 +.delay_slot + 3098 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 3102 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 3106 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 3110 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3120 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3120 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0x80 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508672 + 3130 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3136 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 3140 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 3144 0x00 0x00 NOPX + 3146 0x00 0x00 NOPX + 3148 0x00 0x00 NOPX + 3150 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3154 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 3158 0x00 0x00 NOPX + 3160 0x00 0x00 NOPX + 3162 0x00 0x00 NOPX + 3164 0x00 0x00 NOPX + 3166 0x00 0x00 NOPX + 3168 0x00 0x00 NOPX + 3170 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3174 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x00 0x00 NOPX + 3184 0x00 0x00 NOPX + 3186 0x00 0x00 NOPX + 3188 0x00 0x00 NOPX + 3190 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3194 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 3198 0x00 0x00 NOPX + 3200 0x00 0x00 NOPX +.no_stack_arguments + 3202 0x00 0x06 0x08 0x00 0x01 0x04 JL #3088 +.delay_slot +.swstall delay_slot + 3208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3210 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3212 0x00 0x00 NOPX +.delay_slot + 3214 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 3218 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x5e 0x86 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV r15, p0; NOPV +.return_address + 3232 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 3242 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 3252 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 3262 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 3266 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 3296 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3312 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3312 0x02 0x80 0x80 0x00 0x01 0xf0 0x31 0x86 0x10 0xba MOVA m0, #20; MOVXM p0, #508684 + 3322 0x01 0x01 0x50 0x00 0x20 0x28 0x28 0x06 0x58 0xba LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 + 3332 0x00 0x00 NOPX + 3334 0x00 0x00 NOPX + 3336 0x00 0x00 NOPX + 3338 0x00 0x00 NOPX + 3340 0x00 0x00 NOPX + 3342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3346 0x10 0x06 0xf0 0x18 NEZ r3, r0 +.delay_slot + 3350 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 3354 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 3358 0x02 0x82 0x31 0x88 0x3b 0x5c ST r0, [p0, #4]; LSHL r2, r3, r1 +.delay_slot + 3364 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3376 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3376 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3382 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 3386 0x00 0x06 0x18 0x00 0x01 0x04 JL #3120 +.delay_slot + 3392 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.delay_slot +.swstall delay_slot + 3398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3404 0x00 0x01 0x67 0x98 NOPA +.return_address + 3408 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call + 3412 0x00 0x06 0x78 0x00 0x00 0x84 J #3312 +.delay_slot + 3418 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.delay_slot + 3424 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3434 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3440 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3440 0x02 0x80 0x80 0x00 0x01 0xf1 0xb1 0x80 0x10 0xba MOVA m0, #20; MOVXM p3, #508672 + 3450 0x03 0x3c 0x16 0x98 LDA r0, [p3], #12 + 3454 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 3460 0x00 0x00 NOPX + 3462 0x00 0x00 NOPX + 3464 0x00 0x00 NOPX + 3466 0x00 0x00 NOPX + 3468 0x00 0x00 NOPX + 3470 0x00 0x00 NOPX + 3472 0x08 0x06 0xe8 0x40 0x01 0x84 JNZ r1, #3536 +.delay_slot + 3478 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 3482 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 3486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3488 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3490 0x00 0x00 NOPX + 3492 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 3496 0x00 0x00 NOPX + 3498 0x00 0x00 NOPX + 3500 0x00 0x00 NOPX + 3502 0x00 0x06 0xf8 0x00 0x00 0x84 J #3568 +.delay_slot +.swstall delay_slot + 3508 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3510 0x00 0x00 NOPX +.delay_slot + 3512 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 3516 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 3520 0x00 0x2c 0xf0 0x00 0x20 0x04 0x13 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 + 3536 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 3540 0x00 0x00 NOPX + 3542 0x00 0x00 NOPX + 3544 0x00 0x00 NOPX + 3546 0x00 0x00 NOPX + 3548 0x00 0x00 NOPX + 3550 0x00 0x00 NOPX + 3552 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 3556 0x00 0x00 NOPX + 3558 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 + 3568 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 3578 0x62 0x90 0xd0 0x00 0x00 0x00 0x7f 0x30 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #3680 + 3588 0x00 0x00 0x06 0xfd 0x00 0x44 MOVXM le, #3712 + 3594 0x00 0x07 0xc8 0xc4 0x40 0x44 MOVXM p4, #508448 + 3600 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 3604 0x00 0x00 NOPX + 3606 0x00 0x00 NOPX + 3608 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 3612 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 3616 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3680 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3728 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 3756 0x00 0x00 NOPX +.delay_slot + 3758 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 3762 0x00 0x00 NOPX +.delay_slot + 3764 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 3768 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3776 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3776 0x50 0x91 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p2, p1; PADDXM [sp], #128 + 3786 0xff 0x87 0xb0 0x02 0x08 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p0 + 3794 0x1c 0x55 0xe0 0xf8 MOV r17, sp + 3798 0x00 0x07 0xc6 0xc6 0x18 0x44 MOVXM p3, #508684 + 3804 0x65 0xed 0x50 0xd1 0x80 0x14 LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 + 3810 0x73 0xca 0x50 0x0e 0x56 0x0c LDA.s16 r18, [p3], #-14; VST sfh, [p0] + 3816 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 3820 0x00 0x00 NOPX + 3822 0x00 0x00 NOPX +.no_stack_arguments + 3824 0x00 0x06 0xb8 0x00 0x01 0x04 JL #3440 +.delay_slot + 3830 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 3834 0x00 0x00 NOPX +.delay_slot + 3836 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 3840 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 3846 0x00 0x2c 0xf0 0x00 0x10 0x00 0x34 0x10 0x7e 0xba NOPA; NOPB; MOV p0, r16 +.return_address + 3856 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3860 0x00 0x00 NOPX + 3862 0x00 0x00 NOPX + 3864 0x00 0x00 NOPX + 3866 0x00 0x00 NOPX + 3868 0x00 0x00 NOPX + 3870 0x00 0x00 NOPX + 3872 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3876 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 3882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3888 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3904 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 3904 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 3910 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 3916 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3922 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 3930 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0x02 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508420 + 3940 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 3944 0x00 0x00 NOPX + 3946 0x00 0x00 NOPX + 3948 0x80 0x08 0x08 0x40 0x01 0x84 JNZ r16, #4112 +.delay_slot + 3954 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 3958 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 3962 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 3966 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 3974 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 0x00 0x07 0xc4 0xc4 0x40 0x44 MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 0x00 0x06 0x98 0x00 0x01 0x04 JL #3376 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4012 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4016 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 4032 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 + 4038 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #508672 + 4048 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #508672 + 4058 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 + 4068 0x00 0x00 NOPX + 4070 0x00 0x00 NOPX + 4072 0x00 0x08 0x10 0x00 0x00 0x84 J #4128 +.delay_slot + 4078 0x00 0x07 0xc0 0xc4 0x30 0x44 MOVXM p0, #508440 +.delay_slot +.swstall delay_slot + 4084 0x00 0x00 NOPX +.delay_slot + 4086 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 4090 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 4096 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 4112 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0x04 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 4128 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 4136 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508416 + 4146 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 4150 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 4154 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 4158 0x00 0x00 NOPX + 4160 0x00 0x00 NOPX + 4162 0x00 0x00 NOPX + 4164 0x00 0x00 NOPX + 4166 0x00 0x00 NOPX + 4168 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 4172 0x0f 0x06 0x11 0x98 ST r16, [p7] + 4176 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 4180 0x00 0x00 NOPX + 4182 0x00 0x00 NOPX + 4184 0x00 0x00 NOPX + 4186 0x14 0x93 0x08 0x18 ACQ r18, r16 + 4190 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 4196 0x00 0x00 NOPX + 4198 0x00 0x00 NOPX + 4200 0x00 0x06 0x36 0x98 LDA r17, [p0] + 4204 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 4210 0x01 0x06 0x76 0x98 LDA r19, [p1] + 4214 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 4218 0x00 0x00 NOPX +.no_stack_arguments + 4220 0x00 0x07 0x60 0x00 0x01 0x04 JL #3776 +.delay_slot +.swstall delay_slot + 4226 0x00 0x00 NOPX +.delay_slot + 4228 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 4232 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 4236 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 4240 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 4256 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508416 + 4266 0x10 0x20 0x05 0x18 MOVX r16, #1 + 4270 0x00 0x00 NOPX + 4272 0x00 0x00 NOPX + 4274 0x00 0x00 NOPX + 4276 0x00 0x00 NOPX + 4278 0x00 0x00 NOPX + 4280 0x14 0x51 0x08 0x18 REL r17, r16 + 4284 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508440 + 4294 0x06 0x06 0x36 0x98 LDA r17, [p6] + 4298 0x02 0x06 0x56 0x98 LDA r18, [p2] + 4302 0x00 0x00 NOPX + 4304 0x00 0x00 NOPX + 4306 0x00 0x00 NOPX + 4308 0x00 0x00 NOPX + 4310 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 4314 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 4318 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 4322 0x80 0x08 0x80 0x40 0x01 0x84 JNZ r16, #4352 +.delay_slot +.swstall delay_slot + 4328 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4330 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4332 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4336 0x00 0x00 NOPX + 4338 0x10 0x20 0x01 0x18 MOVX r16, #0 + 4342 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 4352 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 4356 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 4360 0x00 0x00 NOPX + 4362 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 4380 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 4386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4390 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 4400 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 4400 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xc0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508800 + 4410 0x00 0x00 NOPX + 4412 0x00 0x00 NOPX + 4414 0x00 0x00 NOPX + 4416 0x00 0x00 NOPX + 4418 0x00 0x00 NOPX + 4420 0x00 0x00 NOPX + 4422 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 4426 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 4430 0x00 0x00 NOPX + 4432 0x00 0x00 NOPX + 4434 0x00 0x00 NOPX + 4436 0x00 0x00 NOPX + 4438 0x00 0x00 NOPX + 4440 0x00 0x00 NOPX + 4442 0x08 0x04 0x29 0x98 ST el0, [p0] + 4446 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 4450 0x00 0x00 NOPX + 4452 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4462 0x00 0x00 NOPX +.delay_slot + 4464 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 4480 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 4480 0xff 0x40 0x00 0x3d 0x68 0x00 0x01 0xf1 0x31 0xc0 0x10 0xb6 MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 0x40 0x8a 0xd0 0x3b 0xe8 0x00 0x01 0xf1 0x31 0x10 0x10 0xb6 LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 0x40 0x84 0x50 0x3d 0x68 0x00 0x00 0x10 0xc8 0x40 0x10 0xb6 LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 0x03 0xbe 0x80 0x32 0xe5 0xf4 VLDB x7, [p0], #64; VBCST.16 x0, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 0x00 0x00 0xc2 0x21 0x00 0x44 MOVXM r4, #49280 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 0x18 0x91 0x72 0xf8 VBCST.16 x1, r4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 0x00 0x00 0x71 0xbf 0xfe 0x44 MOVXM r3, #32767 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 0x1c 0x50 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 0x10 0x01 0xb6 0x81 0xd9 0xe4 LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 0x0f 0x50 0x08 0x70 0x59 0xe4 MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 + 4554 0x19 0x0d 0x72 0xf8 VBCST.16 x2, r3 + 4558 0x00 0x00 0x32 0xba 0x00 0x44 MOVXM r5, #15616 + 4564 0x19 0x95 0x72 0xf8 VBCST.16 x3, r5 + 4568 0x00 0x00 0x38 0xbe 0x00 0x44 MOVXM r17, #16128 + 4574 0x1d 0xb1 0x2b 0x78 VBAND x11, x6, x2 + 4578 0x64 0x5e 0x25 0x8a 0xe5 0xe4 MOVX r17, #828; VBCST.16 x5, r17 + 4584 0x04 0xc0 0xec 0xe6 0x8c 0xe7 0x61 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 + 4592 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 + 4596 0x00 0x00 0x31 0x3d 0x00 0x44 MOVXM r2, #16000 + 4602 0x02 0x09 0x72 0xe6 0x8a 0xe7 0x01 0x62 VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 + 4610 0x18 0x0b 0x8a 0xf8 VCONV.fp32.bf16 cml0, x5 + 4614 0x04 0x50 0x2c 0xe6 0x8b 0x0c 0x81 0x62 VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 0xb2 0x42 0xc0 0x00 0x00 0x8f 0x24 0x02 0x89 0x12 0x81 0x56 VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 0x1b 0x40 0xec 0xf8 VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 0x00 0x00 0x00 0xb7 0x2a 0x02 0x8a 0x76 0xc3 0x5a MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 0x52 0x22 0xc0 0x02 0xb8 0x3f 0x80 0x02 VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 + 4656 0x1c 0x38 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x7, x0 + 4660 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0xd8 0x95 0xb0 0xf6 NOPA; NOPB; NOPS; VBAND x11, x6, x2 +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4672 0x00 0x3d 0x6c 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x4a VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 0x00 0x3b 0xec 0x49 0x2b 0x66 0x8c 0xe7 0x61 0x4a VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 0x23 0xa4 0x60 0x02 0x89 0x12 0x81 0x62 VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 0x00 0x2c 0xf1 0x1e 0x23 0x00 0x00 0x00 0x00 0x7a NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 0x00 0x2c 0xf0 0x00 0x25 0x92 0x16 0x00 0x00 0x02 0x28 0x16 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0xa0 0x76 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 0x00 0x2c 0xf0 0x00 0x22 0x91 0x16 0x00 0x00 0x02 0x1c 0x16 0x7c 0x53 0xb6 0x1b NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.end_of_loop + 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xd8 0x95 0xb8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV +.loop_nesting 0 + 4784 0x04 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 0x8c 0xe7 0x61 0x48 VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 + 4814 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 + 4818 0xb2 0x42 0xc0 0x02 0x89 0x12 0x81 0x62 VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 + 4826 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 + 4830 0x8a 0x76 0xc3 0x48 VMSC.f dm2, dm3, x11, x6, r17 + 4834 0x8c 0x2b 0x23 0x48 VMSC.f dm4, dm1, x5, x9, r17 + 4838 0x00 0x00 NOPX + 4840 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4844 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4846 0x00 0x00 NOPX +.delay_slot + 4848 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.delay_slot + 4852 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 +.delay_slot +.swstall delay_slot + 4856 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 4864 +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 4864 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 4870 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 4876 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 4882 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 4890 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0x02 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508420 + 4900 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 4904 0x00 0x00 NOPX + 4906 0x00 0x00 NOPX + 4908 0x80 0x09 0xe8 0x40 0x01 0x84 JNZ r16, #5072 +.delay_slot + 4914 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 4918 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 4922 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 4926 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 4934 0x00 0x07 0xc0 0xc7 0x00 0x44 MOVXM p0, #508800 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 0x00 0x07 0xc4 0xc4 0x40 0x44 MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 0x00 0x08 0x98 0x00 0x01 0x04 JL #4400 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4972 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4976 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 4992 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 + 4998 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xc0 0x10 0xba LDA r16, [p2]; MOVXM p2, #508800 + 5008 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x31 0xc0 0x10 0xba LDA r17, [p2]; MOVXM p2, #508800 + 5018 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 + 5028 0x00 0x00 NOPX + 5030 0x00 0x00 NOPX + 5032 0x00 0x09 0xf0 0x00 0x00 0x84 J #5088 +.delay_slot + 5038 0x00 0x07 0xc0 0xc4 0x30 0x44 MOVXM p0, #508440 +.delay_slot +.swstall delay_slot + 5044 0x00 0x00 NOPX +.delay_slot + 5046 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 5050 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 5056 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 5072 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0x04 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 5088 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 5096 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508416 + 5106 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 5110 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 5114 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 5118 0x00 0x00 NOPX + 5120 0x00 0x00 NOPX + 5122 0x00 0x00 NOPX + 5124 0x00 0x00 NOPX + 5126 0x00 0x00 NOPX + 5128 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 5132 0x0f 0x06 0x11 0x98 ST r16, [p7] + 5136 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 5140 0x00 0x00 NOPX + 5142 0x00 0x00 NOPX + 5144 0x00 0x00 NOPX + 5146 0x14 0x93 0x08 0x18 ACQ r18, r16 + 5150 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 5156 0x00 0x00 NOPX + 5158 0x00 0x00 NOPX + 5160 0x00 0x06 0x36 0x98 LDA r17, [p0] + 5164 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 5170 0x01 0x06 0x76 0x98 LDA r19, [p1] + 5174 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 5178 0x00 0x00 NOPX +.no_stack_arguments + 5180 0x00 0x08 0xc0 0x00 0x01 0x04 JL #4480 +.delay_slot +.swstall delay_slot + 5186 0x00 0x00 NOPX +.delay_slot + 5188 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 5192 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 5196 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 5200 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 5216 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508416 + 5226 0x10 0x20 0x05 0x18 MOVX r16, #1 + 5230 0x00 0x00 NOPX + 5232 0x00 0x00 NOPX + 5234 0x00 0x00 NOPX + 5236 0x00 0x00 NOPX + 5238 0x00 0x00 NOPX + 5240 0x14 0x51 0x08 0x18 REL r17, r16 + 5244 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508440 + 5254 0x06 0x06 0x36 0x98 LDA r17, [p6] + 5258 0x02 0x06 0x56 0x98 LDA r18, [p2] + 5262 0x00 0x00 NOPX + 5264 0x00 0x00 NOPX + 5266 0x00 0x00 NOPX + 5268 0x00 0x00 NOPX + 5270 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 5274 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 5278 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 5282 0x80 0x0a 0x60 0x40 0x01 0x84 JNZ r16, #5312 +.delay_slot +.swstall delay_slot + 5288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5296 0x00 0x00 NOPX + 5298 0x10 0x20 0x01 0x18 MOVX r16, #0 + 5302 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 5312 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 5316 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 5320 0x00 0x00 NOPX + 5322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 5340 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 5346 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5350 0x00 0x00 NOPX +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 5360 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 5360 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 5364 0x00 0x07 0xc0 0xc6 0xc0 0x44 MOVXM p0, #508768 +.delay_slot + 5370 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 5374 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 5378 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 5382 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5392 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 5392 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xa0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508736 + 5402 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5408 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 5412 0x00 0x00 NOPX + 5414 0x00 0x00 NOPX + 5416 0x00 0x00 NOPX + 5418 0x00 0x00 NOPX + 5420 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5424 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 5428 0x00 0x00 NOPX + 5430 0x00 0x00 NOPX + 5432 0x00 0x00 NOPX + 5434 0x00 0x00 NOPX + 5436 0x00 0x00 NOPX + 5438 0x00 0x00 NOPX + 5440 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5444 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 5448 0x00 0x00 NOPX + 5450 0x00 0x00 NOPX + 5452 0x00 0x00 NOPX + 5454 0x00 0x00 NOPX + 5456 0x00 0x00 NOPX + 5458 0x00 0x00 NOPX + 5460 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5464 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 5468 0x00 0x00 NOPX + 5470 0x00 0x00 NOPX +.no_stack_arguments + 5472 0x00 0x0a 0x78 0x00 0x01 0x04 JL #5360 +.delay_slot + 5478 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 5482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5484 0x00 0x00 NOPX +.delay_slot + 5486 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 5490 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x7b 0x06 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p7, p0; NOPV +.return_address + 5504 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 5508 0x00 0x00 NOPX + 5510 0x00 0x00 NOPX + 5512 0x00 0x00 NOPX + 5514 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 5552 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 5552 0x04 0x00 0x80 0x00 0x01 0xf1 0xb1 0xa0 0x10 0xba MOVA m0, #32; MOVXM p3, #508736 + 5562 0x61 0x06 0xd0 0x00 0x01 0xf2 0x31 0x10 0x10 0xba LDA r1, [p3], m0; MOVXM p4, #508448 + 5572 0x60 0x90 0xd0 0x18 0x07 0x88 0x6f 0xfa 0x58 0xba LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 + 5582 0x62 0x80 0xd0 0x00 0x00 0x04 0x7b 0x38 0x10 0xba LDA m0, [p3, #4]; MOVXM ls, #5744 + 5592 0x80 0x88 0x50 0x00 0x00 0x05 0xbb 0x40 0x10 0xba LDA.s8 r2, [p4]; MOVXM le, #5760 + 5602 0x00 0x00 NOPX + 5604 0x00 0x00 NOPX + 5606 0x00 0x00 NOPX + 5608 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 + 5612 0x1d 0x70 0xfc 0x98 ADD.NC lc, r1, #-7 + 5616 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 0x21 0x1b 0x70 0x50 0x68 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 0x21 0x1b 0x70 0x50 0x68 0x00 0xad 0x8e 0x00 0xe2 0x41 0x66 VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 0x21 0x13 0x70 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5744 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5776 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 5850 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 5854 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 5858 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5872 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 5872 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 5878 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 5884 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5890 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 5900 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 5908 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 5912 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 5916 0x00 0x00 NOPX + 5918 0x80 0x0b 0xd8 0x40 0x01 0x84 JNZ r16, #6064 +.delay_slot + 5924 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 5928 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 +.delay_slot + 5934 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 5942 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 5946 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb1 0xa0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #508736 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 0x13 0x91 0x60 0x00 0x01 0xf1 0x31 0x10 0x11 0x3a MOVS p0, p7; MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 0x00 0x0a 0x88 0x00 0x01 0x04 JL #5392 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 5992 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 5996 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 6000 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x02 0x10 0xba LDA r16, [p7]; MOVXM p1, #508420 + 6010 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0x04 0x10 0xba LDA r17, [p1]; MOVXM p3, #508424 + 6020 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 + 6030 0x00 0x00 NOPX + 6032 0x00 0x00 NOPX + 6034 0x00 0x00 NOPX + 6036 0x00 0x0b 0xe0 0x00 0x00 0x84 J #6080 +.delay_slot + 6042 0x00 0x07 0xc4 0xc4 0x30 0x44 MOVXM p2, #508440 +.delay_slot + 6048 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 6052 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 6056 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 6060 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 6064 0x00 0x07 0xc6 0xc4 0x10 0x44 MOVXM p3, #508424 + 6070 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba NOPA; MOVXM p1, #508428 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 6080 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 6084 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508416 + 6094 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 6098 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 6102 0x02 0x06 0x56 0x98 LDA r18, [p2] + 6106 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 6110 0x00 0x00 NOPX + 6112 0x00 0x00 NOPX + 6114 0x00 0x00 NOPX + 6116 0x00 0x00 NOPX + 6118 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 6122 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 6128 0x0a 0x06 0x11 0x98 ST r16, [p2] + 6132 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 6136 0x00 0x00 NOPX + 6138 0x00 0x00 NOPX + 6140 0x00 0x00 NOPX + 6142 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6146 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 6150 0x00 0x00 NOPX + 6152 0x00 0x00 NOPX + 6154 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 6158 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 6162 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 6166 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 6170 0x00 0x00 NOPX + 6172 0x00 0x00 NOPX + 6174 0x00 0x00 NOPX + 6176 0x00 0x00 NOPX + 6178 0x00 0x00 NOPX + 6180 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 6184 0x0a 0x06 0x31 0x98 ST r17, [p2] + 6188 0x00 0x00 NOPX + 6190 0x00 0x00 NOPX + 6192 0x00 0x00 NOPX + 6194 0x00 0x00 NOPX + 6196 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6200 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 6210 0x00 0x00 NOPX + 6212 0x00 0x00 NOPX + 6214 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 6218 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 6224 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 0x00 0x0a 0xd8 0x00 0x01 0x04 JL #5552 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 6260 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 6264 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 6268 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 6272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 6288 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 6298 0x00 0x07 0xcc 0xc4 0x30 0x44 MOVXM p6, #508440 + 6304 0x00 0x00 NOPX + 6306 0x00 0x00 NOPX + 6308 0x00 0x00 NOPX + 6310 0x00 0x00 NOPX + 6312 0x00 0x00 NOPX + 6314 0x14 0x51 0x08 0x18 REL r17, r16 + 6318 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 6322 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 6326 0x00 0x00 NOPX + 6328 0x00 0x00 NOPX + 6330 0x00 0x00 NOPX + 6332 0x00 0x00 NOPX + 6334 0x00 0x00 NOPX + 6336 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 6340 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 6346 0x00 0x00 NOPX + 6348 0x00 0x00 NOPX + 6350 0x00 0x00 NOPX + 6352 0x00 0x00 NOPX + 6354 0x00 0x00 NOPX + 6356 0x00 0x00 NOPX + 6358 0x14 0x51 0x08 0x18 REL r17, r16 + 6362 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508416 + 6372 0x06 0x06 0x56 0x98 LDA r18, [p6] + 6376 0x01 0x06 0x36 0x98 LDA r17, [p1] + 6380 0x00 0x00 NOPX + 6382 0x00 0x00 NOPX + 6384 0x00 0x00 NOPX + 6386 0x00 0x00 NOPX + 6388 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 6392 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 6396 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 6400 0x80 0x0c 0x90 0x40 0x01 0x84 JNZ r16, #6432 +.delay_slot +.swstall delay_slot + 6406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6414 0x00 0x00 NOPX + 6416 0x10 0x20 0x01 0x18 MOVX r16, #0 + 6420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 6432 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 6436 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 6440 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 6462 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6472 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 6480 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function_start + 6480 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb1 0xe0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #508864 + 6490 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 + 6500 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6506 0xfe 0x73 0xb0 0x00 0x01 0xf3 0xb1 0xe0 0x11 0x3a ST p7, [sp, #-16]; MOVXM p7, #508864 + 6516 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 6520 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] + 6524 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] + 6528 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 6532 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 6536 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 6540 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 6544 0x00 0x00 NOPX + 6546 0x00 0x00 NOPX + 6548 0x00 0x00 NOPX + 6550 0x00 0x00 NOPX + 6552 0x00 0x00 NOPX + 6554 0x09 0x04 0x29 0x98 ST el0, [p1] + 6558 0x09 0x14 0x09 0x98 ST eh0, [p1, #4] + 6562 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5 + 6566 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2 + 6570 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2 + 6574 0x00 0x00 NOPX + 6576 0x00 0x00 NOPX + 6578 0x00 0x00 NOPX + 6580 0x00 0x00 NOPX +.no_stack_arguments + 6582 0x00 0x18 0x58 0x00 0x01 0x04 JL #12464 +.delay_slot + 6588 0xfd 0xca 0xb8 0xba 0x43 0x5c ST r18, [sp, #-20]; SUB r14, r17, r18 +.delay_slot + 6594 0xfc 0x86 0xb0 0x03 0x08 0x45 0xe8 0x50 0x79 0x3a ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 +.delay_slot + 6604 0xfd 0x42 0xb7 0x6f 0x15 0x5c ST r16, [sp, #-24]; LT r27, r14, r24 +.delay_slot + 6610 0x16 0x22 0xe1 0x98 SUB r17, r24, r14 +.delay_slot + 6614 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x03 0x81 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 +.return_address + 6624 0xe7 0xc5 0x50 0x1f 0x47 0x36 0x08 0x00 0x58 0xba LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 + 6634 0xfd 0xc9 0x58 0x4c 0x43 0x2c LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 + 6640 0xfc 0x86 0x2a 0x6e 0x15 0x2c LDA r1, [sp, #-28]; LT r27, r20, r16 + 6646 0x10 0xa7 0x32 0x18 SEL.EQZ r19, r2, r19, r27 + 6650 0x00 0x00 NOPX + 6652 0x00 0x00 NOPX +.no_stack_arguments + 6654 0x00 0x18 0x58 0x00 0x01 0x04 JL #12464 +.delay_slot + 6660 0x14 0xe6 0x70 0x18 EXTEND.s16 r19, r19 +.delay_slot + 6664 0xfc 0x4a 0xb0 0x22 0xe9 0x0d 0xec 0xc0 0x49 0x3a ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 +.delay_slot + 6674 0x13 0xb7 0x0a 0x98 LT r27, r14, r16 +.delay_slot + 6678 0x14 0x22 0xe1 0x98 SUB r17, r16, r14 +.delay_slot + 6682 0x00 0x2c 0xf7 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r14, r17, r27 +.return_address + 6688 0xfc 0x86 0x20 0x01 0x30 0x48 0x00 0x42 0x58 0xba LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 + 6698 0xe1 0x51 0x50 0x01 0x80 0x0a 0x48 0x08 0x58 0xba LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 + 6708 0xfc 0x72 0x20 0x3f 0x07 0x4b 0xe8 0x17 0x58 0xba LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 + 6718 0xfd 0xda 0x20 0x3f 0xa7 0xca 0xa8 0x06 0x58 0xba LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 + 6728 0xfd 0x0e 0x20 0x0f 0xd7 0x89 0x00 0x20 0x58 0xba LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 + 6738 0xe9 0xc0 0x80 0x01 0x70 0x28 0x08 0x80 0x58 0xba MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 + 6748 0x17 0x44 0x80 0x31 0x11 0x0c 0x9d 0xb0 0x78 0xba MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn + 6758 0x10 0x7c 0xe6 0x98 XOR r30, r1, r14 + 6762 0x17 0xb7 0x8a 0x98 LT r27, r30, r24 + 6766 0x14 0x62 0x43 0xbc 0xff 0x24 SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 + 6772 0x8f 0x8e 0x0b 0x36 0x02 0x24 EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 + 6778 0x7f 0xa9 0xf7 0x3e 0x01 0x24 MUL r30, r15, r20; ADD.NC r14, r30, #1 + 6784 0x08 0x9d 0xf8 0xb6 0x01 0x24 MUL r2, r1, r14; ADD.NC r17, r22, #1 + 6790 0x14 0xf6 0x17 0x98 EQ r27, r19, r1 + 6794 0x17 0x84 0x2f 0x98 MUL r2, r30, r2 + 6798 0xff 0xe4 0x49 0x3f 0xf5 0x64 SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 + 6804 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16 + 6808 0x14 0x45 0xad 0x98 LSHL r2, r17, r26 + 6812 0x10 0xb9 0xf2 0x22 0xff 0x24 MUL r2, r2, r28; ADD.NC r4, r2, #-1 + 6818 0x10 0xc7 0x5d 0x98 LSHL r3, r3, r21 + 6822 0xff 0x8a 0x37 0x94 0x3f 0x5c ST r2, [p7], #-4; MUL r5, r15, r1 + 6828 0xe9 0x42 0x30 0x3b 0x6b 0x26 0x08 0x04 0x59 0x3a ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 + 6838 0xf9 0xfe 0x32 0xda 0xc1 0x5c ST r31, [p7], #-16; ADD r22, r5, r22 + 6844 0xed 0x8e 0x3b 0x7e 0x9f 0x5c ST r3, [p7], #24; MUL r31, r22, r20 + 6850 0xe3 0x92 0x3b 0x5a 0x1b 0x5c ST r4, [p7], #4; LSHL r22, r22, r16 + 6856 0x17 0xc7 0x7d 0x98 LSHL r3, r31, r23 + 6860 0x11 0x09 0x5d 0x98 LSHL r4, r4, r21 + 6864 0xb6 0x46 0x32 0x24 0x02 0xa4 SUB r25, r22, r3; ADD.NC r4, r4, r0 + 6870 0xf8 0x00 0x00 0x06 0x62 0x0f 0x2e 0x40 0xa8 0xba MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 + 6880 0xe3 0x82 0x3f 0x84 0x3f 0x5c ST r0, [p7], #4; MUL r1, r31, r1 + 6886 0xe3 0x9e 0x3f 0xfc 0xff 0x5c ST r7, [p7], #4; MUL r31, r31, r7 + 6892 0xe3 0x9a 0x32 0x96 0x5b 0x5c ST r6, [p7], #4; LSHL r5, r5, r18 + 6898 0xf9 0xaf 0xbf 0xa5 0xff 0x24 LSHL r6, r31, r23; ADD.NC r31, r5, #-1 + 6904 0x00 0xe4 0x00 0x28 0x59 0x6e 0x49 0x88 0xa8 0xba MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 + 6914 0x16 0x25 0x21 0x98 SUB r18, r24, r18 + 6918 0xe3 0xca 0x30 0x02 0x1b 0xee 0x49 0x7f 0xc9 0x3a ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 + 6928 0xe3 0xca 0x30 0x9a 0xc1 0x5c ST r18, [p7], #4; ADD r6, r1, r22 + 6934 0xe3 0xda 0x3f 0x84 0x9b 0x5c ST r22, [p7], #4; LSHL r1, r31, r4 + 6940 0xe3 0xfe 0x30 0x07 0x60 0x84 0x2f 0xff 0x59 0x3a ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 + 6950 0xe3 0xe6 0x30 0x0c 0x3b 0x0e 0xc8 0x40 0x59 0x3a ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 + 6960 0xe3 0x8e 0x37 0xc2 0x1b 0x5c ST r3, [p7], #4; LSHL r16, r15, r16 + 6966 0xe3 0xca 0x30 0x03 0x04 0x5c ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 + 6972 0xf0 0xef 0xb0 0x30 0x02 0xa4 LSHL r3, r30, r23; ADD.NC r0, r16, r0 + 6978 0xe3 0x82 0x38 0x40 0x63 0x5c ST r0, [p7], #4; SUB r16, r16, r3 + 6984 0xe3 0xfe 0x30 0x0a 0x11 0x33 0xec 0x10 0x09 0x3a ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 + 6994 0xe3 0xfe 0x39 0x7e 0xbb 0x5c ST r31, [p7], #4; LSHL r31, r18, r21 + 7000 0xe3 0xda 0x30 0x8a 0xbb 0x5c ST r22, [p7], #4; LSHL r2, r1, r21 + 7006 0xe3 0x86 0x3c 0x04 0x43 0x5c ST r1, [p7], #4; SUB r1, r24, r2 + 7012 0xe3 0xda 0x3c 0x0b 0xe3 0x5c ST r22, [p7], #4; SUB r2, r24, r31 + 7018 0x0f 0x1c 0x31 0x98 ST r1, [p7], #4 + 7022 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 7026 0x0f 0x1e 0xd1 0x98 ST r22, [p7], #4 + 7030 0x0f 0x08 0x51 0x98 ST r2, [p7], m0 + 7034 0x07 0x28 0x2a 0x98 LDA.u8 r1, [p7], m1 + 7038 0x00 0x00 NOPX + 7040 0x00 0x00 NOPX + 7042 0x00 0x00 NOPX + 7044 0x00 0x00 NOPX + 7046 0x00 0x00 NOPX + 7048 0x00 0x00 NOPX + 7050 0x08 0x0d 0xd8 0x00 0x01 0x84 JZ r1, #7088 +.delay_slot + 7056 0x10 0x20 0x0d 0x18 MOVX r16, #3 +.delay_slot + 7060 0x13 0xe1 0x0d 0x98 LSHL r16, r15, r16 +.delay_slot + 7064 0xff 0x7f 0x0f 0xa0 0x00 0x44 MOVXM r31, #-8454144 +.delay_slot +.swstall delay_slot + 7070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7072 0x00 0x00 NOPX + 7074 0x00 0x2c 0xf0 0x00 0x20 0x3e 0x01 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; MOVX r31, #0; NOPM +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 + 7088 0xe7 0x60 0x80 0x00 0x01 0xf0 0x31 0x10 0x10 0xba MOVA m0, #-197; MOVXM p0, #508448 + 7098 0x00 0xc4 0x50 0x3b 0xd8 0xa4 0x01 0xf8 0xb8 0xba LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 + 7108 0xff 0x06 0x20 0x01 0xf0 0xa8 0x00 0x49 0x78 0xba LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 + 7118 0xff 0x87 0x20 0x1f 0xff 0xec 0x80 0xc9 0x58 0xba LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 + 7128 0xfe 0x03 0x20 0x64 0x02 0x2c LDA p0, [sp, #-16]; MOVX r25, #0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 0x07 0x2c 0x37 0x18 ST.s16 r1, [p7], #4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 0xef 0x39 0xff 0x71 0x41 0xe4 MUL r28, r29, r28; MOV crRnd, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 0x08 0x02 0xc0 0x1f 0x1d 0x6d 0xe8 0x50 0x79 0x3a VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 0xe5 0x29 0xf8 0xb1 0xff 0x24 MUL r20, r28, r20; ADD.NC r17, r17, #-1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 0xa7 0x67 0xb0 0x82 0x03 0x64 LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 0x17 0xb8 0xef 0x98 MUL r28, r30, r14 + 7176 0x14 0x6b 0x5d 0x98 LSHL r21, r17, r21 + 7180 0xe3 0xd2 0x3e 0x5e 0xfb 0x5c ST r20, [p7], #4; LSHL r23, r28, r23 + 7186 0xe3 0xf6 0x3f 0xea 0xa3 0x5c ST r29, [p7], #4; SUB r26, r31, r21 + 7192 0xe1 0x72 0x3f 0xd6 0x4c 0x5c ST r28, [p7], m0; MAC r21, r21, r31, r18 + 7198 0x07 0x2a 0x8a 0x98 LDA.u8 r20, [p7], m1 + 7202 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 7208 0x00 0x00 NOPX + 7210 0x00 0x00 NOPX + 7212 0x00 0x00 NOPX + 7214 0x17 0xbd 0x3d 0x98 LSHL r30, r30, r19 + 7218 0x17 0xab 0x51 0x98 SUB r21, r30, r21 + 7222 0x14 0xf7 0x47 0x98 EQ r27, r19, r20 + 7226 0x16 0x27 0x72 0x18 SEL.EQZ r19, r24, r23, r27 + 7230 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 7234 0x0f 0x1e 0x11 0x98 ST r16, [p7], #4 + 7238 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 7242 0xe3 0xda 0x30 0x50 0x00 0x5c ST r22, [p7], #4; RET lr +.delay_slot + 7248 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 +.delay_slot + 7252 0x0f 0x1f 0x51 0x98 ST r26, [p7], #4 +.delay_slot + 7256 0x0f 0x1e 0xb1 0x98 ST r21, [p7], #4 +.delay_slot + 7260 0x0f 0x07 0x31 0x98 ST r25, [p7] +.delay_slot + 7264 0xe2 0xe6 0x30 0x03 0xb0 0x60 0x70 0x02 ST r25, [p7, #4]; MOV p7, p0 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + +.text_segment PM 7280 +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function_start + 7280 0xf1 0x18 0x80 0x3b 0x68 0x00 0x01 0xf2 0x32 0x16 0x10 0xb6 MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 + 7292 0x9f 0xa8 0xd0 0x38 0xe8 0x00 0x12 0x0a 0x80 0x80 0x58 0xb6 LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 + 7304 0x87 0xa4 0xd0 0x00 0x07 0x8a 0x07 0x90 0x58 0xba LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 + 7314 0x9f 0xe8 0xd0 0x00 0x24 0x0a 0x60 0x00 0x58 0xba LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 + 7324 0x85 0xe4 0xd7 0x10 0x4b 0x00 0x00 0x04 0x7e 0xb0 0x10 0x76 LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 + 7336 0x85 0xa0 0xd2 0x10 0x4b 0x00 0x00 0x05 0xbe 0xd8 0x10 0x76 LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 + 7348 0x9f 0x88 0xd6 0x10 0x4b 0x00 0x01 0xf2 0xb1 0x10 0x10 0x76 LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 + 7360 0x87 0x84 0xd1 0x10 0x4b 0x00 0x36 0x09 0xe4 0xc0 0x78 0x76 LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 + 7372 0x9f 0xc8 0xd0 0x10 0x4b 0x01 0x18 0x43 0x62 0xba LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 + 7382 0x85 0xc4 0xdb 0x93 0x01 0xd4 LDA dn4, [p4], #8; MOV dc5, dc4 + 7388 0x04 0x2c 0x06 0x98 LDA m0, [p4], #8 + 7392 0x04 0xfc 0xc6 0x98 LDA dj1, [p4], #-4 + 7396 0x87 0x94 0xd0 0xb1 0x68 0x3c LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 + 7402 0x04 0xfe 0xc6 0x98 LDA dj5, [p4], #-4 + 7406 0x04 0x2e 0xa6 0x98 LDA dn5, [p4], #8 + 7410 0x04 0x2c 0x86 0x98 LDA m1, [p4], #8 + 7414 0x04 0xff 0xc6 0x98 LDA dj7, [p4], #-4 + 7418 0x04 0x2f 0xa6 0x98 LDA dn7, [p4], #8 + 7422 0x04 0x2f 0x86 0x98 LDA m7, [p4], #8 + 7426 0x04 0xfd 0xc6 0x98 LDA dj3, [p4], #-4 + 7430 0x04 0x2d 0xa6 0x98 LDA dn3, [p4], #8 + 7434 0x04 0xc9 0x86 0x98 LDA m3, [p4], m6 + 7438 0x04 0xa8 0x96 0x98 LDA r4, [p4], m5 + 7442 0x04 0x88 0xf2 0x98 LDA.s16 r7, [p4], m4 + 7446 0x04 0x4e 0x06 0x98 LDA m4, [p4], #16 + 7450 0x92 0x96 0xd3 0xe1 0xe8 0x3c LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 + 7456 0x02 0x04 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p2] + 7460 0x00 0x00 NOPX + 7462 0x05 0x04 0xc2 0x98 LDA.s8 r6, [p5] + 7466 0x11 0x09 0xfb 0x18 ADD r4, r4, #-2 + 7470 0x80 0xc6 0xd0 0x00 0x00 0x06 0x36 0xf8 0x10 0xba LDA r17, [p4]; MOVXM p4, #7664 + 7480 0x18 0x1d 0x72 0xf8 VBCST.16 x0, r7 + 7484 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 + 7488 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 + 7492 0x00 0x2c 0xf0 0x00 0x23 0x00 0x8a 0xe2 0x04 0x6d 0x41 0x66 NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0d 0xd4 0x02 0x0e 0x03 0xa8 0x08 0x1a 0x0b NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7520 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7574 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 7582 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 7590 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 7596 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7616 0x03 0x0c 0xf2 0x73 0x90 0x02 0x84 0x83 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x02 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.loop_nesting 1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7664 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 0x00 0x00 0x00 0xb7 0xea 0x02 0x03 0x92 0xe1 0x5a MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 0x40 0x85 0x70 0x00 0x00 0x8f 0xe5 0x02 0x00 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 0x71 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 0x67 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p3], d1; VMOV cml3, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 0x04 0x1c 0x07 0x46 0x04 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 0x02 0x30 0x86 0xc6 0x01 0x03 0x41 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7760 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7814 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 7822 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 7830 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 7836 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 1 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7856 0x03 0x0c 0xf4 0xe7 0x20 0x08 0x49 0x02 0x84 0x83 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 0x04 0xb0 0x8e 0xc6 0x02 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7904 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7908 0x03 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r0 + 7912 0x00 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r0 + 7916 0x00 0x00 NOPX + 7918 0x00 0x00 NOPX + 7920 0x00 0x00 NOPX + 7922 0x00 0x00 NOPX + 7924 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 + 7928 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr +.delay_slot + 7934 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.delay_slot + 7938 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.delay_slot + 7942 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0 +.delay_slot + 7946 0x0b 0x8a 0x13 0x18 VST x8, [p3], m4 +.delay_slot + 7950 0x0b 0x3a 0x93 0x18 VST.3D x10, [p3], d1 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 7968 +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function_start + 7968 0xfb 0x90 0x82 0x39 0x68 0x00 0x01 0xf1 0xb2 0x4c 0x10 0xb6 MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 + 7980 0x63 0x84 0xd4 0x38 0x68 0x3e 0x47 0x68 0x68 0x01 0x58 0xb6 LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 + 7992 0x63 0x88 0xd0 0x00 0x00 0x04 0x7f 0xc8 0x10 0xba LDA dj0, [p3], #4; MOVXM ls, #8080 + 8002 0x63 0xc4 0xd0 0x00 0x00 0x05 0xbf 0xf8 0x10 0xba LDA dn4, [p3], #4; MOVXM le, #8176 + 8012 0x63 0xc8 0xd0 0x00 0x16 0x48 0x08 0x12 0x58 0xba LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 + 8022 0x63 0x80 0xd0 0x08 0x9a 0x2c LDA m0, [p3], #4; MOVX r2, #19 + 8028 0x03 0x1c 0x66 0x98 LDA dc0, [p3], #4 + 8032 0x03 0x8a 0x66 0x98 LDA dc4, [p3], m4 + 8036 0x03 0x04 0xb6 0x98 LDA r5, [p3] + 8040 0x03 0x24 0xd6 0x98 LDA r6, [p3, #8] + 8044 0x00 0x00 NOPX + 8046 0x00 0x00 NOPX + 8048 0x00 0x00 NOPX + 8050 0x00 0x00 NOPX + 8052 0x00 0x00 NOPX + 8054 0x11 0x48 0x4d 0x98 LSHL r4, r5, r4 + 8058 0x30 0xc7 0xba 0xe4 0xff 0x24 LSHL r3, r6, r3; ADD.NC lc, r4, #-1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0xd0 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8080 0x43 0x83 0x72 0x39 0x6c 0x80 0x8b 0x00 0x00 0x00 0x48 0x02 0x38 0x00 0x00 0xe1 VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0x02 0x38 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV + 8112 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0xc0 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV + 8128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0xc4 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV + 8144 0x00 0x2c 0xf0 0x00 0x20 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV + 8160 0x00 0x2c 0xf0 0x00 0x24 0x20 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 8192 0x90 0x11 0x60 0x01 0x40 0x00 0x48 0x02 0x39 0x3a MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 +.delay_slot + 8202 0x19 0x80 0x04 0x78 VSHUFFLE x3, x0, x0, r1 +.delay_slot + 8206 0x18 0x89 0x81 0xd8 VSHUFFLE bmlh0, x1, x3, r0 +.delay_slot + 8210 0x18 0x09 0x89 0xd8 VSHUFFLE bmll0, x1, x3, r2 +.delay_slot + 8214 0x08 0x18 0x26 0x98 VST.3D bmlh0, [p0], d0 +.delay_slot + 8218 0x0c 0x20 0x06 0x98 VST bmll0, [p4, dj1] +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 8224 +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function_start + 8224 0x20 0x93 0xd0 0x01 0x10 0x28 0x07 0x3f 0x58 0xba LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 + 8234 0xe6 0x04 0x80 0x00 0x01 0xf2 0x32 0x44 0x10 0xba MOVA m1, #-208; MOVXM p4, #509064 + 8244 0x81 0x42 0xd0 0x03 0x25 0x54 LDA r16, [p4], m0; MOV m0, #201 + 8250 0x04 0x0a 0x6a 0x98 LDA.u8 r19, [p4], m0 + 8254 0x04 0x2a 0x56 0x98 LDA r18, [p4], m1 + 8258 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 0x00 0x83 0xdf 0xf0 0x7b 0x0c LDA p0, [p0]; ST lr, [sp, #-8] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 0x40 0xbe 0xdf 0xe2 0x3b 0x0c LDA r15, [p2]; ST p2, [sp, #-16] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 0xfd 0xe3 0xb0 0x00 0x03 0x8e 0x00 0x00 0x41 0x3a ST p6, [sp, #-20]; JL #7280 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 0xfe 0xbe 0xb0 0x27 0x08 0x7d 0x31 0x60 0x79 0x3a ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 +.delay_slot + 8300 0x1e 0x68 0xc0 0xf8 MOV p6, p4 +.delay_slot + 8304 0xfd 0x13 0xb8 0x42 0x3b 0x5c ST p1, [sp, #-24]; LSHL r16, r16, r17 +.delay_slot + 8310 0xf0 0x11 0x60 0x25 0x08 0xec 0x04 0x10 0x79 0x3a MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 +.delay_slot + 8320 0x00 0x2c 0xf2 0x17 0x20 0x01 0x5b 0x00 0x00 0x01 0xb3 0xe0 0xa8 0x00 0x00 0xe1 NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV +.return_address + 8336 0xce 0xc1 0x50 0x44 0x12 0x2c LDA.u8 r16, [p6, #7]; MOVX r17, #2 + 8342 0x00 0x00 NOPX + 8344 0x00 0x00 NOPX + 8346 0x00 0x00 NOPX + 8348 0x00 0x00 NOPX + 8350 0x00 0x00 NOPX + 8352 0x00 0x00 NOPX + 8354 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 8358 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544 +.delay_slot + 8364 0x00 0x07 0xc8 0xc9 0x10 0x44 MOVXM p4, #509064 +.delay_slot +.swstall delay_slot + 8370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8376 0x00 0x00 NOPX + 8378 0x9f 0xc2 0xd0 0x00 0x00 0x28 0x07 0x30 0x58 0xba LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 + 8388 0x04 0x2e 0xb6 0x98 LDA r21, [p4], #8 + 8392 0x04 0x1e 0x56 0x98 LDA r18, [p4], #4 + 8396 0xfd 0x4e 0x20 0xd1 0x81 0xd4 LDA r19, [sp, #-24]; MOV p0, p4 + 8402 0x81 0x52 0xd0 0x9c 0x8b 0x03 0xb0 0x60 0x72 0xba LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 + 8412 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 8416 0x00 0x00 NOPX + 8418 0x14 0x23 0x1d 0x98 LSHL r17, r16, r17 + 8422 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 +.no_stack_arguments + 8426 0x00 0x0e 0x38 0x00 0x01 0x04 JL #7280 +.delay_slot + 8432 0x94 0x81 0xbb 0x33 0x8a 0xa4 LSHL r18, r18, r0; ADD.NC r22, r19, r17 +.delay_slot + 8438 0xac 0x41 0xba 0xaf 0x92 0xa4 LSHL r17, r21, r0; ADD.NC r21, r15, r18 +.delay_slot + 8444 0xa4 0x81 0xb2 0xd1 0xb2 0xa4 LSHL r18, r20, r0; ADD.NC p1, r17, r22 +.delay_slot + 8450 0x1a 0x69 0xc1 0x58 ADD.NC p2, r19, r16 +.delay_slot + 8454 0x00 0x2c 0xf0 0x00 0x10 0x01 0xb5 0x64 0xae 0xba NOPA; NOPB; ADD.NC p3, r21, r18 +.return_address + 8464 0xfe 0x43 0x20 0x00 0x00 0x28 0x07 0x34 0x58 0xba LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 + 8474 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 8478 0xff 0xf3 0x26 0xdd 0x81 0xd4 LDA p7, [sp, #-4]; MOV p3, p7 + 8484 0x03 0x0a 0x36 0x98 LDA r17, [p3], m0 + 8488 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 8492 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 8496 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 8502 0x04 0x06 0x56 0x98 LDA r18, [p4] + 8506 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 + 8510 0x00 0x00 NOPX + 8512 0x00 0x00 NOPX + 8514 0x00 0x00 NOPX +.tail_call + 8516 0x00 0x0f 0x90 0x00 0x00 0x84 J #7968 +.delay_slot + 8522 0x14 0x62 0x0d 0x98 LSHL r17, r17, r0 +.delay_slot + 8526 0x1c 0x58 0xc9 0x58 ADD.NC r17, r17, r18 +.delay_slot + 8530 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 8534 0x18 0x69 0x20 0xf8 MOV p0, r18 +.delay_slot + 8538 0x00 0x2c 0xf4 0xd1 0x82 0x94 NOPA; ADD.NC p2, r17, r16 +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.return_address + 8544 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 8548 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 8552 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 8556 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 8560 0x00 0x00 NOPX + 8562 0x00 0x00 NOPX + 8564 0x00 0x00 NOPX + 8566 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8570 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8582 0x00 0x00 NOPX +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 8592 +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 8592 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 8598 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15 + 8604 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 8610 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID + 8618 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr + 8626 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20] + 8630 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 8634 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2 + 8642 0x80 0x11 0x20 0x40 0x01 0x84 JNZ r16, #8768 +.delay_slot + 8648 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 8652 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 8656 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 8660 0x00 0x07 0xc6 0xc4 0x08 0x44 MOVXM p3, #508420 +.delay_slot + 8666 0x0b 0x06 0x31 0x98 ST r17, [p3] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb1 0x10 0x11 0x3a MOVS p7, p1; MOVXM p1, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb1 0x0e 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 0x00 0x0c 0xa8 0x00 0x01 0x04 JL #6480 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 8708 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8712 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM +.return_address + 8720 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8 + 8728 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 8732 0x44 0xc3 0x50 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 + 8742 0x00 0x00 NOPX + 8744 0x00 0x11 0x28 0x00 0x00 0x84 J #8784 +.delay_slot + 8750 0x00 0x07 0xc6 0xc4 0x20 0x44 MOVXM p3, #508432 +.delay_slot +.swstall delay_slot + 8756 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8758 0x00 0x00 NOPX +.delay_slot + 8760 0x0b 0x06 0x31 0x98 ST r17, [p3] +.delay_slot + 8764 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 + 8768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb1 0x08 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 + 8784 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 8788 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x31 0x02 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #508420 + 8798 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 8802 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 8806 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 8810 0x00 0x00 NOPX + 8812 0x00 0x00 NOPX + 8814 0x00 0x00 NOPX + 8816 0x00 0x00 NOPX + 8818 0x00 0x00 NOPX + 8820 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 8824 0x0a 0x06 0x11 0x98 ST r16, [p2] + 8828 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 8832 0x00 0x00 NOPX + 8834 0x00 0x00 NOPX + 8836 0x00 0x00 NOPX + 8838 0x14 0x93 0x08 0x18 ACQ r18, r16 + 8842 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb1 0x00 0x10 0xba MOVA r15, #1; MOVXM p7, #508416 + 8852 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp + 8858 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76 + 8862 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2 + 8868 0x04 0x06 0x36 0x98 LDA r17, [p4] + 8872 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb1 0xe0 0x10 0xba LDA r16, [p3]; MOVXM p3, #508864 + 8882 0x07 0x06 0x56 0x98 LDA r18, [p7] + 8886 0x00 0x00 NOPX + 8888 0x00 0x00 NOPX + 8890 0x00 0x00 NOPX + 8892 0x05 0x06 0x76 0x98 LDA r19, [p5] + 8896 0x00 0x00 NOPX + 8898 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 8902 0x14 0xa2 0x07 0x18 ADD r17, r18, #1 + 8906 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15 +.no_stack_arguments + 8910 0x00 0x10 0x10 0x00 0x01 0x04 JL #8224 +.delay_slot + 8916 0x0f 0x06 0x31 0x98 ST r17, [p7] +.delay_slot + 8920 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16 +.delay_slot + 8924 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76] +.delay_slot + 8928 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72] +.delay_slot + 8932 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX +.return_address + 8944 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20 + 8948 0x02 0x06 0x16 0x98 LDA r16, [p2] + 8952 0x00 0x00 NOPX + 8954 0x00 0x00 NOPX + 8956 0x00 0x00 NOPX + 8958 0x00 0x00 NOPX + 8960 0x00 0x00 NOPX + 8962 0x00 0x00 NOPX + 8964 0x14 0x10 0xf8 0x18 REL r16, r15 + 8968 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x0c 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #508440 + 8978 0x01 0x06 0x56 0x98 LDA r18, [p1] + 8982 0x07 0x06 0x36 0x98 LDA r17, [p7] + 8986 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12] + 8990 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] + 8994 0x00 0x00 NOPX + 8996 0x00 0x00 NOPX + 8998 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 9002 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8] + 9006 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 9010 0x80 0x11 0xa8 0x40 0x01 0x84 JNZ r16, #9040 +.delay_slot + 9016 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 9020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9024 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9026 0x00 0x00 NOPX + 9028 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 9040 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13 + 9046 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] + 9050 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 9054 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 9060 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9062 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9064 0x00 0x00 NOPX +.delay_slot + 9066 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 9072 +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function_start + 9072 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 9076 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 9080 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 9084 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 9088 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 9092 0x00 0x0b 0x78 0x00 0x00 0x84 J #5872 +.delay_slot +.swstall delay_slot + 9098 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9104 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9106 0x00 0x00 NOPX +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + +.text_segment PM 9120 +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function_start + 9120 0x20 0x85 0xd8 0xa9 0x81 0xd4 LDA el0, [p1]; MOV r17, p2 + 9126 0x19 0x68 0x82 0x18 ADD.NC p1, r17, #4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9130 0x01 0x1e 0x56 0x98 LDA r18, [p1], #4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9134 0x01 0x05 0xf6 0x98 LDA r15, [p1] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9138 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9140 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9142 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9144 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9146 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9150 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9156 0x7c 0xa5 0xf8 0x3f 0xfd 0x64 MUL r18, r15, r18; MOV r16, #-1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9162 0xfd 0xca 0xb0 0x0f 0xff 0xfe 0x2f 0xff 0x91 0x3a ST r18, [sp, #-20]; MOVXM r17, #1073741823 + 9172 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 + 9176 0x14 0x61 0x04 0x98 AND r16, r17, r16 + 9180 0x80 0x12 0x18 0x00 0x01 0x84 JZ r16, #9264 +.delay_slot + 9186 0x00 0xf3 0xd0 0xdd 0x81 0xd4 LDA p7, [p0]; MOV p0, p7 +.delay_slot + 9192 0x0f 0xf8 0x1d 0x98 ST p0, [sp, #-8] +.delay_slot + 9196 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] +.delay_slot + 9200 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] +.delay_slot + 9204 0x3c 0xba 0xdf 0xf8 0x2b 0x0c LDA r14, [p1, #-8]; ST r0, [sp, #-4] + 9210 0xfd 0x05 0xb0 0x00 0x02 0x5c ST el0, [sp, #-24]; MOVX r0, #0 + 9216 0x07 0xe8 0x99 0x18 LDA p1, [sp, #-24] +.no_stack_arguments + 9220 0x00 0x18 0xa0 0x00 0x01 0x04 JL #12608 +.delay_slot + 9226 0x10 0x22 0x09 0x18 MOVX r17, #2 +.delay_slot + 9230 0x14 0x03 0x1d 0x98 LSHL r1, r16, r17 +.delay_slot +.swstall delay_slot + 9234 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9236 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9238 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 9248 0x00 0x12 0x20 0x00 0x00 0x84 J #9280 +.delay_slot +.swstall delay_slot + 9254 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9256 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9258 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9262 0x00 0x00 NOPX +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 9264 0x00 0x2c 0xf0 0x00 0x27 0xe8 0x2d 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 + 9280 0x78 0x12 0xa8 0x00 0x01 0x84 JZ r15, #9552 +.delay_slot +.swstall delay_slot + 9286 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9294 0x00 0x00 NOPX + 9296 0xfd 0xc6 0x20 0x00 0x00 0x08 0x7a 0x58 0x10 0xba LDA r17, [sp, #-20]; MOVXM ls, #9392 + 9306 0x00 0x33 0x00 0x00 0x00 0x09 0xba 0x88 0x10 0xba MOVA r19, #1; MOVXM le, #9488 + 9316 0xfd 0x4a 0x20 0x1d 0x49 0xee 0x0b 0xff 0xc8 0xba LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 + 9326 0xfe 0x07 0x20 0x00 0x00 0x08 0x32 0x48 0x10 0xba LDA lr, [sp, #-16]; MOVXM p0, #9360 + 9336 0x18 0x0a 0x20 0xf8 MOV m0, r20 + 9340 0x00 0x00 NOPX + 9342 0x00 0x00 NOPX + 9344 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x23 0x19 0xec 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.loop_nesting 1 + 9360 0x70 0x12 0x90 0x00 0x01 0x84 JZ r14, #9504 +.delay_slot +.swstall delay_slot + 9366 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9368 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9374 0x00 0x00 NOPX + 9376 0x53 0x91 0x60 0x02 0xbb 0x90 0x70 0x02 MOVS p2, p7; MOV lc, r14 + 9384 0x00 0x2b 0x60 0x00 0xb4 0x90 0x70 0x02 NOPS; MOV p1, r18 +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.loop_nesting 2 +.begin_of_loop + 9392 0x43 0xce 0x50 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 9408 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9424 0x23 0xce 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 9440 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9456 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9472 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 9488 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.loop_nesting 1 + 9504 0xe1 0x72 0x08 0x40 0x40 0x1c PADDB [p7], m0; JNZD r16, r16, p0 +.delay_slot +.swstall delay_slot + 9510 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9512 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9514 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9516 0x00 0x00 NOPX +.delay_slot + 9518 0x1c 0x98 0xc9 0x58 ADD.NC r18, r17, r18 +.loop_nesting 0 + 9522 0x00 0x12 0xb0 0x00 0x00 0x84 J #9568 +.delay_slot +.swstall delay_slot + 9528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9536 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 + 9552 0xfe 0x07 0x20 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 9568 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] + 9572 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 9576 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 9580 0x00 0x00 NOPX + 9582 0x00 0x00 NOPX + 9584 0x00 0x00 NOPX + 9586 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 9590 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 9596 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9602 0x00 0x00 NOPX +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + +.text_segment PM 9616 +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function_start + 9616 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 9620 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 9624 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 9628 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 9632 0x00 0x11 0xd0 0x00 0x00 0x84 J #9120 +.delay_slot +.swstall delay_slot + 9638 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9640 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9642 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9646 0x00 0x00 NOPX +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start + 9648 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9652 0x00 0x00 NOPX + 9654 0x00 0x00 NOPX + 9656 0x00 0x00 NOPX + 9658 0x00 0x00 NOPX + 9660 0x00 0x00 NOPX + 9662 0x00 0x00 NOPX + 9664 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9668 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9672 0x00 0x00 NOPX + 9674 0x00 0x00 NOPX + 9676 0x00 0x00 NOPX + 9678 0x00 0x00 NOPX + 9680 0x00 0x00 NOPX + 9682 0x00 0x00 NOPX + 9684 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9688 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9692 0x00 0x00 NOPX + 9694 0x00 0x00 NOPX + 9696 0x00 0x00 NOPX + 9698 0x00 0x00 NOPX + 9700 0x00 0x00 NOPX + 9702 0x00 0x00 NOPX + 9704 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9708 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9712 0x00 0x00 NOPX + 9714 0x00 0x00 NOPX + 9716 0x00 0x00 NOPX + 9718 0x00 0x00 NOPX + 9720 0x00 0x00 NOPX + 9722 0x00 0x00 NOPX + 9724 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9728 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9732 0x00 0x00 NOPX + 9734 0x00 0x00 NOPX + 9736 0x00 0x00 NOPX + 9738 0x00 0x00 NOPX + 9740 0x00 0x00 NOPX + 9742 0x00 0x00 NOPX + 9744 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9748 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9752 0x00 0x00 NOPX + 9754 0x00 0x00 NOPX + 9756 0x00 0x00 NOPX + 9758 0x00 0x00 NOPX + 9760 0x00 0x00 NOPX + 9762 0x00 0x00 NOPX + 9764 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9768 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 9772 0x00 0x00 NOPX + 9774 0x00 0x00 NOPX + 9776 0x00 0x00 NOPX + 9778 0x00 0x00 NOPX + 9780 0x00 0x00 NOPX + 9782 0x00 0x00 NOPX + 9784 0x08 0x04 0x29 0x98 ST el0, [p0] + 9788 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 9792 0x00 0x00 NOPX + 9794 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 9798 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9800 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9802 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9804 0x00 0x00 NOPX +.delay_slot + 9806 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 9824 +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function_start + 9824 0x03 0x86 0xd0 0x00 0x00 0x28 0x80 0x20 0x58 0xba LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 + 9834 0x03 0x96 0xd0 0x00 0x30 0x48 0x4f 0xfa 0x58 0xba LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 + 9844 0x05 0x92 0xd0 0x01 0x01 0x54 LDA r4, [p0], #8; MOV m0, #64 + 9850 0x05 0x1a 0xd1 0x02 0x01 0x54 LDA r6, [p0], m1; MOV dj0, #128 + 9856 0x00 0x00 NOPX + 9858 0x00 0x00 NOPX + 9860 0x00 0x00 NOPX + 9862 0x00 0x00 NOPX + 9864 0x00 0x00 NOPX + 9866 0x11 0x42 0x1f 0x98 MUL r1, r5, r1 + 9870 0x11 0x80 0x04 0x98 AND r0, r6, r0 + 9874 0x10 0xc0 0x05 0x98 OR r0, r3, r0 + 9878 0x19 0x82 0x30 0x84 0x9f 0x5c ST r0, [p0], #-16; MUL r1, r1, r4 + 9884 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 9888 0x10 0x40 0x2d 0x98 LSHL r0, r1, r2 +.delay_slot + 9892 0x08 0x1c 0x11 0x98 ST r0, [p0], #4 +.delay_slot + 9896 0x08 0x1c 0x01 0x98 ST m0, [p0], #4 +.delay_slot + 9900 0x08 0x04 0x41 0x98 ST dj0, [p0] +.delay_slot + 9904 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + +.text_segment PM 9920 +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 9920 0x00 0x12 0xd8 0x00 0x01 0x04 JL #9648 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9926 0x18 0xc1 0xe0 0xf8 MOV dc0, lr +.delay_slot + 9930 0x1a 0x60 0xc0 0xf8 MOV p2, p0 +.delay_slot +.swstall delay_slot + 9934 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9938 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.tail_call +.return_address + 9952 0x00 0x13 0x30 0x00 0x00 0x84 J #9824 +.delay_slot + 9958 0x1f 0x71 0x80 0xf8 MOV lr, dc0 +.delay_slot + 9962 0x18 0x64 0xc0 0xf8 MOV p0, p2 +.delay_slot +.swstall delay_slot + 9966 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9968 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9970 0x00 0x00 NOPX +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 9984 +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function_start + 9984 0xb0 0x91 0x60 0x00 0x0a 0x60 0x70 0x02 MOVS p5, p1; MOV r0, p2 + 9992 0x1b 0x60 0x12 0x18 ADD.NC p3, r0, #36 + 9996 0x63 0xa0 0xd0 0x3d 0x81 0xd4 LDA m2, [p3], #4; MOV r0, p7 + 10002 0x03 0x1c 0x06 0x98 LDA m0, [p3], #4 + 10006 0x03 0xd4 0x56 0x98 LDA r2, [p3, #-12] + 10010 0x03 0x04 0x86 0x98 LDA m1, [p3] + 10014 0x00 0x00 NOPX + 10016 0x00 0x00 NOPX + 10018 0x00 0x00 NOPX + 10020 0x00 0x00 NOPX + 10022 0x00 0x00 NOPX + 10024 0x10 0x14 0x68 0x00 0x01 0x84 JZ r2, #10448 +.delay_slot + 10030 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot + 10034 0xe1 0x72 0x06 0xdd 0x81 0xf4 PADDB [p7], m0; MOV p3, p7 +.delay_slot + 10040 0x38 0x4b 0x90 0x18 PADDB [p0], m2 +.delay_slot + 10044 0x01 0x72 0x08 0xc1 0x81 0xf4 PADDB [p0], m0; MOV p4, p0 +.delay_slot + 10050 0x39 0x2b 0x90 0x18 PADDB [p1], m1 + 10054 0x10 0x02 0x11 0x18 MOVX r1, #4 + 10058 0x10 0x86 0x1c 0x98 LTU r3, r2, r1 + 10062 0x18 0x14 0x20 0x40 0x01 0x84 JNZ r3, #10304 +.delay_slot + 10068 0x18 0x80 0x60 0xb8 MOV dj0, #48 +.delay_slot + 10072 0x02 0x00 0x36 0x98 LDA r1, [p2, dj0] +.delay_slot +.swstall delay_slot + 10076 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10078 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10080 0x00 0x00 NOPX + 10082 0x81 0x13 0x76 0x10 0xe8 0x00 0x00 0x08 0x7b 0xe0 0x10 0xb6 VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #10176 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10094 0x81 0x0c 0xfe 0x10 0x68 0x00 0x01 0x37 0x7f 0x02 0x61 0x0b 0x60 0x7e PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #10224 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10108 0x61 0x0b 0x70 0x11 0xef 0x08 0x5b 0x02 0xb8 0xbf 0x40 0xf6 VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10120 0x01 0x0c 0xf8 0x11 0x6b 0x08 0x5b 0x32 PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10128 0x81 0x0c 0xfe 0x10 0x68 0x3c PADDA [p4], m0; VLDB x0, [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10134 0x01 0x1e 0x8e 0x10 0xb6 0x4c VLDB x3, [p0], m0; PADDS [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10140 0x01 0x0c 0xf6 0x10 0xe8 0x3c PADDA [p0], m0; VLDB x1, [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10146 0x81 0x16 0x80 0x12 0x0b 0xb4 VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10152 0x00 0x2c 0xfe 0x10 0x6b 0x08 0x5b 0x32 NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10160 0x00 0x2c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10176 0x81 0x0c 0xf6 0x10 0xef 0x08 0x5b 0x00 0x00 0x00 0x04 0x82 0xe8 0x00 0x00 0xe1 PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10192 0x01 0x0c 0xf8 0x11 0x69 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10208 0xa5 0x0c 0xfe 0x10 0x6b 0x08 0x5b 0x00 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10224 0x25 0x0c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10240 0x18 0x09 0x05 0xd8 VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10244 0x09 0x28 0x26 0x98 VST bmlh0, [p1], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10248 0x25 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10254 0x00 0x14 0x68 0x00 0x00 0x84 J #10448 +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10260 0xa5 0x0c 0xf1 0x28 0x26 0x80 0x04 0x82 0xe2 0xba PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10270 0x25 0x0c 0xfa 0x50 0x0d 0x0c PADDA [p1], m1; VST bmll0, [p5], m1 +.delay_slot + 10276 0xa5 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 +.delay_slot + 10282 0x00 0x2c 0xfa 0x50 0x0d 0x0c NOPA; VST bmll0, [p5], m1 +.delay_slot + 10288 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 + 10304 0x1d 0x71 0x20 0xf8 MOV lc, r2 + 10308 0x00 0x00 0x21 0xf0 0xa0 0x44 MOVXM ls, #10320 + 10314 0x00 0x00 0x26 0xf1 0x80 0x44 MOVXM le, #10432 +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.loop_nesting 1 +.begin_of_loop + 10320 0x81 0x0b 0x76 0x11 0x68 0x3c VLDA x1, [p4], m0; VLDB x2, [p3], m0 + 10326 0x61 0x0c 0xfe 0x10 0x6c 0x08 0x5b 0x32 PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 + 10334 0xe1 0x0c 0xf0 0x11 0xe8 0x3c PADDA [p7], m0; VLDB x3, [p0], m0 + 10340 0x38 0x0b 0x90 0x18 PADDB [p0], m0 + 10344 0x00 0x00 NOPX + 10346 0x00 0x00 NOPX + 10348 0x00 0x01 0x67 0x98 NOPA + 10352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x08 0x42 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV + 10368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10384 0x00 0x2c 0xf0 0x00 0x25 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV + 10400 0x00 0x2c 0xfa 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV + 10416 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.end_of_loop + 10432 0x00 0x2c 0xf2 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.loop_nesting 0 + 10448 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10452 0x1f 0x60 0x20 0xf8 MOV p7, r0 +.delay_slot +.swstall delay_slot + 10456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10462 0x00 0x00 NOPX +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function_start + 10464 0xb0 0x11 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p5, p0; PADDXM [sp], #128 + 10474 0xff 0x87 0xb0 0x01 0xb1 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV p3, p1 +.no_stack_arguments + 10482 0x31 0x11 0x60 0x00 0x04 0xd8 0x00 0x00 0x41 0x3a MOVS p1, p2; JL #9920 +.delay_slot + 10492 0x18 0x65 0xe0 0xf8 MOV p0, sp +.delay_slot + 10496 0x38 0xef 0x90 0x18 PADDB [p0], #-128 +.delay_slot + 10500 0x1c 0x60 0xc0 0xf8 MOV p4, p0 +.delay_slot +.swstall delay_slot + 10504 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10506 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.return_address + 10512 0xf0 0x4a 0x22 0x90 0x8b 0x02 0x2d 0x70 0x72 0xba LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID + 10522 0xf0 0xda 0x28 0xc5 0x20 0x2c LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 + 10528 0xf1 0x52 0x20 0x00 0x00 0x3e 0x6f 0xff 0x10 0xba LDA r20, [sp, #-120]; MOVXM r19, #65534 + 10538 0x60 0x93 0xd9 0xc6 0x21 0x2c LDA p1, [p3]; ADD r17, r19, r17 + 10544 0xf1 0xce 0x28 0xd5 0x60 0x2c LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 + 10550 0x00 0x00 NOPX + 10552 0x05 0x06 0x36 0x98 LDA r17, [p5] + 10556 0x00 0x00 NOPX + 10558 0x15 0xa5 0x2f 0x98 MUL r18, r22, r18 + 10562 0x00 0x00 NOPX + 10564 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 10568 0x00 0x00 NOPX + 10570 0x15 0x65 0x2f 0x98 MUL r18, r21, r18 +.no_stack_arguments + 10574 0x00 0x13 0x80 0x00 0x01 0x04 JL #9984 +.delay_slot + 10580 0x14 0xe5 0x2f 0x98 MUL r18, r19, r18 +.delay_slot + 10584 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 10588 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 +.delay_slot + 10592 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.delay_slot +.swstall delay_slot + 10596 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 10608 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 10612 0x00 0x00 NOPX + 10614 0x00 0x00 NOPX + 10616 0x00 0x00 NOPX + 10618 0x00 0x00 NOPX + 10620 0x00 0x00 NOPX + 10622 0x00 0x00 NOPX + 10624 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10628 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 10634 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10636 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10638 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10640 0x00 0x00 NOPX +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + +.text_segment PM 10656 +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function_start + 10656 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 10660 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 10664 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 10668 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 10672 0x00 0x14 0x70 0x00 0x00 0x84 J #10464 +.delay_slot +.swstall delay_slot + 10678 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10680 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10682 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10684 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10686 0x00 0x00 NOPX +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function_start + 10688 0x23 0x85 0xd0 0x00 0x01 0xf0 0x09 0x40 0x10 0xba LDA el0, [p1], #4; MOVXM r0, #508544 + 10698 0x08 0x00 0x80 0x80 0x0b 0x3e 0x27 0xa9 0x30 0x01 0x08 0x76 MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 + 10710 0x00 0x42 0x20 0x22 0x01 0x64 MOVX r1, #4; MOV r0, #128 + 10716 0x00 0x00 NOPX + 10718 0x00 0x00 NOPX + 10720 0x00 0x00 NOPX + 10722 0x00 0x00 NOPX + 10724 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 10728 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 10732 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 10736 0x00 0x00 NOPX + 10738 0x00 0x00 NOPX + 10740 0x00 0x00 NOPX + 10742 0x00 0x00 NOPX + 10744 0x00 0x00 NOPX + 10746 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 10750 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 10754 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 10758 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 10762 0x00 0x00 NOPX + 10764 0x00 0x00 NOPX + 10766 0x00 0x00 NOPX + 10768 0x00 0x00 NOPX + 10770 0x00 0x00 NOPX + 10772 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 10776 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 10780 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 10784 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 10788 0x00 0x00 NOPX + 10790 0x00 0x00 NOPX + 10792 0x00 0x00 NOPX + 10794 0x00 0x00 NOPX + 10796 0x00 0x00 NOPX + 10798 0x0a 0x04 0x09 0x98 ST eh0, [p2] + 10802 0x0a 0x14 0x29 0x98 ST el0, [p2, #4] + 10806 0x00 0x08 0x76 0x98 LDA r3, [p0], m0 + 10810 0x00 0x00 NOPX + 10812 0x00 0x00 NOPX + 10814 0x00 0x00 NOPX + 10816 0x00 0x00 NOPX + 10818 0x00 0x00 NOPX + 10820 0x00 0x00 NOPX + 10822 0x10 0xc8 0x2d 0x98 LSHL r4, r3, r2 + 10826 0x18 0xc3 0xb0 0xa4 0xff 0x24 LSHL r3, r3, r1; ADD.NC r1, r4, #-1 + 10832 0x00 0x86 0x30 0x00 0x88 0x60 0x70 0x02 ST r1, [p0]; MOV r4, p0 + 10840 0x19 0x62 0x62 0x18 ADD.NC p1, r4, #-60 + 10844 0x01 0x08 0x96 0x98 LDA r4, [p1], m0 + 10848 0x00 0x00 NOPX + 10850 0x00 0x00 NOPX + 10852 0x00 0x00 NOPX + 10854 0x00 0x00 NOPX + 10856 0x00 0x00 NOPX + 10858 0x00 0x00 NOPX + 10860 0x20 0x85 0xb2 0x22 0x01 0x64 LSHL r2, r4, r2; MOV r4, #128 + 10866 0x10 0x85 0xff 0x18 ADD r2, r2, #-1 + 10870 0x23 0x8a 0x31 0x90 0x5c 0x5c ST r2, [p1], #4; MSC r4, r4, r3, r2 + 10876 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 10880 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 10884 0x09 0x2c 0x11 0x98 ST r0, [p1], #8 + 10888 0x09 0xfc 0x71 0x98 ST r3, [p1], #-4 + 10892 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 + 10896 0x20 0x82 0x30 0x00 0xa9 0x60 0x70 0x02 ST r0, [p1]; MOV r5, p1 + 10904 0x19 0x62 0xde 0x18 ADD.NC p1, r5, #-68 + 10908 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 10912 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 10916 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 10920 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 10924 0x23 0x82 0x30 0x50 0x00 0x5c ST r0, [p1], #4; RET lr +.delay_slot + 10930 0x09 0x2c 0x71 0x98 ST r3, [p1], #8 +.delay_slot + 10934 0x09 0xfc 0x51 0x98 ST r2, [p1], #-4 +.delay_slot + 10938 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 +.delay_slot + 10942 0x09 0x04 0x31 0x98 ST r1, [p1] +.delay_slot + 10946 0x09 0x14 0x11 0x98 ST r0, [p1, #4] +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + +.text_segment PM 10960 +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function_start + 10960 0x00 0x41 0x00 0x00 0x01 0xf1 0x31 0x46 0x10 0xba MOVA r1, #2; MOVXM p2, #508556 + 10970 0x40 0xee 0xd0 0x00 0xb2 0x2c LDA r27, [p2]; MOVX r0, #22 + 10976 0x00 0x00 NOPX + 10978 0x00 0x00 NOPX + 10980 0x00 0x00 NOPX + 10982 0x00 0x00 NOPX + 10984 0x00 0x00 NOPX + 10986 0x00 0x00 NOPX + 10988 0x16 0xc2 0x17 0x98 EQ r1, r27, r1 + 10992 0x08 0x16 0x60 0x40 0x01 0x84 JNZ r1, #11456 +.delay_slot + 10998 0x10 0x04 0x75 0x18 MOVX r2, #29 +.delay_slot + 11002 0x10 0x00 0x22 0x18 SEL.EQZ r0, r0, r2, r27 +.delay_slot +.swstall delay_slot + 11006 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11008 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11010 0x00 0x00 NOPX + 11012 0x00 0x07 0xc4 0xc5 0x20 0x44 MOVXM p2, #508560 + 11018 0x02 0x04 0x36 0x98 LDA r1, [p2] + 11022 0x00 0x00 NOPX + 11024 0x00 0x00 NOPX + 11026 0x00 0x00 NOPX + 11028 0x00 0x00 NOPX + 11030 0x00 0x00 NOPX + 11032 0x00 0x00 NOPX + 11034 0x08 0x17 0xa0 0x00 0x01 0x84 JZ r1, #12096 +.delay_slot +.swstall delay_slot + 11040 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11042 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11044 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11046 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11048 0x00 0x00 NOPX + 11050 0x10 0x04 0x29 0x18 MOVX r2, #10 + 11054 0x10 0x44 0x2c 0x98 LTU r2, r1, r2 + 11058 0x10 0x16 0x10 0x40 0x01 0x84 JNZ r2, #11296 +.delay_slot +.swstall delay_slot + 11064 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11066 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11068 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11072 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11074 0x00 0x38 0x68 0x00 0x00 0x08 0x7d 0xe0 0x10 0x3a VLDB x0, [p0], #64; MOVXM ls, #11200 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11084 0x00 0x38 0x68 0x00 0x00 0x09 0xbd 0xe0 0x10 0x3a VLDB x0, [p0], #64; MOVXM le, #11200 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11094 0x00 0x2c 0xf0 0x1c 0x34 0x02 0xb8 0x7d 0xce 0xba NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11120 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11136 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11152 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11168 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11184 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.loop_nesting 1 +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11200 0x00 0x2c 0xf0 0x38 0x69 0x1c 0x06 0x80 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11216 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11232 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11240 0x23 0x80 0xd0 0x01 0x40 0x00 0x00 0x00 0xe9 0x3a VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11250 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11258 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11266 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11274 0x00 0x2c 0xf2 0x38 0x0d 0x0c NOPA; VST bmll0, [p1], #64 +.delay_slot + 11280 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 + 11296 0x1d 0x70 0xa0 0xf8 MOV lc, r1 + 11300 0x00 0x00 0x21 0xf8 0x60 0x44 MOVXM ls, #11312 + 11306 0x00 0x00 0x26 0xf9 0x40 0x44 MOVXM le, #11424 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.loop_nesting 1 +.begin_of_loop + 11312 0x38 0x1c 0x34 0x18 VLDB x0, [p0], #64 + 11316 0x00 0x00 NOPX + 11318 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM + 11328 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11344 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11360 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11376 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV + 11408 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.end_of_loop + 11424 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.loop_nesting 0 + 11440 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 11444 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11450 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11452 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 + 11456 0x00 0x07 0xc4 0xc5 0x00 0x44 MOVXM p2, #508544 + 11462 0x02 0x04 0x16 0x98 LDA r0, [p2] + 11466 0x00 0x00 NOPX + 11468 0x00 0x00 NOPX + 11470 0x00 0x00 NOPX + 11472 0x00 0x00 NOPX + 11474 0x00 0x00 NOPX + 11476 0x00 0x00 NOPX + 11478 0x00 0x17 0xa0 0x00 0x01 0x84 JZ r0, #12096 +.delay_slot +.swstall delay_slot + 11484 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11488 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11490 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11492 0x00 0x00 NOPX + 11494 0x04 0x94 0x80 0x00 0x01 0xf2 0x31 0x42 0x10 0xba MOVA m5, #36; MOVXM p4, #508548 + 11504 0x83 0x86 0xd0 0x00 0x51 0x08 0x4f 0xfd 0x58 0xba LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 + 11514 0x95 0x12 0xd0 0x00 0x30 0x2a 0x60 0x00 0x58 0xba LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 + 11524 0x9d 0x90 0xd0 0x10 0x4b 0x00 0x60 0x8a 0x00 0x20 0x58 0x76 LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 + 11536 0x9d 0x94 0xd1 0x10 0x4b 0x00 0x0f 0xf8 0xe8 0x34 0x58 0x76 LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 + 11548 0x87 0x98 0xd5 0x10 0x4b 0x00 0x00 0x09 0x36 0xb8 0x10 0x76 LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #11632 + 11560 0x9d 0xd4 0xd0 0x00 0x00 0x09 0xb6 0xc8 0x10 0xba LDA dn5, [p4], #-8; MOVXM p3, #11664 + 11570 0x91 0x58 0xd0 0x41 0xaa 0x2c LDA dj5, [p4], m4; MOVX r16, #53 + 11576 0x9d 0x80 0xd0 0x0b 0xb0 0xe4 0xa8 0x7f 0xc8 0xba LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 + 11586 0x9d 0x84 0xd0 0x0b 0x11 0x6c 0xa9 0x3f 0xc8 0xba LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 + 11596 0x87 0x88 0xd0 0x0a 0x21 0x6c 0xac 0x40 0x48 0xba LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 + 11606 0x80 0xc4 0xd0 0x06 0x52 0x90 0x68 0x80 0x48 0xba LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 + 11616 0x9c 0xc8 0xd0 0x00 0x20 0x01 0x5b 0x0a 0x5f 0xf8 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.loop_nesting 1 + 11632 0x08 0x17 0x98 0x00 0x01 0x84 JZ r1, #12080 +.delay_slot +.swstall delay_slot + 11638 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11640 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11642 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11646 0x00 0x00 NOPX + 11648 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x29 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.loop_nesting 2 + 11664 0x20 0x17 0x90 0x00 0x01 0x84 JZ r4, #12064 +.delay_slot +.swstall delay_slot + 11670 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11672 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11674 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11676 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11678 0x00 0x00 NOPX + 11680 0x10 0xe4 0x6c 0x98 LTU r18, r3, r6 + 11684 0x90 0x17 0x48 0x40 0x01 0x84 JNZ r18, #11920 +.delay_slot +.swstall delay_slot + 11690 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11692 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11694 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11696 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11698 0x00 0x00 NOPX + 11700 0x00 0x28 0x68 0x00 0x00 0x08 0x7f 0x10 0x10 0x3a VLDB x0, [p0, #64]; MOVXM ls, #11808 + 11710 0x00 0x70 0xe8 0x00 0x00 0x09 0xbf 0x20 0x10 0x3a VLDB.3D x1, [p0], d1; MOVXM le, #11840 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11720 0x1d 0x71 0xfe 0x98 ADD.NC lc, r3, #-3 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11724 0x38 0x14 0x34 0x18 VLDB x0, [p0, #64] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11728 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11744 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11760 0x00 0x2c 0xf0 0x28 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11776 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11792 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.loop_nesting 3 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11808 0x00 0x2c 0xf0 0x28 0x6c 0x84 0x8b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11824 0x00 0x2c 0xf0 0x70 0xe9 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11840 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.loop_nesting 2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11856 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11864 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11868 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11876 0x18 0x08 0x41 0xd8 VSHUFFLE bmll0, x1, x0, r16 + 11880 0x00 0x17 0x90 0x00 0x00 0x84 J #12064 +.delay_slot + 11886 0x23 0x04 0xd0 0x02 0x31 0x60 0x70 0x02 VST.3D bmlh0, [p1], d0; MOV p4, p1 +.delay_slot + 11894 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.delay_slot + 11902 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.delay_slot + 11910 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.delay_slot + 11914 0x00 0x2c 0xf8 0x28 0x0d 0x0c NOPA; VST bmll0, [p4, #64] +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 + 11920 0x00 0x00 0x21 0xfd 0x40 0x44 MOVXM ls, #11936 + 11926 0x00 0x00 0x26 0xfe 0x20 0x44 MOVXM le, #12048 + 11932 0x1d 0x71 0x00 0x98 ADD.NC lc, r2, #1 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.loop_nesting 3 +.begin_of_loop + 11936 0x02 0x86 0x88 0xc5 0x81 0xf4 VLDB x0, [p0, #64]; MOV p4, p1 + 11942 0x38 0x38 0x74 0x18 VLDB.3D x1, [p0], d1 + 11946 0x00 0x00 NOPX + 11948 0x00 0x00 NOPX + 11950 0x00 0x00 NOPX + 11952 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11968 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11984 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12000 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV + 12016 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV + 12032 0x00 0x2c 0xf0 0x00 0x21 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.end_of_loop + 12048 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.loop_nesting 2 + 12064 0x14 0x62 0xe0 0x18 JNZD r17, r17, p3 +.delay_slot +.swstall delay_slot + 12068 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12076 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.loop_nesting 1 + 12080 0x10 0x00 0xa0 0x18 JNZD r0, r0, p2 +.delay_slot +.swstall delay_slot + 12084 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12086 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12088 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12090 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12092 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.loop_nesting 0 + 12096 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 12100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12104 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12106 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12108 0x00 0x00 NOPX +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + +.text_segment PM 12112 +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function_start + 12112 0x1b 0x6c 0xc0 0xf8 MOV p3, p6 + 12116 0xd0 0x91 0x60 0x00 0x01 0xf0 0xb1 0x0a 0x11 0x3a MOVS p6, p1; MOVXM p1, #508436 + 12126 0x01 0x06 0x16 0x98 LDA r16, [p1] + 12130 0x00 0x00 NOPX + 12132 0x00 0x00 NOPX + 12134 0x00 0x00 NOPX + 12136 0x00 0x00 NOPX + 12138 0x00 0x00 NOPX + 12140 0x00 0x00 NOPX + 12142 0x80 0x17 0xd8 0x40 0x01 0x84 JNZ r16, #12208 +.delay_slot + 12148 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 12154 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 12158 0xf0 0x11 0x60 0x00 0xb7 0x60 0x70 0x02 MOVS p7, p0; MOV p1, p7 +.delay_slot + 12166 0x0f 0xf9 0x9d 0x98 ST p3, [sp, #-8] +.delay_slot + 12170 0xff 0x93 0xb0 0x00 0x01 0xf0 0x31 0x40 0x11 0x3a ST p1, [sp, #-4]; MOVXM p0, #508544 +.no_stack_arguments + 12180 0x00 0x14 0xe0 0x00 0x01 0x04 JL #10688 +.delay_slot + 12186 0x19 0x64 0xc0 0xf8 MOV p1, p2 +.delay_slot +.swstall delay_slot + 12190 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12192 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12194 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12196 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.return_address + 12208 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb1 0x4a 0x10 0xba LDA r16, [p7]; MOVXM p7, #508564 + 12218 0x07 0x06 0x36 0x98 LDA r17, [p7] + 12222 0x06 0x04 0x9e 0x98 LDA p1, [p6] + 12226 0x00 0x00 NOPX +.no_stack_arguments + 12228 0x00 0x15 0x68 0x00 0x01 0x04 JL #10960 +.delay_slot + 12234 0x10 0x24 0x05 0x18 MOVX r18, #1 +.delay_slot + 12238 0x00 0x07 0xc4 0xc5 0x00 0x44 MOVXM p2, #508544 +.delay_slot + 12244 0x1e 0x64 0xc0 0xf8 MOV p6, p2 +.delay_slot + 12248 0x14 0x63 0x2d 0x98 LSHL r17, r17, r18 +.delay_slot + 12252 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.return_address + 12256 0xfe 0x87 0x20 0x00 0x01 0xf1 0x31 0x0a 0x10 0xba LDA lr, [sp, #-12]; MOVXM p2, #508436 + 12266 0x40 0xc2 0xd0 0x60 0x02 0x2c LDA r16, [p2]; MOVX r24, #0 + 12272 0x06 0x66 0x36 0x98 LDA r17, [p6, #24] + 12276 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 12280 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 12284 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 12290 0x00 0x00 NOPX + 12292 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12296 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 12300 0x14 0x77 0x07 0x98 EQ r27, r17, r16 +.delay_slot + 12304 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot + 12308 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 12312 0x00 0x00 NOPX +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + +.text_segment PM 12320 +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function_start + 12320 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 12324 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 12328 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 12332 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 12336 0x00 0x17 0xa8 0x00 0x00 0x84 J #12112 +.delay_slot +.swstall delay_slot + 12342 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12344 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12346 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12350 0x00 0x00 NOPX +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function_start + 12352 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 12356 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 12360 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 12364 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 12368 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 12372 0x00 0x10 0xc8 0x00 0x00 0x84 J #8592 +.delay_slot +.swstall delay_slot + 12378 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12382 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12384 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12386 0x00 0x00 NOPX +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + +.text_segment PM 12400 +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function_start + 12400 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 12404 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 12408 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 12412 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 12416 0x00 0x07 0xa0 0x00 0x00 0x84 J #3904 +.delay_slot +.swstall delay_slot + 12422 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12426 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12428 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12430 0x00 0x00 NOPX +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function_start + 12432 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 12436 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 12440 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 12444 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 12448 0x00 0x09 0x80 0x00 0x00 0x84 J #4864 +.delay_slot +.swstall delay_slot + 12454 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12462 0x00 0x00 NOPX +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 12464 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 12470 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12474 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12478 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12482 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12486 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12490 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12494 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12498 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12502 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12506 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12510 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12514 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12518 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12522 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12526 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12530 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12534 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12538 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12542 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12546 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12550 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12554 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12558 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12562 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12566 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12570 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12574 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12578 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12582 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12586 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12590 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12594 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12598 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12602 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.text_segment PM 12608 +.label memset +.function_start + 12608 0x08 0x18 0xf0 0x00 0x01 0x84 JZ r1, #12768 +.delay_slot + 12614 0x18 0x62 0xc0 0xf8 MOV p0, p1 +.delay_slot +.swstall delay_slot + 12618 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12620 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12622 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12624 0x00 0x00 NOPX + 12626 0x30 0x11 0x60 0x02 0xb8 0x50 0x70 0x02 MOVS p1, p0; MOV lc, r1 + 12634 0x00 0x00 0x31 0xe2 0xe0 0x44 MOVXM ls, #12656 + 12640 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x0d 0xb8 0xe8 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV +.label ZLS_Fmemset_48 +.loop_nesting 1 +.begin_of_loop + 12656 0x23 0x80 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV + 12672 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12704 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_Fmemset_144 +.end_of_loop + 12752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_Fmemset_160 +.loop_nesting 0 + 12768 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 12772 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12774 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12776 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12778 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12780 0x00 0x00 NOPX +.label memset__end + +.bss_segment DMb 508416 24 + +.data_segment DMb 508440 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 508444 4 + +.bss_segment DMb 508448 1 + +.rodata_segment DMb 508480 +.label _ZL20g_uniformKernelFuncs + 0x70 + 0x23 + 0x0 + 0x0 + 0x90 + 0x25 + 0x0 + 0x0 + 0xa0 + 0x29 + 0x0 + 0x0 + 0x20 + 0x30 + 0x0 + 0x0 + 0x40 + 0x30 + 0x0 + 0x0 + 0x70 + 0x30 + 0x0 + 0x0 + 0x90 + 0x30 + 0x0 + 0x0 + +.bss_segment DMb 508544 576 + +.stack DM_stack 507264 508352 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.map b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.map new file mode 100644 index 0000000000000000000000000000000000000000..296f44b64d7be238f24615c125631fb5e9cf8857 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/Release/0_0_reloadable77.map @@ -0,0 +1,314 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:35:26 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable77 ../Release/0_0_reloadable77.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable77.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3591677 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1088 + + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1725 + + 0x00000000..0x0007bd7f ( 507264 items) : Reserved + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + 0x0007c1c0..0x0007c1ff ( 64 items) : Reserved + 0x0007c200..0x0007c203 ( 4 items) : ../Release/0_0_reloadable77.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c204..0x0007c207 ( 4 items) : ../Release/0_0_reloadable77.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c208..0x0007c20b ( 4 items) : ../Release/0_0_reloadable77.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c20c..0x0007c20f ( 4 items) : ../Release/0_0_reloadable77.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c210..0x0007c213 ( 4 items) : ../Release/0_0_reloadable77.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4) + 0x0007c214..0x0007c217 ( 4 items) : ../Release/0_0_reloadable77.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep (Data, Weak, .bss.DMb.4) + 0x0007c218..0x0007c21b ( 4 items) : ../Release/0_0_reloadable77.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c21c..0x0007c21f ( 4 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c220..0x0007c220 ( 1 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c240..0x0007c25b ( 28 items) : ../Release/0_0_reloadable77.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z15_b13749_wrapperPPv + _Z14_b8148_wrapperPPv + _Z14_b8170_wrapperPPv + _Z14_b7835_wrapperPPv + _Z15_b14160_wrapperPPv + _Z15_b13739_wrapperPPv + _Z15_b13744_wrapperPPv + + 0x0007c280..0x0007c2ff ( 128 items) : ../Release/0_0_reloadable77.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params (Data, Weak, .bss.DMb.64) + 0x0007c300..0x0007c33f ( 64 items) : ../Release/0_0_reloadable77.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c340..0x0007c37f ( 64 items) : ../Release/0_0_reloadable77.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c380..0x0007c3bf ( 64 items) : ../Release/0_0_reloadable77.o::sigmoid1d_params (Data, Global, .bss.DMb.64) + 0x0007c3c0..0x0007c4bf ( 256 items) : ../Release/0_0_reloadable77.o::conv2d_dw_params (Data, Global, .bss.DMb.64) + 0x0007ca00..0x000fffff ( 538112 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 9978 + + 0x00000000..0x000009df ( 2528 items) : Reserved + 0x000009e0..0x00000c01 ( 546 items) : ../Release/0_0_reloadable77.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000c10..0x00000c27 ( 24 items) : ../Release/0_0_reloadable77.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000c30..0x00000ce1 ( 178 items) : ../Release/0_0_reloadable77.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000cf0..0x00000d27 ( 56 items) : ../Release/0_0_reloadable77.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d30..0x00000d6b ( 60 items) : ../Release/0_0_reloadable77.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d70..0x00000eb9 ( 330 items) : ../Release/0_0_reloadable77.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + + 0x00000ec0..0x00000f31 ( 114 items) : ../Release/0_0_reloadable77.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000f40..0x00001127 ( 488 items) : ../Release/0_0_reloadable77.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00001130..0x00001173 ( 68 items) : ../Release/0_0_reloadable77.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + + 0x00001180..0x000012f9 ( 378 items) : ../Release/0_0_reloadable77.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + _ZN12me_primitive11control_rndE + + 0x00001300..0x000014e7 ( 488 items) : ../Release/0_0_reloadable77.o::_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + sigmoid1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x000014f0..0x00001507 ( 24 items) : ../Release/0_0_reloadable77.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + + 0x00001510..0x000015a9 ( 154 items) : ../Release/0_0_reloadable77.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + Referenced symbols: mul1d_params + + 0x000015b0..0x000016e3 ( 308 items) : ../Release/0_0_reloadable77.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + _ZN12me_primitive11control_rndE + + 0x000016f0..0x00001949 ( 602 items) : ../Release/0_0_reloadable77.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00001950..0x00001c67 ( 792 items) : ../Release/0_0_reloadable77.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x00001c70..0x00001f11 ( 674 items) : ../Release/0_0_reloadable77.o::_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x00001f20..0x0000201d ( 254 items) : ../Release/0_0_reloadable77.o::_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: conv2d_dw_params + + 0x00002020..0x00002187 ( 360 items) : ../Release/0_0_reloadable77.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + + Referenced symbols: conv2d_dw_params + + 0x00002190..0x0000236d ( 478 items) : ../Release/0_0_reloadable77.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL8num_iter + _ZL10ifmsv_size + conv2d_dw_params + + 0x00002370..0x00002393 ( 36 items) : ../Release/0_0_reloadable77.o::_Z15_b13749_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x000023a0..0x00002583 ( 484 items) : ../Release/0_0_reloadable77.o::_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : memset + + 0x00002590..0x000025af ( 32 items) : ../Release/0_0_reloadable77.o::_Z14_b8148_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + + 0x000025b0..0x00002651 ( 162 items) : ../Release/0_0_reloadable77.o::_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + 0x00002660..0x000026b3 ( 84 items) : ../Release/0_0_reloadable77.o::_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params (Function, Local, .text) (stack frame size = 0) + 0x000026c0..0x000026f3 ( 52 items) : ../Release/0_0_reloadable77.o::_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + + 0x00002700..0x000028df ( 480 items) : ../Release/0_0_reloadable77.o::_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params (Function, Weak, .text) (stack frame size = 0) + 0x000028e0..0x00002991 ( 178 items) : ../Release/0_0_reloadable77.o::_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + 0x000029a0..0x000029bf ( 32 items) : ../Release/0_0_reloadable77.o::_Z14_b8170_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + + 0x000029c0..0x00002ac5 ( 262 items) : ../Release/0_0_reloadable77.o::_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002ad0..0x00002f4d ( 1150 items) : ../Release/0_0_reloadable77.o::_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002f50..0x00003019 ( 202 items) : ../Release/0_0_reloadable77.o::_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep + _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00003020..0x0000303f ( 32 items) : ../Release/0_0_reloadable77.o::_Z14_b7835_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + + 0x00003040..0x00003063 ( 36 items) : ../Release/0_0_reloadable77.o::_Z15_b14160_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003070..0x0000308f ( 32 items) : ../Release/0_0_reloadable77.o::_Z15_b13739_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003090..0x000030af ( 32 items) : ../Release/0_0_reloadable77.o::_Z15_b13744_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x000030b0..0x0000313d ( 142 items) : me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + 0x00003140..0x000031ed ( 174 items) : string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a)::memset (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x31ee + _pc_start = 0x9e0 + _sp_end_DM_stack = 0x7c1c0 + _sp_start_DM_stack = 0x7bd80 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 1088 + ---------- ---------- + 1088 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 600 4 28 ../Release/0_0_reloadable77.o + 5 0 0 me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 605 4 28 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 9662 ../Release/0_0_reloadable77.o + 142 me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + 174 string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + ---------- ---------- + 9978 Total + +File summary: + +../Release/0_0_reloadable77.o + DMb 632 + PM 9662 + +me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + PM 142 + +string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + PM 174 + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/xlopt.log b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/xlopt.log new file mode 100644 index 0000000000000000000000000000000000000000..62897a4bf385fd8652ab8542743948ead9124df5 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable77/xlopt.log @@ -0,0 +1,461 @@ + + +--------------- FILTER ANALYSIS INFO LOG --------------- + +Reading Header IR from ir/_header.ll + +-------------------------------------------------------- + +Add module pass *1*{anonymous}::GuidancePass +Add module pass *1*{anonymous}::ChessOptionsPass +Add module pass *1*{anonymous}::DisableInliningInMainPass +Add module pass *1*cdno::xlopt::AIEMergeSubWordStoresOpt +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::IpConstPropPass +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::XLModuleAdaptor<{anonymous}::AIELoopInfoPass> +Add module pass *1*cdno::xlopt::AIEAnnotatePragmaPass +Add module pass *1*{anonymous}::XLModuleAdaptor<{anonymous}::AIELoopPeelPass> +Add module pass *1*{anonymous}::AIEAliasAnalysisPass + + +--------------- MEMORY MANAGEMENT GUIDANCE LOG --------------- + +ALIGNMENT_HINT: Alignment of global array g_uniformKernelFuncs is 4 bytes; automatically aligning it to 64 bytes. +ALIGNMENT_HINT: Alignment of global array aie::detail::transpose_bits_impl<16u, bfloat16, 64u>::shuffle_modes is 4 bytes; automatically aligning it to 64 bytes. +SIZE_HINT: Global array sigmoid_lut<0u, 256u>::data_ab is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array sigmoid_lut<0u, 256u>::data_cd is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array sigmoid_lut_fp16<0u, 256u>::data_ab is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array sigmoid_lut_fp16<0u, 256u>::data_cd is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array gelu_lut_32<0u, 512u>::data_ab is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array gelu_lut_32<0u, 512u>::data_cd is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array tanh_lut<0u, 512u>::data_ab is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array tanh_lut<0u, 512u>::data_cd is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array log_f32_lut<256u>::fraction_table_ab_f32 is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array log_f32_lut<256u>::fraction_table_cd_f32 is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array exp2_lut<512u>::exp2_table_ab is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array exp2_lut<512u>::exp2_table_cd is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. + +-------------------------------------------------------------- + + + +--------------- MERGING SUBWORD STORES OPT LOG --------------- + + + +--------------- LOOP STATISTICS : _ZN18conv2d_bf16_paramsC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +----------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z14conv2d_genericILh1EL5act_t0ELb0ELb1ELb0E8bfloat16EvPS1_S2_S2_S2_R18conv2d_bf16_params10out_mode_t --------------- + +Total loops = 4 +Loops with prepare for pipelining pragma = 4 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 4 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj2EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +-------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params --------------- + +Total loops = 2 +Loops with prepare for pipelining pragma = 2 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 2 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z8init_accILt1EEvPaS0_iii --------------- + +Total loops = 2 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 2 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL12gemm_bf16x16ILj2ELj2ELj1EEvP8bfloat16S1_PaR10MMultIncrsb --------------- + +Total loops = 2 +Loops with prepare for pipelining pragma = 2 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 2 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z12post_processPai --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL14transpose_bf16P8bfloat16S0_j --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +----------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie4mmulILj8ELj8ELj8E8bfloat16S3_7accautoEELj4EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj64EEELj2EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6detail10accum_baseILNS2_10AccumClassE2ELj32ELj32EEELj2EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZN3aie17tensor_descriptorILj4E8bfloat16Lj64ENSt3__25tupleIJNS_6detail6dim_3dEiEEEE14steps_to_incrsERKNS2_5arrayINS_10tensor_dimELj4EEEb --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E --------------- + +Total loops = 7 +Loops with prepare for pipelining pragma = 3 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 3 +Loops with max range pragma = 0 +Loops with known trip count = 3 + +------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj1EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__28__fill_nB7v160003IP6addr_tjS1_EET_S3_T0_RKT1_ --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj --------------- + +Total loops = 3 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params --------------- + +Total loops = 4 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- PRAGMA INSERTION LOG (unroll threshold = 3000, max unroll factor = 1) --------------- + +Adding pragma to function _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh : + Pragma added to loop at line 398 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h : chess_loop_range(8,8) +Adding pragma to function _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj2EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh : + Pragma added to loop at line 180 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h : chess_loop_range(4,4) +Adding pragma to function _ZN3aie17tensor_descriptorILj4E8bfloat16Lj64ENSt3__25tupleIJNS_6detail6dim_3dEiEEEE14steps_to_incrsERKNS2_5arrayINS_10tensor_dimELj4EEEb : + Pragma added to loop at line 7052 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp : chess_loop_range(4,4) +Adding pragma to function _Z8init_accILt1EEvPaS0_iii : + Pragma added to loop at line 68 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h : chess_prepare_for_pipelining + Pragma added to loop at line 53 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h : chess_prepare_for_pipelining +Adding pragma to function _ZNSt3__25arrayIN3aie4mmulILj8ELj8ELj8E8bfloat16S3_7accautoEELj4EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _ZNSt3__25arrayIN3aie6detail10accum_baseILNS2_10AccumClassE2ELj32ELj32EEELj2EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _Z12post_processPai : + Pragma added to loop at line 92 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h : chess_prepare_for_pipelining +Adding pragma to function _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj1EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E : + Pragma added to loop at line 280 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_loop_range(32,32) + Pragma added to loop at line 268 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_loop_range(1,1) + Pragma added to loop at line 268 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_flatten_loop + Pragma added to loop at line 258 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_loop_range(1,1) + Pragma added to loop at line 258 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_flatten_loop +Adding pragma to function _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj : + Pragma added to loop at line 75 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle_params.h : chess_loop_range(7,7) +Adding pragma to function _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params : + Pragma added to loop at line 88 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h : chess_prepare_for_pipelining + Pragma added to loop at line 116 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h : chess_prepare_for_pipelining + +----------------------------------------------------------------------------------------------------- + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.calltree b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.calltree new file mode 100644 index 0000000000000000000000000000000000000000..d14fb6d090940167cca43c8c6a97196883ac208d --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.calltree @@ -0,0 +1,96 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable4 ../Release/0_0_reloadable4.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable4.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577691 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z15_b14160_wrapperPPv (referenced text) + _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + _Z14_b7835_wrapperPPv (referenced text) + _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + _Z14_b8148_wrapperPPv (referenced text) + _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + memset + _Z15_b13739_wrapperPPv (referenced text) + _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _Z15_b13744_wrapperPPv (referenced text) + _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + _Z15_b13749_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z14_b8170_wrapperPPv (referenced text) + _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 256 0 0 546 9978 _Z13kernelWrapperPPvjjjj + 0 192 1 1 36 2736 _Z15_b14160_wrapperPPv + 128 192 1 2 478 2700 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 792 934 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 64 64 2 3 360 1288 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + 0 0 3 4 674 674 _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + 0 0 2 4 254 254 _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + 0 64 1 1 32 1646 _Z14_b7835_wrapperPPv + 64 64 1 2 202 1614 _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + 0 0 2 3 262 262 _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + 0 0 2 3 1150 1150 _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + 0 64 1 1 32 690 _Z14_b8148_wrapperPPv + 64 64 1 2 484 658 _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + 0 0 2 3 174 174 memset + 0 192 1 1 32 1282 _Z15_b13739_wrapperPPv + 64 192 1 2 488 1250 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 128 2 3 60 318 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + 64 64 3 4 178 202 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + 128 128 2 3 114 444 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + 0 0 3 4 330 330 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 0 64 1 1 32 966 _Z15_b13744_wrapperPPv + 64 64 1 2 488 934 _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 68 68 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + 0 0 2 3 378 378 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + 0 128 1 1 36 1124 _Z15_b13749_wrapperPPv + 64 128 1 2 602 1088 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 154 178 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 308 308 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 128 1 1 32 988 _Z14_b8170_wrapperPPv + 128 128 1 2 178 956 _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + 0 0 2 3 52 298 _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + 0 0 3 4 162 162 _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + 0 0 2 4 84 84 _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + 0 0 2 3 480 480 _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + +Maximum call level : 5 +Maximum stack level: 4 +Maximum stack size : 256 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.cmic2 b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..f5e6b3e5828701d92c85b709bbc3b7c45a16ad8f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.cmic2 @@ -0,0 +1,14042 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable4 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable4.cc" 91 first +.src_ref 0 "0_0_reloadable4.cc" 93 60 +.src_ref 0 "0_0_reloadable4.cc" 93 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 91 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2816 "01000100" // MOVXM p7, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "10000000" // /* MW 5 */ + 2818 "11000100" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 104 60 +.src_ref 0 "0_0_reloadable4.cc" 106 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 104 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 106 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 106 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 109 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 3088 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3089 "00000000" // /* MW 3 */ + 3090 "00101000" // /* MW 2 */ + 3091 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3092 "01000100" // MOVXM p0, #508704 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3093 "01000000" // /* MW 5 */ + 3094 "11000110" // /* MW 4 */ + 3095 "11000000" // /* MW 3 */ + 3096 "00000111" // /* MW 2 */ + 3097 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3098 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3099 "10000000" // /* MW 3 */ + 3100 "00000000" // /* MW 2 */ + 3101 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 3102 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3103 "00000001" // /* MW 3 */ + 3104 "00000100" // /* MW 2 */ + 3105 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3106 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3107 "00000001" // /* MW 3 */ + 3108 "00010100" // /* MW 2 */ + 3109 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3111 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 3120 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3121 "00010000" // /* MW 9 */ + 3122 "10000000" // /* MW 8 */ + 3123 "00110001" // /* MW 7 */ + 3124 "11110000" // /* MW 6 */ + 3125 "00000001" // /* MW 5 */ + 3126 "00000000" // /* MW 4 */ + 3127 "11010000" // /* MW 3 */ + 3128 "10000101" // /* MW 2 */ + 3129 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 3130 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3131 "00000001" // /* MW 5 */ + 3132 "00000000" // /* MW 4 */ + 3133 "00000000" // /* MW 3 */ + 3134 "00001000" // /* MW 2 */ + 3135 "00000000" // /* MW 1 */ + 3136 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3137 "00111101" // /* MW 3 */ + 3138 "11111000" // /* MW 2 */ + 3139 "00001111" // /* MW 1 */ + 3140 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "11110101" // /* MW 3 */ + 3142 "11111101" // /* MW 2 */ + 3143 "00001111" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 3150 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "00101001" // /* MW 3 */ + 3152 "00011100" // /* MW 2 */ + 3153 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 3154 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3155 "00101110" // /* MW 3 */ + 3156 "00011100" // /* MW 2 */ + 3157 "00000001" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 3170 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3171 "00101001" // /* MW 3 */ + 3172 "00011100" // /* MW 2 */ + 3173 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 3174 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3175 "00101110" // /* MW 3 */ + 3176 "00000100" // /* MW 2 */ + 3177 "00000001" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ + 3182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3183 "00000000" // /* MW 1 */ + 3184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3185 "00000000" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 3190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00101001" // /* MW 3 */ + 3192 "00011100" // /* MW 2 */ + 3193 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 3194 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "00101110" // /* MW 3 */ + 3196 "00010100" // /* MW 2 */ + 3197 "00000001" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ + 3200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3201 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 3202 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 3203 "00000001" // /* MW 5 */ + 3204 "00000000" // /* MW 4 */ + 3205 "00001000" // /* MW 3 */ + 3206 "00000110" // /* MW 2 */ + 3207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3213 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 3214 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3215 "00101001" // /* MW 3 */ + 3216 "11011100" // /* MW 2 */ + 3217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.delay_slot + 3218 "00101110" // NOPA; NOPS; MOV r15, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3219 "00011100" // /* MW 13 */ + 3220 "00000000" // /* MW 12 */ + 3221 "00000000" // /* MW 11 */ + 3222 "00000111" // /* MW 10 */ + 3223 "10000110" // /* MW 9 */ + 3224 "01011110" // /* MW 8 */ + 3225 "00000000" // /* MW 7 */ + 3226 "00000000" // /* MW 6 */ + 3227 "10110110" // /* MW 5 */ + 3228 "00000010" // /* MW 4 */ + 3229 "11110000" // /* MW 3 */ + 3230 "00101100" // /* MW 2 */ + 3231 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 3232 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3233 "00001000" // /* MW 9 */ + 3234 "11000100" // /* MW 8 */ + 3235 "00110011" // /* MW 7 */ + 3236 "01101000" // /* MW 6 */ + 3237 "00000000" // /* MW 5 */ + 3238 "00000001" // /* MW 4 */ + 3239 "00100000" // /* MW 3 */ + 3240 "00000111" // /* MW 2 */ + 3241 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 3242 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3243 "01011000" // /* MW 9 */ + 3244 "11111101" // /* MW 8 */ + 3245 "00000111" // /* MW 7 */ + 3246 "00001000" // /* MW 6 */ + 3247 "10000000" // /* MW 5 */ + 3248 "00000001" // /* MW 4 */ + 3249 "10000000" // /* MW 3 */ + 3250 "11100010" // /* MW 2 */ + 3251 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 3252 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3253 "00000001" // /* MW 9 */ + 3254 "10100000" // /* MW 8 */ + 3255 "00000111" // /* MW 7 */ + 3256 "10000000" // /* MW 6 */ + 3257 "00010001" // /* MW 5 */ + 3258 "00001010" // /* MW 4 */ + 3259 "00100000" // /* MW 3 */ + 3260 "10111110" // /* MW 2 */ + 3261 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 3262 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3263 "01001010" // /* MW 3 */ + 3264 "00000110" // /* MW 2 */ + 3265 "00000000" // /* MW 1 */ + 3266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3271 "00010111" // /* MW 3 */ + 3272 "00000010" // /* MW 2 */ + 3273 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3275 "00000000" // /* MW 3 */ + 3276 "00101000" // /* MW 2 */ + 3277 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3279 "00000101" // /* MW 3 */ + 3280 "00100010" // /* MW 2 */ + 3281 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3283 "00000001" // /* MW 5 */ + 3284 "00000000" // /* MW 4 */ + 3285 "00000000" // /* MW 3 */ + 3286 "11111000" // /* MW 2 */ + 3287 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00100111" // /* MW 3 */ + 3290 "01110111" // /* MW 2 */ + 3291 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "10000010" // /* MW 3 */ + 3294 "00100001" // /* MW 2 */ + 3295 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3297 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 40 first +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.function_start + 3312 "10111010" // MOVA m0, #20; MOVXM p0, #508684 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3313 "00010000" // /* MW 9 */ + 3314 "10000110" // /* MW 8 */ + 3315 "00110001" // /* MW 7 */ + 3316 "11110000" // /* MW 6 */ + 3317 "00000001" // /* MW 5 */ + 3318 "00000000" // /* MW 4 */ + 3319 "10000000" // /* MW 3 */ + 3320 "10000000" // /* MW 2 */ + 3321 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 + 3322 "10111010" // LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3323 "01011000" // /* MW 9 */ + 3324 "00000110" // /* MW 8 */ + 3325 "00101000" // /* MW 7 */ + 3326 "00101000" // /* MW 6 */ + 3327 "00100000" // /* MW 5 */ + 3328 "00000000" // /* MW 4 */ + 3329 "01010000" // /* MW 3 */ + 3330 "00000001" // /* MW 2 */ + 3331 "00000001" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 43 4 first + 3342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3343 "00000000" // /* MW 3 */ + 3344 "00101000" // /* MW 2 */ + 3345 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.delay_slot + 3346 "00011000" // NEZ r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "11110000" // /* MW 3 */ + 3348 "00000110" // /* MW 2 */ + 3349 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.delay_slot + 3350 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00001000" // /* MW 3 */ + 3352 "10000000" // /* MW 2 */ + 3353 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 first +.delay_slot + 3354 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00011101" // /* MW 3 */ + 3356 "00000000" // /* MW 2 */ + 3357 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 23 +.delay_slot + 3358 "01011100" // ST r0, [p0, #4]; LSHL r2, r3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3359 "00111011" // /* MW 5 */ + 3360 "10001000" // /* MW 4 */ + 3361 "00110001" // /* MW 3 */ + 3362 "10000010" // /* MW 2 */ + 3363 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 23 +.delay_slot + 3364 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3365 "01010001" // /* MW 3 */ + 3366 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3367 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_broadcasting.h" 35 +.src_ref 2 "elementwise_binary_broadcasting.h" 35 first +.function_start + 3376 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000000" // /* MW 4 */ + 3379 "00000000" // /* MW 3 */ + 3380 "00001000" // /* MW 2 */ + 3381 "00000000" // /* MW 1 */ + 3382 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00111101" // /* MW 3 */ + 3384 "11111100" // /* MW 2 */ + 3385 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 first +.no_stack_arguments + 3386 "00000100" // JL #3120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3120 delay_slots=5 */ + 3387 "00000001" // /* MW 5 */ + 3388 "00000000" // /* MW 4 */ + 3389 "00011000" // /* MW 3 */ + 3390 "00000110" // /* MW 2 */ + 3391 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 +.delay_slot + 3392 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000000" // /* MW 5 */ + 3394 "11000110" // /* MW 4 */ + 3395 "11000000" // /* MW 3 */ + 3396 "00000111" // /* MW 2 */ + 3397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "00000001" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.return_address + 3408 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00111001" // /* MW 3 */ + 3410 "11111100" // /* MW 2 */ + 3411 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 first +.tail_call + 3412 "10000100" // J #3312 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3312 delay_slots=5 */ + 3413 "00000000" // /* MW 5 */ + 3414 "00000000" // /* MW 4 */ + 3415 "01111000" // /* MW 3 */ + 3416 "00000110" // /* MW 2 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.delay_slot + 3418 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "00000000" // /* MW 5 */ + 3420 "11000110" // /* MW 4 */ + 3421 "11000000" // /* MW 3 */ + 3422 "00000111" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 38 4 first +.delay_slot + 3424 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3425 "00000001" // /* MW 5 */ + 3426 "00000000" // /* MW 4 */ + 3427 "00000000" // /* MW 3 */ + 3428 "11111000" // /* MW 2 */ + 3429 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3435 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 48 first +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 +.function_start + 3440 "10111010" // MOVA m0, #20; MOVXM p3, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3441 "00010000" // /* MW 9 */ + 3442 "10000000" // /* MW 8 */ + 3443 "10110001" // /* MW 7 */ + 3444 "11110001" // /* MW 6 */ + 3445 "00000001" // /* MW 5 */ + 3446 "00000000" // /* MW 4 */ + 3447 "10000000" // /* MW 3 */ + 3448 "10000000" // /* MW 2 */ + 3449 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 first + 3450 "10011000" // LDA r0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3451 "00010110" // /* MW 3 */ + 3452 "00111100" // /* MW 2 */ + 3453 "00000011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3454 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3455 "10000001" // /* MW 5 */ + 3456 "11001101" // /* MW 4 */ + 3457 "01011000" // /* MW 3 */ + 3458 "00000101" // /* MW 2 */ + 3459 "01100001" // /* MW 1 */ + 3460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3461 "00000000" // /* MW 1 */ + 3462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3463 "00000000" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 12 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 35 + 3472 "10000100" // JNZ r1, #3536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3536 delay_slots=5 */ + 3473 "00000001" // /* MW 5 */ + 3474 "01000000" // /* MW 4 */ + 3475 "11101000" // /* MW 3 */ + 3476 "00000110" // /* MW 2 */ + 3477 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 +.delay_slot + 3478 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3479 "11101001" // /* MW 3 */ + 3480 "11000100" // /* MW 2 */ + 3481 "00010111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 first +.delay_slot + 3482 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3483 "00101101" // /* MW 3 */ + 3484 "00000000" // /* MW 2 */ + 3485 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 62 28 first + 3492 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00110010" // /* MW 3 */ + 3494 "00000100" // /* MW 2 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "10000100" // J #3568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3568 delay_slots=5 */ + 3503 "00000000" // /* MW 5 */ + 3504 "00000000" // /* MW 4 */ + 3505 "11111000" // /* MW 3 */ + 3506 "00000110" // /* MW 2 */ + 3507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3511 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 3512 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "01110010" // /* MW 3 */ + 3514 "00000101" // /* MW 2 */ + 3515 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3516 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "01100111" // /* MW 3 */ + 3518 "00000001" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3520 "11100001" // NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00000000" // /* MW 15 */ + 3522 "00000000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "00010011" // /* MW 7 */ + 3530 "00000100" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.src_ref 2 "elementwise_binary_broadcasting.h" 65 28 first + 3536 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "00110010" // /* MW 3 */ + 3538 "00000100" // /* MW 2 */ + 3539 "00000001" // /* MW 1 */ + 3540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3541 "00000000" // /* MW 1 */ + 3542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3543 "00000000" // /* MW 1 */ + 3544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3545 "00000000" // /* MW 1 */ + 3546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3547 "00000000" // /* MW 1 */ + 3548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3549 "00000000" // /* MW 1 */ + 3550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3551 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 3552 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "01110010" // /* MW 3 */ + 3554 "00000101" // /* MW 2 */ + 3555 "00011000" // /* MW 1 */ + 3556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3557 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3558 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "00000000" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00010011" // /* MW 5 */ + 3564 "00000100" // /* MW 4 */ + 3565 "11110001" // /* MW 3 */ + 3566 "00101100" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first + 3568 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01001000" // /* MW 9 */ + 3570 "00111111" // /* MW 8 */ + 3571 "10111000" // /* MW 7 */ + 3572 "10001010" // /* MW 6 */ + 3573 "00000111" // /* MW 5 */ + 3574 "00000000" // /* MW 4 */ + 3575 "11010000" // /* MW 3 */ + 3576 "10000000" // /* MW 2 */ + 3577 "10001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3578 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #3680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3579 "00010000" // /* MW 9 */ + 3580 "00110000" // /* MW 8 */ + 3581 "01111111" // /* MW 7 */ + 3582 "00000000" // /* MW 6 */ + 3583 "00000000" // /* MW 5 */ + 3584 "00000000" // /* MW 4 */ + 3585 "11010000" // /* MW 3 */ + 3586 "10010000" // /* MW 2 */ + 3587 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3588 "01000100" // MOVXM le, #3712 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3589 "00000000" // /* MW 5 */ + 3590 "11111101" // /* MW 4 */ + 3591 "00000110" // /* MW 3 */ + 3592 "00000000" // /* MW 2 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3594 "01000100" // MOVXM p4, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "01000000" // /* MW 5 */ + 3596 "11000100" // /* MW 4 */ + 3597 "11001000" // /* MW 3 */ + 3598 "00000111" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3600 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00100010" // /* MW 3 */ + 3602 "00000100" // /* MW 2 */ + 3603 "00000100" // /* MW 1 */ + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first + 3608 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "10101011" // /* MW 3 */ + 3610 "00001000" // /* MW 2 */ + 3611 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 148 20 first + 3612 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "00101011" // /* MW 3 */ + 3614 "00101001" // /* MW 2 */ + 3615 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first + 3616 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00101011" // /* MW 3 */ + 3618 "00001000" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "00101011" // /* MW 3 */ + 3622 "00101010" // /* MW 2 */ + 3623 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "00000000" // /* MW 5 */ + 3626 "11110101" // /* MW 4 */ + 3627 "01110000" // /* MW 3 */ + 3628 "00010101" // /* MW 2 */ + 3629 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3631 "00111101" // /* MW 7 */ + 3632 "00101000" // /* MW 6 */ + 3633 "00000011" // /* MW 5 */ + 3634 "00000100" // /* MW 4 */ + 3635 "01110000" // /* MW 3 */ + 3636 "00100101" // /* MW 2 */ + 3637 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3639 "00101011" // /* MW 3 */ + 3640 "00001000" // /* MW 2 */ + 3641 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3643 "00111101" // /* MW 7 */ + 3644 "00010000" // /* MW 6 */ + 3645 "00000100" // /* MW 5 */ + 3646 "00000100" // /* MW 4 */ + 3647 "01110000" // /* MW 3 */ + 3648 "01000101" // /* MW 2 */ + 3649 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3651 "10101011" // /* MW 3 */ + 3652 "00001000" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3655 "00111101" // /* MW 7 */ + 3656 "00101000" // /* MW 6 */ + 3657 "00000011" // /* MW 5 */ + 3658 "00000100" // /* MW 4 */ + 3659 "01110000" // /* MW 3 */ + 3660 "00100101" // /* MW 2 */ + 3661 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3663 "00101011" // /* MW 3 */ + 3664 "00001000" // /* MW 2 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3667 "00111101" // /* MW 13 */ + 3668 "00010000" // /* MW 12 */ + 3669 "00000100" // /* MW 11 */ + 3670 "01010111" // /* MW 10 */ + 3671 "00011010" // /* MW 9 */ + 3672 "01000000" // /* MW 8 */ + 3673 "00000000" // /* MW 7 */ + 3674 "00000000" // /* MW 6 */ + 3675 "01000110" // /* MW 5 */ + 3676 "00111011" // /* MW 4 */ + 3677 "01110100" // /* MW 3 */ + 3678 "01000101" // /* MW 2 */ + 3679 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3680 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "10101011" // /* MW 3 */ + 3682 "00001000" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3685 "00111101" // /* MW 11 */ + 3686 "00101000" // /* MW 10 */ + 3687 "00000011" // /* MW 9 */ + 3688 "10001110" // /* MW 8 */ + 3689 "00010001" // /* MW 7 */ + 3690 "00001111" // /* MW 6 */ + 3691 "00100001" // /* MW 5 */ + 3692 "00000000" // /* MW 4 */ + 3693 "01110000" // /* MW 3 */ + 3694 "00100101" // /* MW 2 */ + 3695 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3697 "00000000" // /* MW 15 */ + 3698 "00000000" // /* MW 14 */ + 3699 "01111000" // /* MW 13 */ + 3700 "10100101" // /* MW 12 */ + 3701 "00000001" // /* MW 11 */ + 3702 "00000000" // /* MW 10 */ + 3703 "00000000" // /* MW 9 */ + 3704 "00000000" // /* MW 8 */ + 3705 "01011011" // /* MW 7 */ + 3706 "00000001" // /* MW 6 */ + 3707 "00100000" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "01110000" // /* MW 3 */ + 3710 "00000101" // /* MW 2 */ + 3711 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3713 "10000001" // /* MW 15 */ + 3714 "00100000" // /* MW 14 */ + 3715 "01111000" // /* MW 13 */ + 3716 "10100101" // /* MW 12 */ + 3717 "00000001" // /* MW 11 */ + 3718 "00000000" // /* MW 10 */ + 3719 "00000000" // /* MW 9 */ + 3720 "00000000" // /* MW 8 */ + 3721 "10100011" // /* MW 7 */ + 3722 "00011101" // /* MW 6 */ + 3723 "00100010" // /* MW 5 */ + 3724 "00000000" // /* MW 4 */ + 3725 "01110000" // /* MW 3 */ + 3726 "01000101" // /* MW 2 */ + 3727 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3729 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3731 "00111101" // /* MW 7 */ + 3732 "00101000" // /* MW 6 */ + 3733 "00000011" // /* MW 5 */ + 3734 "00000010" // /* MW 4 */ + 3735 "01100000" // /* MW 3 */ + 3736 "11000100" // /* MW 2 */ + 3737 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3741 "00111101" // /* MW 7 */ + 3742 "00010000" // /* MW 6 */ + 3743 "00000100" // /* MW 5 */ + 3744 "00000010" // /* MW 4 */ + 3745 "01100000" // /* MW 3 */ + 3746 "10110100" // /* MW 2 */ + 3747 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.src_ref 2 "elementwise_binary_broadcasting.h" 80 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3751 "00000000" // /* MW 5 */ + 3752 "01010000" // /* MW 4 */ + 3753 "01100000" // /* MW 3 */ + 3754 "11000100" // /* MW 2 */ + 3755 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 3758 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3759 "10100011" // /* MW 3 */ + 3760 "00011101" // /* MW 2 */ + 3761 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 3764 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3765 "00100011" // /* MW 3 */ + 3766 "00011110" // /* MW 2 */ + 3767 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3769 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 first +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.function_start + 3776 "00111010" // MOVS p2, p1; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3777 "01110001" // /* MW 9 */ + 3778 "00000000" // /* MW 8 */ + 3779 "00000000" // /* MW 7 */ + 3780 "00000000" // /* MW 6 */ + 3781 "00000100" // /* MW 5 */ + 3782 "00000000" // /* MW 4 */ + 3783 "01100000" // /* MW 3 */ + 3784 "10010001" // /* MW 2 */ + 3785 "01010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 + 3786 "00000010" // ST lr, [sp, #-4]; MOV r16, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3787 "01110000" // /* MW 7 */ + 3788 "01100000" // /* MW 6 */ + 3789 "00001000" // /* MW 5 */ + 3790 "00000010" // /* MW 4 */ + 3791 "10110000" // /* MW 3 */ + 3792 "10000111" // /* MW 2 */ + 3793 "11111111" // /* MW 1 */ + 3794 "11111000" // MOV r17, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "11100000" // /* MW 3 */ + 3796 "01010101" // /* MW 2 */ + 3797 "00011100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 + 3798 "01000100" // MOVXM p3, #508684 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3799 "00011000" // /* MW 5 */ + 3800 "11000110" // /* MW 4 */ + 3801 "11000110" // /* MW 3 */ + 3802 "00000111" // /* MW 2 */ + 3803 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 first + 3804 "00010100" // LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3805 "10000000" // /* MW 5 */ + 3806 "11010001" // /* MW 4 */ + 3807 "01010000" // /* MW 3 */ + 3808 "11101101" // /* MW 2 */ + 3809 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 3810 "00001100" // LDA.s16 r18, [p3], #-14; VST sfh, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3811 "01010110" // /* MW 5 */ + 3812 "00001110" // /* MW 4 */ + 3813 "01010000" // /* MW 3 */ + 3814 "11001010" // /* MW 2 */ + 3815 "01110011" // /* MW 1 */ + 3816 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3817 "01010111" // /* MW 3 */ + 3818 "00000110" // /* MW 2 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ + 3822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3823 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 first +.no_stack_arguments + 3824 "00000100" // JL #3440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3440 delay_slots=5 */ + 3825 "00000001" // /* MW 5 */ + 3826 "00000000" // /* MW 4 */ + 3827 "10111000" // /* MW 3 */ + 3828 "00000110" // /* MW 2 */ + 3829 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.delay_slot + 3830 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3831 "11000000" // /* MW 3 */ + 3832 "01010000" // /* MW 2 */ + 3833 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 first +.delay_slot + 3836 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "00010010" // /* MW 3 */ + 3838 "00100101" // /* MW 2 */ + 3839 "00010100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3840 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000001" // /* MW 5 */ + 3842 "11010010" // /* MW 4 */ + 3843 "01000010" // /* MW 3 */ + 3844 "00100000" // /* MW 2 */ + 3845 "10001100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3846 "10111010" // NOPA; NOPB; MOV p0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111110" // /* MW 9 */ + 3848 "00010000" // /* MW 8 */ + 3849 "00110100" // /* MW 7 */ + 3850 "00000000" // /* MW 6 */ + 3851 "00010000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.return_address + 3856 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00111001" // /* MW 3 */ + 3858 "11111100" // /* MW 2 */ + 3859 "00000111" // /* MW 1 */ + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ + 3862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3863 "00000000" // /* MW 1 */ + 3864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3865 "00000000" // /* MW 1 */ + 3866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3867 "00000000" // /* MW 1 */ + 3868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3869 "00000000" // /* MW 1 */ + 3870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 first + 3872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3873 "00000000" // /* MW 3 */ + 3874 "00101000" // /* MW 2 */ + 3875 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.delay_slot + 3876 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3877 "00000001" // /* MW 5 */ + 3878 "00000000" // /* MW 4 */ + 3879 "00000000" // /* MW 3 */ + 3880 "11110000" // /* MW 2 */ + 3881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3889 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 152 first +.src_ref 6 "superkernels.cpp" 157 6 +.function_start + 3904 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3905 "00000000" // /* MW 5 */ + 3906 "11000100" // /* MW 4 */ + 3907 "11000110" // /* MW 3 */ + 3908 "00000111" // /* MW 2 */ + 3909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 first + 3910 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3911 "11000001" // /* MW 5 */ + 3912 "10110101" // /* MW 4 */ + 3913 "11011000" // /* MW 3 */ + 3914 "11000010" // /* MW 2 */ + 3915 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 152 + 3916 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3917 "00000001" // /* MW 5 */ + 3918 "00000000" // /* MW 4 */ + 3919 "00000000" // /* MW 3 */ + 3920 "00001000" // /* MW 2 */ + 3921 "00000000" // /* MW 1 */ + 3922 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3923 "01110000" // /* MW 7 */ + 3924 "11010000" // /* MW 6 */ + 3925 "00001011" // /* MW 5 */ + 3926 "00000000" // /* MW 4 */ + 3927 "10110000" // /* MW 3 */ + 3928 "01100011" // /* MW 2 */ + 3929 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 11 + 3930 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3931 "00010001" // /* MW 9 */ + 3932 "00000010" // /* MW 8 */ + 3933 "00110001" // /* MW 7 */ + 3934 "11110011" // /* MW 6 */ + 3935 "00000001" // /* MW 5 */ + 3936 "00000000" // /* MW 4 */ + 3937 "10110000" // /* MW 3 */ + 3938 "10000010" // /* MW 2 */ + 3939 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3940 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3941 "11000000" // /* MW 3 */ + 3942 "11010100" // /* MW 2 */ + 3943 "00011011" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 +.src_ref 6 "superkernels.cpp" 157 16 + 3948 "10000100" // JNZ r16, #4112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4112 delay_slots=5 */ + 3949 "00000001" // /* MW 5 */ + 3950 "01000000" // /* MW 4 */ + 3951 "00001000" // /* MW 3 */ + 3952 "00001000" // /* MW 2 */ + 3953 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 22 first +.delay_slot + 3954 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10010000" // /* MW 3 */ + 3956 "01100010" // /* MW 2 */ + 3957 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 30 +.delay_slot + 3958 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3959 "11111011" // /* MW 3 */ + 3960 "01100011" // /* MW 2 */ + 3961 "00010100" // /* MW 1 */ +.delay_slot + 3962 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3963 "00111101" // /* MW 3 */ + 3964 "11110100" // /* MW 2 */ + 3965 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 154 11 +.delay_slot + 3966 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3967 "01110000" // /* MW 7 */ + 3968 "01100000" // /* MW 6 */ + 3969 "00110000" // /* MW 5 */ + 3970 "00000011" // /* MW 4 */ + 3971 "00110000" // /* MW 3 */ + 3972 "11000110" // /* MW 2 */ + 3973 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 +.src_ref 6 "superkernels.cpp" 171 2 +.delay_slot + 3974 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3975 "00000000" // /* MW 5 */ + 3976 "11000110" // /* MW 4 */ + 3977 "11000000" // /* MW 3 */ + 3978 "00000111" // /* MW 2 */ + 3979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3981 "01000000" // /* MW 5 */ + 3982 "11000100" // /* MW 4 */ + 3983 "11000100" // /* MW 3 */ + 3984 "00000111" // /* MW 2 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "00010000" // /* MW 9 */ + 3988 "00001110" // /* MW 8 */ + 3989 "00110001" // /* MW 7 */ + 3990 "11110001" // /* MW 6 */ + 3991 "00000001" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "11100000" // /* MW 3 */ + 3994 "11000000" // /* MW 2 */ + 3995 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 "00000100" // JL #3376 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3376 delay_slots=5 */ + 3999 "00000001" // /* MW 5 */ + 4000 "00000000" // /* MW 4 */ + 4001 "10011000" // /* MW 3 */ + 4002 "00000110" // /* MW 2 */ + 4003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4009 "00110001" // /* MW 3 */ + 4010 "00100000" // /* MW 2 */ + 4011 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4012 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4013 "00000101" // /* MW 3 */ + 4014 "00100000" // /* MW 2 */ + 4015 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4016 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "01111000" // /* MW 13 */ + 4020 "10100101" // /* MW 12 */ + 4021 "00000001" // /* MW 11 */ + 4022 "00000000" // /* MW 10 */ + 4023 "00000000" // /* MW 9 */ + 4024 "10000000" // /* MW 8 */ + 4025 "00010001" // /* MW 7 */ + 4026 "00000110" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 +.return_address + 4032 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4033 "00001000" // /* MW 5 */ + 4034 "11000100" // /* MW 4 */ + 4035 "11000100" // /* MW 3 */ + 4036 "00000111" // /* MW 2 */ + 4037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 first +.src_ref 6 "superkernels.cpp" 164 65 + 4038 "10111010" // LDA r16, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4039 "00010000" // /* MW 9 */ + 4040 "10000000" // /* MW 8 */ + 4041 "00110001" // /* MW 7 */ + 4042 "11110001" // /* MW 6 */ + 4043 "00000001" // /* MW 5 */ + 4044 "00000000" // /* MW 4 */ + 4045 "11010000" // /* MW 3 */ + 4046 "11000010" // /* MW 2 */ + 4047 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 +.src_ref 6 "superkernels.cpp" 164 65 +.src_ref 6 "superkernels.cpp" 171 2 + 4048 "10111010" // LDA r17, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "00010000" // /* MW 9 */ + 4050 "10000000" // /* MW 8 */ + 4051 "00110001" // /* MW 7 */ + 4052 "11110001" // /* MW 6 */ + 4053 "00000001" // /* MW 5 */ + 4054 "00000000" // /* MW 4 */ + 4055 "11010000" // /* MW 3 */ + 4056 "11000110" // /* MW 2 */ + 4057 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 first +.src_ref 6 "superkernels.cpp" 164 16 +.src_ref 6 "superkernels.cpp" 169 47 + 4058 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4059 "00010000" // /* MW 9 */ + 4060 "00000100" // /* MW 8 */ + 4061 "10110001" // /* MW 7 */ + 4062 "11110000" // /* MW 6 */ + 4063 "00000001" // /* MW 5 */ + 4064 "00000000" // /* MW 4 */ + 4065 "01010000" // /* MW 3 */ + 4066 "11001011" // /* MW 2 */ + 4067 "01001010" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "10000100" // J #4128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4128 delay_slots=5 */ + 4073 "00000000" // /* MW 5 */ + 4074 "00000000" // /* MW 4 */ + 4075 "00010000" // /* MW 3 */ + 4076 "00001000" // /* MW 2 */ + 4077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 +.delay_slot + 4078 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4079 "00110000" // /* MW 5 */ + 4080 "11000100" // /* MW 4 */ + 4081 "11000000" // /* MW 3 */ + 4082 "00000111" // /* MW 2 */ + 4083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 27 first +.delay_slot + 4086 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4087 "00001111" // /* MW 3 */ + 4088 "01100001" // /* MW 2 */ + 4089 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 first +.delay_slot + 4090 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4091 "10100011" // /* MW 5 */ + 4092 "00001100" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 16 first +.delay_slot + 4096 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4097 "00000000" // /* MW 15 */ + 4098 "00000000" // /* MW 14 */ + 4099 "01111000" // /* MW 13 */ + 4100 "10100101" // /* MW 12 */ + 4101 "00000001" // /* MW 11 */ + 4102 "00000000" // /* MW 10 */ + 4103 "00000000" // /* MW 9 */ + 4104 "10000000" // /* MW 8 */ + 4105 "00010001" // /* MW 7 */ + 4106 "00000110" // /* MW 6 */ + 4107 "00100001" // /* MW 5 */ + 4108 "00000000" // /* MW 4 */ + 4109 "11110000" // /* MW 3 */ + 4110 "00101100" // /* MW 2 */ + 4111 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 169 47 +.src_ref 6 "superkernels.cpp" 171 2 + 4112 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4113 "00000000" // /* MW 15 */ + 4114 "00000000" // /* MW 14 */ + 4115 "00010000" // /* MW 13 */ + 4116 "00000100" // /* MW 12 */ + 4117 "10110001" // /* MW 11 */ + 4118 "11110000" // /* MW 10 */ + 4119 "00000001" // /* MW 9 */ + 4120 "00000000" // /* MW 8 */ + 4121 "10001011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "00100010" // /* MW 5 */ + 4124 "00000000" // /* MW 4 */ + 4125 "11110000" // /* MW 3 */ + 4126 "00101100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4128 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4129 "00000000" // /* MW 7 */ + 4130 "11000011" // /* MW 6 */ + 4131 "10110011" // /* MW 5 */ + 4132 "00000011" // /* MW 4 */ + 4133 "01100000" // /* MW 3 */ + 4134 "10010001" // /* MW 2 */ + 4135 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 168 2 + 4136 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4137 "00010000" // /* MW 9 */ + 4138 "00000000" // /* MW 8 */ + 4139 "00110001" // /* MW 7 */ + 4140 "11110000" // /* MW 6 */ + 4141 "00000001" // /* MW 5 */ + 4142 "00000000" // /* MW 4 */ + 4143 "11010000" // /* MW 3 */ + 4144 "11101110" // /* MW 2 */ + 4145 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4146 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4147 "00010110" // /* MW 3 */ + 4148 "11111110" // /* MW 2 */ + 4149 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4150 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "00110110" // /* MW 3 */ + 4152 "11111110" // /* MW 2 */ + 4153 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4154 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4155 "01010110" // /* MW 3 */ + 4156 "01000110" // /* MW 2 */ + 4157 "00000111" // /* MW 1 */ + 4158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4159 "00000000" // /* MW 1 */ + 4160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4161 "00000000" // /* MW 1 */ + 4162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4163 "00000000" // /* MW 1 */ + 4164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4165 "00000000" // /* MW 1 */ + 4166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4167 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4168 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "00000010" // /* MW 3 */ + 4170 "01100001" // /* MW 2 */ + 4171 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4172 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4173 "00010001" // /* MW 3 */ + 4174 "00000110" // /* MW 2 */ + 4175 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4176 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4177 "11111101" // /* MW 3 */ + 4178 "11100000" // /* MW 2 */ + 4179 "00010111" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ + 4184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4186 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4187 "00001000" // /* MW 3 */ + 4188 "10010011" // /* MW 2 */ + 4189 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 + 4190 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4191 "10000001" // /* MW 5 */ + 4192 "10101101" // /* MW 4 */ + 4193 "10100111" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00000100" // /* MW 1 */ + 4196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4197 "00000000" // /* MW 1 */ + 4198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4199 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first + 4200 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4201 "00110110" // /* MW 3 */ + 4202 "00000110" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4204 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4205 "10000001" // /* MW 5 */ + 4206 "11011101" // /* MW 4 */ + 4207 "11011100" // /* MW 3 */ + 4208 "11001010" // /* MW 2 */ + 4209 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 47 first + 4210 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "01110110" // /* MW 3 */ + 4212 "00000110" // /* MW 2 */ + 4213 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4214 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "10011110" // /* MW 3 */ + 4216 "01011100" // /* MW 2 */ + 4217 "00000111" // /* MW 1 */ + 4218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 171 2 first +.no_stack_arguments + 4220 "00000100" // JL #3776 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 4221 "00000001" // /* MW 5 */ + 4222 "00000000" // /* MW 4 */ + 4223 "01100000" // /* MW 3 */ + 4224 "00000111" // /* MW 2 */ + 4225 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4227 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first +.delay_slot + 4228 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4229 "00000111" // /* MW 3 */ + 4230 "01100010" // /* MW 2 */ + 4231 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 +.delay_slot + 4232 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4233 "00110001" // /* MW 3 */ + 4234 "00000110" // /* MW 2 */ + 4235 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 first +.delay_slot + 4236 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4237 "00001101" // /* MW 3 */ + 4238 "11100001" // /* MW 2 */ + 4239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 +.delay_slot + 4240 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4241 "00000000" // /* MW 15 */ + 4242 "00000000" // /* MW 14 */ + 4243 "10101000" // /* MW 13 */ + 4244 "10100000" // /* MW 12 */ + 4245 "00110100" // /* MW 11 */ + 4246 "00000000" // /* MW 10 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "01011011" // /* MW 7 */ + 4250 "00000001" // /* MW 6 */ + 4251 "00100000" // /* MW 5 */ + 4252 "00000000" // /* MW 4 */ + 4253 "11110000" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 173 6 +.src_ref 6 "superkernels.cpp" 174 14 +.return_address + 4256 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4257 "00010000" // /* MW 9 */ + 4258 "00000000" // /* MW 8 */ + 4259 "00110001" // /* MW 7 */ + 4260 "11110011" // /* MW 6 */ + 4261 "00000001" // /* MW 5 */ + 4262 "00000000" // /* MW 4 */ + 4263 "11010000" // /* MW 3 */ + 4264 "11000110" // /* MW 2 */ + 4265 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4266 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4267 "00000101" // /* MW 3 */ + 4268 "00100000" // /* MW 2 */ + 4269 "00010000" // /* MW 1 */ + 4270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4271 "00000000" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4280 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "00001000" // /* MW 3 */ + 4282 "01010001" // /* MW 2 */ + 4283 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 173 19 + 4284 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4285 "00010000" // /* MW 9 */ + 4286 "00001100" // /* MW 8 */ + 4287 "00110001" // /* MW 7 */ + 4288 "11110001" // /* MW 6 */ + 4289 "00000001" // /* MW 5 */ + 4290 "00000000" // /* MW 4 */ + 4291 "11010000" // /* MW 3 */ + 4292 "11001110" // /* MW 2 */ + 4293 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 first + 4294 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4295 "00110110" // /* MW 3 */ + 4296 "00000110" // /* MW 2 */ + 4297 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 19 + 4298 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4299 "01010110" // /* MW 3 */ + 4300 "00000110" // /* MW 2 */ + 4301 "00000010" // /* MW 1 */ + 4302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4303 "00000000" // /* MW 1 */ + 4304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4305 "00000000" // /* MW 1 */ + 4306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4307 "00000000" // /* MW 1 */ + 4308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4309 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4310 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "00110001" // /* MW 3 */ + 4312 "00100001" // /* MW 2 */ + 4313 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4314 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4315 "00010001" // /* MW 3 */ + 4316 "11100110" // /* MW 2 */ + 4317 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 16 first + 4318 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4319 "00101000" // /* MW 3 */ + 4320 "01100001" // /* MW 2 */ + 4321 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 + 4322 "10000100" // JNZ r16, #4352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4352 delay_slots=5 */ + 4323 "00000001" // /* MW 5 */ + 4324 "01000000" // /* MW 4 */ + 4325 "10000000" // /* MW 3 */ + 4326 "00001000" // /* MW 2 */ + 4327 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4337 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 + 4338 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00000001" // /* MW 3 */ + 4340 "00100000" // /* MW 2 */ + 4341 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 first + 4342 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "00000000" // /* MW 7 */ + 4346 "10000000" // /* MW 6 */ + 4347 "00010001" // /* MW 5 */ + 4348 "00000110" // /* MW 4 */ + 4349 "11110110" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 176 + 4352 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4353 "00111001" // /* MW 3 */ + 4354 "11110100" // /* MW 2 */ + 4355 "00000111" // /* MW 1 */ + 4356 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4357 "00011001" // /* MW 3 */ + 4358 "11111011" // /* MW 2 */ + 4359 "00000111" // /* MW 1 */ + 4360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4361 "00000000" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4367 "11110001" // /* MW 3 */ + 4368 "11111101" // /* MW 2 */ + 4369 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4373 "00000000" // /* MW 3 */ + 4374 "00101000" // /* MW 2 */ + 4375 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4377 "10100000" // /* MW 3 */ + 4378 "01100111" // /* MW 2 */ + 4379 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 +.delay_slot + 4380 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4381 "00000001" // /* MW 5 */ + 4382 "00000000" // /* MW 4 */ + 4383 "00000000" // /* MW 3 */ + 4384 "11111000" // /* MW 2 */ + 4385 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4391 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 2 "elementwise_unary.h" 95 first +.src_ref 2 "elementwise_unary.h" 97 22 +.src_ref 2 "elementwise_unary.h" 97 24 first +.function_start + 4400 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4401 "00010000" // /* MW 9 */ + 4402 "11000000" // /* MW 8 */ + 4403 "00110001" // /* MW 7 */ + 4404 "11110000" // /* MW 6 */ + 4405 "00000001" // /* MW 5 */ + 4406 "00000000" // /* MW 4 */ + 4407 "11010000" // /* MW 3 */ + 4408 "10000101" // /* MW 2 */ + 4409 "00100011" // /* MW 1 */ + 4410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4411 "00000000" // /* MW 1 */ + 4412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4413 "00000000" // /* MW 1 */ + 4414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4415 "00000000" // /* MW 1 */ + 4416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4417 "00000000" // /* MW 1 */ + 4418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4419 "00000000" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 97 22 first + 4422 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4423 "00101001" // /* MW 3 */ + 4424 "00011100" // /* MW 2 */ + 4425 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 24 first + 4426 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4427 "00101110" // /* MW 3 */ + 4428 "00000100" // /* MW 2 */ + 4429 "00000001" // /* MW 1 */ + 4430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4431 "00000000" // /* MW 1 */ + 4432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4433 "00000000" // /* MW 1 */ + 4434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4435 "00000000" // /* MW 1 */ + 4436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4437 "00000000" // /* MW 1 */ + 4438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4439 "00000000" // /* MW 1 */ + 4440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4441 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 22 + 4442 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00101001" // /* MW 3 */ + 4444 "00000100" // /* MW 2 */ + 4445 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 24 first + 4446 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "00101110" // /* MW 3 */ + 4448 "00010100" // /* MW 2 */ + 4449 "00000001" // /* MW 1 */ + 4450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4451 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 101 4 first + 4452 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4453 "00000000" // /* MW 3 */ + 4454 "00101000" // /* MW 2 */ + 4455 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 22 first +.delay_slot + 4464 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4465 "00101001" // /* MW 3 */ + 4466 "00010100" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 4467 "00001000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 107 first +.src_ref 2 "elementwise_unary.h" 113 37 +.src_ref 2 "elementwise_unary.h" 113 78 +.src_ref 2 "elementwise_unary.h" 142 19 +.function_start + 4480 "10110110" // MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4481 "00010000" // /* MW 11 */ + 4482 "11000000" // /* MW 10 */ + 4483 "00110001" // /* MW 9 */ + 4484 "11110001" // /* MW 8 */ + 4485 "00000001" // /* MW 7 */ + 4486 "00000000" // /* MW 6 */ + 4487 "01101000" // /* MW 5 */ + 4488 "00111101" // /* MW 4 */ + 4489 "00000000" // /* MW 3 */ + 4490 "01000000" // /* MW 2 */ + 4491 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 113 37 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 "10110110" // LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4493 "00010000" // /* MW 11 */ + 4494 "00010000" // /* MW 10 */ + 4495 "00110001" // /* MW 9 */ + 4496 "11110001" // /* MW 8 */ + 4497 "00000001" // /* MW 7 */ + 4498 "00000000" // /* MW 6 */ + 4499 "11101000" // /* MW 5 */ + 4500 "00111011" // /* MW 4 */ + 4501 "11010000" // /* MW 3 */ + 4502 "10001010" // /* MW 2 */ + 4503 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 142 19 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 "10110110" // LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4505 "00010000" // /* MW 11 */ + 4506 "01000000" // /* MW 10 */ + 4507 "11001000" // /* MW 9 */ + 4508 "00010000" // /* MW 8 */ + 4509 "00000000" // /* MW 7 */ + 4510 "00000000" // /* MW 6 */ + 4511 "01101000" // /* MW 5 */ + 4512 "00111101" // /* MW 4 */ + 4513 "01010000" // /* MW 3 */ + 4514 "10000100" // /* MW 2 */ + 4515 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 "11110100" // VLDB x7, [p0], #64; VBCST.16 x0, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4517 "11100101" // /* MW 5 */ + 4518 "00110010" // /* MW 4 */ + 4519 "10000000" // /* MW 3 */ + 4520 "10111110" // /* MW 2 */ + 4521 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 "01000100" // MOVXM r4, #49280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4523 "00000000" // /* MW 5 */ + 4524 "00100001" // /* MW 4 */ + 4525 "11000010" // /* MW 3 */ + 4526 "00000000" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "11111000" // VBCST.16 x1, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4529 "01110010" // /* MW 3 */ + 4530 "10010001" // /* MW 2 */ + 4531 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 "01000100" // MOVXM r3, #32767 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4533 "11111110" // /* MW 5 */ + 4534 "10111111" // /* MW 4 */ + 4535 "01110001" // /* MW 3 */ + 4536 "00000000" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 "11111000" // VMIN_GE.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4539 "00101100" // /* MW 3 */ + 4540 "01010000" // /* MW 2 */ + 4541 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "elementwise_unary.h" 113 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 "11100100" // LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4543 "11011001" // /* MW 5 */ + 4544 "10000001" // /* MW 4 */ + 4545 "10110110" // /* MW 3 */ + 4546 "00000001" // /* MW 2 */ + 4547 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 +.src_ref 2 "elementwise_unary.h" 166 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 "11100100" // MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4549 "01011001" // /* MW 5 */ + 4550 "01110000" // /* MW 4 */ + 4551 "00001000" // /* MW 3 */ + 4552 "01010000" // /* MW 2 */ + 4553 "00001111" // /* MW 1 */ + 4554 "11111000" // VBCST.16 x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4555 "01110010" // /* MW 3 */ + 4556 "00001101" // /* MW 2 */ + 4557 "00011001" // /* MW 1 */ + 4558 "01000100" // MOVXM r5, #15616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4559 "00000000" // /* MW 5 */ + 4560 "10111010" // /* MW 4 */ + 4561 "00110010" // /* MW 3 */ + 4562 "00000000" // /* MW 2 */ + 4563 "00000000" // /* MW 1 */ + 4564 "11111000" // VBCST.16 x3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "01110010" // /* MW 3 */ + 4566 "10010101" // /* MW 2 */ + 4567 "00011001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 + 4568 "01000100" // MOVXM r17, #16128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "00000000" // /* MW 5 */ + 4570 "10111110" // /* MW 4 */ + 4571 "00111000" // /* MW 3 */ + 4572 "00000000" // /* MW 2 */ + 4573 "00000000" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4574 "01111000" // VBAND x11, x6, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4575 "00101011" // /* MW 3 */ + 4576 "10110001" // /* MW 2 */ + 4577 "00011101" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4578 "11100100" // MOVX r17, #828; VBCST.16 x5, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4579 "11100101" // /* MW 5 */ + 4580 "10001010" // /* MW 4 */ + 4581 "00100101" // /* MW 3 */ + 4582 "01011110" // /* MW 2 */ + 4583 "01100100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4584 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4585 "01100001" // /* MW 7 */ + 4586 "11100111" // /* MW 6 */ + 4587 "10001100" // /* MW 5 */ + 4588 "11100110" // /* MW 4 */ + 4589 "11101100" // /* MW 3 */ + 4590 "11000000" // /* MW 2 */ + 4591 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4592 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4593 "00101011" // /* MW 3 */ + 4594 "01001001" // /* MW 2 */ + 4595 "00011100" // /* MW 1 */ + 4596 "01000100" // MOVXM r2, #16000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4597 "00000000" // /* MW 5 */ + 4598 "00111101" // /* MW 4 */ + 4599 "00110001" // /* MW 3 */ + 4600 "00000000" // /* MW 2 */ + 4601 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4602 "01100010" // VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4603 "00000001" // /* MW 7 */ + 4604 "11100111" // /* MW 6 */ + 4605 "10001010" // /* MW 5 */ + 4606 "11100110" // /* MW 4 */ + 4607 "01110010" // /* MW 3 */ + 4608 "00001001" // /* MW 2 */ + 4609 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 4610 "11111000" // VCONV.fp32.bf16 cml0, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4611 "10001010" // /* MW 3 */ + 4612 "00001011" // /* MW 2 */ + 4613 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4614 "01100010" // VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4615 "10000001" // /* MW 7 */ + 4616 "00001100" // /* MW 6 */ + 4617 "10001011" // /* MW 5 */ + 4618 "11100110" // /* MW 4 */ + 4619 "00101100" // /* MW 3 */ + 4620 "01010000" // /* MW 2 */ + 4621 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 "01010110" // VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4623 "10000001" // /* MW 11 */ + 4624 "00010010" // /* MW 10 */ + 4625 "10001001" // /* MW 9 */ + 4626 "00000010" // /* MW 8 */ + 4627 "00100100" // /* MW 7 */ + 4628 "10001111" // /* MW 6 */ + 4629 "00000000" // /* MW 5 */ + 4630 "00000000" // /* MW 4 */ + 4631 "11000000" // /* MW 3 */ + 4632 "01000010" // /* MW 2 */ + 4633 "10110010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 "11111000" // VMAX_LT.bf16 x6, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11101100" // /* MW 3 */ + 4636 "01000000" // /* MW 2 */ + 4637 "00011011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 "01011010" // MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4639 "11000011" // /* MW 9 */ + 4640 "01110110" // /* MW 8 */ + 4641 "10001010" // /* MW 7 */ + 4642 "00000010" // /* MW 6 */ + 4643 "00101010" // /* MW 5 */ + 4644 "10110111" // /* MW 4 */ + 4645 "00000000" // /* MW 3 */ + 4646 "00000000" // /* MW 2 */ + 4647 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 125 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 "00000010" // VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4649 "10000000" // /* MW 7 */ + 4650 "00111111" // /* MW 6 */ + 4651 "10111000" // /* MW 5 */ + 4652 "00000010" // /* MW 4 */ + 4653 "11000000" // /* MW 3 */ + 4654 "00100010" // /* MW 2 */ + 4655 "01010010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first + 4656 "11111000" // VMIN_GE.bf16 x8, r16, x7, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4657 "00101100" // /* MW 3 */ + 4658 "00111000" // /* MW 2 */ + 4659 "00011100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4660 "11110110" // NOPA; NOPB; NOPS; VBAND x11, x6, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4661 "10110000" // /* MW 11 */ + 4662 "10010101" // /* MW 10 */ + 4663 "11011000" // /* MW 9 */ + 4664 "00000010" // /* MW 8 */ + 4665 "01011011" // /* MW 7 */ + 4666 "00000001" // /* MW 6 */ + 4667 "00100000" // /* MW 5 */ + 4668 "00000000" // /* MW 4 */ + 4669 "11110000" // /* MW 3 */ + 4670 "00101100" // /* MW 2 */ + 4671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 142 19 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first +.loop_nesting 1 + 4672 "01001010" // VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4673 "00100011" // /* MW 9 */ + 4674 "00101011" // /* MW 8 */ + 4675 "10001100" // /* MW 7 */ + 4676 "11100110" // /* MW 6 */ + 4677 "11101100" // /* MW 5 */ + 4678 "11000000" // /* MW 4 */ + 4679 "01101100" // /* MW 3 */ + 4680 "00111101" // /* MW 2 */ + 4681 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "abs.hpp" 32 22 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 "01001010" // VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4683 "01100001" // /* MW 9 */ + 4684 "11100111" // /* MW 8 */ + 4685 "10001100" // /* MW 7 */ + 4686 "01100110" // /* MW 6 */ + 4687 "00101011" // /* MW 5 */ + 4688 "01001001" // /* MW 4 */ + 4689 "11101100" // /* MW 3 */ + 4690 "00111011" // /* MW 2 */ + 4691 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "10000001" // /* MW 3 */ + 4694 "00001100" // /* MW 2 */ + 4695 "10001011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4697 "00000001" // /* MW 3 */ + 4698 "11100111" // /* MW 2 */ + 4699 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 "01100010" // VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4701 "10000001" // /* MW 7 */ + 4702 "00010010" // /* MW 6 */ + 4703 "10001001" // /* MW 5 */ + 4704 "00000010" // /* MW 4 */ + 4705 "01100000" // /* MW 3 */ + 4706 "10100100" // /* MW 2 */ + 4707 "00100011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 "01111010" // NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4711 "00000000" // /* MW 9 */ + 4712 "00000000" // /* MW 8 */ + 4713 "00000000" // /* MW 7 */ + 4714 "00000000" // /* MW 6 */ + 4715 "00100011" // /* MW 5 */ + 4716 "00011110" // /* MW 4 */ + 4717 "11110001" // /* MW 3 */ + 4718 "00101100" // /* MW 2 */ + 4719 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "00010110" // /* MW 12 */ + 4725 "00101000" // /* MW 11 */ + 4726 "00000010" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "00010110" // /* MW 7 */ + 4730 "10010010" // /* MW 6 */ + 4731 "00100101" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "01110110" // /* MW 12 */ + 4741 "10100000" // /* MW 11 */ + 4742 "00000001" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 "00011011" // NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "10110110" // /* MW 15 */ + 4754 "01010011" // /* MW 14 */ + 4755 "01111100" // /* MW 13 */ + 4756 "00010110" // /* MW 12 */ + 4757 "00011100" // /* MW 11 */ + 4758 "00000010" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "00010110" // /* MW 7 */ + 4762 "10010001" // /* MW 6 */ + 4763 "00100010" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.src_ref 4 "abs.hpp" 32 22 first +.end_of_loop + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "10111000" // /* MW 13 */ + 4772 "10010101" // /* MW 12 */ + 4773 "11011000" // /* MW 11 */ + 4774 "00000010" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.loop_nesting 0 + 4784 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4785 "00100011" // /* MW 7 */ + 4786 "00101011" // /* MW 6 */ + 4787 "10001100" // /* MW 5 */ + 4788 "11100110" // /* MW 4 */ + 4789 "11101100" // /* MW 3 */ + 4790 "11000000" // /* MW 2 */ + 4791 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4793 "00101011" // /* MW 3 */ + 4794 "01001001" // /* MW 2 */ + 4795 "00011100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 "01001000" // VMUL.f dm4, x3, x11, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100001" // /* MW 3 */ + 4798 "11100111" // /* MW 2 */ + 4799 "10001100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4801 "00000001" // /* MW 3 */ + 4802 "11100111" // /* MW 2 */ + 4803 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4805 "00100011" // /* MW 3 */ + 4806 "00011101" // /* MW 2 */ + 4807 "00001001" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4809 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4811 "00100011" // /* MW 3 */ + 4812 "00011110" // /* MW 2 */ + 4813 "00001001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4814 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "10000001" // /* MW 3 */ + 4816 "00001100" // /* MW 2 */ + 4817 "10001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 + 4818 "01100010" // VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4819 "10000001" // /* MW 7 */ + 4820 "00010010" // /* MW 6 */ + 4821 "10001001" // /* MW 5 */ + 4822 "00000010" // /* MW 4 */ + 4823 "11000000" // /* MW 3 */ + 4824 "01000010" // /* MW 2 */ + 4825 "10110010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 + 4826 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4827 "00010110" // /* MW 3 */ + 4828 "10010001" // /* MW 2 */ + 4829 "00001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first + 4830 "01001000" // VMSC.f dm2, dm3, x11, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4831 "11000011" // /* MW 3 */ + 4832 "01110110" // /* MW 2 */ + 4833 "10001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4834 "01001000" // VMSC.f dm4, dm1, x5, x9, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4835 "00100011" // /* MW 3 */ + 4836 "00101011" // /* MW 2 */ + 4837 "10001100" // /* MW 1 */ + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 129 4 first + 4840 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4841 "00000000" // /* MW 3 */ + 4842 "00101000" // /* MW 2 */ + 4843 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4847 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.delay_slot + 4848 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "00100011" // /* MW 3 */ + 4850 "00011101" // /* MW 2 */ + 4851 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.delay_slot + 4852 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4853 "00100011" // /* MW 3 */ + 4854 "00011110" // /* MW 2 */ + 4855 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 4857 "00000000" // /* MW 1 */ +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_sigmoid1d _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 210 first +.src_ref 6 "superkernels.cpp" 215 6 +.function_start + 4864 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4865 "00000000" // /* MW 5 */ + 4866 "11000100" // /* MW 4 */ + 4867 "11000110" // /* MW 3 */ + 4868 "00000111" // /* MW 2 */ + 4869 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 first + 4870 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4871 "11000001" // /* MW 5 */ + 4872 "10110101" // /* MW 4 */ + 4873 "11011000" // /* MW 3 */ + 4874 "11000010" // /* MW 2 */ + 4875 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 210 + 4876 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4877 "00000001" // /* MW 5 */ + 4878 "00000000" // /* MW 4 */ + 4879 "00000000" // /* MW 3 */ + 4880 "00001000" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ + 4882 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4883 "01110000" // /* MW 7 */ + 4884 "11010000" // /* MW 6 */ + 4885 "00001011" // /* MW 5 */ + 4886 "00000000" // /* MW 4 */ + 4887 "10110000" // /* MW 3 */ + 4888 "01100011" // /* MW 2 */ + 4889 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 11 + 4890 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4891 "00010001" // /* MW 9 */ + 4892 "00000010" // /* MW 8 */ + 4893 "00110001" // /* MW 7 */ + 4894 "11110011" // /* MW 6 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "10110000" // /* MW 3 */ + 4898 "10000010" // /* MW 2 */ + 4899 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 4900 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "11000000" // /* MW 3 */ + 4902 "11010100" // /* MW 2 */ + 4903 "00011011" // /* MW 1 */ + 4904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4905 "00000000" // /* MW 1 */ + 4906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4907 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 +.src_ref 6 "superkernels.cpp" 215 16 + 4908 "10000100" // JNZ r16, #5072 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5072 delay_slots=5 */ + 4909 "00000001" // /* MW 5 */ + 4910 "01000000" // /* MW 4 */ + 4911 "11101000" // /* MW 3 */ + 4912 "00001001" // /* MW 2 */ + 4913 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 22 first +.delay_slot + 4914 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4915 "10010000" // /* MW 3 */ + 4916 "01100010" // /* MW 2 */ + 4917 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 30 +.delay_slot + 4918 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4919 "11111011" // /* MW 3 */ + 4920 "01100011" // /* MW 2 */ + 4921 "00010100" // /* MW 1 */ +.delay_slot + 4922 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4923 "00111101" // /* MW 3 */ + 4924 "11110100" // /* MW 2 */ + 4925 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 212 11 +.delay_slot + 4926 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4927 "01110000" // /* MW 7 */ + 4928 "01100000" // /* MW 6 */ + 4929 "00110000" // /* MW 5 */ + 4930 "00000011" // /* MW 4 */ + 4931 "00110000" // /* MW 3 */ + 4932 "11000110" // /* MW 2 */ + 4933 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 +.src_ref 6 "superkernels.cpp" 229 2 +.delay_slot + 4934 "01000100" // MOVXM p0, #508800 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4935 "00000000" // /* MW 5 */ + 4936 "11000111" // /* MW 4 */ + 4937 "11000000" // /* MW 3 */ + 4938 "00000111" // /* MW 2 */ + 4939 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4941 "01000000" // /* MW 5 */ + 4942 "11000100" // /* MW 4 */ + 4943 "11000100" // /* MW 3 */ + 4944 "00000111" // /* MW 2 */ + 4945 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4947 "00010000" // /* MW 9 */ + 4948 "00001110" // /* MW 8 */ + 4949 "00110001" // /* MW 7 */ + 4950 "11110001" // /* MW 6 */ + 4951 "00000001" // /* MW 5 */ + 4952 "00000000" // /* MW 4 */ + 4953 "11100000" // /* MW 3 */ + 4954 "11000000" // /* MW 2 */ + 4955 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4957 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 "00000100" // JL #4400 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4400 delay_slots=5 */ + 4959 "00000001" // /* MW 5 */ + 4960 "00000000" // /* MW 4 */ + 4961 "10011000" // /* MW 3 */ + 4962 "00001000" // /* MW 2 */ + 4963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4967 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4969 "00110001" // /* MW 3 */ + 4970 "00100000" // /* MW 2 */ + 4971 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4972 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4973 "00000101" // /* MW 3 */ + 4974 "00100000" // /* MW 2 */ + 4975 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4976 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "10000000" // /* MW 8 */ + 4985 "00010001" // /* MW 7 */ + 4986 "00000110" // /* MW 6 */ + 4987 "00100010" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 +.return_address + 4992 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4993 "00001000" // /* MW 5 */ + 4994 "11000100" // /* MW 4 */ + 4995 "11000100" // /* MW 3 */ + 4996 "00000111" // /* MW 2 */ + 4997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 first +.src_ref 6 "superkernels.cpp" 222 46 + 4998 "10111010" // LDA r16, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4999 "00010000" // /* MW 9 */ + 5000 "11000000" // /* MW 8 */ + 5001 "00110001" // /* MW 7 */ + 5002 "11110001" // /* MW 6 */ + 5003 "00000001" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11010000" // /* MW 3 */ + 5006 "11000010" // /* MW 2 */ + 5007 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 +.src_ref 6 "superkernels.cpp" 222 46 +.src_ref 6 "superkernels.cpp" 229 2 + 5008 "10111010" // LDA r17, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5009 "00010000" // /* MW 9 */ + 5010 "11000000" // /* MW 8 */ + 5011 "00110001" // /* MW 7 */ + 5012 "11110001" // /* MW 6 */ + 5013 "00000001" // /* MW 5 */ + 5014 "00000000" // /* MW 4 */ + 5015 "11010000" // /* MW 3 */ + 5016 "11000110" // /* MW 2 */ + 5017 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 first +.src_ref 6 "superkernels.cpp" 222 16 +.src_ref 6 "superkernels.cpp" 227 47 + 5018 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5019 "00010000" // /* MW 9 */ + 5020 "00000100" // /* MW 8 */ + 5021 "10110001" // /* MW 7 */ + 5022 "11110000" // /* MW 6 */ + 5023 "00000001" // /* MW 5 */ + 5024 "00000000" // /* MW 4 */ + 5025 "01010000" // /* MW 3 */ + 5026 "11001011" // /* MW 2 */ + 5027 "01001000" // /* MW 1 */ + 5028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5029 "00000000" // /* MW 1 */ + 5030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5031 "00000000" // /* MW 1 */ + 5032 "10000100" // J #5088 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5088 delay_slots=5 */ + 5033 "00000000" // /* MW 5 */ + 5034 "00000000" // /* MW 4 */ + 5035 "11110000" // /* MW 3 */ + 5036 "00001001" // /* MW 2 */ + 5037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 +.delay_slot + 5038 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5039 "00110000" // /* MW 5 */ + 5040 "11000100" // /* MW 4 */ + 5041 "11000000" // /* MW 3 */ + 5042 "00000111" // /* MW 2 */ + 5043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5045 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 27 first +.delay_slot + 5046 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5047 "00001111" // /* MW 3 */ + 5048 "01100001" // /* MW 2 */ + 5049 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 first +.delay_slot + 5050 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5051 "10100011" // /* MW 5 */ + 5052 "00001100" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 16 first +.delay_slot + 5056 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "10000000" // /* MW 8 */ + 5065 "00010001" // /* MW 7 */ + 5066 "00000110" // /* MW 6 */ + 5067 "00100001" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 227 47 +.src_ref 6 "superkernels.cpp" 229 2 + 5072 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "00010000" // /* MW 13 */ + 5076 "00000100" // /* MW 12 */ + 5077 "10110001" // /* MW 11 */ + 5078 "11110000" // /* MW 10 */ + 5079 "00000001" // /* MW 9 */ + 5080 "00000000" // /* MW 8 */ + 5081 "10001011" // /* MW 7 */ + 5082 "10000000" // /* MW 6 */ + 5083 "00100010" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5088 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5089 "00000000" // /* MW 7 */ + 5090 "11000011" // /* MW 6 */ + 5091 "10110011" // /* MW 5 */ + 5092 "00000011" // /* MW 4 */ + 5093 "01100000" // /* MW 3 */ + 5094 "10010001" // /* MW 2 */ + 5095 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 226 2 + 5096 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5097 "00010000" // /* MW 9 */ + 5098 "00000000" // /* MW 8 */ + 5099 "00110001" // /* MW 7 */ + 5100 "11110000" // /* MW 6 */ + 5101 "00000001" // /* MW 5 */ + 5102 "00000000" // /* MW 4 */ + 5103 "11010000" // /* MW 3 */ + 5104 "11101110" // /* MW 2 */ + 5105 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5106 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5107 "00010110" // /* MW 3 */ + 5108 "11111110" // /* MW 2 */ + 5109 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5110 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5111 "00110110" // /* MW 3 */ + 5112 "11111110" // /* MW 2 */ + 5113 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5114 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5115 "01010110" // /* MW 3 */ + 5116 "01000110" // /* MW 2 */ + 5117 "00000111" // /* MW 1 */ + 5118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5119 "00000000" // /* MW 1 */ + 5120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5121 "00000000" // /* MW 1 */ + 5122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5123 "00000000" // /* MW 1 */ + 5124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5125 "00000000" // /* MW 1 */ + 5126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5128 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5129 "00000010" // /* MW 3 */ + 5130 "01100001" // /* MW 2 */ + 5131 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 5132 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5133 "00010001" // /* MW 3 */ + 5134 "00000110" // /* MW 2 */ + 5135 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 5136 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5137 "11111101" // /* MW 3 */ + 5138 "11100000" // /* MW 2 */ + 5139 "00010111" // /* MW 1 */ + 5140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5141 "00000000" // /* MW 1 */ + 5142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5143 "00000000" // /* MW 1 */ + 5144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5145 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5146 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5147 "00001000" // /* MW 3 */ + 5148 "10010011" // /* MW 2 */ + 5149 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 + 5150 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5151 "10000001" // /* MW 5 */ + 5152 "10101101" // /* MW 4 */ + 5153 "10100111" // /* MW 3 */ + 5154 "00000000" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ + 5156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5157 "00000000" // /* MW 1 */ + 5158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5159 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first + 5160 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "00110110" // /* MW 3 */ + 5162 "00000110" // /* MW 2 */ + 5163 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 5164 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5165 "10000001" // /* MW 5 */ + 5166 "11011101" // /* MW 4 */ + 5167 "11011100" // /* MW 3 */ + 5168 "11001010" // /* MW 2 */ + 5169 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 47 first + 5170 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5171 "01110110" // /* MW 3 */ + 5172 "00000110" // /* MW 2 */ + 5173 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 5174 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5175 "10011110" // /* MW 3 */ + 5176 "01011100" // /* MW 2 */ + 5177 "00000111" // /* MW 1 */ + 5178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5179 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 229 2 first +.no_stack_arguments + 5180 "00000100" // JL #4480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4480 delay_slots=5 */ + 5181 "00000001" // /* MW 5 */ + 5182 "00000000" // /* MW 4 */ + 5183 "11000000" // /* MW 3 */ + 5184 "00001000" // /* MW 2 */ + 5185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5187 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first +.delay_slot + 5188 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "00000111" // /* MW 3 */ + 5190 "01100010" // /* MW 2 */ + 5191 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 +.delay_slot + 5192 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "00110001" // /* MW 3 */ + 5194 "00000110" // /* MW 2 */ + 5195 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 first +.delay_slot + 5196 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "00001101" // /* MW 3 */ + 5198 "11100001" // /* MW 2 */ + 5199 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 +.delay_slot + 5200 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5201 "00000000" // /* MW 15 */ + 5202 "00000000" // /* MW 14 */ + 5203 "10101000" // /* MW 13 */ + 5204 "10100000" // /* MW 12 */ + 5205 "00110100" // /* MW 11 */ + 5206 "00000000" // /* MW 10 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "01011011" // /* MW 7 */ + 5210 "00000001" // /* MW 6 */ + 5211 "00100000" // /* MW 5 */ + 5212 "00000000" // /* MW 4 */ + 5213 "11110000" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 231 6 +.src_ref 6 "superkernels.cpp" 232 14 +.return_address + 5216 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5217 "00010000" // /* MW 9 */ + 5218 "00000000" // /* MW 8 */ + 5219 "00110001" // /* MW 7 */ + 5220 "11110011" // /* MW 6 */ + 5221 "00000001" // /* MW 5 */ + 5222 "00000000" // /* MW 4 */ + 5223 "11010000" // /* MW 3 */ + 5224 "11000110" // /* MW 2 */ + 5225 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 5226 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5227 "00000101" // /* MW 3 */ + 5228 "00100000" // /* MW 2 */ + 5229 "00010000" // /* MW 1 */ + 5230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5231 "00000000" // /* MW 1 */ + 5232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5233 "00000000" // /* MW 1 */ + 5234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5235 "00000000" // /* MW 1 */ + 5236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5237 "00000000" // /* MW 1 */ + 5238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5239 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5240 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5241 "00001000" // /* MW 3 */ + 5242 "01010001" // /* MW 2 */ + 5243 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 231 19 + 5244 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5245 "00010000" // /* MW 9 */ + 5246 "00001100" // /* MW 8 */ + 5247 "00110001" // /* MW 7 */ + 5248 "11110001" // /* MW 6 */ + 5249 "00000001" // /* MW 5 */ + 5250 "00000000" // /* MW 4 */ + 5251 "11010000" // /* MW 3 */ + 5252 "11001110" // /* MW 2 */ + 5253 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 first + 5254 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5255 "00110110" // /* MW 3 */ + 5256 "00000110" // /* MW 2 */ + 5257 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 19 + 5258 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "01010110" // /* MW 3 */ + 5260 "00000110" // /* MW 2 */ + 5261 "00000010" // /* MW 1 */ + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ + 5264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5265 "00000000" // /* MW 1 */ + 5266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5267 "00000000" // /* MW 1 */ + 5268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5270 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5271 "00110001" // /* MW 3 */ + 5272 "00100001" // /* MW 2 */ + 5273 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5274 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5275 "00010001" // /* MW 3 */ + 5276 "11100110" // /* MW 2 */ + 5277 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 16 first + 5278 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5279 "00101000" // /* MW 3 */ + 5280 "01100001" // /* MW 2 */ + 5281 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 + 5282 "10000100" // JNZ r16, #5312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5312 delay_slots=5 */ + 5283 "00000001" // /* MW 5 */ + 5284 "01000000" // /* MW 4 */ + 5285 "01100000" // /* MW 3 */ + 5286 "00001010" // /* MW 2 */ + 5287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5297 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 + 5298 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5299 "00000001" // /* MW 3 */ + 5300 "00100000" // /* MW 2 */ + 5301 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 first + 5302 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00000000" // /* MW 7 */ + 5306 "10000000" // /* MW 6 */ + 5307 "00010001" // /* MW 5 */ + 5308 "00000110" // /* MW 4 */ + 5309 "11110110" // /* MW 3 */ + 5310 "00101100" // /* MW 2 */ + 5311 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 234 + 5312 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "00111001" // /* MW 3 */ + 5314 "11110100" // /* MW 2 */ + 5315 "00000111" // /* MW 1 */ + 5316 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5317 "00011001" // /* MW 3 */ + 5318 "11111011" // /* MW 2 */ + 5319 "00000111" // /* MW 1 */ + 5320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5321 "00000000" // /* MW 1 */ + 5322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "11110001" // /* MW 3 */ + 5328 "11111101" // /* MW 2 */ + 5329 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5331 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5333 "00000000" // /* MW 3 */ + 5334 "00101000" // /* MW 2 */ + 5335 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5337 "10100000" // /* MW 3 */ + 5338 "01100111" // /* MW 2 */ + 5339 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 +.delay_slot + 5340 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5341 "00000001" // /* MW 5 */ + 5342 "00000000" // /* MW 4 */ + 5343 "00000000" // /* MW 3 */ + 5344 "11111000" // /* MW 2 */ + 5345 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 5351 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 5360 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5361 "00000000" // /* MW 3 */ + 5362 "00101000" // /* MW 2 */ + 5363 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5364 "01000100" // MOVXM p0, #508768 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5365 "11000000" // /* MW 5 */ + 5366 "11000110" // /* MW 4 */ + 5367 "11000000" // /* MW 3 */ + 5368 "00000111" // /* MW 2 */ + 5369 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5370 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "10000000" // /* MW 3 */ + 5372 "00000000" // /* MW 2 */ + 5373 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 5374 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "00000001" // /* MW 3 */ + 5376 "00000100" // /* MW 2 */ + 5377 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5378 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "00000001" // /* MW 3 */ + 5380 "00010100" // /* MW 2 */ + 5381 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 5383 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 5392 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5393 "00010000" // /* MW 9 */ + 5394 "10100000" // /* MW 8 */ + 5395 "00110001" // /* MW 7 */ + 5396 "11110000" // /* MW 6 */ + 5397 "00000001" // /* MW 5 */ + 5398 "00000000" // /* MW 4 */ + 5399 "11010000" // /* MW 3 */ + 5400 "10000101" // /* MW 2 */ + 5401 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 5402 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5403 "00000001" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "00000000" // /* MW 3 */ + 5406 "00001000" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ + 5408 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "00111101" // /* MW 3 */ + 5410 "11111100" // /* MW 2 */ + 5411 "00001111" // /* MW 1 */ + 5412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5413 "00000000" // /* MW 1 */ + 5414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5415 "00000000" // /* MW 1 */ + 5416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5417 "00000000" // /* MW 1 */ + 5418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5419 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 5420 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "00101001" // /* MW 3 */ + 5422 "00011100" // /* MW 2 */ + 5423 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 5424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5425 "00101110" // /* MW 3 */ + 5426 "00011100" // /* MW 2 */ + 5427 "00000001" // /* MW 1 */ + 5428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5429 "00000000" // /* MW 1 */ + 5430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5431 "00000000" // /* MW 1 */ + 5432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5433 "00000000" // /* MW 1 */ + 5434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5435 "00000000" // /* MW 1 */ + 5436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5437 "00000000" // /* MW 1 */ + 5438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5439 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 5440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5441 "00101001" // /* MW 3 */ + 5442 "00011100" // /* MW 2 */ + 5443 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 5444 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "00101110" // /* MW 3 */ + 5446 "00000100" // /* MW 2 */ + 5447 "00000001" // /* MW 1 */ + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ + 5450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5451 "00000000" // /* MW 1 */ + 5452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5453 "00000000" // /* MW 1 */ + 5454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5455 "00000000" // /* MW 1 */ + 5456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5457 "00000000" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 5460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5461 "00101001" // /* MW 3 */ + 5462 "00011100" // /* MW 2 */ + 5463 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 5464 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5465 "00101110" // /* MW 3 */ + 5466 "00010100" // /* MW 2 */ + 5467 "00000001" // /* MW 1 */ + 5468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5469 "00000000" // /* MW 1 */ + 5470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 5472 "00000100" // JL #5360 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5360 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "00000000" // /* MW 4 */ + 5475 "01111000" // /* MW 3 */ + 5476 "00001010" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot + 5478 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5479 "10011101" // /* MW 3 */ + 5480 "11111011" // /* MW 2 */ + 5481 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5485 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 5486 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5487 "00101001" // /* MW 3 */ + 5488 "11011100" // /* MW 2 */ + 5489 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 5490 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5491 "00011100" // /* MW 13 */ + 5492 "00000000" // /* MW 12 */ + 5493 "00000000" // /* MW 11 */ + 5494 "00000111" // /* MW 10 */ + 5495 "00000110" // /* MW 9 */ + 5496 "01111011" // /* MW 8 */ + 5497 "00000000" // /* MW 7 */ + 5498 "00000000" // /* MW 6 */ + 5499 "10110110" // /* MW 5 */ + 5500 "00000010" // /* MW 4 */ + 5501 "11110000" // /* MW 3 */ + 5502 "00101100" // /* MW 2 */ + 5503 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 5504 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5505 "00111001" // /* MW 3 */ + 5506 "11111100" // /* MW 2 */ + 5507 "00000111" // /* MW 1 */ + 5508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5509 "00000000" // /* MW 1 */ + 5510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5511 "00000000" // /* MW 1 */ + 5512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5513 "00000000" // /* MW 1 */ + 5514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "10011001" // /* MW 3 */ + 5520 "11111011" // /* MW 2 */ + 5521 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5523 "00000000" // /* MW 3 */ + 5524 "00101000" // /* MW 2 */ + 5525 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5531 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5533 "00000001" // /* MW 3 */ + 5534 "00100000" // /* MW 2 */ + 5535 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5537 "01110001" // /* MW 9 */ + 5538 "00000000" // /* MW 8 */ + 5539 "00000000" // /* MW 7 */ + 5540 "00000000" // /* MW 6 */ + 5541 "11111110" // /* MW 5 */ + 5542 "00111111" // /* MW 4 */ + 5543 "00110000" // /* MW 3 */ + 5544 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 5545 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 5552 "10111010" // MOVA m0, #32; MOVXM p3, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "00010000" // /* MW 9 */ + 5554 "10100000" // /* MW 8 */ + 5555 "10110001" // /* MW 7 */ + 5556 "11110001" // /* MW 6 */ + 5557 "00000001" // /* MW 5 */ + 5558 "00000000" // /* MW 4 */ + 5559 "10000000" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 5562 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5563 "00010000" // /* MW 9 */ + 5564 "00010000" // /* MW 8 */ + 5565 "00110001" // /* MW 7 */ + 5566 "11110010" // /* MW 6 */ + 5567 "00000001" // /* MW 5 */ + 5568 "00000000" // /* MW 4 */ + 5569 "11010000" // /* MW 3 */ + 5570 "00000110" // /* MW 2 */ + 5571 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 5572 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5573 "01011000" // /* MW 9 */ + 5574 "11111010" // /* MW 8 */ + 5575 "01101111" // /* MW 7 */ + 5576 "10001000" // /* MW 6 */ + 5577 "00000111" // /* MW 5 */ + 5578 "00011000" // /* MW 4 */ + 5579 "11010000" // /* MW 3 */ + 5580 "10010000" // /* MW 2 */ + 5581 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 5582 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #5744 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5583 "00010000" // /* MW 9 */ + 5584 "00111000" // /* MW 8 */ + 5585 "01111011" // /* MW 7 */ + 5586 "00000100" // /* MW 6 */ + 5587 "00000000" // /* MW 5 */ + 5588 "00000000" // /* MW 4 */ + 5589 "11010000" // /* MW 3 */ + 5590 "10000000" // /* MW 2 */ + 5591 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 5592 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5593 "00010000" // /* MW 9 */ + 5594 "01000000" // /* MW 8 */ + 5595 "10111011" // /* MW 7 */ + 5596 "00000101" // /* MW 6 */ + 5597 "00000000" // /* MW 5 */ + 5598 "00000000" // /* MW 4 */ + 5599 "01010000" // /* MW 3 */ + 5600 "10001000" // /* MW 2 */ + 5601 "10000000" // /* MW 1 */ + 5602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5603 "00000000" // /* MW 1 */ + 5604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5605 "00000000" // /* MW 1 */ + 5606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5607 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 5608 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5609 "00111101" // /* MW 3 */ + 5610 "01000010" // /* MW 2 */ + 5611 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 5612 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5613 "11111100" // /* MW 3 */ + 5614 "01110000" // /* MW 2 */ + 5615 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 5616 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5617 "11101000" // /* MW 5 */ + 5618 "01010000" // /* MW 4 */ + 5619 "01110000" // /* MW 3 */ + 5620 "00010011" // /* MW 2 */ + 5621 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5623 "10000000" // /* MW 7 */ + 5624 "10111010" // /* MW 6 */ + 5625 "01101000" // /* MW 5 */ + 5626 "01010000" // /* MW 4 */ + 5627 "01110000" // /* MW 3 */ + 5628 "00011011" // /* MW 2 */ + 5629 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5631 "11101000" // /* MW 5 */ + 5632 "01010000" // /* MW 4 */ + 5633 "01110000" // /* MW 3 */ + 5634 "00010011" // /* MW 2 */ + 5635 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5637 "01101000" // /* MW 5 */ + 5638 "01010000" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00011011" // /* MW 2 */ + 5641 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5643 "11101000" // /* MW 5 */ + 5644 "01010000" // /* MW 4 */ + 5645 "01110000" // /* MW 3 */ + 5646 "00010011" // /* MW 2 */ + 5647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5649 "01101000" // /* MW 5 */ + 5650 "01010000" // /* MW 4 */ + 5651 "01110000" // /* MW 3 */ + 5652 "00011011" // /* MW 2 */ + 5653 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5655 "11101000" // /* MW 5 */ + 5656 "01010000" // /* MW 4 */ + 5657 "01110000" // /* MW 3 */ + 5658 "00010011" // /* MW 2 */ + 5659 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5661 "01000001" // /* MW 9 */ + 5662 "11100010" // /* MW 8 */ + 5663 "00000000" // /* MW 7 */ + 5664 "00011101" // /* MW 6 */ + 5665 "00110100" // /* MW 5 */ + 5666 "00101000" // /* MW 4 */ + 5667 "01110000" // /* MW 3 */ + 5668 "00011011" // /* MW 2 */ + 5669 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5671 "01100001" // /* MW 9 */ + 5672 "11100000" // /* MW 8 */ + 5673 "00000001" // /* MW 7 */ + 5674 "00011101" // /* MW 6 */ + 5675 "01110100" // /* MW 5 */ + 5676 "00101000" // /* MW 4 */ + 5677 "01110000" // /* MW 3 */ + 5678 "00010011" // /* MW 2 */ + 5679 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5681 "01000001" // /* MW 9 */ + 5682 "11100010" // /* MW 8 */ + 5683 "00000000" // /* MW 7 */ + 5684 "00011101" // /* MW 6 */ + 5685 "00110100" // /* MW 5 */ + 5686 "00101000" // /* MW 4 */ + 5687 "01110000" // /* MW 3 */ + 5688 "00011011" // /* MW 2 */ + 5689 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5691 "01100001" // /* MW 9 */ + 5692 "11100000" // /* MW 8 */ + 5693 "00000001" // /* MW 7 */ + 5694 "00011101" // /* MW 6 */ + 5695 "01110100" // /* MW 5 */ + 5696 "00101000" // /* MW 4 */ + 5697 "01110000" // /* MW 3 */ + 5698 "00010011" // /* MW 2 */ + 5699 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5701 "01000001" // /* MW 11 */ + 5702 "11100010" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "10001110" // /* MW 8 */ + 5705 "10101101" // /* MW 7 */ + 5706 "00000000" // /* MW 6 */ + 5707 "01101000" // /* MW 5 */ + 5708 "01010000" // /* MW 4 */ + 5709 "01110000" // /* MW 3 */ + 5710 "00011011" // /* MW 2 */ + 5711 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "00000011" // /* MW 15 */ + 5714 "00001111" // /* MW 14 */ + 5715 "01111000" // /* MW 13 */ + 5716 "10100101" // /* MW 12 */ + 5717 "00000001" // /* MW 11 */ + 5718 "00000000" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "11101000" // /* MW 5 */ + 5724 "01010000" // /* MW 4 */ + 5725 "01110000" // /* MW 3 */ + 5726 "00010011" // /* MW 2 */ + 5727 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00010010" // /* MW 15 */ + 5730 "00000111" // /* MW 14 */ + 5731 "01111000" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "00100011" // /* MW 7 */ + 5738 "00011100" // /* MW 6 */ + 5739 "01101010" // /* MW 5 */ + 5740 "01010000" // /* MW 4 */ + 5741 "01110000" // /* MW 3 */ + 5742 "00011011" // /* MW 2 */ + 5743 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5744 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000011" // /* MW 15 */ + 5746 "00001111" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "10100011" // /* MW 7 */ + 5754 "00011100" // /* MW 6 */ + 5755 "11101010" // /* MW 5 */ + 5756 "01010000" // /* MW 4 */ + 5757 "01110000" // /* MW 3 */ + 5758 "00010011" // /* MW 2 */ + 5759 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "00010010" // /* MW 15 */ + 5762 "00000111" // /* MW 14 */ + 5763 "01111000" // /* MW 13 */ + 5764 "10100101" // /* MW 12 */ + 5765 "00000001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "00100011" // /* MW 7 */ + 5770 "00011100" // /* MW 6 */ + 5771 "01101010" // /* MW 5 */ + 5772 "01010000" // /* MW 4 */ + 5773 "01110000" // /* MW 3 */ + 5774 "00011011" // /* MW 2 */ + 5775 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5776 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5777 "01100001" // /* MW 7 */ + 5778 "11100000" // /* MW 6 */ + 5779 "00000001" // /* MW 5 */ + 5780 "00000010" // /* MW 4 */ + 5781 "01100000" // /* MW 3 */ + 5782 "10010100" // /* MW 2 */ + 5783 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5785 "01000001" // /* MW 7 */ + 5786 "11100010" // /* MW 6 */ + 5787 "00000000" // /* MW 5 */ + 5788 "00000010" // /* MW 4 */ + 5789 "01100000" // /* MW 3 */ + 5790 "10000100" // /* MW 2 */ + 5791 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5793 "01100001" // /* MW 7 */ + 5794 "11100000" // /* MW 6 */ + 5795 "00000001" // /* MW 5 */ + 5796 "00000010" // /* MW 4 */ + 5797 "01100000" // /* MW 3 */ + 5798 "10010100" // /* MW 2 */ + 5799 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5801 "01000001" // /* MW 7 */ + 5802 "11100010" // /* MW 6 */ + 5803 "00000000" // /* MW 5 */ + 5804 "00000010" // /* MW 4 */ + 5805 "01100000" // /* MW 3 */ + 5806 "10000100" // /* MW 2 */ + 5807 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5809 "01100001" // /* MW 7 */ + 5810 "11100000" // /* MW 6 */ + 5811 "00000001" // /* MW 5 */ + 5812 "00000010" // /* MW 4 */ + 5813 "01100000" // /* MW 3 */ + 5814 "10010100" // /* MW 2 */ + 5815 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5817 "01000001" // /* MW 7 */ + 5818 "11100010" // /* MW 6 */ + 5819 "00000000" // /* MW 5 */ + 5820 "00000010" // /* MW 4 */ + 5821 "01100000" // /* MW 3 */ + 5822 "10000100" // /* MW 2 */ + 5823 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5825 "01100001" // /* MW 7 */ + 5826 "11100000" // /* MW 6 */ + 5827 "00000001" // /* MW 5 */ + 5828 "00000010" // /* MW 4 */ + 5829 "01100000" // /* MW 3 */ + 5830 "10010100" // /* MW 2 */ + 5831 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "00100011" // /* MW 3 */ + 5834 "00011100" // /* MW 2 */ + 5835 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 5837 "00000000" // /* MW 5 */ + 5838 "01010000" // /* MW 4 */ + 5839 "01100000" // /* MW 3 */ + 5840 "10010100" // /* MW 2 */ + 5841 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "00100011" // /* MW 3 */ + 5844 "00011100" // /* MW 2 */ + 5845 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5847 "10100011" // /* MW 3 */ + 5848 "00011100" // /* MW 2 */ + 5849 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 5850 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5851 "00100011" // /* MW 3 */ + 5852 "00011100" // /* MW 2 */ + 5853 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 5854 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10100011" // /* MW 3 */ + 5856 "00011100" // /* MW 2 */ + 5857 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 5859 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 5872 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5873 "00000000" // /* MW 5 */ + 5874 "11000100" // /* MW 4 */ + 5875 "11001000" // /* MW 3 */ + 5876 "00000111" // /* MW 2 */ + 5877 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 5878 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5879 "11000001" // /* MW 5 */ + 5880 "10110101" // /* MW 4 */ + 5881 "11011000" // /* MW 3 */ + 5882 "11000010" // /* MW 2 */ + 5883 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 5884 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5885 "00000001" // /* MW 5 */ + 5886 "00000000" // /* MW 4 */ + 5887 "00000000" // /* MW 3 */ + 5888 "00001000" // /* MW 2 */ + 5889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 5890 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5891 "01111001" // /* MW 9 */ + 5892 "01100000" // /* MW 8 */ + 5893 "11001010" // /* MW 7 */ + 5894 "10000001" // /* MW 6 */ + 5895 "00010100" // /* MW 5 */ + 5896 "00100011" // /* MW 4 */ + 5897 "10110000" // /* MW 3 */ + 5898 "00111010" // /* MW 2 */ + 5899 "11111111" // /* MW 1 */ + 5900 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5901 "01110000" // /* MW 7 */ + 5902 "11010000" // /* MW 6 */ + 5903 "00001011" // /* MW 5 */ + 5904 "00000000" // /* MW 4 */ + 5905 "10110000" // /* MW 3 */ + 5906 "10000011" // /* MW 2 */ + 5907 "11111101" // /* MW 1 */ + 5908 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5909 "00010101" // /* MW 3 */ + 5910 "11111100" // /* MW 2 */ + 5911 "00001111" // /* MW 1 */ + 5912 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5913 "00111101" // /* MW 3 */ + 5914 "11110000" // /* MW 2 */ + 5915 "00001111" // /* MW 1 */ + 5916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5917 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 5918 "10000100" // JNZ r16, #6064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6064 delay_slots=5 */ + 5919 "00000001" // /* MW 5 */ + 5920 "01000000" // /* MW 4 */ + 5921 "11011000" // /* MW 3 */ + 5922 "00001011" // /* MW 2 */ + 5923 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 5924 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5925 "11111011" // /* MW 3 */ + 5926 "01100011" // /* MW 2 */ + 5927 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5928 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5929 "00001000" // /* MW 5 */ + 5930 "11000100" // /* MW 4 */ + 5931 "11000100" // /* MW 3 */ + 5932 "00000111" // /* MW 2 */ + 5933 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5934 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "01110000" // /* MW 7 */ + 5936 "01100000" // /* MW 6 */ + 5937 "00110111" // /* MW 5 */ + 5938 "00000001" // /* MW 4 */ + 5939 "00110000" // /* MW 3 */ + 5940 "11000110" // /* MW 2 */ + 5941 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 5942 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "11000000" // /* MW 3 */ + 5944 "11010110" // /* MW 2 */ + 5945 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 5946 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "00010001" // /* MW 9 */ + 5948 "10100000" // /* MW 8 */ + 5949 "10110001" // /* MW 7 */ + 5950 "11110011" // /* MW 6 */ + 5951 "00000001" // /* MW 5 */ + 5952 "00000000" // /* MW 4 */ + 5953 "10110000" // /* MW 3 */ + 5954 "10100011" // /* MW 2 */ + 5955 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 "00111010" // MOVS p0, p7; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5957 "00010001" // /* MW 9 */ + 5958 "00010000" // /* MW 8 */ + 5959 "00110001" // /* MW 7 */ + 5960 "11110001" // /* MW 6 */ + 5961 "00000001" // /* MW 5 */ + 5962 "00000000" // /* MW 4 */ + 5963 "01100000" // /* MW 3 */ + 5964 "10010001" // /* MW 2 */ + 5965 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5967 "00010000" // /* MW 9 */ + 5968 "00001110" // /* MW 8 */ + 5969 "00110001" // /* MW 7 */ + 5970 "11110001" // /* MW 6 */ + 5971 "00000001" // /* MW 5 */ + 5972 "00000000" // /* MW 4 */ + 5973 "11100000" // /* MW 3 */ + 5974 "11000000" // /* MW 2 */ + 5975 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5977 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 "00000100" // JL #5392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5392 delay_slots=5 */ + 5979 "00000001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "10001000" // /* MW 3 */ + 5982 "00001010" // /* MW 2 */ + 5983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5987 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5989 "00110001" // /* MW 3 */ + 5990 "00100000" // /* MW 2 */ + 5991 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5992 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5993 "00000101" // /* MW 3 */ + 5994 "00100000" // /* MW 2 */ + 5995 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5996 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010001" // /* MW 3 */ + 5998 "00000110" // /* MW 2 */ + 5999 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 6000 "10111010" // LDA r16, [p7]; MOVXM p1, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6001 "00010000" // /* MW 9 */ + 6002 "00000010" // /* MW 8 */ + 6003 "10110001" // /* MW 7 */ + 6004 "11110000" // /* MW 6 */ + 6005 "00000001" // /* MW 5 */ + 6006 "00000000" // /* MW 4 */ + 6007 "11010000" // /* MW 3 */ + 6008 "11000010" // /* MW 2 */ + 6009 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 6010 "10111010" // LDA r17, [p1]; MOVXM p3, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6011 "00010000" // /* MW 9 */ + 6012 "00000100" // /* MW 8 */ + 6013 "10110001" // /* MW 7 */ + 6014 "11110001" // /* MW 6 */ + 6015 "00000001" // /* MW 5 */ + 6016 "00000000" // /* MW 4 */ + 6017 "11010000" // /* MW 3 */ + 6018 "11000110" // /* MW 2 */ + 6019 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 6020 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6021 "00010000" // /* MW 9 */ + 6022 "00000110" // /* MW 8 */ + 6023 "10110001" // /* MW 7 */ + 6024 "11110000" // /* MW 6 */ + 6025 "00000001" // /* MW 5 */ + 6026 "00000000" // /* MW 4 */ + 6027 "01010000" // /* MW 3 */ + 6028 "11001011" // /* MW 2 */ + 6029 "11101010" // /* MW 1 */ + 6030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6031 "00000000" // /* MW 1 */ + 6032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6033 "00000000" // /* MW 1 */ + 6034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6035 "00000000" // /* MW 1 */ + 6036 "10000100" // J #6080 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6080 delay_slots=5 */ + 6037 "00000000" // /* MW 5 */ + 6038 "00000000" // /* MW 4 */ + 6039 "11100000" // /* MW 3 */ + 6040 "00001011" // /* MW 2 */ + 6041 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 6042 "01000100" // MOVXM p2, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6043 "00110000" // /* MW 5 */ + 6044 "11000100" // /* MW 4 */ + 6045 "11000100" // /* MW 3 */ + 6046 "00000111" // /* MW 2 */ + 6047 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 6048 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6049 "00001111" // /* MW 3 */ + 6050 "01100001" // /* MW 2 */ + 6051 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 6052 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6053 "01010001" // /* MW 3 */ + 6054 "00000110" // /* MW 2 */ + 6055 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 6056 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6057 "00010001" // /* MW 3 */ + 6058 "00000110" // /* MW 2 */ + 6059 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 6060 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "00010001" // /* MW 3 */ + 6062 "00000110" // /* MW 2 */ + 6063 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 6064 "01000100" // MOVXM p3, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6065 "00010000" // /* MW 5 */ + 6066 "11000100" // /* MW 4 */ + 6067 "11000110" // /* MW 3 */ + 6068 "00000111" // /* MW 2 */ + 6069 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 6070 "10111010" // NOPA; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6071 "00010000" // /* MW 9 */ + 6072 "00000110" // /* MW 8 */ + 6073 "10110001" // /* MW 7 */ + 6074 "11110000" // /* MW 6 */ + 6075 "00000001" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "11110000" // /* MW 3 */ + 6078 "00101100" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6080 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6081 "10000110" // /* MW 3 */ + 6082 "01100111" // /* MW 2 */ + 6083 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 6084 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6085 "00010000" // /* MW 9 */ + 6086 "00000000" // /* MW 8 */ + 6087 "00110001" // /* MW 7 */ + 6088 "11110001" // /* MW 6 */ + 6089 "00000001" // /* MW 5 */ + 6090 "00000000" // /* MW 4 */ + 6091 "11010000" // /* MW 3 */ + 6092 "11101110" // /* MW 2 */ + 6093 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6094 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6095 "00010110" // /* MW 3 */ + 6096 "11111110" // /* MW 2 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6098 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00110110" // /* MW 3 */ + 6100 "11111110" // /* MW 2 */ + 6101 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 6102 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6103 "01010110" // /* MW 3 */ + 6104 "00000110" // /* MW 2 */ + 6105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6106 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6107 "01110110" // /* MW 3 */ + 6108 "01000110" // /* MW 2 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6118 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00000010" // /* MW 3 */ + 6120 "01100001" // /* MW 2 */ + 6121 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 6122 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6123 "00001110" // /* MW 5 */ + 6124 "01000000" // /* MW 4 */ + 6125 "00111001" // /* MW 3 */ + 6126 "11000010" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 6128 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "00010001" // /* MW 3 */ + 6130 "00000110" // /* MW 2 */ + 6131 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6132 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6133 "11111101" // /* MW 3 */ + 6134 "11100000" // /* MW 2 */ + 6135 "00010111" // /* MW 1 */ + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ + 6138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6139 "00000000" // /* MW 1 */ + 6140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6141 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6142 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00001000" // /* MW 3 */ + 6144 "11010011" // /* MW 2 */ + 6145 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6146 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00000110" // /* MW 3 */ + 6148 "01100111" // /* MW 2 */ + 6149 "00011010" // /* MW 1 */ + 6150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6151 "00000000" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6154 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "01110110" // /* MW 3 */ + 6156 "11111111" // /* MW 2 */ + 6157 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6158 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6159 "00110110" // /* MW 3 */ + 6160 "11111110" // /* MW 2 */ + 6161 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6162 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6163 "01010110" // /* MW 3 */ + 6164 "11111110" // /* MW 2 */ + 6165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6166 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6167 "01110110" // /* MW 3 */ + 6168 "01010110" // /* MW 2 */ + 6169 "00000010" // /* MW 1 */ + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6180 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6181 "00010010" // /* MW 3 */ + 6182 "10100011" // /* MW 2 */ + 6183 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6184 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6185 "00110001" // /* MW 3 */ + 6186 "00000110" // /* MW 2 */ + 6187 "00001010" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ + 6190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6191 "00000000" // /* MW 1 */ + 6192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6193 "00000000" // /* MW 1 */ + 6194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6195 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6196 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6197 "00001000" // /* MW 3 */ + 6198 "11010011" // /* MW 2 */ + 6199 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 6200 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111001" // /* MW 9 */ + 6202 "01100000" // /* MW 8 */ + 6203 "11001110" // /* MW 7 */ + 6204 "00101001" // /* MW 6 */ + 6205 "00000000" // /* MW 5 */ + 6206 "00000001" // /* MW 4 */ + 6207 "01100000" // /* MW 3 */ + 6208 "00010001" // /* MW 2 */ + 6209 "11010001" // /* MW 1 */ + 6210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6211 "00000000" // /* MW 1 */ + 6212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6213 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6214 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6215 "00011001" // /* MW 3 */ + 6216 "11101110" // /* MW 2 */ + 6217 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 6218 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6219 "00111011" // /* MW 5 */ + 6220 "11011000" // /* MW 4 */ + 6221 "11011111" // /* MW 3 */ + 6222 "11000110" // /* MW 2 */ + 6223 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 6224 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6225 "10000001" // /* MW 5 */ + 6226 "11011101" // /* MW 4 */ + 6227 "11010110" // /* MW 3 */ + 6228 "11010010" // /* MW 2 */ + 6229 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6231 "01010110" // /* MW 3 */ + 6232 "01001110" // /* MW 2 */ + 6233 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6235 "00011110" // /* MW 3 */ + 6236 "01011101" // /* MW 2 */ + 6237 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6239 "11000000" // /* MW 3 */ + 6240 "01100000" // /* MW 2 */ + 6241 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6245 "01110110" // /* MW 3 */ + 6246 "00000110" // /* MW 2 */ + 6247 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 "00000100" // JL #5552 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5552 delay_slots=5 */ + 6251 "00000001" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "11011000" // /* MW 3 */ + 6254 "00001010" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "11000000" // /* MW 3 */ + 6258 "11010100" // /* MW 2 */ + 6259 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6260 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "00001101" // /* MW 3 */ + 6262 "01100011" // /* MW 2 */ + 6263 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 6264 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00001101" // /* MW 3 */ + 6266 "00100001" // /* MW 2 */ + 6267 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 6268 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "01000001" // /* MW 3 */ + 6270 "01101001" // /* MW 2 */ + 6271 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6272 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6273 "00000000" // /* MW 15 */ + 6274 "00000000" // /* MW 14 */ + 6275 "10101000" // /* MW 13 */ + 6276 "11100010" // /* MW 12 */ + 6277 "00110100" // /* MW 11 */ + 6278 "00000000" // /* MW 10 */ + 6279 "00000000" // /* MW 9 */ + 6280 "00000000" // /* MW 8 */ + 6281 "01011011" // /* MW 7 */ + 6282 "00000001" // /* MW 6 */ + 6283 "00100000" // /* MW 5 */ + 6284 "00000000" // /* MW 4 */ + 6285 "11110000" // /* MW 3 */ + 6286 "00101100" // /* MW 2 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6288 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6289 "01111000" // /* MW 9 */ + 6290 "11010000" // /* MW 8 */ + 6291 "10110011" // /* MW 7 */ + 6292 "00101000" // /* MW 6 */ + 6293 "00000000" // /* MW 5 */ + 6294 "00000001" // /* MW 4 */ + 6295 "11010000" // /* MW 3 */ + 6296 "11000110" // /* MW 2 */ + 6297 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 6298 "01000100" // MOVXM p6, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6299 "00110000" // /* MW 5 */ + 6300 "11000100" // /* MW 4 */ + 6301 "11001100" // /* MW 3 */ + 6302 "00000111" // /* MW 2 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ + 6308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6309 "00000000" // /* MW 1 */ + 6310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6311 "00000000" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6314 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "00001000" // /* MW 3 */ + 6316 "01010001" // /* MW 2 */ + 6317 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6318 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "00110110" // /* MW 3 */ + 6320 "11110110" // /* MW 2 */ + 6321 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6322 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6323 "00011001" // /* MW 3 */ + 6324 "11101101" // /* MW 2 */ + 6325 "00000111" // /* MW 1 */ + 6326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6327 "00000000" // /* MW 1 */ + 6328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6329 "00000000" // /* MW 1 */ + 6330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6331 "00000000" // /* MW 1 */ + 6332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6333 "00000000" // /* MW 1 */ + 6334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6335 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6336 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6337 "00010001" // /* MW 3 */ + 6338 "00100011" // /* MW 2 */ + 6339 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6340 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6341 "01100011" // /* MW 5 */ + 6342 "11101100" // /* MW 4 */ + 6343 "11010011" // /* MW 3 */ + 6344 "11000110" // /* MW 2 */ + 6345 "01001010" // /* MW 1 */ + 6346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6347 "00000000" // /* MW 1 */ + 6348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6349 "00000000" // /* MW 1 */ + 6350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6351 "00000000" // /* MW 1 */ + 6352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6353 "00000000" // /* MW 1 */ + 6354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6355 "00000000" // /* MW 1 */ + 6356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6357 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6358 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6359 "00001000" // /* MW 3 */ + 6360 "01010001" // /* MW 2 */ + 6361 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 6362 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6363 "00010000" // /* MW 9 */ + 6364 "00000000" // /* MW 8 */ + 6365 "10110001" // /* MW 7 */ + 6366 "11110000" // /* MW 6 */ + 6367 "00000001" // /* MW 5 */ + 6368 "00000000" // /* MW 4 */ + 6369 "11010000" // /* MW 3 */ + 6370 "11001110" // /* MW 2 */ + 6371 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 6372 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6373 "01010110" // /* MW 3 */ + 6374 "00000110" // /* MW 2 */ + 6375 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6376 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6377 "00110110" // /* MW 3 */ + 6378 "00000110" // /* MW 2 */ + 6379 "00000001" // /* MW 1 */ + 6380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6388 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6389 "00110001" // /* MW 3 */ + 6390 "00100001" // /* MW 2 */ + 6391 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6392 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6393 "00010001" // /* MW 3 */ + 6394 "11100110" // /* MW 2 */ + 6395 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 6396 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6397 "00101000" // /* MW 3 */ + 6398 "01100001" // /* MW 2 */ + 6399 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6400 "10000100" // JNZ r16, #6432 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6432 delay_slots=5 */ + 6401 "00000001" // /* MW 5 */ + 6402 "01000000" // /* MW 4 */ + 6403 "10010000" // /* MW 3 */ + 6404 "00001100" // /* MW 2 */ + 6405 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6415 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 6416 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6417 "00000001" // /* MW 3 */ + 6418 "00100000" // /* MW 2 */ + 6419 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 6420 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6421 "11000001" // /* MW 11 */ + 6422 "00001000" // /* MW 10 */ + 6423 "10000011" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "00000000" // /* MW 7 */ + 6426 "00000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 6432 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11110000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "11110001" // /* MW 3 */ + 6438 "11111101" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ + 6440 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "10011001" // /* MW 3 */ + 6442 "11110111" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6445 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6447 "11010001" // /* MW 3 */ + 6448 "11111001" // /* MW 2 */ + 6449 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00101000" // /* MW 2 */ + 6457 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00001011" // /* MW 3 */ + 6460 "10001110" // /* MW 2 */ + 6461 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 6462 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6463 "00000001" // /* MW 5 */ + 6464 "00000000" // /* MW 4 */ + 6465 "00000000" // /* MW 3 */ + 6466 "11111000" // /* MW 2 */ + 6467 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6473 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 7 "conv2d_dw_bf16_params.h" 177 first +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.function_start + 6480 "10111010" // LDA el0, [p0], #4; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6481 "00010000" // /* MW 9 */ + 6482 "11100000" // /* MW 8 */ + 6483 "10110001" // /* MW 7 */ + 6484 "11110000" // /* MW 6 */ + 6485 "00000001" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "11010000" // /* MW 3 */ + 6488 "10000101" // /* MW 2 */ + 6489 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6490 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6491 "01011000" // /* MW 9 */ + 6492 "00000000" // /* MW 8 */ + 6493 "00001000" // /* MW 7 */ + 6494 "01001011" // /* MW 6 */ + 6495 "00000000" // /* MW 5 */ + 6496 "00000001" // /* MW 4 */ + 6497 "11010000" // /* MW 3 */ + 6498 "10000001" // /* MW 2 */ + 6499 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 177 + 6500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6501 "00000001" // /* MW 5 */ + 6502 "00000000" // /* MW 4 */ + 6503 "00000000" // /* MW 3 */ + 6504 "00001000" // /* MW 2 */ + 6505 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 + 6506 "00111010" // ST p7, [sp, #-16]; MOVXM p7, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6507 "00010001" // /* MW 9 */ + 6508 "11100000" // /* MW 8 */ + 6509 "10110001" // /* MW 7 */ + 6510 "11110011" // /* MW 6 */ + 6511 "00000001" // /* MW 5 */ + 6512 "00000000" // /* MW 4 */ + 6513 "10110000" // /* MW 3 */ + 6514 "01110011" // /* MW 2 */ + 6515 "11111110" // /* MW 1 */ + 6516 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "00111101" // /* MW 3 */ + 6518 "11111100" // /* MW 2 */ + 6519 "00001111" // /* MW 1 */ + 6520 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6521 "11010101" // /* MW 3 */ + 6522 "11110101" // /* MW 2 */ + 6523 "00001111" // /* MW 1 */ + 6524 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6525 "11110101" // /* MW 3 */ + 6526 "11111001" // /* MW 2 */ + 6527 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6528 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6529 "00101001" // /* MW 3 */ + 6530 "00011100" // /* MW 2 */ + 6531 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6532 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6533 "00001001" // /* MW 3 */ + 6534 "00011100" // /* MW 2 */ + 6535 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6536 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00101110" // /* MW 3 */ + 6538 "00000100" // /* MW 2 */ + 6539 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6540 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "00001110" // /* MW 3 */ + 6542 "00010100" // /* MW 2 */ + 6543 "00000000" // /* MW 1 */ + 6544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6545 "00000000" // /* MW 1 */ + 6546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6547 "00000000" // /* MW 1 */ + 6548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6549 "00000000" // /* MW 1 */ + 6550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6551 "00000000" // /* MW 1 */ + 6552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6553 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6554 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6555 "00101001" // /* MW 3 */ + 6556 "00000100" // /* MW 2 */ + 6557 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6558 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6559 "00001001" // /* MW 3 */ + 6560 "00010100" // /* MW 2 */ + 6561 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 first + 6562 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6563 "00101010" // /* MW 3 */ + 6564 "01011110" // /* MW 2 */ + 6565 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 52 + 6566 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6567 "01001010" // /* MW 3 */ + 6568 "11101110" // /* MW 2 */ + 6569 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6570 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6571 "00101010" // /* MW 3 */ + 6572 "11101100" // /* MW 2 */ + 6573 "00000111" // /* MW 1 */ + 6574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6575 "00000000" // /* MW 1 */ + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ + 6580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6581 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.no_stack_arguments + 6582 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6583 "00000001" // /* MW 5 */ + 6584 "00000000" // /* MW 4 */ + 6585 "01011000" // /* MW 3 */ + 6586 "00011000" // /* MW 2 */ + 6587 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 38 +.delay_slot + 6588 "01011100" // ST r18, [sp, #-20]; SUB r14, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6589 "01000011" // /* MW 5 */ + 6590 "10111010" // /* MW 4 */ + 6591 "10111000" // /* MW 3 */ + 6592 "11001010" // /* MW 2 */ + 6593 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 +.delay_slot + 6594 "00111010" // ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6595 "01111001" // /* MW 9 */ + 6596 "01010000" // /* MW 8 */ + 6597 "11101000" // /* MW 7 */ + 6598 "01000101" // /* MW 6 */ + 6599 "00001000" // /* MW 5 */ + 6600 "00000011" // /* MW 4 */ + 6601 "10110000" // /* MW 3 */ + 6602 "10000110" // /* MW 2 */ + 6603 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6604 "01011100" // ST r16, [sp, #-24]; LT r27, r14, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6605 "00010101" // /* MW 5 */ + 6606 "01101111" // /* MW 4 */ + 6607 "10110111" // /* MW 3 */ + 6608 "01000010" // /* MW 2 */ + 6609 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6610 "10011000" // SUB r17, r24, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6611 "11100001" // /* MW 3 */ + 6612 "00100010" // /* MW 2 */ + 6613 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6614 "01111010" // NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6615 "00010010" // /* MW 9 */ + 6616 "10000001" // /* MW 8 */ + 6617 "00000011" // /* MW 7 */ + 6618 "00000000" // /* MW 6 */ + 6619 "01011011" // /* MW 5 */ + 6620 "00000001" // /* MW 4 */ + 6621 "11110000" // /* MW 3 */ + 6622 "00101100" // /* MW 2 */ + 6623 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 32 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.return_address + 6624 "10111010" // LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6625 "01011000" // /* MW 9 */ + 6626 "00000000" // /* MW 8 */ + 6627 "00001000" // /* MW 7 */ + 6628 "00110110" // /* MW 6 */ + 6629 "01000111" // /* MW 5 */ + 6630 "00011111" // /* MW 4 */ + 6631 "01010000" // /* MW 3 */ + 6632 "11000101" // /* MW 2 */ + 6633 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 52 + 6634 "00101100" // LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6635 "01000011" // /* MW 5 */ + 6636 "01001100" // /* MW 4 */ + 6637 "01011000" // /* MW 3 */ + 6638 "11001001" // /* MW 2 */ + 6639 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6640 "00101100" // LDA r1, [sp, #-28]; LT r27, r20, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6641 "00010101" // /* MW 5 */ + 6642 "01101110" // /* MW 4 */ + 6643 "00101010" // /* MW 3 */ + 6644 "10000110" // /* MW 2 */ + 6645 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 + 6646 "00011000" // SEL.EQZ r19, r2, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6647 "00110010" // /* MW 3 */ + 6648 "10100111" // /* MW 2 */ + 6649 "00010000" // /* MW 1 */ + 6650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6651 "00000000" // /* MW 1 */ + 6652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6653 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.no_stack_arguments + 6654 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6655 "00000001" // /* MW 5 */ + 6656 "00000000" // /* MW 4 */ + 6657 "01011000" // /* MW 3 */ + 6658 "00011000" // /* MW 2 */ + 6659 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.delay_slot + 6660 "00011000" // EXTEND.s16 r19, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6661 "01110000" // /* MW 3 */ + 6662 "11100110" // /* MW 2 */ + 6663 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 87 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 38 first +.delay_slot + 6664 "00111010" // ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6665 "01001001" // /* MW 9 */ + 6666 "11000000" // /* MW 8 */ + 6667 "11101100" // /* MW 7 */ + 6668 "00001101" // /* MW 6 */ + 6669 "11101001" // /* MW 5 */ + 6670 "00100010" // /* MW 4 */ + 6671 "10110000" // /* MW 3 */ + 6672 "01001010" // /* MW 2 */ + 6673 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6674 "10011000" // LT r27, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6675 "00001010" // /* MW 3 */ + 6676 "10110111" // /* MW 2 */ + 6677 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6678 "10011000" // SUB r17, r16, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6679 "11100001" // /* MW 3 */ + 6680 "00100010" // /* MW 2 */ + 6681 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6682 "00101100" // NOPA; SEL.EQZ r0, r14, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6683 "00100100" // /* MW 5 */ + 6684 "00000010" // /* MW 4 */ + 6685 "11110111" // /* MW 3 */ + 6686 "00101100" // /* MW 2 */ + 6687 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 +.return_address + 6688 "10111010" // LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6689 "01011000" // /* MW 9 */ + 6690 "01000010" // /* MW 8 */ + 6691 "00000000" // /* MW 7 */ + 6692 "01001000" // /* MW 6 */ + 6693 "00110000" // /* MW 5 */ + 6694 "00000001" // /* MW 4 */ + 6695 "00100000" // /* MW 3 */ + 6696 "10000110" // /* MW 2 */ + 6697 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6698 "10111010" // LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6699 "01011000" // /* MW 9 */ + 6700 "00001000" // /* MW 8 */ + 6701 "01001000" // /* MW 7 */ + 6702 "00001010" // /* MW 6 */ + 6703 "10000000" // /* MW 5 */ + 6704 "00000001" // /* MW 4 */ + 6705 "01010000" // /* MW 3 */ + 6706 "01010001" // /* MW 2 */ + 6707 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 + 6708 "10111010" // LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6709 "01011000" // /* MW 9 */ + 6710 "00010111" // /* MW 8 */ + 6711 "11101000" // /* MW 7 */ + 6712 "01001011" // /* MW 6 */ + 6713 "00000111" // /* MW 5 */ + 6714 "00111111" // /* MW 4 */ + 6715 "00100000" // /* MW 3 */ + 6716 "01110010" // /* MW 2 */ + 6717 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 + 6718 "10111010" // LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6719 "01011000" // /* MW 9 */ + 6720 "00000110" // /* MW 8 */ + 6721 "10101000" // /* MW 7 */ + 6722 "11001010" // /* MW 6 */ + 6723 "10100111" // /* MW 5 */ + 6724 "00111111" // /* MW 4 */ + 6725 "00100000" // /* MW 3 */ + 6726 "11011010" // /* MW 2 */ + 6727 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 + 6728 "10111010" // LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6729 "01011000" // /* MW 9 */ + 6730 "00100000" // /* MW 8 */ + 6731 "00000000" // /* MW 7 */ + 6732 "10001001" // /* MW 6 */ + 6733 "11010111" // /* MW 5 */ + 6734 "00001111" // /* MW 4 */ + 6735 "00100000" // /* MW 3 */ + 6736 "00001110" // /* MW 2 */ + 6737 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6738 "10111010" // MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6739 "01011000" // /* MW 9 */ + 6740 "10000000" // /* MW 8 */ + 6741 "00001000" // /* MW 7 */ + 6742 "00101000" // /* MW 6 */ + 6743 "01110000" // /* MW 5 */ + 6744 "00000001" // /* MW 4 */ + 6745 "10000000" // /* MW 3 */ + 6746 "11000000" // /* MW 2 */ + 6747 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 + 6748 "10111010" // MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6749 "01111000" // /* MW 9 */ + 6750 "10110000" // /* MW 8 */ + 6751 "10011101" // /* MW 7 */ + 6752 "00001100" // /* MW 6 */ + 6753 "00010001" // /* MW 5 */ + 6754 "00110001" // /* MW 4 */ + 6755 "10000000" // /* MW 3 */ + 6756 "01000100" // /* MW 2 */ + 6757 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6758 "10011000" // XOR r30, r1, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6759 "11100110" // /* MW 3 */ + 6760 "01111100" // /* MW 2 */ + 6761 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6762 "10011000" // LT r27, r30, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6763 "10001010" // /* MW 3 */ + 6764 "10110111" // /* MW 2 */ + 6765 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 206 70 + 6766 "00100100" // SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6767 "11111111" // /* MW 5 */ + 6768 "10111100" // /* MW 4 */ + 6769 "01000011" // /* MW 3 */ + 6770 "01100010" // /* MW 2 */ + 6771 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 + 6772 "00100100" // EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6773 "00000010" // /* MW 5 */ + 6774 "00110110" // /* MW 4 */ + 6775 "00001011" // /* MW 3 */ + 6776 "10001110" // /* MW 2 */ + 6777 "10001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 88 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 22 first + 6778 "00100100" // MUL r30, r15, r20; ADD.NC r14, r30, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6779 "00000001" // /* MW 5 */ + 6780 "00111110" // /* MW 4 */ + 6781 "11110111" // /* MW 3 */ + 6782 "10101001" // /* MW 2 */ + 6783 "01111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 first + 6784 "00100100" // MUL r2, r1, r14; ADD.NC r17, r22, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6785 "00000001" // /* MW 5 */ + 6786 "10110110" // /* MW 4 */ + 6787 "11111000" // /* MW 3 */ + 6788 "10011101" // /* MW 2 */ + 6789 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 first + 6790 "10011000" // EQ r27, r19, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6791 "00010111" // /* MW 3 */ + 6792 "11110110" // /* MW 2 */ + 6793 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 55 first + 6794 "10011000" // MUL r2, r30, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6795 "00101111" // /* MW 3 */ + 6796 "10000100" // /* MW 2 */ + 6797 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 + 6798 "01100100" // SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6799 "11110101" // /* MW 5 */ + 6800 "00111111" // /* MW 4 */ + 6801 "01001001" // /* MW 3 */ + 6802 "11100100" // /* MW 2 */ + 6803 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 first + 6804 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00001101" // /* MW 3 */ + 6806 "10100001" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 first + 6808 "10011000" // LSHL r2, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6809 "10101101" // /* MW 3 */ + 6810 "01000101" // /* MW 2 */ + 6811 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 204 79 + 6812 "00100100" // MUL r2, r2, r28; ADD.NC r4, r2, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6813 "11111111" // /* MW 5 */ + 6814 "00100010" // /* MW 4 */ + 6815 "11110010" // /* MW 3 */ + 6816 "10111001" // /* MW 2 */ + 6817 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 first + 6818 "10011000" // LSHL r3, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6819 "01011101" // /* MW 3 */ + 6820 "11000111" // /* MW 2 */ + 6821 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 197 39 first + 6822 "01011100" // ST r2, [p7], #-4; MUL r5, r15, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6823 "00111111" // /* MW 5 */ + 6824 "10010100" // /* MW 4 */ + 6825 "00110111" // /* MW 3 */ + 6826 "10001010" // /* MW 2 */ + 6827 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 + 6828 "00111010" // ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6829 "01011001" // /* MW 9 */ + 6830 "00000100" // /* MW 8 */ + 6831 "00001000" // /* MW 7 */ + 6832 "00100110" // /* MW 6 */ + 6833 "01101011" // /* MW 5 */ + 6834 "00111011" // /* MW 4 */ + 6835 "00110000" // /* MW 3 */ + 6836 "01000010" // /* MW 2 */ + 6837 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 44 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 42 first + 6838 "01011100" // ST r31, [p7], #-16; ADD r22, r5, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6839 "11000001" // /* MW 5 */ + 6840 "11011010" // /* MW 4 */ + 6841 "00110010" // /* MW 3 */ + 6842 "11111110" // /* MW 2 */ + 6843 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 193 22 first +.src_ref 7 "conv2d_dw_bf16_params.h" 201 47 first + 6844 "01011100" // ST r3, [p7], #24; MUL r31, r22, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6845 "10011111" // /* MW 5 */ + 6846 "01111110" // /* MW 4 */ + 6847 "00111011" // /* MW 3 */ + 6848 "10001110" // /* MW 2 */ + 6849 "11101101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 204 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 first + 6850 "01011100" // ST r4, [p7], #4; LSHL r22, r22, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6851 "00011011" // /* MW 5 */ + 6852 "01011010" // /* MW 4 */ + 6853 "00111011" // /* MW 3 */ + 6854 "10010010" // /* MW 2 */ + 6855 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 first + 6856 "10011000" // LSHL r3, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "01111101" // /* MW 3 */ + 6858 "11000111" // /* MW 2 */ + 6859 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 + 6860 "10011000" // LSHL r4, r4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6861 "01011101" // /* MW 3 */ + 6862 "00001001" // /* MW 2 */ + 6863 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 first + 6864 "10100100" // SUB r25, r22, r3; ADD.NC r4, r4, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6865 "00000010" // /* MW 5 */ + 6866 "00100100" // /* MW 4 */ + 6867 "00110010" // /* MW 3 */ + 6868 "01000110" // /* MW 2 */ + 6869 "10110110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6870 "10111010" // MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6871 "10101000" // /* MW 9 */ + 6872 "01000000" // /* MW 8 */ + 6873 "00101110" // /* MW 7 */ + 6874 "00001111" // /* MW 6 */ + 6875 "01100010" // /* MW 5 */ + 6876 "00000110" // /* MW 4 */ + 6877 "00000000" // /* MW 3 */ + 6878 "00000000" // /* MW 2 */ + 6879 "11111000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6880 "01011100" // ST r0, [p7], #4; MUL r1, r31, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6881 "00111111" // /* MW 5 */ + 6882 "10000100" // /* MW 4 */ + 6883 "00111111" // /* MW 3 */ + 6884 "10000010" // /* MW 2 */ + 6885 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 206 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 53 first + 6886 "01011100" // ST r7, [p7], #4; MUL r31, r31, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6887 "11111111" // /* MW 5 */ + 6888 "11111100" // /* MW 4 */ + 6889 "00111111" // /* MW 3 */ + 6890 "10011110" // /* MW 2 */ + 6891 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 207 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 first + 6892 "01011100" // ST r6, [p7], #4; LSHL r5, r5, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6893 "01011011" // /* MW 5 */ + 6894 "10010110" // /* MW 4 */ + 6895 "00110010" // /* MW 3 */ + 6896 "10011010" // /* MW 2 */ + 6897 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 73 + 6898 "00100100" // LSHL r6, r31, r23; ADD.NC r31, r5, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6899 "11111111" // /* MW 5 */ + 6900 "10100101" // /* MW 4 */ + 6901 "10111111" // /* MW 3 */ + 6902 "10101111" // /* MW 2 */ + 6903 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6904 "10111010" // MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6905 "10101000" // /* MW 9 */ + 6906 "10001000" // /* MW 8 */ + 6907 "01001001" // /* MW 7 */ + 6908 "01101110" // /* MW 6 */ + 6909 "01011001" // /* MW 5 */ + 6910 "00101000" // /* MW 4 */ + 6911 "00000000" // /* MW 3 */ + 6912 "11100100" // /* MW 2 */ + 6913 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 + 6914 "10011000" // SUB r18, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6915 "00100001" // /* MW 3 */ + 6916 "00100101" // /* MW 2 */ + 6917 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 211 77 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6918 "00111010" // ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6919 "11001001" // /* MW 9 */ + 6920 "01111111" // /* MW 8 */ + 6921 "01001001" // /* MW 7 */ + 6922 "11101110" // /* MW 6 */ + 6923 "00011011" // /* MW 5 */ + 6924 "00000010" // /* MW 4 */ + 6925 "00110000" // /* MW 3 */ + 6926 "11001010" // /* MW 2 */ + 6927 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 211 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6928 "01011100" // ST r18, [p7], #4; ADD r6, r1, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6929 "11000001" // /* MW 5 */ + 6930 "10011010" // /* MW 4 */ + 6931 "00110000" // /* MW 3 */ + 6932 "11001010" // /* MW 2 */ + 6933 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 212 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6934 "01011100" // ST r22, [p7], #4; LSHL r1, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6935 "10011011" // /* MW 5 */ + 6936 "10000100" // /* MW 4 */ + 6937 "00111111" // /* MW 3 */ + 6938 "11011010" // /* MW 2 */ + 6939 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 213 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 + 6940 "00111010" // ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6941 "01011001" // /* MW 9 */ + 6942 "11111111" // /* MW 8 */ + 6943 "00101111" // /* MW 7 */ + 6944 "10000100" // /* MW 6 */ + 6945 "01100000" // /* MW 5 */ + 6946 "00000111" // /* MW 4 */ + 6947 "00110000" // /* MW 3 */ + 6948 "11111110" // /* MW 2 */ + 6949 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 214 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 6950 "00111010" // ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6951 "01011001" // /* MW 9 */ + 6952 "01000000" // /* MW 8 */ + 6953 "11001000" // /* MW 7 */ + 6954 "00001110" // /* MW 6 */ + 6955 "00111011" // /* MW 5 */ + 6956 "00001100" // /* MW 4 */ + 6957 "00110000" // /* MW 3 */ + 6958 "11100110" // /* MW 2 */ + 6959 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 215 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 first + 6960 "01011100" // ST r3, [p7], #4; LSHL r16, r15, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6961 "00011011" // /* MW 5 */ + 6962 "11000010" // /* MW 4 */ + 6963 "00110111" // /* MW 3 */ + 6964 "10001110" // /* MW 2 */ + 6965 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 218 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6966 "01011100" // ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6967 "00000100" // /* MW 5 */ + 6968 "00000011" // /* MW 4 */ + 6969 "00110000" // /* MW 3 */ + 6970 "11001010" // /* MW 2 */ + 6971 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 60 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 first + 6972 "10100100" // LSHL r3, r30, r23; ADD.NC r0, r16, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6973 "00000010" // /* MW 5 */ + 6974 "00110000" // /* MW 4 */ + 6975 "10110000" // /* MW 3 */ + 6976 "11101111" // /* MW 2 */ + 6977 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 + 6978 "01011100" // ST r0, [p7], #4; SUB r16, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6979 "01100011" // /* MW 5 */ + 6980 "01000000" // /* MW 4 */ + 6981 "00111000" // /* MW 3 */ + 6982 "10000010" // /* MW 2 */ + 6983 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 220 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 first + 6984 "00111010" // ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6985 "00001001" // /* MW 9 */ + 6986 "00010000" // /* MW 8 */ + 6987 "11101100" // /* MW 7 */ + 6988 "00110011" // /* MW 6 */ + 6989 "00010001" // /* MW 5 */ + 6990 "00001010" // /* MW 4 */ + 6991 "00110000" // /* MW 3 */ + 6992 "11111110" // /* MW 2 */ + 6993 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 221 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 6994 "01011100" // ST r31, [p7], #4; LSHL r31, r18, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6995 "10111011" // /* MW 5 */ + 6996 "01111110" // /* MW 4 */ + 6997 "00111001" // /* MW 3 */ + 6998 "11111110" // /* MW 2 */ + 6999 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 first + 7000 "01011100" // ST r22, [p7], #4; LSHL r2, r1, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7001 "10111011" // /* MW 5 */ + 7002 "10001010" // /* MW 4 */ + 7003 "00110000" // /* MW 3 */ + 7004 "11011010" // /* MW 2 */ + 7005 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 224 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 + 7006 "01011100" // ST r1, [p7], #4; SUB r1, r24, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7007 "01000011" // /* MW 5 */ + 7008 "00000100" // /* MW 4 */ + 7009 "00111100" // /* MW 3 */ + 7010 "10000110" // /* MW 2 */ + 7011 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 7012 "01011100" // ST r22, [p7], #4; SUB r2, r24, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7013 "11100011" // /* MW 5 */ + 7014 "00001011" // /* MW 4 */ + 7015 "00111100" // /* MW 3 */ + 7016 "11011010" // /* MW 2 */ + 7017 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 226 43 first + 7018 "10011000" // ST r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7019 "00110001" // /* MW 3 */ + 7020 "00011100" // /* MW 2 */ + 7021 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 228 40 first + 7022 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7023 "01010001" // /* MW 3 */ + 7024 "00011110" // /* MW 2 */ + 7025 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 first + 7026 "10011000" // ST r22, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7027 "11010001" // /* MW 3 */ + 7028 "00011110" // /* MW 2 */ + 7029 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 first + 7030 "10011000" // ST r2, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7031 "01010001" // /* MW 3 */ + 7032 "00001000" // /* MW 2 */ + 7033 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 first + 7034 "10011000" // LDA.u8 r1, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "00101010" // /* MW 3 */ + 7036 "00101000" // /* MW 2 */ + 7037 "00000111" // /* MW 1 */ + 7038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7039 "00000000" // /* MW 1 */ + 7040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7041 "00000000" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ + 7046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7047 "00000000" // /* MW 1 */ + 7048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7049 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 58 + 7050 "10000100" // JZ r1, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 7051 "00000001" // /* MW 5 */ + 7052 "00000000" // /* MW 4 */ + 7053 "11011000" // /* MW 3 */ + 7054 "00001101" // /* MW 2 */ + 7055 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 +.delay_slot + 7056 "00011000" // MOVX r16, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "00001101" // /* MW 3 */ + 7058 "00100000" // /* MW 2 */ + 7059 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 first +.delay_slot + 7060 "10011000" // LSHL r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001101" // /* MW 3 */ + 7062 "11100001" // /* MW 2 */ + 7063 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.delay_slot + 7064 "01000100" // MOVXM r31, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7065 "00000000" // /* MW 5 */ + 7066 "10100000" // /* MW 4 */ + 7067 "00001111" // /* MW 3 */ + 7068 "01111111" // /* MW 2 */ + 7069 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 + 7074 "01111110" // NOPA; NOPB; NOPS; MOVX r31, #0; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7075 "01100000" // /* MW 13 */ + 7076 "00101011" // /* MW 12 */ + 7077 "00000000" // /* MW 11 */ + 7078 "10101111" // /* MW 10 */ + 7079 "00110100" // /* MW 9 */ + 7080 "00000000" // /* MW 8 */ + 7081 "00000001" // /* MW 7 */ + 7082 "00111110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 + 7088 "10111010" // MOVA m0, #-197; MOVXM p0, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00010000" // /* MW 8 */ + 7091 "00110001" // /* MW 7 */ + 7092 "11110000" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "10000000" // /* MW 3 */ + 7096 "01100000" // /* MW 2 */ + 7097 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 first + 7098 "10111010" // LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "10111000" // /* MW 9 */ + 7100 "11111000" // /* MW 8 */ + 7101 "00000001" // /* MW 7 */ + 7102 "10100100" // /* MW 6 */ + 7103 "11011000" // /* MW 5 */ + 7104 "00111011" // /* MW 4 */ + 7105 "01010000" // /* MW 3 */ + 7106 "11000100" // /* MW 2 */ + 7107 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 + 7108 "10111010" // LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "01111000" // /* MW 9 */ + 7110 "01001001" // /* MW 8 */ + 7111 "00000000" // /* MW 7 */ + 7112 "10101000" // /* MW 6 */ + 7113 "11110000" // /* MW 5 */ + 7114 "00000001" // /* MW 4 */ + 7115 "00100000" // /* MW 3 */ + 7116 "00000110" // /* MW 2 */ + 7117 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 first +.src_ref 7 "conv2d_dw_bf16_params.h" 240 + 7118 "10111010" // LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7119 "01011000" // /* MW 9 */ + 7120 "11001001" // /* MW 8 */ + 7121 "10000000" // /* MW 7 */ + 7122 "11101100" // /* MW 6 */ + 7123 "11111111" // /* MW 5 */ + 7124 "00011111" // /* MW 4 */ + 7125 "00100000" // /* MW 3 */ + 7126 "10000111" // /* MW 2 */ + 7127 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 7128 "00101100" // LDA p0, [sp, #-16]; MOVX r25, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7129 "00000010" // /* MW 5 */ + 7130 "01100100" // /* MW 4 */ + 7131 "00100000" // /* MW 3 */ + 7132 "00000011" // /* MW 2 */ + 7133 "11111110" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "11010001" // /* MW 3 */ + 7138 "11110101" // /* MW 2 */ + 7139 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 39 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 "00011000" // ST.s16 r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00110111" // /* MW 3 */ + 7142 "00101100" // /* MW 2 */ + 7143 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 "11100100" // MUL r28, r29, r28; MOV crRnd, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7145 "01000001" // /* MW 5 */ + 7146 "01110001" // /* MW 4 */ + 7147 "11111111" // /* MW 3 */ + 7148 "00111001" // /* MW 2 */ + 7149 "11101111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 "00111010" // VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7151 "01111001" // /* MW 9 */ + 7152 "01010000" // /* MW 8 */ + 7153 "11101000" // /* MW 7 */ + 7154 "01101101" // /* MW 6 */ + 7155 "00011101" // /* MW 5 */ + 7156 "00011111" // /* MW 4 */ + 7157 "11000000" // /* MW 3 */ + 7158 "00000010" // /* MW 2 */ + 7159 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 109 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 "00100100" // MUL r20, r28, r20; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7161 "11111111" // /* MW 5 */ + 7162 "10110001" // /* MW 4 */ + 7163 "11111000" // /* MW 3 */ + 7164 "00101001" // /* MW 2 */ + 7165 "11100101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 "01100100" // LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7167 "00000011" // /* MW 5 */ + 7168 "10000010" // /* MW 4 */ + 7169 "10110000" // /* MW 3 */ + 7170 "01100111" // /* MW 2 */ + 7171 "10100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 52 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 "10011000" // MUL r28, r30, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7173 "11101111" // /* MW 3 */ + 7174 "10111000" // /* MW 2 */ + 7175 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7176 "10011000" // LSHL r21, r17, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "01011101" // /* MW 3 */ + 7178 "01101011" // /* MW 2 */ + 7179 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 first + 7180 "01011100" // ST r20, [p7], #4; LSHL r23, r28, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7181 "11111011" // /* MW 5 */ + 7182 "01011110" // /* MW 4 */ + 7183 "00111110" // /* MW 3 */ + 7184 "11010010" // /* MW 2 */ + 7185 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 235 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7186 "01011100" // ST r29, [p7], #4; SUB r26, r31, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7187 "10100011" // /* MW 5 */ + 7188 "11101010" // /* MW 4 */ + 7189 "00111111" // /* MW 3 */ + 7190 "11110110" // /* MW 2 */ + 7191 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7192 "01011100" // ST r28, [p7], m0; MAC r21, r21, r31, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7193 "01001100" // /* MW 5 */ + 7194 "11010110" // /* MW 4 */ + 7195 "00111111" // /* MW 3 */ + 7196 "01110010" // /* MW 2 */ + 7197 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 first + 7198 "10011000" // LDA.u8 r20, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "10001010" // /* MW 3 */ + 7200 "00101010" // /* MW 2 */ + 7201 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7202 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7203 "00000001" // /* MW 5 */ + 7204 "00000000" // /* MW 4 */ + 7205 "00000000" // /* MW 3 */ + 7206 "11111000" // /* MW 2 */ + 7207 "11111111" // /* MW 1 */ + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 first + 7214 "10011000" // LSHL r30, r30, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7215 "00111101" // /* MW 3 */ + 7216 "10111101" // /* MW 2 */ + 7217 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7218 "10011000" // SUB r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7219 "01010001" // /* MW 3 */ + 7220 "10101011" // /* MW 2 */ + 7221 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 first + 7222 "10011000" // EQ r27, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7223 "01000111" // /* MW 3 */ + 7224 "11110111" // /* MW 2 */ + 7225 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 7226 "00011000" // SEL.EQZ r19, r24, r23, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7227 "01110010" // /* MW 3 */ + 7228 "00100111" // /* MW 2 */ + 7229 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 39 + 7230 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7231 "01110001" // /* MW 3 */ + 7232 "00011110" // /* MW 2 */ + 7233 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 238 39 first + 7234 "10011000" // ST r16, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7235 "00010001" // /* MW 3 */ + 7236 "00011110" // /* MW 2 */ + 7237 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first + 7238 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7239 "00110001" // /* MW 3 */ + 7240 "00011110" // /* MW 2 */ + 7241 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7242 "01011100" // ST r22, [p7], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7243 "00000000" // /* MW 5 */ + 7244 "01010000" // /* MW 4 */ + 7245 "00110000" // /* MW 3 */ + 7246 "11011010" // /* MW 2 */ + 7247 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first +.delay_slot + 7248 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7249 "01010001" // /* MW 3 */ + 7250 "00011110" // /* MW 2 */ + 7251 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7252 "10011000" // ST r26, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7253 "01010001" // /* MW 3 */ + 7254 "00011111" // /* MW 2 */ + 7255 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7256 "10011000" // ST r21, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7257 "10110001" // /* MW 3 */ + 7258 "00011110" // /* MW 2 */ + 7259 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7260 "10011000" // ST r25, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7261 "00110001" // /* MW 3 */ + 7262 "00000111" // /* MW 2 */ + 7263 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7264 "00000010" // ST r25, [p7, #4]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7265 "01110000" // /* MW 7 */ + 7266 "01100000" // /* MW 6 */ + 7267 "10110000" // /* MW 5 */ + 7268 "00000011" // /* MW 4 */ + 7269 "00110000" // /* MW 3 */ + 7270 "11100110" // /* MW 2 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 7271 "11100010" // /* MW 1 */ +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function conv2d_dw_core _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 158 first +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 183 4 +.function_start + 7280 "10110110" // MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7281 "00010000" // /* MW 11 */ + 7282 "00010110" // /* MW 10 */ + 7283 "00110010" // /* MW 9 */ + 7284 "11110010" // /* MW 8 */ + 7285 "00000001" // /* MW 7 */ + 7286 "00000000" // /* MW 6 */ + 7287 "01101000" // /* MW 5 */ + 7288 "00111011" // /* MW 4 */ + 7289 "10000000" // /* MW 3 */ + 7290 "00011000" // /* MW 2 */ + 7291 "11110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7292 "10110110" // LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7293 "01011000" // /* MW 11 */ + 7294 "10000000" // /* MW 10 */ + 7295 "10000000" // /* MW 9 */ + 7296 "00001010" // /* MW 8 */ + 7297 "00010010" // /* MW 7 */ + 7298 "00000000" // /* MW 6 */ + 7299 "11101000" // /* MW 5 */ + 7300 "00111000" // /* MW 4 */ + 7301 "11010000" // /* MW 3 */ + 7302 "10101000" // /* MW 2 */ + 7303 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 202 56 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 229 12 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 +.src_ref 7 "conv2d_dw_bf16.h" 231 12 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 +.src_ref 7 "conv2d_dw_bf16.h" 233 12 +.src_ref 7 "conv2d_dw_bf16.h" 234 12 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 + 7304 "10111010" // LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7305 "01011000" // /* MW 9 */ + 7306 "10010000" // /* MW 8 */ + 7307 "00000111" // /* MW 7 */ + 7308 "10001010" // /* MW 6 */ + 7309 "00000111" // /* MW 5 */ + 7310 "00000000" // /* MW 4 */ + 7311 "11010000" // /* MW 3 */ + 7312 "10100100" // /* MW 2 */ + 7313 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 + 7314 "10111010" // LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7315 "01011000" // /* MW 9 */ + 7316 "00000000" // /* MW 8 */ + 7317 "01100000" // /* MW 7 */ + 7318 "00001010" // /* MW 6 */ + 7319 "00100100" // /* MW 5 */ + 7320 "00000000" // /* MW 4 */ + 7321 "11010000" // /* MW 3 */ + 7322 "11101000" // /* MW 2 */ + 7323 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7324 "01110110" // LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7325 "00010000" // /* MW 11 */ + 7326 "10110000" // /* MW 10 */ + 7327 "01111110" // /* MW 9 */ + 7328 "00000100" // /* MW 8 */ + 7329 "00000000" // /* MW 7 */ + 7330 "00000000" // /* MW 6 */ + 7331 "01001011" // /* MW 5 */ + 7332 "00010000" // /* MW 4 */ + 7333 "11010111" // /* MW 3 */ + 7334 "11100100" // /* MW 2 */ + 7335 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 + 7336 "01110110" // LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7337 "00010000" // /* MW 11 */ + 7338 "11011000" // /* MW 10 */ + 7339 "10111110" // /* MW 9 */ + 7340 "00000101" // /* MW 8 */ + 7341 "00000000" // /* MW 7 */ + 7342 "00000000" // /* MW 6 */ + 7343 "01001011" // /* MW 5 */ + 7344 "00010000" // /* MW 4 */ + 7345 "11010010" // /* MW 3 */ + 7346 "10100000" // /* MW 2 */ + 7347 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7348 "01110110" // LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7349 "00010000" // /* MW 11 */ + 7350 "00010000" // /* MW 10 */ + 7351 "10110001" // /* MW 9 */ + 7352 "11110010" // /* MW 8 */ + 7353 "00000001" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "01001011" // /* MW 5 */ + 7356 "00010000" // /* MW 4 */ + 7357 "11010110" // /* MW 3 */ + 7358 "10001000" // /* MW 2 */ + 7359 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7360 "01110110" // LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7361 "01111000" // /* MW 11 */ + 7362 "11000000" // /* MW 10 */ + 7363 "11100100" // /* MW 9 */ + 7364 "00001001" // /* MW 8 */ + 7365 "00110110" // /* MW 7 */ + 7366 "00000000" // /* MW 6 */ + 7367 "01001011" // /* MW 5 */ + 7368 "00010000" // /* MW 4 */ + 7369 "11010001" // /* MW 3 */ + 7370 "10000100" // /* MW 2 */ + 7371 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7372 "10111010" // LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7373 "01100010" // /* MW 9 */ + 7374 "01000011" // /* MW 8 */ + 7375 "00011000" // /* MW 7 */ + 7376 "00000001" // /* MW 6 */ + 7377 "01001011" // /* MW 5 */ + 7378 "00010000" // /* MW 4 */ + 7379 "11010000" // /* MW 3 */ + 7380 "11001000" // /* MW 2 */ + 7381 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first + 7382 "11010100" // LDA dn4, [p4], #8; MOV dc5, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7383 "00000001" // /* MW 5 */ + 7384 "10010011" // /* MW 4 */ + 7385 "11011011" // /* MW 3 */ + 7386 "11000100" // /* MW 2 */ + 7387 "10000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7388 "10011000" // LDA m0, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00000110" // /* MW 3 */ + 7390 "00101100" // /* MW 2 */ + 7391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7392 "10011000" // LDA dj1, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7393 "11000110" // /* MW 3 */ + 7394 "11111100" // /* MW 2 */ + 7395 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7396 "00111100" // LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7397 "01101000" // /* MW 5 */ + 7398 "10110001" // /* MW 4 */ + 7399 "11010000" // /* MW 3 */ + 7400 "10010100" // /* MW 2 */ + 7401 "10000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7402 "10011000" // LDA dj5, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7403 "11000110" // /* MW 3 */ + 7404 "11111110" // /* MW 2 */ + 7405 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7406 "10011000" // LDA dn5, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7407 "10100110" // /* MW 3 */ + 7408 "00101110" // /* MW 2 */ + 7409 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7410 "10011000" // LDA m1, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7411 "10000110" // /* MW 3 */ + 7412 "00101100" // /* MW 2 */ + 7413 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 first + 7414 "10011000" // LDA dj7, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7415 "11000110" // /* MW 3 */ + 7416 "11111111" // /* MW 2 */ + 7417 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7418 "10011000" // LDA dn7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7419 "10100110" // /* MW 3 */ + 7420 "00101111" // /* MW 2 */ + 7421 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7422 "10011000" // LDA m7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7423 "10000110" // /* MW 3 */ + 7424 "00101111" // /* MW 2 */ + 7425 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 first + 7426 "10011000" // LDA dj3, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7427 "11000110" // /* MW 3 */ + 7428 "11111101" // /* MW 2 */ + 7429 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7430 "10011000" // LDA dn3, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7431 "10100110" // /* MW 3 */ + 7432 "00101101" // /* MW 2 */ + 7433 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7434 "10011000" // LDA m3, [p4], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7435 "10000110" // /* MW 3 */ + 7436 "11001001" // /* MW 2 */ + 7437 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7438 "10011000" // LDA r4, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7439 "10010110" // /* MW 3 */ + 7440 "10101000" // /* MW 2 */ + 7441 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7442 "10011000" // LDA.s16 r7, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7443 "11110010" // /* MW 3 */ + 7444 "10001000" // /* MW 2 */ + 7445 "00000100" // /* MW 1 */ + 7446 "10011000" // LDA m4, [p4], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7447 "00000110" // /* MW 3 */ + 7448 "01001110" // /* MW 2 */ + 7449 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7450 "00111100" // LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7451 "11101000" // /* MW 5 */ + 7452 "11100001" // /* MW 4 */ + 7453 "11010011" // /* MW 3 */ + 7454 "10010110" // /* MW 2 */ + 7455 "10010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first + 7456 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7457 "00101011" // /* MW 3 */ + 7458 "00000100" // /* MW 2 */ + 7459 "00000010" // /* MW 1 */ + 7460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7461 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7462 "10011000" // LDA.s8 r6, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000010" // /* MW 3 */ + 7464 "00000100" // /* MW 2 */ + 7465 "00000101" // /* MW 1 */ + 7466 "00011000" // ADD r4, r4, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "11111011" // /* MW 3 */ + 7468 "00001001" // /* MW 2 */ + 7469 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 4 + 7470 "10111010" // LDA r17, [p4]; MOVXM p4, #7664 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7471 "00010000" // /* MW 9 */ + 7472 "11111000" // /* MW 8 */ + 7473 "00110110" // /* MW 7 */ + 7474 "00000110" // /* MW 6 */ + 7475 "00000000" // /* MW 5 */ + 7476 "00000000" // /* MW 4 */ + 7477 "11010000" // /* MW 3 */ + 7478 "11000110" // /* MW 2 */ + 7479 "10000000" // /* MW 1 */ + 7480 "11111000" // VBCST.16 x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7481 "01110010" // /* MW 3 */ + 7482 "00011101" // /* MW 2 */ + 7483 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first + 7484 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7485 "00000011" // /* MW 3 */ + 7486 "00011100" // /* MW 2 */ + 7487 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7488 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "11111111" // /* MW 3 */ + 7490 "01110010" // /* MW 2 */ + 7491 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7492 "01100110" // NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7493 "01000001" // /* MW 11 */ + 7494 "01101101" // /* MW 10 */ + 7495 "00000100" // /* MW 9 */ + 7496 "11100010" // /* MW 8 */ + 7497 "10001010" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00100011" // /* MW 5 */ + 7500 "00000000" // /* MW 4 */ + 7501 "11110000" // /* MW 3 */ + 7502 "00101100" // /* MW 2 */ + 7503 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 "00001011" // NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7505 "00011010" // /* MW 15 */ + 7506 "00001000" // /* MW 14 */ + 7507 "10101000" // /* MW 13 */ + 7508 "00000011" // /* MW 12 */ + 7509 "00001110" // /* MW 11 */ + 7510 "00000010" // /* MW 10 */ + 7511 "11010100" // /* MW 9 */ + 7512 "00001101" // /* MW 8 */ + 7513 "01011011" // /* MW 7 */ + 7514 "00000001" // /* MW 6 */ + 7515 "00100000" // /* MW 5 */ + 7516 "00000000" // /* MW 4 */ + 7517 "11110000" // /* MW 3 */ + 7518 "00101100" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 7520 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7521 "01101110" // /* MW 9 */ + 7522 "10000011" // /* MW 8 */ + 7523 "10000100" // /* MW 7 */ + 7524 "00000010" // /* MW 6 */ + 7525 "11110100" // /* MW 5 */ + 7526 "11110000" // /* MW 4 */ + 7527 "01110001" // /* MW 3 */ + 7528 "10110011" // /* MW 2 */ + 7529 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7531 "00000001" // /* MW 9 */ + 7532 "10001001" // /* MW 8 */ + 7533 "00000010" // /* MW 7 */ + 7534 "01000110" // /* MW 6 */ + 7535 "00001011" // /* MW 5 */ + 7536 "10011100" // /* MW 4 */ + 7537 "11101010" // /* MW 3 */ + 7538 "00111000" // /* MW 2 */ + 7539 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7541 "00000001" // /* MW 9 */ + 7542 "00110101" // /* MW 8 */ + 7543 "00000001" // /* MW 7 */ + 7544 "11000110" // /* MW 6 */ + 7545 "10001010" // /* MW 5 */ + 7546 "00110000" // /* MW 4 */ + 7547 "01101010" // /* MW 3 */ + 7548 "10110001" // /* MW 2 */ + 7549 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00001010" // /* MW 3 */ + 7552 "10001001" // /* MW 2 */ + 7553 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7555 "10100001" // /* MW 7 */ + 7556 "01001000" // /* MW 6 */ + 7557 "00000100" // /* MW 5 */ + 7558 "11000110" // /* MW 4 */ + 7559 "10001110" // /* MW 3 */ + 7560 "10110000" // /* MW 2 */ + 7561 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7563 "10100001" // /* MW 7 */ + 7564 "00110110" // /* MW 6 */ + 7565 "00000010" // /* MW 5 */ + 7566 "01000110" // /* MW 4 */ + 7567 "00001111" // /* MW 3 */ + 7568 "10011100" // /* MW 2 */ + 7569 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7571 "00001110" // /* MW 3 */ + 7572 "10001001" // /* MW 2 */ + 7573 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7574 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7575 "11100001" // /* MW 7 */ + 7576 "10010010" // /* MW 6 */ + 7577 "00000011" // /* MW 5 */ + 7578 "01000110" // /* MW 4 */ + 7579 "00000011" // /* MW 3 */ + 7580 "00011100" // /* MW 2 */ + 7581 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7582 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7583 "11100001" // /* MW 7 */ + 7584 "01010110" // /* MW 6 */ + 7585 "00000000" // /* MW 5 */ + 7586 "01000110" // /* MW 4 */ + 7587 "00000111" // /* MW 3 */ + 7588 "00011100" // /* MW 2 */ + 7589 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7590 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7591 "00001101" // /* MW 5 */ + 7592 "01100001" // /* MW 4 */ + 7593 "11110100" // /* MW 3 */ + 7594 "00101100" // /* MW 2 */ + 7595 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7596 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7597 "01000001" // /* MW 3 */ + 7598 "01101101" // /* MW 2 */ + 7599 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7601 "00011010" // /* MW 15 */ + 7602 "00001000" // /* MW 14 */ + 7603 "01111000" // /* MW 13 */ + 7604 "10100101" // /* MW 12 */ + 7605 "00000001" // /* MW 11 */ + 7606 "00000000" // /* MW 10 */ + 7607 "00000000" // /* MW 9 */ + 7608 "00000000" // /* MW 8 */ + 7609 "01011011" // /* MW 7 */ + 7610 "00000001" // /* MW 6 */ + 7611 "00100000" // /* MW 5 */ + 7612 "00000000" // /* MW 4 */ + 7613 "11110000" // /* MW 3 */ + 7614 "00101100" // /* MW 2 */ + 7615 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7616 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7617 "01101110" // /* MW 9 */ + 7618 "10000011" // /* MW 8 */ + 7619 "10000100" // /* MW 7 */ + 7620 "00000010" // /* MW 6 */ + 7621 "10010000" // /* MW 5 */ + 7622 "01110011" // /* MW 4 */ + 7623 "11110010" // /* MW 3 */ + 7624 "00001100" // /* MW 2 */ + 7625 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7627 "00000001" // /* MW 7 */ + 7628 "10001001" // /* MW 6 */ + 7629 "00000010" // /* MW 5 */ + 7630 "01000110" // /* MW 4 */ + 7631 "00001011" // /* MW 3 */ + 7632 "10011100" // /* MW 2 */ + 7633 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7635 "00000001" // /* MW 7 */ + 7636 "00110101" // /* MW 6 */ + 7637 "00000001" // /* MW 5 */ + 7638 "11000110" // /* MW 4 */ + 7639 "10001010" // /* MW 3 */ + 7640 "00110000" // /* MW 2 */ + 7641 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7643 "00001010" // /* MW 3 */ + 7644 "10001001" // /* MW 2 */ + 7645 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7647 "10100001" // /* MW 7 */ + 7648 "01001000" // /* MW 6 */ + 7649 "00000100" // /* MW 5 */ + 7650 "01000110" // /* MW 4 */ + 7651 "00001111" // /* MW 3 */ + 7652 "10011100" // /* MW 2 */ + 7653 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7655 "10100001" // /* MW 9 */ + 7656 "00110110" // /* MW 8 */ + 7657 "00000010" // /* MW 7 */ + 7658 "11000010" // /* MW 6 */ + 7659 "10001110" // /* MW 5 */ + 7660 "10110000" // /* MW 4 */ + 7661 "11110100" // /* MW 3 */ + 7662 "00101100" // /* MW 2 */ + 7663 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7664 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7665 "00011101" // /* MW 5 */ + 7666 "00010010" // /* MW 4 */ + 7667 "10001011" // /* MW 3 */ + 7668 "00011110" // /* MW 2 */ + 7669 "00111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 "01011010" // MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7671 "11100001" // /* MW 9 */ + 7672 "10010010" // /* MW 8 */ + 7673 "00000011" // /* MW 7 */ + 7674 "00000010" // /* MW 6 */ + 7675 "11101010" // /* MW 5 */ + 7676 "10110111" // /* MW 4 */ + 7677 "00000000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7681 "11100001" // /* MW 11 */ + 7682 "01010110" // /* MW 10 */ + 7683 "00000000" // /* MW 9 */ + 7684 "00000010" // /* MW 8 */ + 7685 "11100101" // /* MW 7 */ + 7686 "10001111" // /* MW 6 */ + 7687 "00000000" // /* MW 5 */ + 7688 "00000000" // /* MW 4 */ + 7689 "01110000" // /* MW 3 */ + 7690 "10000101" // /* MW 2 */ + 7691 "01000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7693 "11111111" // /* MW 3 */ + 7694 "01110010" // /* MW 2 */ + 7695 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7697 "10011011" // /* MW 3 */ + 7698 "00011101" // /* MW 2 */ + 7699 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7701 "01110100" // /* MW 3 */ + 7702 "00011100" // /* MW 2 */ + 7703 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7705 "10110100" // /* MW 3 */ + 7706 "01011000" // /* MW 2 */ + 7707 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7709 "10010110" // /* MW 3 */ + 7710 "00010001" // /* MW 2 */ + 7711 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00010110" // /* MW 3 */ + 7714 "00010000" // /* MW 2 */ + 7715 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01101100" // /* MW 3 */ + 7718 "01010000" // /* MW 2 */ + 7719 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7721 "01000100" // /* MW 3 */ + 7722 "01010011" // /* MW 2 */ + 7723 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 "00000010" // VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7725 "01110000" // /* MW 7 */ + 7726 "00110110" // /* MW 6 */ + 7727 "10101000" // /* MW 5 */ + 7728 "00000010" // /* MW 4 */ + 7729 "01100000" // /* MW 3 */ + 7730 "01000010" // /* MW 2 */ + 7731 "01110001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7733 "00000011" // /* MW 3 */ + 7734 "00011100" // /* MW 2 */ + 7735 "00011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 "00000010" // VST.3D x10, [p3], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7737 "01110000" // /* MW 7 */ + 7738 "01000101" // /* MW 6 */ + 7739 "10000000" // /* MW 5 */ + 7740 "00000001" // /* MW 4 */ + 7741 "01100000" // /* MW 3 */ + 7742 "01010010" // /* MW 2 */ + 7743 "01100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7745 "01000001" // /* MW 7 */ + 7746 "01101101" // /* MW 6 */ + 7747 "00000100" // /* MW 5 */ + 7748 "01000110" // /* MW 4 */ + 7749 "00000111" // /* MW 3 */ + 7750 "00011100" // /* MW 2 */ + 7751 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7753 "01000001" // /* MW 7 */ + 7754 "00000011" // /* MW 6 */ + 7755 "00000001" // /* MW 5 */ + 7756 "11000110" // /* MW 4 */ + 7757 "10000110" // /* MW 3 */ + 7758 "00110000" // /* MW 2 */ + 7759 "00000010" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 7760 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7761 "01101110" // /* MW 9 */ + 7762 "10000011" // /* MW 8 */ + 7763 "10000100" // /* MW 7 */ + 7764 "00000010" // /* MW 6 */ + 7765 "11110100" // /* MW 5 */ + 7766 "11110000" // /* MW 4 */ + 7767 "01110001" // /* MW 3 */ + 7768 "10110011" // /* MW 2 */ + 7769 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7771 "00000001" // /* MW 9 */ + 7772 "10001001" // /* MW 8 */ + 7773 "00000010" // /* MW 7 */ + 7774 "01000110" // /* MW 6 */ + 7775 "00001011" // /* MW 5 */ + 7776 "10011100" // /* MW 4 */ + 7777 "11101010" // /* MW 3 */ + 7778 "00111000" // /* MW 2 */ + 7779 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7781 "00000001" // /* MW 9 */ + 7782 "00110101" // /* MW 8 */ + 7783 "00000001" // /* MW 7 */ + 7784 "11000110" // /* MW 6 */ + 7785 "10001010" // /* MW 5 */ + 7786 "00110000" // /* MW 4 */ + 7787 "01101010" // /* MW 3 */ + 7788 "10110001" // /* MW 2 */ + 7789 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "00001010" // /* MW 3 */ + 7792 "10001001" // /* MW 2 */ + 7793 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7795 "10100001" // /* MW 7 */ + 7796 "01001000" // /* MW 6 */ + 7797 "00000100" // /* MW 5 */ + 7798 "11000110" // /* MW 4 */ + 7799 "10001110" // /* MW 3 */ + 7800 "10110000" // /* MW 2 */ + 7801 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7803 "10100001" // /* MW 7 */ + 7804 "00110110" // /* MW 6 */ + 7805 "00000010" // /* MW 5 */ + 7806 "01000110" // /* MW 4 */ + 7807 "00001111" // /* MW 3 */ + 7808 "10011100" // /* MW 2 */ + 7809 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7811 "00001110" // /* MW 3 */ + 7812 "10001001" // /* MW 2 */ + 7813 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7814 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7815 "11100001" // /* MW 7 */ + 7816 "10010010" // /* MW 6 */ + 7817 "00000011" // /* MW 5 */ + 7818 "01000110" // /* MW 4 */ + 7819 "00000011" // /* MW 3 */ + 7820 "00011100" // /* MW 2 */ + 7821 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7822 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7823 "11100001" // /* MW 7 */ + 7824 "01010110" // /* MW 6 */ + 7825 "00000000" // /* MW 5 */ + 7826 "01000110" // /* MW 4 */ + 7827 "00000111" // /* MW 3 */ + 7828 "00011100" // /* MW 2 */ + 7829 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7830 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7831 "00001101" // /* MW 5 */ + 7832 "01100001" // /* MW 4 */ + 7833 "11110100" // /* MW 3 */ + 7834 "00101100" // /* MW 2 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7836 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "01000001" // /* MW 3 */ + 7838 "01101101" // /* MW 2 */ + 7839 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7841 "00011010" // /* MW 15 */ + 7842 "00001000" // /* MW 14 */ + 7843 "01111000" // /* MW 13 */ + 7844 "10100101" // /* MW 12 */ + 7845 "00000001" // /* MW 11 */ + 7846 "00000000" // /* MW 10 */ + 7847 "00000000" // /* MW 9 */ + 7848 "00000000" // /* MW 8 */ + 7849 "01011011" // /* MW 7 */ + 7850 "00000001" // /* MW 6 */ + 7851 "00100000" // /* MW 5 */ + 7852 "00000000" // /* MW 4 */ + 7853 "11110000" // /* MW 3 */ + 7854 "00101100" // /* MW 2 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 202 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7856 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 7857 "01101000" // /* MW 11 */ + 7858 "10000011" // /* MW 10 */ + 7859 "10000100" // /* MW 9 */ + 7860 "00000010" // /* MW 8 */ + 7861 "01001001" // /* MW 7 */ + 7862 "00001000" // /* MW 6 */ + 7863 "00100000" // /* MW 5 */ + 7864 "11100111" // /* MW 4 */ + 7865 "11110100" // /* MW 3 */ + 7866 "00001100" // /* MW 2 */ + 7867 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7869 "00000001" // /* MW 7 */ + 7870 "10001001" // /* MW 6 */ + 7871 "00000010" // /* MW 5 */ + 7872 "01000110" // /* MW 4 */ + 7873 "00001011" // /* MW 3 */ + 7874 "10011100" // /* MW 2 */ + 7875 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7877 "00000001" // /* MW 7 */ + 7878 "00110101" // /* MW 6 */ + 7879 "00000001" // /* MW 5 */ + 7880 "11000110" // /* MW 4 */ + 7881 "10001010" // /* MW 3 */ + 7882 "00110000" // /* MW 2 */ + 7883 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7885 "00001010" // /* MW 3 */ + 7886 "10001001" // /* MW 2 */ + 7887 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7889 "10100001" // /* MW 7 */ + 7890 "01001000" // /* MW 6 */ + 7891 "00000100" // /* MW 5 */ + 7892 "01000110" // /* MW 4 */ + 7893 "00001111" // /* MW 3 */ + 7894 "10011100" // /* MW 2 */ + 7895 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7897 "10100001" // /* MW 7 */ + 7898 "00110110" // /* MW 6 */ + 7899 "00000010" // /* MW 5 */ + 7900 "11000110" // /* MW 4 */ + 7901 "10001110" // /* MW 3 */ + 7902 "10110000" // /* MW 2 */ + 7903 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7904 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "00001110" // /* MW 3 */ + 7906 "10001001" // /* MW 2 */ + 7907 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7908 "01001000" // VMAC.f dm3, dm4, x9, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7909 "11100001" // /* MW 3 */ + 7910 "10010010" // /* MW 2 */ + 7911 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7912 "01001000" // VMAC.f dm0, dm2, x11, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7913 "11100001" // /* MW 3 */ + 7914 "01010110" // /* MW 2 */ + 7915 "00000000" // /* MW 1 */ + 7916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7917 "00000000" // /* MW 1 */ + 7918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7919 "00000000" // /* MW 1 */ + 7920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7921 "00000000" // /* MW 1 */ + 7922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7923 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 7924 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "10010110" // /* MW 3 */ + 7926 "00010001" // /* MW 2 */ + 7927 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 248 first + 7928 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7929 "00000000" // /* MW 5 */ + 7930 "01010000" // /* MW 4 */ + 7931 "11000000" // /* MW 3 */ + 7932 "00000010" // /* MW 2 */ + 7933 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7934 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7935 "01101100" // /* MW 3 */ + 7936 "01010000" // /* MW 2 */ + 7937 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.delay_slot + 7938 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7939 "01000100" // /* MW 3 */ + 7940 "01010011" // /* MW 2 */ + 7941 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7942 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7943 "01101100" // /* MW 3 */ + 7944 "01010000" // /* MW 2 */ + 7945 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.delay_slot + 7946 "00011000" // VST x8, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7947 "00010011" // /* MW 3 */ + 7948 "10001010" // /* MW 2 */ + 7949 "00001011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 7950 "00011000" // VST.3D x10, [p3], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7951 "10010011" // /* MW 3 */ + 7952 "00111010" // /* MW 2 */ +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + 7953 "00001011" // /* MW 1 */ +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function conv2d_dw_shuffle _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 254 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 +.function_start + 7968 "10110110" // MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7969 "00010000" // /* MW 11 */ + 7970 "01001100" // /* MW 10 */ + 7971 "10110010" // /* MW 9 */ + 7972 "11110001" // /* MW 8 */ + 7973 "00000001" // /* MW 7 */ + 7974 "00000000" // /* MW 6 */ + 7975 "01101000" // /* MW 5 */ + 7976 "00111001" // /* MW 4 */ + 7977 "10000010" // /* MW 3 */ + 7978 "10010000" // /* MW 2 */ + 7979 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 + 7980 "10110110" // LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7981 "01011000" // /* MW 11 */ + 7982 "00000001" // /* MW 10 */ + 7983 "01101000" // /* MW 9 */ + 7984 "01101000" // /* MW 8 */ + 7985 "01000111" // /* MW 7 */ + 7986 "00111110" // /* MW 6 */ + 7987 "01101000" // /* MW 5 */ + 7988 "00111000" // /* MW 4 */ + 7989 "11010100" // /* MW 3 */ + 7990 "10000100" // /* MW 2 */ + 7991 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first + 7992 "10111010" // LDA dj0, [p3], #4; MOVXM ls, #8080 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7993 "00010000" // /* MW 9 */ + 7994 "11001000" // /* MW 8 */ + 7995 "01111111" // /* MW 7 */ + 7996 "00000100" // /* MW 6 */ + 7997 "00000000" // /* MW 5 */ + 7998 "00000000" // /* MW 4 */ + 7999 "11010000" // /* MW 3 */ + 8000 "10001000" // /* MW 2 */ + 8001 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 258 4 + 8002 "10111010" // LDA dn4, [p3], #4; MOVXM le, #8176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8003 "00010000" // /* MW 9 */ + 8004 "11111000" // /* MW 8 */ + 8005 "10111111" // /* MW 7 */ + 8006 "00000101" // /* MW 6 */ + 8007 "00000000" // /* MW 5 */ + 8008 "00000000" // /* MW 4 */ + 8009 "11010000" // /* MW 3 */ + 8010 "11000100" // /* MW 2 */ + 8011 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 +.src_ref 7 "conv2d_dw_bf16.h" 264 16 +.src_ref 7 "conv2d_dw_bf16.h" 266 47 + 8012 "10111010" // LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8013 "01011000" // /* MW 9 */ + 8014 "00010010" // /* MW 8 */ + 8015 "00001000" // /* MW 7 */ + 8016 "01001000" // /* MW 6 */ + 8017 "00010110" // /* MW 5 */ + 8018 "00000000" // /* MW 4 */ + 8019 "11010000" // /* MW 3 */ + 8020 "11001000" // /* MW 2 */ + 8021 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 267 47 + 8022 "00101100" // LDA m0, [p3], #4; MOVX r2, #19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8023 "10011010" // /* MW 5 */ + 8024 "00001000" // /* MW 4 */ + 8025 "11010000" // /* MW 3 */ + 8026 "10000000" // /* MW 2 */ + 8027 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8028 "10011000" // LDA dc0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "01100110" // /* MW 3 */ + 8030 "00011100" // /* MW 2 */ + 8031 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8032 "10011000" // LDA dc4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "01100110" // /* MW 3 */ + 8034 "10001010" // /* MW 2 */ + 8035 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 51 first + 8036 "10011000" // LDA r5, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "10110110" // /* MW 3 */ + 8038 "00000100" // /* MW 2 */ + 8039 "00000011" // /* MW 1 */ + 8040 "10011000" // LDA r6, [p3, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8041 "11010110" // /* MW 3 */ + 8042 "00100100" // /* MW 2 */ + 8043 "00000011" // /* MW 1 */ + 8044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8045 "00000000" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ + 8050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8051 "00000000" // /* MW 1 */ + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ + 8054 "10011000" // LSHL r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8055 "01001101" // /* MW 3 */ + 8056 "01001000" // /* MW 2 */ + 8057 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8058 "00100100" // LSHL r3, r6, r3; ADD.NC lc, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8059 "11111111" // /* MW 5 */ + 8060 "11100100" // /* MW 4 */ + 8061 "10111010" // /* MW 3 */ + 8062 "11000111" // /* MW 2 */ + 8063 "00110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8065 "00000000" // /* MW 15 */ + 8066 "00000000" // /* MW 14 */ + 8067 "01111000" // /* MW 13 */ + 8068 "11010000" // /* MW 12 */ + 8069 "11000000" // /* MW 11 */ + 8070 "00000000" // /* MW 10 */ + 8071 "00000000" // /* MW 9 */ + 8072 "00000000" // /* MW 8 */ + 8073 "01011011" // /* MW 7 */ + 8074 "00000001" // /* MW 6 */ + 8075 "00100000" // /* MW 5 */ + 8076 "00000000" // /* MW 4 */ + 8077 "11110000" // /* MW 3 */ + 8078 "00101100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 8080 "11100001" // VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8081 "00000000" // /* MW 15 */ + 8082 "00000000" // /* MW 14 */ + 8083 "00111000" // /* MW 13 */ + 8084 "00000010" // /* MW 12 */ + 8085 "01001000" // /* MW 11 */ + 8086 "00000000" // /* MW 10 */ + 8087 "00000000" // /* MW 9 */ + 8088 "00000000" // /* MW 8 */ + 8089 "10001011" // /* MW 7 */ + 8090 "10000000" // /* MW 6 */ + 8091 "01101100" // /* MW 5 */ + 8092 "00111001" // /* MW 4 */ + 8093 "01110010" // /* MW 3 */ + 8094 "10000011" // /* MW 2 */ + 8095 "01000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8097 "00000000" // /* MW 15 */ + 8098 "00000000" // /* MW 14 */ + 8099 "00111000" // /* MW 13 */ + 8100 "00000010" // /* MW 12 */ + 8101 "11000000" // /* MW 11 */ + 8102 "00000000" // /* MW 10 */ + 8103 "00000000" // /* MW 9 */ + 8104 "00000000" // /* MW 8 */ + 8105 "01011011" // /* MW 7 */ + 8106 "00000001" // /* MW 6 */ + 8107 "00100000" // /* MW 5 */ + 8108 "00000000" // /* MW 4 */ + 8109 "11110000" // /* MW 3 */ + 8110 "00101100" // /* MW 2 */ + 8111 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first + 8112 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8113 "00000000" // /* MW 15 */ + 8114 "00000000" // /* MW 14 */ + 8115 "11101000" // /* MW 13 */ + 8116 "11000000" // /* MW 12 */ + 8117 "01000100" // /* MW 11 */ + 8118 "00000000" // /* MW 10 */ + 8119 "00000000" // /* MW 9 */ + 8120 "00000000" // /* MW 8 */ + 8121 "01011011" // /* MW 7 */ + 8122 "00000001" // /* MW 6 */ + 8123 "00100000" // /* MW 5 */ + 8124 "00000000" // /* MW 4 */ + 8125 "11110000" // /* MW 3 */ + 8126 "00101100" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first + 8128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8129 "00000000" // /* MW 15 */ + 8130 "00000000" // /* MW 14 */ + 8131 "11101000" // /* MW 13 */ + 8132 "11000100" // /* MW 12 */ + 8133 "00000100" // /* MW 11 */ + 8134 "00000000" // /* MW 10 */ + 8135 "00000000" // /* MW 9 */ + 8136 "00000000" // /* MW 8 */ + 8137 "01011011" // /* MW 7 */ + 8138 "00000001" // /* MW 6 */ + 8139 "00100000" // /* MW 5 */ + 8140 "00000000" // /* MW 4 */ + 8141 "11110000" // /* MW 3 */ + 8142 "00101100" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first + 8144 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00100110" // /* MW 7 */ + 8154 "00011000" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8160 "11100001" // NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8161 "00000000" // /* MW 15 */ + 8162 "00000000" // /* MW 14 */ + 8163 "01111000" // /* MW 13 */ + 8164 "10100101" // /* MW 12 */ + 8165 "00000001" // /* MW 11 */ + 8166 "00000000" // /* MW 10 */ + 8167 "00000000" // /* MW 9 */ + 8168 "10000000" // /* MW 8 */ + 8169 "00000110" // /* MW 7 */ + 8170 "00100000" // /* MW 6 */ + 8171 "00100100" // /* MW 5 */ + 8172 "00000000" // /* MW 4 */ + 8173 "11110000" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8177 "00000000" // /* MW 15 */ + 8178 "00000000" // /* MW 14 */ + 8179 "01111000" // /* MW 13 */ + 8180 "10100101" // /* MW 12 */ + 8181 "00000001" // /* MW 11 */ + 8182 "00000000" // /* MW 10 */ + 8183 "00000000" // /* MW 9 */ + 8184 "00000000" // /* MW 8 */ + 8185 "01011011" // /* MW 7 */ + 8186 "00000001" // /* MW 6 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.src_ref 7 "conv2d_dw_bf16.h" 274 first +.loop_nesting 0 + 8192 "00111010" // MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 8193 "00111001" // /* MW 9 */ + 8194 "00000010" // /* MW 8 */ + 8195 "01001000" // /* MW 7 */ + 8196 "00000000" // /* MW 6 */ + 8197 "01000000" // /* MW 5 */ + 8198 "00000001" // /* MW 4 */ + 8199 "01100000" // /* MW 3 */ + 8200 "00010001" // /* MW 2 */ + 8201 "10010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.delay_slot + 8202 "01111000" // VSHUFFLE x3, x0, x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8203 "00000100" // /* MW 3 */ + 8204 "10000000" // /* MW 2 */ + 8205 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first +.delay_slot + 8206 "11011000" // VSHUFFLE bmlh0, x1, x3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8207 "10000001" // /* MW 3 */ + 8208 "10001001" // /* MW 2 */ + 8209 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first +.delay_slot + 8210 "11011000" // VSHUFFLE bmll0, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8211 "10001001" // /* MW 3 */ + 8212 "00001001" // /* MW 2 */ + 8213 "00011000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 8214 "10011000" // VST.3D bmlh0, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8215 "00100110" // /* MW 3 */ + 8216 "00011000" // /* MW 2 */ + 8217 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first +.delay_slot + 8218 "10011000" // VST bmll0, [p4, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8219 "00000110" // /* MW 3 */ + 8220 "00100000" // /* MW 2 */ +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + 8221 "00001100" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 282 first +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.function_start + 8224 "10111010" // LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8225 "01011000" // /* MW 9 */ + 8226 "00111111" // /* MW 8 */ + 8227 "00000111" // /* MW 7 */ + 8228 "00101000" // /* MW 6 */ + 8229 "00010000" // /* MW 5 */ + 8230 "00000001" // /* MW 4 */ + 8231 "11010000" // /* MW 3 */ + 8232 "10010011" // /* MW 2 */ + 8233 "00100000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 77 + 8234 "10111010" // MOVA m1, #-208; MOVXM p4, #509064 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8235 "00010000" // /* MW 9 */ + 8236 "01000100" // /* MW 8 */ + 8237 "00110010" // /* MW 7 */ + 8238 "11110010" // /* MW 6 */ + 8239 "00000001" // /* MW 5 */ + 8240 "00000000" // /* MW 4 */ + 8241 "10000000" // /* MW 3 */ + 8242 "00000100" // /* MW 2 */ + 8243 "11100110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 first +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8244 "01010100" // LDA r16, [p4], m0; MOV m0, #201 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8245 "00100101" // /* MW 5 */ + 8246 "00000011" // /* MW 4 */ + 8247 "11010000" // /* MW 3 */ + 8248 "01000010" // /* MW 2 */ + 8249 "10000001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8250 "10011000" // LDA.u8 r19, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8251 "01101010" // /* MW 3 */ + 8252 "00001010" // /* MW 2 */ + 8253 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 287 77 first + 8254 "10011000" // LDA r18, [p4], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8255 "01010110" // /* MW 3 */ + 8256 "00101010" // /* MW 2 */ + 8257 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 282 + 8258 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8259 "00000001" // /* MW 5 */ + 8260 "00000000" // /* MW 4 */ + 8261 "00000000" // /* MW 3 */ + 8262 "00001000" // /* MW 2 */ + 8263 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 "00001100" // LDA p0, [p0]; ST lr, [sp, #-8] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8265 "01111011" // /* MW 5 */ + 8266 "11110000" // /* MW 4 */ + 8267 "11011111" // /* MW 3 */ + 8268 "10000011" // /* MW 2 */ + 8269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 "00001100" // LDA r15, [p2]; ST p2, [sp, #-16] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8271 "00111011" // /* MW 5 */ + 8272 "11100010" // /* MW 4 */ + 8273 "11011111" // /* MW 3 */ + 8274 "10111110" // /* MW 2 */ + 8275 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "10011101" // /* MW 3 */ + 8278 "11111111" // /* MW 2 */ + 8279 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 "00111010" // ST p6, [sp, #-20]; JL #7280 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8281 "01000001" // /* MW 9 */ + 8282 "00000000" // /* MW 8 */ + 8283 "00000000" // /* MW 7 */ + 8284 "10001110" // /* MW 6 */ + 8285 "00000011" // /* MW 5 */ + 8286 "00000000" // /* MW 4 */ + 8287 "10110000" // /* MW 3 */ + 8288 "11100011" // /* MW 2 */ + 8289 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 +.src_ref 7 "conv2d_dw_bf16.h" 285 89 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 "00111010" // ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8291 "01111001" // /* MW 9 */ + 8292 "01100000" // /* MW 8 */ + 8293 "00110001" // /* MW 7 */ + 8294 "01111101" // /* MW 6 */ + 8295 "00001000" // /* MW 5 */ + 8296 "00100111" // /* MW 4 */ + 8297 "10110000" // /* MW 3 */ + 8298 "10111110" // /* MW 2 */ + 8299 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 +.delay_slot + 8300 "11111000" // MOV p6, p4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8301 "11000000" // /* MW 3 */ + 8302 "01101000" // /* MW 2 */ + 8303 "00011110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.delay_slot + 8304 "01011100" // ST p1, [sp, #-24]; LSHL r16, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8305 "00111011" // /* MW 5 */ + 8306 "01000010" // /* MW 4 */ + 8307 "10111000" // /* MW 3 */ + 8308 "00010011" // /* MW 2 */ + 8309 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 first +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.delay_slot + 8310 "00111010" // MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8311 "01111001" // /* MW 9 */ + 8312 "00010000" // /* MW 8 */ + 8313 "00000100" // /* MW 7 */ + 8314 "11101100" // /* MW 6 */ + 8315 "00001000" // /* MW 5 */ + 8316 "00100101" // /* MW 4 */ + 8317 "01100000" // /* MW 3 */ + 8318 "00010001" // /* MW 2 */ + 8319 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 first +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.delay_slot + 8320 "11100001" // NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8321 "00000000" // /* MW 15 */ + 8322 "00000000" // /* MW 14 */ + 8323 "10101000" // /* MW 13 */ + 8324 "11100000" // /* MW 12 */ + 8325 "10110011" // /* MW 11 */ + 8326 "00000001" // /* MW 10 */ + 8327 "00000000" // /* MW 9 */ + 8328 "00000000" // /* MW 8 */ + 8329 "01011011" // /* MW 7 */ + 8330 "00000001" // /* MW 6 */ + 8331 "00100000" // /* MW 5 */ + 8332 "00010111" // /* MW 4 */ + 8333 "11110010" // /* MW 3 */ + 8334 "00101100" // /* MW 2 */ + 8335 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 first +.src_ref 7 "conv2d_dw_bf16.h" 290 24 +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.return_address + 8336 "00101100" // LDA.u8 r16, [p6, #7]; MOVX r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8337 "00010010" // /* MW 5 */ + 8338 "01000100" // /* MW 4 */ + 8339 "01010000" // /* MW 3 */ + 8340 "11000001" // /* MW 2 */ + 8341 "11001110" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ + 8346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8347 "00000000" // /* MW 1 */ + 8348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8349 "00000000" // /* MW 1 */ + 8350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8351 "00000000" // /* MW 1 */ + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 24 + 8354 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8355 "00001000" // /* MW 3 */ + 8356 "01100001" // /* MW 2 */ + 8357 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 8 + 8358 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8359 "00000001" // /* MW 5 */ + 8360 "01000000" // /* MW 4 */ + 8361 "10110000" // /* MW 3 */ + 8362 "00010000" // /* MW 2 */ + 8363 "10000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 61 +.delay_slot + 8364 "01000100" // MOVXM p4, #509064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8365 "00010000" // /* MW 5 */ + 8366 "11001001" // /* MW 4 */ + 8367 "11001000" // /* MW 3 */ + 8368 "00000111" // /* MW 2 */ + 8369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 292 61 first +.src_ref 7 "conv2d_dw_bf16.h" 292 71 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 + 8378 "10111010" // LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8379 "01011000" // /* MW 9 */ + 8380 "00110000" // /* MW 8 */ + 8381 "00000111" // /* MW 7 */ + 8382 "00101000" // /* MW 6 */ + 8383 "00000000" // /* MW 5 */ + 8384 "00000000" // /* MW 4 */ + 8385 "11010000" // /* MW 3 */ + 8386 "11000010" // /* MW 2 */ + 8387 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 118 + 8388 "10011000" // LDA r21, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "10110110" // /* MW 3 */ + 8390 "00101110" // /* MW 2 */ + 8391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 59 first + 8392 "10011000" // LDA r18, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8393 "01010110" // /* MW 3 */ + 8394 "00011110" // /* MW 2 */ + 8395 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 293 31 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8396 "11010100" // LDA r19, [sp, #-24]; MOV p0, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8397 "10000001" // /* MW 5 */ + 8398 "11010001" // /* MW 4 */ + 8399 "00100000" // /* MW 3 */ + 8400 "01001110" // /* MW 2 */ + 8401 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8402 "10111010" // LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8403 "01110010" // /* MW 9 */ + 8404 "01100000" // /* MW 8 */ + 8405 "10110000" // /* MW 7 */ + 8406 "00000011" // /* MW 6 */ + 8407 "10001011" // /* MW 5 */ + 8408 "10011100" // /* MW 4 */ + 8409 "11010000" // /* MW 3 */ + 8410 "01010010" // /* MW 2 */ + 8411 "10000001" // /* MW 1 */ + 8412 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8413 "00011001" // /* MW 3 */ + 8414 "11101111" // /* MW 2 */ + 8415 "00000111" // /* MW 1 */ + 8416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8417 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first + 8418 "10011000" // LSHL r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00011101" // /* MW 3 */ + 8420 "00100011" // /* MW 2 */ + 8421 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 71 + 8422 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001101" // /* MW 3 */ + 8424 "00100000" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 first +.no_stack_arguments + 8426 "00000100" // JL #7280 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "00111000" // /* MW 3 */ + 8430 "00001110" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first +.src_ref 7 "conv2d_dw_bf16.h" 294 30 first +.delay_slot + 8432 "10100100" // LSHL r18, r18, r0; ADD.NC r22, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8433 "10001010" // /* MW 5 */ + 8434 "00110011" // /* MW 4 */ + 8435 "10111011" // /* MW 3 */ + 8436 "10000001" // /* MW 2 */ + 8437 "10010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.delay_slot + 8438 "10100100" // LSHL r17, r21, r0; ADD.NC r21, r15, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8439 "10010010" // /* MW 5 */ + 8440 "10101111" // /* MW 4 */ + 8441 "10111010" // /* MW 3 */ + 8442 "01000001" // /* MW 2 */ + 8443 "10101100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.delay_slot + 8444 "10100100" // LSHL r18, r20, r0; ADD.NC p1, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8445 "10110010" // /* MW 5 */ + 8446 "11010001" // /* MW 4 */ + 8447 "10110010" // /* MW 3 */ + 8448 "10000001" // /* MW 2 */ + 8449 "10100100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 293 31 first +.delay_slot + 8450 "01011000" // ADD.NC p2, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8451 "11000001" // /* MW 3 */ + 8452 "01101001" // /* MW 2 */ + 8453 "00011010" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 68 first +.delay_slot + 8454 "10111010" // NOPA; NOPB; ADD.NC p3, r21, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8455 "10101110" // /* MW 9 */ + 8456 "01100100" // /* MW 8 */ + 8457 "10110101" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00010000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 297 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 84 +.return_address + 8464 "10111010" // LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8465 "01011000" // /* MW 9 */ + 8466 "00110100" // /* MW 8 */ + 8467 "00000111" // /* MW 7 */ + 8468 "00101000" // /* MW 6 */ + 8469 "00000000" // /* MW 5 */ + 8470 "00000000" // /* MW 4 */ + 8471 "00100000" // /* MW 3 */ + 8472 "01000011" // /* MW 2 */ + 8473 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 84 first + 8474 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8475 "00010110" // /* MW 3 */ + 8476 "11111110" // /* MW 2 */ + 8477 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 + 8478 "11010100" // LDA p7, [sp, #-4]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8479 "10000001" // /* MW 5 */ + 8480 "11011101" // /* MW 4 */ + 8481 "00100110" // /* MW 3 */ + 8482 "11110011" // /* MW 2 */ + 8483 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 first + 8484 "10011000" // LDA r17, [p3], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8485 "00110110" // /* MW 3 */ + 8486 "00001010" // /* MW 2 */ + 8487 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 + 8488 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8489 "00111001" // /* MW 3 */ + 8490 "11111000" // /* MW 2 */ + 8491 "00000111" // /* MW 1 */ + 8492 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8493 "11110001" // /* MW 3 */ + 8494 "11110101" // /* MW 2 */ + 8495 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8496 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8497 "00000001" // /* MW 5 */ + 8498 "00000000" // /* MW 4 */ + 8499 "00000000" // /* MW 3 */ + 8500 "11111000" // /* MW 2 */ + 8501 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8502 "10011000" // LDA r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8503 "01010110" // /* MW 3 */ + 8504 "00000110" // /* MW 2 */ + 8505 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first + 8506 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8507 "00001101" // /* MW 3 */ + 8508 "00100000" // /* MW 2 */ + 8509 "00010100" // /* MW 1 */ + 8510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8511 "00000000" // /* MW 1 */ + 8512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8513 "00000000" // /* MW 1 */ + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 first +.tail_call + 8516 "10000100" // J #7968 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7968 delay_slots=5 */ + 8517 "00000000" // /* MW 5 */ + 8518 "00000000" // /* MW 4 */ + 8519 "10010000" // /* MW 3 */ + 8520 "00001111" // /* MW 2 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 first +.delay_slot + 8522 "10011000" // LSHL r17, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001101" // /* MW 3 */ + 8524 "01100010" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.delay_slot + 8526 "01011000" // ADD.NC r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "11001001" // /* MW 3 */ + 8528 "01011000" // /* MW 2 */ + 8529 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first +.delay_slot + 8530 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8531 "01000001" // /* MW 3 */ + 8532 "01101001" // /* MW 2 */ + 8533 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 +.delay_slot + 8534 "11111000" // MOV p0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8535 "00100000" // /* MW 3 */ + 8536 "01101001" // /* MW 2 */ + 8537 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 93 first +.delay_slot + 8538 "10010100" // NOPA; ADD.NC p2, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "10000010" // /* MW 5 */ + 8540 "11010001" // /* MW 4 */ + 8541 "11110100" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 301 +.return_address + 8544 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8545 "00111001" // /* MW 3 */ + 8546 "11111000" // /* MW 2 */ + 8547 "00000111" // /* MW 1 */ + 8548 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8549 "11110001" // /* MW 3 */ + 8550 "11110101" // /* MW 2 */ + 8551 "00000111" // /* MW 1 */ + 8552 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8553 "10011001" // /* MW 3 */ + 8554 "11111111" // /* MW 2 */ + 8555 "00000111" // /* MW 1 */ + 8556 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8557 "00011001" // /* MW 3 */ + 8558 "11101111" // /* MW 2 */ + 8559 "00000111" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8566 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8567 "00000000" // /* MW 3 */ + 8568 "00101000" // /* MW 2 */ + 8569 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 +.delay_slot + 8570 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8571 "00000001" // /* MW 5 */ + 8572 "00000000" // /* MW 4 */ + 8573 "00000000" // /* MW 3 */ + 8574 "11111000" // /* MW 2 */ + 8575 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 8583 "00000000" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 352 first +.src_ref 6 "superkernels.cpp" 357 6 +.function_start + 8592 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8593 "00000000" // /* MW 5 */ + 8594 "11000100" // /* MW 4 */ + 8595 "11001000" // /* MW 3 */ + 8596 "00000111" // /* MW 2 */ + 8597 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 first + 8598 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8599 "01000001" // /* MW 5 */ + 8600 "00101111" // /* MW 4 */ + 8601 "11010000" // /* MW 3 */ + 8602 "11000010" // /* MW 2 */ + 8603 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 352 + 8604 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8605 "00000001" // /* MW 5 */ + 8606 "00000000" // /* MW 4 */ + 8607 "00000000" // /* MW 3 */ + 8608 "00010000" // /* MW 2 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8611 "01110000" // /* MW 7 */ + 8612 "01110000" // /* MW 6 */ + 8613 "00101101" // /* MW 5 */ + 8614 "00000010" // /* MW 4 */ + 8615 "10110000" // /* MW 3 */ + 8616 "00111010" // /* MW 2 */ + 8617 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 + 8618 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8619 "01110000" // /* MW 7 */ + 8620 "11110000" // /* MW 6 */ + 8621 "10101000" // /* MW 5 */ + 8622 "00000001" // /* MW 4 */ + 8623 "10110000" // /* MW 3 */ + 8624 "10110110" // /* MW 2 */ + 8625 "11111111" // /* MW 1 */ + 8626 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8627 "00011101" // /* MW 3 */ + 8628 "11101100" // /* MW 2 */ + 8629 "00001111" // /* MW 1 */ + 8630 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "10011101" // /* MW 3 */ + 8632 "11110111" // /* MW 2 */ + 8633 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 + 8634 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8635 "01110000" // /* MW 7 */ + 8636 "01100000" // /* MW 6 */ + 8637 "11001010" // /* MW 5 */ + 8638 "00000001" // /* MW 4 */ + 8639 "10110000" // /* MW 3 */ + 8640 "00000010" // /* MW 2 */ + 8641 "11111110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 +.src_ref 6 "superkernels.cpp" 357 16 + 8642 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8643 "00000001" // /* MW 5 */ + 8644 "01000000" // /* MW 4 */ + 8645 "00100000" // /* MW 3 */ + 8646 "00010001" // /* MW 2 */ + 8647 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 8648 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8649 "11000000" // /* MW 3 */ + 8650 "11010110" // /* MW 2 */ + 8651 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 22 first +.delay_slot + 8652 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8653 "10010000" // /* MW 3 */ + 8654 "01100010" // /* MW 2 */ + 8655 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 30 +.delay_slot + 8656 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8657 "11111011" // /* MW 3 */ + 8658 "01100011" // /* MW 2 */ + 8659 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8660 "01000100" // MOVXM p3, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8661 "00001000" // /* MW 5 */ + 8662 "11000100" // /* MW 4 */ + 8663 "11000110" // /* MW 3 */ + 8664 "00000111" // /* MW 2 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8666 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8667 "00110001" // /* MW 3 */ + 8668 "00000110" // /* MW 2 */ + 8669 "00001011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 369 2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 "00111010" // MOVS p7, p1; MOVXM p1, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8671 "00010001" // /* MW 9 */ + 8672 "00010000" // /* MW 8 */ + 8673 "10110001" // /* MW 7 */ + 8674 "11110000" // /* MW 6 */ + 8675 "00000001" // /* MW 5 */ + 8676 "00000000" // /* MW 4 */ + 8677 "01100000" // /* MW 3 */ + 8678 "10010001" // /* MW 2 */ + 8679 "11110000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 6 "superkernels.cpp" 359 4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8681 "00010000" // /* MW 11 */ + 8682 "00001110" // /* MW 10 */ + 8683 "10110001" // /* MW 9 */ + 8684 "11110000" // /* MW 8 */ + 8685 "00000001" // /* MW 7 */ + 8686 "00000000" // /* MW 6 */ + 8687 "10001011" // /* MW 5 */ + 8688 "10001000" // /* MW 4 */ + 8689 "11100000" // /* MW 3 */ + 8690 "11000000" // /* MW 2 */ + 8691 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 359 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 "00000100" // JL #6480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 8695 "00000001" // /* MW 5 */ + 8696 "00000000" // /* MW 4 */ + 8697 "10101000" // /* MW 3 */ + 8698 "00001100" // /* MW 2 */ + 8699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8705 "00110001" // /* MW 3 */ + 8706 "00100000" // /* MW 2 */ + 8707 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8708 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8709 "00000101" // /* MW 3 */ + 8710 "00100000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8712 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8713 "01110000" // /* MW 7 */ + 8714 "10100101" // /* MW 6 */ + 8715 "00000001" // /* MW 5 */ + 8716 "00000000" // /* MW 4 */ + 8717 "00110000" // /* MW 3 */ + 8718 "11000010" // /* MW 2 */ + 8719 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 +.src_ref 6 "superkernels.cpp" 369 2 +.return_address + 8720 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8721 "00000000" // /* MW 7 */ + 8722 "10000010" // /* MW 6 */ + 8723 "00110011" // /* MW 5 */ + 8724 "00000001" // /* MW 4 */ + 8725 "01100000" // /* MW 3 */ + 8726 "10010001" // /* MW 2 */ + 8727 "00110011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 17 first + 8728 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8729 "00111010" // /* MW 3 */ + 8730 "00000110" // /* MW 2 */ + 8731 "00000010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 +.src_ref 6 "superkernels.cpp" 361 15 first + 8732 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8733 "00010000" // /* MW 9 */ + 8734 "00001100" // /* MW 8 */ + 8735 "00110001" // /* MW 7 */ + 8736 "11110001" // /* MW 6 */ + 8737 "00000001" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "01010000" // /* MW 3 */ + 8740 "11000011" // /* MW 2 */ + 8741 "01000100" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8745 "00000000" // /* MW 5 */ + 8746 "00000000" // /* MW 4 */ + 8747 "00101000" // /* MW 3 */ + 8748 "00010001" // /* MW 2 */ + 8749 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 +.src_ref 6 "superkernels.cpp" 365 26 +.delay_slot + 8750 "01000100" // MOVXM p3, #508432 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8751 "00100000" // /* MW 5 */ + 8752 "11000100" // /* MW 4 */ + 8753 "11000110" // /* MW 3 */ + 8754 "00000111" // /* MW 2 */ + 8755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8757 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8759 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 first +.delay_slot + 8760 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8761 "00110001" // /* MW 3 */ + 8762 "00000110" // /* MW 2 */ + 8763 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 first +.delay_slot + 8764 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8765 "00010001" // /* MW 3 */ + 8766 "00000110" // /* MW 2 */ + 8767 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 6 "superkernels.cpp" 365 26 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "00010000" // /* MW 13 */ + 8772 "00001000" // /* MW 12 */ + 8773 "10110001" // /* MW 11 */ + 8774 "11110001" // /* MW 10 */ + 8775 "00000001" // /* MW 9 */ + 8776 "00000000" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 242 49 first + 8784 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8785 "10000110" // /* MW 3 */ + 8786 "01100111" // /* MW 2 */ + 8787 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 365 15 + 8788 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8789 "00010000" // /* MW 9 */ + 8790 "00000010" // /* MW 8 */ + 8791 "00110001" // /* MW 7 */ + 8792 "11110010" // /* MW 6 */ + 8793 "00000001" // /* MW 5 */ + 8794 "00000000" // /* MW 4 */ + 8795 "11010000" // /* MW 3 */ + 8796 "11101110" // /* MW 2 */ + 8797 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 8798 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010110" // /* MW 3 */ + 8800 "11111110" // /* MW 2 */ + 8801 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 8802 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8803 "00110110" // /* MW 3 */ + 8804 "11111110" // /* MW 2 */ + 8805 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 8806 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8807 "01010110" // /* MW 3 */ + 8808 "01000110" // /* MW 2 */ + 8809 "00000010" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ + 8814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8815 "00000000" // /* MW 1 */ + 8816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8817 "00000000" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 8820 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8821 "00000010" // /* MW 3 */ + 8822 "01100001" // /* MW 2 */ + 8823 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 8824 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "00010001" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 8828 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "11111101" // /* MW 3 */ + 8830 "11100000" // /* MW 2 */ + 8831 "00010111" // /* MW 1 */ + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8835 "00000000" // /* MW 1 */ + 8836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8837 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 8838 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8839 "00001000" // /* MW 3 */ + 8840 "10010011" // /* MW 2 */ + 8841 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 364 11 +.src_ref 6 "superkernels.cpp" 367 47 +.src_ref 6 "superkernels.cpp" 372 6 +.src_ref 6 "superkernels.cpp" 373 16 + 8842 "10111010" // MOVA r15, #1; MOVXM p7, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8843 "00010000" // /* MW 9 */ + 8844 "00000000" // /* MW 8 */ + 8845 "10110001" // /* MW 7 */ + 8846 "11110011" // /* MW 6 */ + 8847 "00000001" // /* MW 5 */ + 8848 "00000000" // /* MW 4 */ + 8849 "00000000" // /* MW 3 */ + 8850 "00101111" // /* MW 2 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 + 8852 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8853 "11000001" // /* MW 5 */ + 8854 "00101011" // /* MW 4 */ + 8855 "00101000" // /* MW 3 */ + 8856 "00000000" // /* MW 2 */ + 8857 "00000110" // /* MW 1 */ + 8858 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8859 "01011010" // /* MW 3 */ + 8860 "01101000" // /* MW 2 */ + 8861 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 + 8862 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8863 "10000001" // /* MW 5 */ + 8864 "00101001" // /* MW 4 */ + 8865 "00100111" // /* MW 3 */ + 8866 "11010011" // /* MW 2 */ + 8867 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 15 first + 8868 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8869 "00110110" // /* MW 3 */ + 8870 "00000110" // /* MW 2 */ + 8871 "00000100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 26 +.src_ref 6 "superkernels.cpp" 369 2 + 8872 "10111010" // LDA r16, [p3]; MOVXM p3, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8873 "00010000" // /* MW 9 */ + 8874 "11100000" // /* MW 8 */ + 8875 "10110001" // /* MW 7 */ + 8876 "11110001" // /* MW 6 */ + 8877 "00000001" // /* MW 5 */ + 8878 "00000000" // /* MW 4 */ + 8879 "11010000" // /* MW 3 */ + 8880 "11000010" // /* MW 2 */ + 8881 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8882 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "01010110" // /* MW 3 */ + 8884 "00000110" // /* MW 2 */ + 8885 "00000111" // /* MW 1 */ + 8886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8887 "00000000" // /* MW 1 */ + 8888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8889 "00000000" // /* MW 1 */ + 8890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8891 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8892 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8893 "01110110" // /* MW 3 */ + 8894 "00000110" // /* MW 2 */ + 8895 "00000101" // /* MW 1 */ + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 24 first + 8898 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8899 "00001111" // /* MW 3 */ + 8900 "01100001" // /* MW 2 */ + 8901 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8902 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8903 "00000111" // /* MW 3 */ + 8904 "10100010" // /* MW 2 */ + 8905 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first + 8906 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8907 "11111101" // /* MW 3 */ + 8908 "00100000" // /* MW 2 */ + 8909 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 369 2 first +.no_stack_arguments + 8910 "00000100" // JL #8224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8224 delay_slots=5 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "00010000" // /* MW 3 */ + 8914 "00010000" // /* MW 2 */ + 8915 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first +.delay_slot + 8916 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8917 "00110001" // /* MW 3 */ + 8918 "00000110" // /* MW 2 */ + 8919 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first +.delay_slot + 8920 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8921 "11000001" // /* MW 3 */ + 8922 "01001001" // /* MW 2 */ + 8923 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 225 10 first +.delay_slot + 8924 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8925 "00100101" // /* MW 3 */ + 8926 "10110100" // /* MW 2 */ + 8927 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 first +.delay_slot + 8928 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8929 "00010101" // /* MW 3 */ + 8930 "10111011" // /* MW 2 */ + 8931 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 +.delay_slot + 8932 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8933 "11000001" // /* MW 11 */ + 8934 "10001010" // /* MW 10 */ + 8935 "11011111" // /* MW 9 */ + 8936 "00000011" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "00100000" // /* MW 5 */ + 8940 "00000000" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.return_address + 8944 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8945 "00001010" // /* MW 3 */ + 8946 "01100111" // /* MW 2 */ + 8947 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first + 8948 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8949 "00010110" // /* MW 3 */ + 8950 "00000110" // /* MW 2 */ + 8951 "00000010" // /* MW 1 */ + 8952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8953 "00000000" // /* MW 1 */ + 8954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8955 "00000000" // /* MW 1 */ + 8956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8957 "00000000" // /* MW 1 */ + 8958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8959 "00000000" // /* MW 1 */ + 8960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8961 "00000000" // /* MW 1 */ + 8962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8963 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 8964 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8965 "11111000" // /* MW 3 */ + 8966 "00010000" // /* MW 2 */ + 8967 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 372 19 + 8968 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8969 "00010000" // /* MW 9 */ + 8970 "00001100" // /* MW 8 */ + 8971 "10110001" // /* MW 7 */ + 8972 "11110000" // /* MW 6 */ + 8973 "00000001" // /* MW 5 */ + 8974 "00000000" // /* MW 4 */ + 8975 "11010000" // /* MW 3 */ + 8976 "11000010" // /* MW 2 */ + 8977 "01011100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 19 first + 8978 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8979 "01010110" // /* MW 3 */ + 8980 "00000110" // /* MW 2 */ + 8981 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 8982 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8983 "00110110" // /* MW 3 */ + 8984 "00000110" // /* MW 2 */ + 8985 "00000111" // /* MW 1 */ + 8986 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8987 "10011001" // /* MW 3 */ + 8988 "11110100" // /* MW 2 */ + 8989 "00000111" // /* MW 1 */ + 8990 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "11010001" // /* MW 3 */ + 8992 "11111001" // /* MW 2 */ + 8993 "00000111" // /* MW 1 */ + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 8998 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8999 "00000001" // /* MW 3 */ + 9000 "11100001" // /* MW 2 */ + 9001 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 9002 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9003 "00010001" // /* MW 3 */ + 9004 "11100110" // /* MW 2 */ + 9005 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 16 first + 9006 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9007 "00101000" // /* MW 3 */ + 9008 "01100001" // /* MW 2 */ + 9009 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 9010 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 9011 "00000001" // /* MW 5 */ + 9012 "01000000" // /* MW 4 */ + 9013 "10101000" // /* MW 3 */ + 9014 "00010001" // /* MW 2 */ + 9015 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 +.delay_slot + 9016 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9017 "00000001" // /* MW 3 */ + 9018 "00110000" // /* MW 2 */ + 9019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9027 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 first + 9028 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9029 "11000001" // /* MW 11 */ + 9030 "10001000" // /* MW 10 */ + 9031 "10000011" // /* MW 9 */ + 9032 "00000011" // /* MW 8 */ + 9033 "00000000" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 375 + 9040 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9041 "01000001" // /* MW 5 */ + 9042 "11101101" // /* MW 4 */ + 9043 "00101110" // /* MW 3 */ + 9044 "10110110" // /* MW 2 */ + 9045 "11111111" // /* MW 1 */ + 9046 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9047 "11110001" // /* MW 3 */ + 9048 "11110001" // /* MW 2 */ + 9049 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 first + 9050 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9051 "00000000" // /* MW 3 */ + 9052 "00101000" // /* MW 2 */ + 9053 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 +.delay_slot + 9054 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9055 "00000001" // /* MW 5 */ + 9056 "00000000" // /* MW 4 */ + 9057 "00000000" // /* MW 3 */ + 9058 "11110000" // /* MW 2 */ + 9059 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9065 "00000000" // /* MW 1 */ +.delay_slot + 9066 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "11000000" // /* MW 3 */ + 9068 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9069 "00011111" // /* MW 1 */ +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function _b14160_wrapper _Z15_b14160_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 21 first +.src_ref 0 "0_0_reloadable4.cc" 23 79 +.function_start + 9072 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9073 "11000000" // /* MW 3 */ + 9074 "01100000" // /* MW 2 */ + 9075 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 23 79 first + 9076 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "00011110" // /* MW 3 */ + 9078 "00011100" // /* MW 2 */ + 9079 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 24 79 first + 9080 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9081 "10011110" // /* MW 3 */ + 9082 "00101100" // /* MW 2 */ + 9083 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 26 81 first + 9084 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9085 "10011110" // /* MW 3 */ + 9086 "11110101" // /* MW 2 */ + 9087 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 25 47 first + 9088 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9089 "00011110" // /* MW 3 */ + 9090 "00000101" // /* MW 2 */ + 9091 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 22 4 first +.tail_call + 9092 "10000100" // J #8592 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8592 delay_slots=5 */ + 9093 "00000000" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "11001000" // /* MW 3 */ + 9096 "00010000" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + 9107 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.src_ref 3 "transposeshuffle_params.h" 71 first +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 76 18 first +.function_start + 9120 "10111010" // LDA el0, [p1], #4; MOVXM r0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9121 "00010000" // /* MW 9 */ + 9122 "01000000" // /* MW 8 */ + 9123 "00001001" // /* MW 7 */ + 9124 "11110000" // /* MW 6 */ + 9125 "00000001" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "11010000" // /* MW 3 */ + 9128 "10000101" // /* MW 2 */ + 9129 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 9 +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 80 28 +.src_ref 3 "transposeshuffle_params.h" 80 36 +.src_ref 3 "transposeshuffle_params.h" 81 28 +.src_ref 3 "transposeshuffle_params.h" 81 36 + 9130 "01110110" // MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9131 "00001000" // /* MW 11 */ + 9132 "00000001" // /* MW 10 */ + 9133 "00110000" // /* MW 9 */ + 9134 "10101001" // /* MW 8 */ + 9135 "00100111" // /* MW 7 */ + 9136 "00111110" // /* MW 6 */ + 9137 "00001011" // /* MW 5 */ + 9138 "10000000" // /* MW 4 */ + 9139 "10000000" // /* MW 3 */ + 9140 "00000000" // /* MW 2 */ + 9141 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 +.src_ref 3 "transposeshuffle_params.h" 86 17 +.src_ref 3 "transposeshuffle_params.h" 89 43 +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 94 4 + 9142 "01100100" // MOVX r1, #4; MOV r0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9143 "00000001" // /* MW 5 */ + 9144 "00100010" // /* MW 4 */ + 9145 "00100000" // /* MW 3 */ + 9146 "01000010" // /* MW 2 */ + 9147 "00000000" // /* MW 1 */ + 9148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9149 "00000000" // /* MW 1 */ + 9150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9151 "00000000" // /* MW 1 */ + 9152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9153 "00000000" // /* MW 1 */ + 9154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9155 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 first + 9156 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9157 "00101001" // /* MW 3 */ + 9158 "00011100" // /* MW 2 */ + 9159 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9160 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9161 "00101110" // /* MW 3 */ + 9162 "00011100" // /* MW 2 */ + 9163 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9164 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9165 "00001110" // /* MW 3 */ + 9166 "00011100" // /* MW 2 */ + 9167 "00000001" // /* MW 1 */ + 9168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9169 "00000000" // /* MW 1 */ + 9170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9171 "00000000" // /* MW 1 */ + 9172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9173 "00000000" // /* MW 1 */ + 9174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9175 "00000000" // /* MW 1 */ + 9176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9177 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9178 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9179 "00101001" // /* MW 3 */ + 9180 "00011100" // /* MW 2 */ + 9181 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9182 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "00001001" // /* MW 3 */ + 9184 "00011100" // /* MW 2 */ + 9185 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9186 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9187 "00101110" // /* MW 3 */ + 9188 "00011100" // /* MW 2 */ + 9189 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9190 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9191 "00001110" // /* MW 3 */ + 9192 "00011100" // /* MW 2 */ + 9193 "00000001" // /* MW 1 */ + 9194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9195 "00000000" // /* MW 1 */ + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ + 9198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9199 "00000000" // /* MW 1 */ + 9200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9201 "00000000" // /* MW 1 */ + 9202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9203 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9204 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9205 "00101001" // /* MW 3 */ + 9206 "00011100" // /* MW 2 */ + 9207 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9208 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00001001" // /* MW 3 */ + 9210 "00011100" // /* MW 2 */ + 9211 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9212 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00001110" // /* MW 3 */ + 9214 "00000100" // /* MW 2 */ + 9215 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9216 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9217 "00101110" // /* MW 3 */ + 9218 "00010100" // /* MW 2 */ + 9219 "00000001" // /* MW 1 */ + 9220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9221 "00000000" // /* MW 1 */ + 9222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9223 "00000000" // /* MW 1 */ + 9224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9225 "00000000" // /* MW 1 */ + 9226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9227 "00000000" // /* MW 1 */ + 9228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9229 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9230 "10011000" // ST eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9231 "00001001" // /* MW 3 */ + 9232 "00000100" // /* MW 2 */ + 9233 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9234 "10011000" // ST el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9235 "00101001" // /* MW 3 */ + 9236 "00010100" // /* MW 2 */ + 9237 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 28 first + 9238 "10011000" // LDA r3, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9239 "01110110" // /* MW 3 */ + 9240 "00001000" // /* MW 2 */ + 9241 "00000000" // /* MW 1 */ + 9242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9243 "00000000" // /* MW 1 */ + 9244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9245 "00000000" // /* MW 1 */ + 9246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9247 "00000000" // /* MW 1 */ + 9248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9249 "00000000" // /* MW 1 */ + 9250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9251 "00000000" // /* MW 1 */ + 9252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9253 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 36 + 9254 "10011000" // LSHL r4, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9255 "00101101" // /* MW 3 */ + 9256 "11001000" // /* MW 2 */ + 9257 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 42 +.src_ref 3 "transposeshuffle_params.h" 89 43 first + 9258 "00100100" // LSHL r3, r3, r1; ADD.NC r1, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9259 "11111111" // /* MW 5 */ + 9260 "10100100" // /* MW 4 */ + 9261 "10110000" // /* MW 3 */ + 9262 "11000011" // /* MW 2 */ + 9263 "00011000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 +.src_ref 3 "transposeshuffle_params.h" 80 19 first + 9264 "00000010" // ST r1, [p0]; MOV r4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9265 "01110000" // /* MW 7 */ + 9266 "01100000" // /* MW 6 */ + 9267 "10001000" // /* MW 5 */ + 9268 "00000000" // /* MW 4 */ + 9269 "00110000" // /* MW 3 */ + 9270 "10000110" // /* MW 2 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 + 9272 "00011000" // ADD.NC p1, r4, #-60 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "01100010" // /* MW 3 */ + 9274 "01100010" // /* MW 2 */ + 9275 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 28 first + 9276 "10011000" // LDA r4, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9277 "10010110" // /* MW 3 */ + 9278 "00001000" // /* MW 2 */ + 9279 "00000001" // /* MW 1 */ + 9280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9283 "00000000" // /* MW 1 */ + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ + 9286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9287 "00000000" // /* MW 1 */ + 9288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9289 "00000000" // /* MW 1 */ + 9290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9291 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 36 +.src_ref 3 "transposeshuffle_params.h" 90 77 + 9292 "01100100" // LSHL r2, r4, r2; MOV r4, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "00100010" // /* MW 4 */ + 9295 "10110010" // /* MW 3 */ + 9296 "10000101" // /* MW 2 */ + 9297 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 42 + 9298 "00011000" // ADD r2, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "11111111" // /* MW 3 */ + 9300 "10000101" // /* MW 2 */ + 9301 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 19 +.src_ref 3 "transposeshuffle_params.h" 90 77 first + 9302 "01011100" // ST r2, [p1], #4; MSC r4, r4, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9303 "01011100" // /* MW 5 */ + 9304 "10010000" // /* MW 4 */ + 9305 "00110001" // /* MW 3 */ + 9306 "10001010" // /* MW 2 */ + 9307 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 first + 9308 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9309 "00010001" // /* MW 3 */ + 9310 "00011100" // /* MW 2 */ + 9311 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 + 9312 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9313 "00010001" // /* MW 3 */ + 9314 "00011100" // /* MW 2 */ + 9315 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 86 17 first + 9316 "10011000" // ST r0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9317 "00010001" // /* MW 3 */ + 9318 "00101100" // /* MW 2 */ + 9319 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 89 23 first + 9320 "10011000" // ST r3, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9321 "01110001" // /* MW 3 */ + 9322 "11111100" // /* MW 2 */ + 9323 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 90 23 first + 9324 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9325 "10010001" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 91 18 first + 9328 "00000010" // ST r0, [p1]; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "01110000" // /* MW 7 */ + 9330 "01100000" // /* MW 6 */ + 9331 "10101001" // /* MW 5 */ + 9332 "00000000" // /* MW 4 */ + 9333 "00110000" // /* MW 3 */ + 9334 "10000010" // /* MW 2 */ + 9335 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 + 9336 "00011000" // ADD.NC p1, r5, #-68 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9337 "11011110" // /* MW 3 */ + 9338 "01100010" // /* MW 2 */ + 9339 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 first + 9340 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9341 "00010001" // /* MW 3 */ + 9342 "00011100" // /* MW 2 */ + 9343 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9344 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9345 "00010001" // /* MW 3 */ + 9346 "00011100" // /* MW 2 */ + 9347 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9348 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9349 "01010001" // /* MW 3 */ + 9350 "00011100" // /* MW 2 */ + 9351 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9352 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9353 "00110001" // /* MW 3 */ + 9354 "00011100" // /* MW 2 */ + 9355 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 95 first + 9356 "01011100" // ST r0, [p1], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9357 "00000000" // /* MW 5 */ + 9358 "01010000" // /* MW 4 */ + 9359 "00110000" // /* MW 3 */ + 9360 "10000010" // /* MW 2 */ + 9361 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 first +.delay_slot + 9362 "10011000" // ST r3, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9363 "01110001" // /* MW 3 */ + 9364 "00101100" // /* MW 2 */ + 9365 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9366 "10011000" // ST r2, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9367 "01010001" // /* MW 3 */ + 9368 "11111100" // /* MW 2 */ + 9369 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9370 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "10010001" // /* MW 3 */ + 9372 "00101100" // /* MW 2 */ + 9373 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9374 "10011000" // ST r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00110001" // /* MW 3 */ + 9376 "00000100" // /* MW 2 */ + 9377 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9378 "10011000" // ST r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9379 "00010001" // /* MW 3 */ + 9380 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + 9381 "00001001" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.src_ref 3 "transposeshuffle.h" 38 first +.src_ref 3 "transposeshuffle.h" 72 14 +.src_ref 3 "transposeshuffle.h" 79 23 +.function_start + 9392 "10111010" // MOVA r1, #2; MOVXM p2, #508556 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010000" // /* MW 9 */ + 9394 "01000110" // /* MW 8 */ + 9395 "00110001" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "00000000" // /* MW 3 */ + 9400 "01000001" // /* MW 2 */ + 9401 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 14 first +.src_ref 3 "transposeshuffle.h" 72 23 + 9402 "00101100" // LDA r27, [p2]; MOVX r0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10110010" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11010000" // /* MW 3 */ + 9406 "11101110" // /* MW 2 */ + 9407 "01000000" // /* MW 1 */ + 9408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9409 "00000000" // /* MW 1 */ + 9410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9411 "00000000" // /* MW 1 */ + 9412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9413 "00000000" // /* MW 1 */ + 9414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9415 "00000000" // /* MW 1 */ + 9416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9417 "00000000" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 23 first + 9420 "10011000" // EQ r1, r27, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9421 "00010111" // /* MW 3 */ + 9422 "11000010" // /* MW 2 */ + 9423 "00010110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 8 + 9424 "10000100" // JNZ r1, #9888 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9888 delay_slots=5 */ + 9425 "00000001" // /* MW 5 */ + 9426 "01000000" // /* MW 4 */ + 9427 "01010000" // /* MW 3 */ + 9428 "00010011" // /* MW 2 */ + 9429 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 23 +.delay_slot + 9430 "00011000" // MOVX r2, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9431 "01110101" // /* MW 3 */ + 9432 "00000100" // /* MW 2 */ + 9433 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 first +.src_ref 3 "transposeshuffle.h" 72 23 first +.delay_slot + 9434 "00011000" // SEL.EQZ r0, r0, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9435 "00100010" // /* MW 3 */ + 9436 "00000000" // /* MW 2 */ + 9437 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9443 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 + 9444 "01000100" // MOVXM p2, #508560 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9445 "00100000" // /* MW 5 */ + 9446 "11000101" // /* MW 4 */ + 9447 "11000100" // /* MW 3 */ + 9448 "00000111" // /* MW 2 */ + 9449 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 first + 9450 "10011000" // LDA r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9451 "00110110" // /* MW 3 */ + 9452 "00000100" // /* MW 2 */ + 9453 "00000010" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ + 9456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9457 "00000000" // /* MW 1 */ + 9458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9459 "00000000" // /* MW 1 */ + 9460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9461 "00000000" // /* MW 1 */ + 9462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9463 "00000000" // /* MW 1 */ + 9464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9465 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 116 26 + 9466 "10000100" // JZ r1, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9467 "00000001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "10010000" // /* MW 3 */ + 9470 "00010100" // /* MW 2 */ + 9471 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9481 "00000000" // /* MW 1 */ + 9482 "00011000" // MOVX r2, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9483 "00101001" // /* MW 3 */ + 9484 "00000100" // /* MW 2 */ + 9485 "00010000" // /* MW 1 */ + 9486 "10011000" // LTU r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9487 "00101100" // /* MW 3 */ + 9488 "01000100" // /* MW 2 */ + 9489 "00010000" // /* MW 1 */ + 9490 "10000100" // JNZ r2, #9728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9728 delay_slots=5 */ + 9491 "00000001" // /* MW 5 */ + 9492 "01000000" // /* MW 4 */ + 9493 "00000000" // /* MW 3 */ + 9494 "00010011" // /* MW 2 */ + 9495 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9506 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9507 "00010000" // /* MW 9 */ + 9508 "11010000" // /* MW 8 */ + 9509 "01111010" // /* MW 7 */ + 9510 "00001000" // /* MW 6 */ + 9511 "00000000" // /* MW 5 */ + 9512 "00000000" // /* MW 4 */ + 9513 "01101000" // /* MW 3 */ + 9514 "00111000" // /* MW 2 */ + 9515 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 116 8 first +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9516 "00111010" // VLDB x0, [p0], #64; MOVXM le, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9517 "00010000" // /* MW 9 */ + 9518 "11010000" // /* MW 8 */ + 9519 "10111010" // /* MW 7 */ + 9520 "00001001" // /* MW 6 */ + 9521 "00000000" // /* MW 5 */ + 9522 "00000000" // /* MW 4 */ + 9523 "01101000" // /* MW 3 */ + 9524 "00111000" // /* MW 2 */ + 9525 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9526 "10111010" // NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "11001110" // /* MW 9 */ + 9528 "01111101" // /* MW 8 */ + 9529 "10111000" // /* MW 7 */ + 9530 "00000010" // /* MW 6 */ + 9531 "00110100" // /* MW 5 */ + 9532 "00011100" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9536 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9537 "00000000" // /* MW 15 */ + 9538 "00000000" // /* MW 14 */ + 9539 "01111000" // /* MW 13 */ + 9540 "10100101" // /* MW 12 */ + 9541 "00000001" // /* MW 11 */ + 9542 "00000000" // /* MW 10 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "01011011" // /* MW 7 */ + 9546 "00000001" // /* MW 6 */ + 9547 "01101000" // /* MW 5 */ + 9548 "00111000" // /* MW 4 */ + 9549 "11110000" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9552 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9553 "00000000" // /* MW 15 */ + 9554 "00000000" // /* MW 14 */ + 9555 "01111000" // /* MW 13 */ + 9556 "10100101" // /* MW 12 */ + 9557 "00000001" // /* MW 11 */ + 9558 "00000000" // /* MW 10 */ + 9559 "00000000" // /* MW 9 */ + 9560 "00000000" // /* MW 8 */ + 9561 "01011011" // /* MW 7 */ + 9562 "00000001" // /* MW 6 */ + 9563 "01101000" // /* MW 5 */ + 9564 "00111000" // /* MW 4 */ + 9565 "11110000" // /* MW 3 */ + 9566 "00101100" // /* MW 2 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9568 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9569 "00000000" // /* MW 15 */ + 9570 "00000000" // /* MW 14 */ + 9571 "01111000" // /* MW 13 */ + 9572 "10100101" // /* MW 12 */ + 9573 "00000001" // /* MW 11 */ + 9574 "00000000" // /* MW 10 */ + 9575 "00000000" // /* MW 9 */ + 9576 "00000000" // /* MW 8 */ + 9577 "01011011" // /* MW 7 */ + 9578 "00000001" // /* MW 6 */ + 9579 "01101000" // /* MW 5 */ + 9580 "00111000" // /* MW 4 */ + 9581 "11110000" // /* MW 3 */ + 9582 "00101100" // /* MW 2 */ + 9583 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9584 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9585 "00000000" // /* MW 15 */ + 9586 "00000000" // /* MW 14 */ + 9587 "01111000" // /* MW 13 */ + 9588 "10100101" // /* MW 12 */ + 9589 "00000001" // /* MW 11 */ + 9590 "00000000" // /* MW 10 */ + 9591 "00000000" // /* MW 9 */ + 9592 "00000000" // /* MW 8 */ + 9593 "01011011" // /* MW 7 */ + 9594 "00000001" // /* MW 6 */ + 9595 "01101000" // /* MW 5 */ + 9596 "00111000" // /* MW 4 */ + 9597 "11110000" // /* MW 3 */ + 9598 "00101100" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9600 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9601 "00000000" // /* MW 15 */ + 9602 "00000000" // /* MW 14 */ + 9603 "11101000" // /* MW 13 */ + 9604 "00000000" // /* MW 12 */ + 9605 "00000000" // /* MW 11 */ + 9606 "00000000" // /* MW 10 */ + 9607 "00000000" // /* MW 9 */ + 9608 "00000000" // /* MW 8 */ + 9609 "01011011" // /* MW 7 */ + 9610 "00000001" // /* MW 6 */ + 9611 "01101000" // /* MW 5 */ + 9612 "00111000" // /* MW 4 */ + 9613 "11110000" // /* MW 3 */ + 9614 "00101100" // /* MW 2 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.src_ref 3 "transposeshuffle.h" 120 17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9616 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9617 "00000000" // /* MW 15 */ + 9618 "00000000" // /* MW 14 */ + 9619 "11101000" // /* MW 13 */ + 9620 "00000000" // /* MW 12 */ + 9621 "00000000" // /* MW 11 */ + 9622 "00000000" // /* MW 10 */ + 9623 "00000000" // /* MW 9 */ + 9624 "00000000" // /* MW 8 */ + 9625 "01011011" // /* MW 7 */ + 9626 "00000001" // /* MW 6 */ + 9627 "01101000" // /* MW 5 */ + 9628 "00111000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9632 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "11101000" // /* MW 13 */ + 9636 "00000000" // /* MW 12 */ + 9637 "00000000" // /* MW 11 */ + 9638 "00000000" // /* MW 10 */ + 9639 "00000000" // /* MW 9 */ + 9640 "10000000" // /* MW 8 */ + 9641 "00000110" // /* MW 7 */ + 9642 "00011100" // /* MW 6 */ + 9643 "01101001" // /* MW 5 */ + 9644 "00111000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9648 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9649 "11100000" // /* MW 7 */ + 9650 "00000000" // /* MW 6 */ + 9651 "00000000" // /* MW 5 */ + 9652 "00000000" // /* MW 4 */ + 9653 "11010000" // /* MW 3 */ + 9654 "10000000" // /* MW 2 */ + 9655 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9656 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9657 "11100000" // /* MW 7 */ + 9658 "00000000" // /* MW 6 */ + 9659 "00000000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "10000000" // /* MW 2 */ + 9663 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9664 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9665 "11100000" // /* MW 7 */ + 9666 "00000000" // /* MW 6 */ + 9667 "00000000" // /* MW 5 */ + 9668 "00000000" // /* MW 4 */ + 9669 "11010000" // /* MW 3 */ + 9670 "10000000" // /* MW 2 */ + 9671 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.src_ref 3 "transposeshuffle.h" 126 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9672 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 9673 "11101001" // /* MW 9 */ + 9674 "00000000" // /* MW 8 */ + 9675 "00000000" // /* MW 7 */ + 9676 "00000000" // /* MW 6 */ + 9677 "01000000" // /* MW 5 */ + 9678 "00000001" // /* MW 4 */ + 9679 "11010000" // /* MW 3 */ + 9680 "10000000" // /* MW 2 */ + 9681 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9682 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9683 "11100000" // /* MW 7 */ + 9684 "00000000" // /* MW 6 */ + 9685 "00000000" // /* MW 5 */ + 9686 "00000000" // /* MW 4 */ + 9687 "11010000" // /* MW 3 */ + 9688 "10000000" // /* MW 2 */ + 9689 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9690 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9691 "11100000" // /* MW 7 */ + 9692 "00000000" // /* MW 6 */ + 9693 "00000000" // /* MW 5 */ + 9694 "00000000" // /* MW 4 */ + 9695 "11010000" // /* MW 3 */ + 9696 "10000000" // /* MW 2 */ + 9697 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9698 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9699 "11100000" // /* MW 7 */ + 9700 "00000000" // /* MW 6 */ + 9701 "00000000" // /* MW 5 */ + 9702 "00000000" // /* MW 4 */ + 9703 "11010000" // /* MW 3 */ + 9704 "10000000" // /* MW 2 */ + 9705 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9706 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9707 "00001101" // /* MW 5 */ + 9708 "00111000" // /* MW 4 */ + 9709 "11110010" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot + 9712 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9713 "00000000" // /* MW 15 */ + 9714 "00000000" // /* MW 14 */ + 9715 "01111000" // /* MW 13 */ + 9716 "10100101" // /* MW 12 */ + 9717 "00000001" // /* MW 11 */ + 9718 "00000000" // /* MW 10 */ + 9719 "00000000" // /* MW 9 */ + 9720 "10000000" // /* MW 8 */ + 9721 "00000110" // /* MW 7 */ + 9722 "00011100" // /* MW 6 */ + 9723 "00100001" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "11110000" // /* MW 3 */ + 9726 "00101100" // /* MW 2 */ + 9727 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.src_ref 3 "transposeshuffle.h" 116 8 first + 9728 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "10100000" // /* MW 3 */ + 9730 "01110000" // /* MW 2 */ + 9731 "00011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9732 "01000100" // MOVXM ls, #9744 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00100000" // /* MW 5 */ + 9734 "11101100" // /* MW 4 */ + 9735 "00100001" // /* MW 3 */ + 9736 "00000000" // /* MW 2 */ + 9737 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9738 "01000100" // MOVXM le, #9856 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "00000000" // /* MW 5 */ + 9740 "11101101" // /* MW 4 */ + 9741 "00100110" // /* MW 3 */ + 9742 "00000000" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.begin_of_loop +.loop_nesting 1 + 9744 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "00110100" // /* MW 3 */ + 9746 "00011100" // /* MW 2 */ + 9747 "00111000" // /* MW 1 */ + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ + 9750 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9751 "01111110" // /* MW 9 */ + 9752 "10100101" // /* MW 8 */ + 9753 "00000001" // /* MW 7 */ + 9754 "00000000" // /* MW 6 */ + 9755 "00010000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ + 9760 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9761 "00000000" // /* MW 15 */ + 9762 "00000000" // /* MW 14 */ + 9763 "01111000" // /* MW 13 */ + 9764 "10100101" // /* MW 12 */ + 9765 "00000001" // /* MW 11 */ + 9766 "00000000" // /* MW 10 */ + 9767 "00000000" // /* MW 9 */ + 9768 "00000000" // /* MW 8 */ + 9769 "01011011" // /* MW 7 */ + 9770 "00000001" // /* MW 6 */ + 9771 "00100000" // /* MW 5 */ + 9772 "00000000" // /* MW 4 */ + 9773 "11110000" // /* MW 3 */ + 9774 "00101100" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ + 9776 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9777 "00000000" // /* MW 15 */ + 9778 "00000000" // /* MW 14 */ + 9779 "01111000" // /* MW 13 */ + 9780 "10100101" // /* MW 12 */ + 9781 "00000001" // /* MW 11 */ + 9782 "00000000" // /* MW 10 */ + 9783 "00000000" // /* MW 9 */ + 9784 "00000000" // /* MW 8 */ + 9785 "01011011" // /* MW 7 */ + 9786 "00000001" // /* MW 6 */ + 9787 "00100000" // /* MW 5 */ + 9788 "00000000" // /* MW 4 */ + 9789 "11110000" // /* MW 3 */ + 9790 "00101100" // /* MW 2 */ + 9791 "00000000" // /* MW 1 */ + 9792 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9793 "00000000" // /* MW 15 */ + 9794 "00000000" // /* MW 14 */ + 9795 "01111000" // /* MW 13 */ + 9796 "10100101" // /* MW 12 */ + 9797 "00000001" // /* MW 11 */ + 9798 "00000000" // /* MW 10 */ + 9799 "00000000" // /* MW 9 */ + 9800 "00000000" // /* MW 8 */ + 9801 "01011011" // /* MW 7 */ + 9802 "00000001" // /* MW 6 */ + 9803 "00100000" // /* MW 5 */ + 9804 "00000000" // /* MW 4 */ + 9805 "11110000" // /* MW 3 */ + 9806 "00101100" // /* MW 2 */ + 9807 "00000000" // /* MW 1 */ + 9808 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9809 "00000000" // /* MW 15 */ + 9810 "00000000" // /* MW 14 */ + 9811 "01111000" // /* MW 13 */ + 9812 "10100101" // /* MW 12 */ + 9813 "00000001" // /* MW 11 */ + 9814 "00000000" // /* MW 10 */ + 9815 "00000000" // /* MW 9 */ + 9816 "00000000" // /* MW 8 */ + 9817 "01011011" // /* MW 7 */ + 9818 "00000001" // /* MW 6 */ + 9819 "00100000" // /* MW 5 */ + 9820 "00000000" // /* MW 4 */ + 9821 "11110000" // /* MW 3 */ + 9822 "00101100" // /* MW 2 */ + 9823 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 120 17 first + 9824 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9825 "00000000" // /* MW 15 */ + 9826 "00000000" // /* MW 14 */ + 9827 "11101000" // /* MW 13 */ + 9828 "00000000" // /* MW 12 */ + 9829 "00000000" // /* MW 11 */ + 9830 "00000000" // /* MW 10 */ + 9831 "00000000" // /* MW 9 */ + 9832 "00000000" // /* MW 8 */ + 9833 "01011011" // /* MW 7 */ + 9834 "00000001" // /* MW 6 */ + 9835 "00100000" // /* MW 5 */ + 9836 "00000000" // /* MW 4 */ + 9837 "11110000" // /* MW 3 */ + 9838 "00101100" // /* MW 2 */ + 9839 "00000000" // /* MW 1 */ + 9840 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9841 "00000000" // /* MW 15 */ + 9842 "00000000" // /* MW 14 */ + 9843 "01111000" // /* MW 13 */ + 9844 "10100101" // /* MW 12 */ + 9845 "00000001" // /* MW 11 */ + 9846 "00000000" // /* MW 10 */ + 9847 "00000000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "01011011" // /* MW 7 */ + 9850 "00000001" // /* MW 6 */ + 9851 "00100000" // /* MW 5 */ + 9852 "00000000" // /* MW 4 */ + 9853 "11110000" // /* MW 3 */ + 9854 "00101100" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.end_of_loop + 9856 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "10000000" // /* MW 8 */ + 9865 "00000110" // /* MW 7 */ + 9866 "00011100" // /* MW 6 */ + 9867 "00100001" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9873 "00000000" // /* MW 3 */ + 9874 "00101000" // /* MW 2 */ + 9875 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9881 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9884 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9885 "01100111" // /* MW 3 */ + 9886 "00000001" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.src_ref 3 "transposeshuffle.h" 86 34 + 9888 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000000" // /* MW 5 */ + 9890 "11000101" // /* MW 4 */ + 9891 "11000100" // /* MW 3 */ + 9892 "00000111" // /* MW 2 */ + 9893 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 34 first + 9894 "10011000" // LDA r0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00010110" // /* MW 3 */ + 9896 "00000100" // /* MW 2 */ + 9897 "00000010" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ + 9906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9907 "00000000" // /* MW 1 */ + 9908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9909 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 26 + 9910 "10000100" // JZ r0, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "00000000" // /* MW 4 */ + 9913 "10010000" // /* MW 3 */ + 9914 "00010100" // /* MW 2 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9926 "10111010" // MOVA m5, #36; MOVXM p4, #508548 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9927 "00010000" // /* MW 9 */ + 9928 "01000010" // /* MW 8 */ + 9929 "00110001" // /* MW 7 */ + 9930 "11110010" // /* MW 6 */ + 9931 "00000001" // /* MW 5 */ + 9932 "00000000" // /* MW 4 */ + 9933 "10000000" // /* MW 3 */ + 9934 "10010100" // /* MW 2 */ + 9935 "00000100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 + 9936 "10111010" // LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9937 "01011000" // /* MW 9 */ + 9938 "11111101" // /* MW 8 */ + 9939 "01001111" // /* MW 7 */ + 9940 "00001000" // /* MW 6 */ + 9941 "01010001" // /* MW 5 */ + 9942 "00000000" // /* MW 4 */ + 9943 "11010000" // /* MW 3 */ + 9944 "10000110" // /* MW 2 */ + 9945 "10000011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 + 9946 "10111010" // LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9947 "01011000" // /* MW 9 */ + 9948 "00000000" // /* MW 8 */ + 9949 "01100000" // /* MW 7 */ + 9950 "00101010" // /* MW 6 */ + 9951 "00110000" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11010000" // /* MW 3 */ + 9954 "00010010" // /* MW 2 */ + 9955 "10010101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9956 "01110110" // LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01011000" // /* MW 11 */ + 9958 "00100000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "10001010" // /* MW 8 */ + 9961 "01100000" // /* MW 7 */ + 9962 "00000000" // /* MW 6 */ + 9963 "01001011" // /* MW 5 */ + 9964 "00010000" // /* MW 4 */ + 9965 "11010000" // /* MW 3 */ + 9966 "10010000" // /* MW 2 */ + 9967 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 9968 "01110110" // LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9969 "01011000" // /* MW 11 */ + 9970 "00110100" // /* MW 10 */ + 9971 "11101000" // /* MW 9 */ + 9972 "11111000" // /* MW 8 */ + 9973 "00001111" // /* MW 7 */ + 9974 "00000000" // /* MW 6 */ + 9975 "01001011" // /* MW 5 */ + 9976 "00010000" // /* MW 4 */ + 9977 "11010001" // /* MW 3 */ + 9978 "10010100" // /* MW 2 */ + 9979 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9980 "01110110" // LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #10064 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9981 "00010000" // /* MW 11 */ + 9982 "10101000" // /* MW 10 */ + 9983 "00110011" // /* MW 9 */ + 9984 "00001001" // /* MW 8 */ + 9985 "00000000" // /* MW 7 */ + 9986 "00000000" // /* MW 6 */ + 9987 "01001011" // /* MW 5 */ + 9988 "00010000" // /* MW 4 */ + 9989 "11010101" // /* MW 3 */ + 9990 "10011000" // /* MW 2 */ + 9991 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 12 + 9992 "10111010" // LDA dn5, [p4], #-8; MOVXM p3, #10096 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9993 "00010000" // /* MW 9 */ + 9994 "10111000" // /* MW 8 */ + 9995 "10110011" // /* MW 7 */ + 9996 "00001001" // /* MW 6 */ + 9997 "00000000" // /* MW 5 */ + 9998 "00000000" // /* MW 4 */ + 9999 "11010000" // /* MW 3 */ + 10000 "11010100" // /* MW 2 */ + 10001 "10011101" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 10002 "00101100" // LDA dj5, [p4], m4; MOVX r16, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10003 "10101010" // /* MW 5 */ + 10004 "01000001" // /* MW 4 */ + 10005 "11010000" // /* MW 3 */ + 10006 "01011000" // /* MW 2 */ + 10007 "10010001" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 first + 10008 "10111010" // LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10009 "11001000" // /* MW 9 */ + 10010 "01111111" // /* MW 8 */ + 10011 "10101000" // /* MW 7 */ + 10012 "11100100" // /* MW 6 */ + 10013 "10110000" // /* MW 5 */ + 10014 "00001011" // /* MW 4 */ + 10015 "11010000" // /* MW 3 */ + 10016 "10000000" // /* MW 2 */ + 10017 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 first +.src_ref 3 "transposeshuffle.h" 86 8 first + 10018 "10111010" // LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10019 "11001000" // /* MW 9 */ + 10020 "00111111" // /* MW 8 */ + 10021 "10101001" // /* MW 7 */ + 10022 "01101100" // /* MW 6 */ + 10023 "00010001" // /* MW 5 */ + 10024 "00001011" // /* MW 4 */ + 10025 "11010000" // /* MW 3 */ + 10026 "10000100" // /* MW 2 */ + 10027 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 10028 "10111010" // LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10029 "01001000" // /* MW 9 */ + 10030 "01000000" // /* MW 8 */ + 10031 "10101100" // /* MW 7 */ + 10032 "01101100" // /* MW 6 */ + 10033 "00100001" // /* MW 5 */ + 10034 "00001010" // /* MW 4 */ + 10035 "11010000" // /* MW 3 */ + 10036 "10001000" // /* MW 2 */ + 10037 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10038 "10111010" // LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "01001000" // /* MW 9 */ + 10040 "10000000" // /* MW 8 */ + 10041 "01101000" // /* MW 7 */ + 10042 "10010000" // /* MW 6 */ + 10043 "01010010" // /* MW 5 */ + 10044 "00000110" // /* MW 4 */ + 10045 "11010000" // /* MW 3 */ + 10046 "11000100" // /* MW 2 */ + 10047 "10000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10048 "11100001" // LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10049 "00000000" // /* MW 15 */ + 10050 "00000000" // /* MW 14 */ + 10051 "01111000" // /* MW 13 */ + 10052 "10100101" // /* MW 12 */ + 10053 "00000001" // /* MW 11 */ + 10054 "11111000" // /* MW 10 */ + 10055 "01011111" // /* MW 9 */ + 10056 "00001010" // /* MW 8 */ + 10057 "01011011" // /* MW 7 */ + 10058 "00000001" // /* MW 6 */ + 10059 "00100000" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "11010000" // /* MW 3 */ + 10062 "11001000" // /* MW 2 */ + 10063 "10011100" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 1 + 10064 "10000100" // JZ r1, #10512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10512 delay_slots=5 */ + 10065 "00000001" // /* MW 5 */ + 10066 "00000000" // /* MW 4 */ + 10067 "10001000" // /* MW 3 */ + 10068 "00010100" // /* MW 2 */ + 10069 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10079 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 87 12 + 10080 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "01010000" // /* MW 12 */ + 10085 "00101001" // /* MW 11 */ + 10086 "00000010" // /* MW 10 */ + 10087 "00000000" // /* MW 9 */ + 10088 "00000000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11110000" // /* MW 3 */ + 10094 "00101100" // /* MW 2 */ + 10095 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.src_ref 3 "transposeshuffle.h" 88 16 first +.loop_nesting 2 + 10096 "10000100" // JZ r4, #10496 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10496 delay_slots=5 */ + 10097 "00000001" // /* MW 5 */ + 10098 "00000000" // /* MW 4 */ + 10099 "10000000" // /* MW 3 */ + 10100 "00010100" // /* MW 2 */ + 10101 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "10011000" // LTU r18, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10113 "01101100" // /* MW 3 */ + 10114 "11100100" // /* MW 2 */ + 10115 "00010000" // /* MW 1 */ + 10116 "10000100" // JNZ r18, #10352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10352 delay_slots=5 */ + 10117 "00000001" // /* MW 5 */ + 10118 "01000000" // /* MW 4 */ + 10119 "00111000" // /* MW 3 */ + 10120 "00010100" // /* MW 2 */ + 10121 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10131 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 3 "transposeshuffle.h" 88 16 + 10132 "00111010" // VLDB x0, [p0, #64]; MOVXM ls, #10240 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10133 "00010000" // /* MW 9 */ + 10134 "00000000" // /* MW 8 */ + 10135 "01111100" // /* MW 7 */ + 10136 "00001000" // /* MW 6 */ + 10137 "00000000" // /* MW 5 */ + 10138 "00000000" // /* MW 4 */ + 10139 "01101000" // /* MW 3 */ + 10140 "00101000" // /* MW 2 */ + 10141 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10142 "00111010" // VLDB.3D x1, [p0], d1; MOVXM le, #10272 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10143 "00010000" // /* MW 9 */ + 10144 "00010000" // /* MW 8 */ + 10145 "10111100" // /* MW 7 */ + 10146 "00001001" // /* MW 6 */ + 10147 "00000000" // /* MW 5 */ + 10148 "00000000" // /* MW 4 */ + 10149 "11101000" // /* MW 3 */ + 10150 "01110000" // /* MW 2 */ + 10151 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10152 "10011000" // ADD.NC lc, r3, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10153 "11111110" // /* MW 3 */ + 10154 "01110001" // /* MW 2 */ + 10155 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10156 "00011000" // VLDB x0, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10157 "00110100" // /* MW 3 */ + 10158 "00010100" // /* MW 2 */ + 10159 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "11101000" // /* MW 5 */ + 10172 "01110000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10177 "00000000" // /* MW 15 */ + 10178 "00000000" // /* MW 14 */ + 10179 "01111000" // /* MW 13 */ + 10180 "10100101" // /* MW 12 */ + 10181 "00000001" // /* MW 11 */ + 10182 "00000000" // /* MW 10 */ + 10183 "00000000" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "01011011" // /* MW 7 */ + 10186 "00000001" // /* MW 6 */ + 10187 "00100000" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "00000000" // /* MW 9 */ + 10200 "00000000" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "01101000" // /* MW 5 */ + 10204 "00101000" // /* MW 4 */ + 10205 "11110000" // /* MW 3 */ + 10206 "00101100" // /* MW 2 */ + 10207 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "11101000" // /* MW 5 */ + 10220 "01110000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00000000" // /* MW 15 */ + 10226 "00000000" // /* MW 14 */ + 10227 "11101000" // /* MW 13 */ + 10228 "00001110" // /* MW 12 */ + 10229 "01000100" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 3 + 10240 "11100001" // NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "11101000" // /* MW 13 */ + 10244 "00100000" // /* MW 12 */ + 10245 "00000100" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "10001011" // /* MW 7 */ + 10250 "10000100" // /* MW 6 */ + 10251 "01101100" // /* MW 5 */ + 10252 "00101000" // /* MW 4 */ + 10253 "11110000" // /* MW 3 */ + 10254 "00101100" // /* MW 2 */ + 10255 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "10000000" // /* MW 8 */ + 10265 "00100110" // /* MW 7 */ + 10266 "00011000" // /* MW 6 */ + 10267 "11101001" // /* MW 5 */ + 10268 "01110000" // /* MW 4 */ + 10269 "11110000" // /* MW 3 */ + 10270 "00101100" // /* MW 2 */ + 10271 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "11101000" // /* MW 13 */ + 10276 "00001110" // /* MW 12 */ + 10277 "01000100" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "10000000" // /* MW 8 */ + 10281 "00000110" // /* MW 7 */ + 10282 "00010100" // /* MW 6 */ + 10283 "00100100" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 10288 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10289 "11100000" // /* MW 7 */ + 10290 "00100000" // /* MW 6 */ + 10291 "00000100" // /* MW 5 */ + 10292 "00000000" // /* MW 4 */ + 10293 "01100000" // /* MW 3 */ + 10294 "10010001" // /* MW 2 */ + 10295 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10296 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10297 "00100110" // /* MW 3 */ + 10298 "00011000" // /* MW 2 */ + 10299 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10300 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10301 "11100000" // /* MW 7 */ + 10302 "00001110" // /* MW 6 */ + 10303 "01000100" // /* MW 5 */ + 10304 "00000000" // /* MW 4 */ + 10305 "11010000" // /* MW 3 */ + 10306 "10000000" // /* MW 2 */ + 10307 "10000010" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10308 "11011000" // VSHUFFLE bmll0, x1, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10309 "01000001" // /* MW 3 */ + 10310 "00001000" // /* MW 2 */ + 10311 "00011000" // /* MW 1 */ + 10312 "10000100" // J #10496 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10496 delay_slots=5 */ + 10313 "00000000" // /* MW 5 */ + 10314 "00000000" // /* MW 4 */ + 10315 "10000000" // /* MW 3 */ + 10316 "00010100" // /* MW 2 */ + 10317 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10318 "00000010" // VST.3D bmlh0, [p1], d0; MOV p4, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10319 "01110000" // /* MW 7 */ + 10320 "01100000" // /* MW 6 */ + 10321 "00110001" // /* MW 5 */ + 10322 "00000010" // /* MW 4 */ + 10323 "11010000" // /* MW 3 */ + 10324 "00000100" // /* MW 2 */ + 10325 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.delay_slot + 10326 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10327 "11100000" // /* MW 7 */ + 10328 "00001110" // /* MW 6 */ + 10329 "01000100" // /* MW 5 */ + 10330 "00000000" // /* MW 4 */ + 10331 "11010000" // /* MW 3 */ + 10332 "10000000" // /* MW 2 */ + 10333 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.delay_slot + 10334 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10335 "11100000" // /* MW 7 */ + 10336 "00100000" // /* MW 6 */ + 10337 "00000100" // /* MW 5 */ + 10338 "00000000" // /* MW 4 */ + 10339 "01100000" // /* MW 3 */ + 10340 "10010001" // /* MW 2 */ + 10341 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10342 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10343 "00100110" // /* MW 3 */ + 10344 "00011000" // /* MW 2 */ + 10345 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 10346 "00001100" // NOPA; VST bmll0, [p4, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10347 "00001101" // /* MW 5 */ + 10348 "00101000" // /* MW 4 */ + 10349 "11111000" // /* MW 3 */ + 10350 "00101100" // /* MW 2 */ + 10351 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10352 "01000100" // MOVXM ls, #10368 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10353 "00000000" // /* MW 5 */ + 10354 "11110001" // /* MW 4 */ + 10355 "00100001" // /* MW 3 */ + 10356 "00000000" // /* MW 2 */ + 10357 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10358 "01000100" // MOVXM le, #10480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10359 "11100000" // /* MW 5 */ + 10360 "11110001" // /* MW 4 */ + 10361 "00100110" // /* MW 3 */ + 10362 "00000000" // /* MW 2 */ + 10363 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10364 "10011000" // ADD.NC lc, r2, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10365 "00000000" // /* MW 3 */ + 10366 "01110001" // /* MW 2 */ + 10367 "00011101" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.begin_of_loop +.loop_nesting 3 + 10368 "11110100" // VLDB x0, [p0, #64]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10369 "10000001" // /* MW 5 */ + 10370 "11000101" // /* MW 4 */ + 10371 "10001000" // /* MW 3 */ + 10372 "10000110" // /* MW 2 */ + 10373 "00000010" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 + 10374 "00011000" // VLDB.3D x1, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10375 "01110100" // /* MW 3 */ + 10376 "00111000" // /* MW 2 */ + 10377 "00111000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ + 10380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10381 "00000000" // /* MW 1 */ + 10382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10383 "00000000" // /* MW 1 */ + 10384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10385 "00000000" // /* MW 15 */ + 10386 "00000000" // /* MW 14 */ + 10387 "01111000" // /* MW 13 */ + 10388 "10100101" // /* MW 12 */ + 10389 "00000001" // /* MW 11 */ + 10390 "00000000" // /* MW 10 */ + 10391 "00000000" // /* MW 9 */ + 10392 "00000000" // /* MW 8 */ + 10393 "01011011" // /* MW 7 */ + 10394 "00000001" // /* MW 6 */ + 10395 "00100000" // /* MW 5 */ + 10396 "00000000" // /* MW 4 */ + 10397 "11110000" // /* MW 3 */ + 10398 "00101100" // /* MW 2 */ + 10399 "00000000" // /* MW 1 */ + 10400 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10401 "00000000" // /* MW 15 */ + 10402 "00000000" // /* MW 14 */ + 10403 "01111000" // /* MW 13 */ + 10404 "10100101" // /* MW 12 */ + 10405 "00000001" // /* MW 11 */ + 10406 "00000000" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "01011011" // /* MW 7 */ + 10410 "00000001" // /* MW 6 */ + 10411 "00100000" // /* MW 5 */ + 10412 "00000000" // /* MW 4 */ + 10413 "11110000" // /* MW 3 */ + 10414 "00101100" // /* MW 2 */ + 10415 "00000000" // /* MW 1 */ + 10416 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10417 "00000000" // /* MW 15 */ + 10418 "00000000" // /* MW 14 */ + 10419 "01111000" // /* MW 13 */ + 10420 "10100101" // /* MW 12 */ + 10421 "00000001" // /* MW 11 */ + 10422 "00000000" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "01011011" // /* MW 7 */ + 10426 "00000001" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first + 10432 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10433 "00000000" // /* MW 15 */ + 10434 "00000000" // /* MW 14 */ + 10435 "11101000" // /* MW 13 */ + 10436 "00001110" // /* MW 12 */ + 10437 "01000100" // /* MW 11 */ + 10438 "00000000" // /* MW 10 */ + 10439 "00000000" // /* MW 9 */ + 10440 "00000000" // /* MW 8 */ + 10441 "01011011" // /* MW 7 */ + 10442 "00000001" // /* MW 6 */ + 10443 "00100000" // /* MW 5 */ + 10444 "00000000" // /* MW 4 */ + 10445 "11110000" // /* MW 3 */ + 10446 "00101100" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first + 10448 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00000000" // /* MW 15 */ + 10450 "00000000" // /* MW 14 */ + 10451 "11101000" // /* MW 13 */ + 10452 "00100000" // /* MW 12 */ + 10453 "00000100" // /* MW 11 */ + 10454 "00000000" // /* MW 10 */ + 10455 "00000000" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "01011011" // /* MW 7 */ + 10458 "00000001" // /* MW 6 */ + 10459 "00100000" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first + 10464 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "01111000" // /* MW 13 */ + 10468 "10100101" // /* MW 12 */ + 10469 "00000001" // /* MW 11 */ + 10470 "00000000" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "10000000" // /* MW 8 */ + 10473 "00100110" // /* MW 7 */ + 10474 "00011000" // /* MW 6 */ + 10475 "00100001" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.src_ref 4 "vector.hpp" 1152 43 +.end_of_loop + 10480 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10481 "00000000" // /* MW 15 */ + 10482 "00000000" // /* MW 14 */ + 10483 "01111000" // /* MW 13 */ + 10484 "10100101" // /* MW 12 */ + 10485 "00000001" // /* MW 11 */ + 10486 "00000000" // /* MW 10 */ + 10487 "00000000" // /* MW 9 */ + 10488 "10000000" // /* MW 8 */ + 10489 "00000110" // /* MW 7 */ + 10490 "00010100" // /* MW 6 */ + 10491 "00100100" // /* MW 5 */ + 10492 "00000000" // /* MW 4 */ + 10493 "11110000" // /* MW 3 */ + 10494 "00101100" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 2 + 10496 "00011000" // JNZD r17, r17, p3 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10497 "11100000" // /* MW 3 */ + 10498 "01100010" // /* MW 2 */ + 10499 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10508 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10509 "01100111" // /* MW 3 */ + 10510 "00000001" // /* MW 2 */ + 10511 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.src_ref 3 "transposeshuffle.h" 86 8 first +.loop_nesting 1 + 10512 "00011000" // JNZD r0, r0, p2 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10513 "10100000" // /* MW 3 */ + 10514 "00000000" // /* MW 2 */ + 10515 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10525 "01100111" // /* MW 3 */ + 10526 "00000001" // /* MW 2 */ + 10527 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 10528 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10529 "00000000" // /* MW 3 */ + 10530 "00101000" // /* MW 2 */ + 10531 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10537 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + 10541 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 first +.function_start + 10544 "11111000" // MOV p3, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10545 "11000000" // /* MW 3 */ + 10546 "01101100" // /* MW 2 */ + 10547 "00011011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 + 10548 "00111010" // MOVS p6, p1; MOVXM p1, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10549 "00010001" // /* MW 9 */ + 10550 "00001010" // /* MW 8 */ + 10551 "10110001" // /* MW 7 */ + 10552 "11110000" // /* MW 6 */ + 10553 "00000001" // /* MW 5 */ + 10554 "00000000" // /* MW 4 */ + 10555 "01100000" // /* MW 3 */ + 10556 "10010001" // /* MW 2 */ + 10557 "11010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 first + 10558 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10559 "00010110" // /* MW 3 */ + 10560 "00000110" // /* MW 2 */ + 10561 "00000001" // /* MW 1 */ + 10562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10563 "00000000" // /* MW 1 */ + 10564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10565 "00000000" // /* MW 1 */ + 10566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10567 "00000000" // /* MW 1 */ + 10568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10569 "00000000" // /* MW 1 */ + 10570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10571 "00000000" // /* MW 1 */ + 10572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10573 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 15 + 10574 "10000100" // JNZ r16, #10640 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10640 delay_slots=5 */ + 10575 "00000001" // /* MW 5 */ + 10576 "01000000" // /* MW 4 */ + 10577 "11001000" // /* MW 3 */ + 10578 "00010100" // /* MW 2 */ + 10579 "10000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 +.delay_slot + 10580 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10581 "00000001" // /* MW 5 */ + 10582 "00000000" // /* MW 4 */ + 10583 "00000000" // /* MW 3 */ + 10584 "00001000" // /* MW 2 */ + 10585 "00000000" // /* MW 1 */ +.delay_slot + 10586 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10587 "00111101" // /* MW 3 */ + 10588 "11110100" // /* MW 2 */ + 10589 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 10590 "00000010" // MOVS p7, p0; MOV p1, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10591 "01110000" // /* MW 7 */ + 10592 "01100000" // /* MW 6 */ + 10593 "10110111" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "01100000" // /* MW 3 */ + 10596 "00010001" // /* MW 2 */ + 10597 "11110000" // /* MW 1 */ +.delay_slot + 10598 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10599 "10011101" // /* MW 3 */ + 10600 "11111001" // /* MW 2 */ + 10601 "00001111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10602 "00111010" // ST p1, [sp, #-4]; MOVXM p0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10603 "00010001" // /* MW 9 */ + 10604 "01000000" // /* MW 8 */ + 10605 "00110001" // /* MW 7 */ + 10606 "11110000" // /* MW 6 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "10110000" // /* MW 3 */ + 10610 "10010011" // /* MW 2 */ + 10611 "11111111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 first +.no_stack_arguments + 10612 "00000100" // JL #9120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9120 delay_slots=5 */ + 10613 "00000001" // /* MW 5 */ + 10614 "00000000" // /* MW 4 */ + 10615 "11010000" // /* MW 3 */ + 10616 "00010001" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10618 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "11000000" // /* MW 3 */ + 10620 "01100100" // /* MW 2 */ + 10621 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10627 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10628 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10629 "10000001" // /* MW 11 */ + 10630 "10101101" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "00000000" // /* MW 8 */ + 10633 "00000000" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00100000" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 3 "transposeshuffle.h" 137 72 +.return_address + 10640 "10111010" // LDA r16, [p7]; MOVXM p7, #508564 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10641 "00010000" // /* MW 9 */ + 10642 "01001010" // /* MW 8 */ + 10643 "10110001" // /* MW 7 */ + 10644 "11110011" // /* MW 6 */ + 10645 "00000001" // /* MW 5 */ + 10646 "00000000" // /* MW 4 */ + 10647 "11010000" // /* MW 3 */ + 10648 "11000010" // /* MW 2 */ + 10649 "11100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 72 first + 10650 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10651 "00110110" // /* MW 3 */ + 10652 "00000110" // /* MW 2 */ + 10653 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 10654 "10011000" // LDA p1, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10655 "10011110" // /* MW 3 */ + 10656 "00000100" // /* MW 2 */ + 10657 "00000110" // /* MW 1 */ + 10658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10659 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 first +.no_stack_arguments + 10660 "00000100" // JL #9392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9392 delay_slots=5 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "01011000" // /* MW 3 */ + 10664 "00010010" // /* MW 2 */ + 10665 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10666 "00011000" // MOVX r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10667 "00000101" // /* MW 3 */ + 10668 "00100100" // /* MW 2 */ + 10669 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10670 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10671 "00000000" // /* MW 5 */ + 10672 "11000101" // /* MW 4 */ + 10673 "11000100" // /* MW 3 */ + 10674 "00000111" // /* MW 2 */ + 10675 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10676 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10677 "11000000" // /* MW 3 */ + 10678 "01100100" // /* MW 2 */ + 10679 "00011110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10680 "10011000" // LSHL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10681 "00101101" // /* MW 3 */ + 10682 "01100011" // /* MW 2 */ + 10683 "00010100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10684 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10685 "11000001" // /* MW 3 */ + 10686 "01101000" // /* MW 2 */ + 10687 "00011000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 +.return_address + 10688 "10111010" // LDA lr, [sp, #-12]; MOVXM p2, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10689 "00010000" // /* MW 9 */ + 10690 "00001010" // /* MW 8 */ + 10691 "00110001" // /* MW 7 */ + 10692 "11110001" // /* MW 6 */ + 10693 "00000001" // /* MW 5 */ + 10694 "00000000" // /* MW 4 */ + 10695 "00100000" // /* MW 3 */ + 10696 "10000111" // /* MW 2 */ + 10697 "11111110" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first + 10698 "00101100" // LDA r16, [p2]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "00000010" // /* MW 5 */ + 10700 "01100000" // /* MW 4 */ + 10701 "11010000" // /* MW 3 */ + 10702 "11000010" // /* MW 2 */ + 10703 "01000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 + 10704 "10011000" // LDA r17, [p6, #24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10705 "00110110" // /* MW 3 */ + 10706 "01100110" // /* MW 2 */ + 10707 "00000110" // /* MW 1 */ + 10708 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10709 "00011001" // /* MW 3 */ + 10710 "11111011" // /* MW 2 */ + 10711 "00000111" // /* MW 1 */ + 10712 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10713 "10011001" // /* MW 3 */ + 10714 "11111111" // /* MW 2 */ + 10715 "00000111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 first + 10716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10717 "00000001" // /* MW 5 */ + 10718 "00000000" // /* MW 4 */ + 10719 "00000000" // /* MW 3 */ + 10720 "11111000" // /* MW 2 */ + 10721 "11111111" // /* MW 1 */ + 10722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10723 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 + 10724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10725 "00000000" // /* MW 3 */ + 10726 "00101000" // /* MW 2 */ + 10727 "00010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first +.delay_slot + 10728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10729 "00000111" // /* MW 3 */ + 10730 "00100000" // /* MW 2 */ + 10731 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 17 +.delay_slot + 10732 "10011000" // EQ r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10733 "00000111" // /* MW 3 */ + 10734 "01110111" // /* MW 2 */ + 10735 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.delay_slot + 10736 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10737 "10000010" // /* MW 3 */ + 10738 "00100001" // /* MW 2 */ + 10739 "00010100" // /* MW 1 */ +.delay_slot + 10740 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10741 "00010001" // /* MW 3 */ + 10742 "00000110" // /* MW 2 */ + 10743 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + 10745 "00000000" // /* MW 1 */ +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function _b7835_wrapper _Z14_b7835_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 30 first +.src_ref 0 "0_0_reloadable4.cc" 32 79 +.function_start + 10752 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10753 "11000000" // /* MW 3 */ + 10754 "01100000" // /* MW 2 */ + 10755 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 32 79 first + 10756 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10757 "00011110" // /* MW 3 */ + 10758 "00011100" // /* MW 2 */ + 10759 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 34 46 first + 10760 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10761 "00011110" // /* MW 3 */ + 10762 "00010101" // /* MW 2 */ + 10763 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 33 80 first + 10764 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10765 "10011110" // /* MW 3 */ + 10766 "00000100" // /* MW 2 */ + 10767 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 31 4 first +.tail_call + 10768 "10000100" // J #10544 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10544 delay_slots=5 */ + 10769 "00000000" // /* MW 5 */ + 10770 "00000000" // /* MW 4 */ + 10771 "10011000" // /* MW 3 */ + 10772 "00010100" // /* MW 2 */ + 10773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 + 10783 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function buffer_pad_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.function_start + 10784 "11010100" // LDA el0, [p1]; MOV r17, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10785 "10000001" // /* MW 5 */ + 10786 "10101001" // /* MW 4 */ + 10787 "11011000" // /* MW 3 */ + 10788 "10000101" // /* MW 2 */ + 10789 "00100000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 first + 10790 "00011000" // ADD.NC p1, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10791 "10000010" // /* MW 3 */ + 10792 "01101000" // /* MW 2 */ + 10793 "00011001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10794 "10011000" // LDA r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "01010110" // /* MW 3 */ + 10796 "00011110" // /* MW 2 */ + 10797 "00000001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 27 33 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10798 "10011000" // LDA r15, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10799 "11110110" // /* MW 3 */ + 10800 "00000101" // /* MW 2 */ + 10801 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10803 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10809 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10810 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10811 "10100000" // /* MW 3 */ + 10812 "00010111" // /* MW 2 */ + 10813 "00011000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10814 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10815 "00000001" // /* MW 5 */ + 10816 "00000000" // /* MW 4 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00001000" // /* MW 2 */ + 10819 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 43 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10820 "01100100" // MUL r18, r15, r18; MOV r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10821 "11111101" // /* MW 5 */ + 10822 "00111111" // /* MW 4 */ + 10823 "11111000" // /* MW 3 */ + 10824 "10100101" // /* MW 2 */ + 10825 "01111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10826 "00111010" // ST r18, [sp, #-20]; MOVXM r17, #1073741823 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10827 "10010001" // /* MW 9 */ + 10828 "11111111" // /* MW 8 */ + 10829 "00101111" // /* MW 7 */ + 10830 "11111110" // /* MW 6 */ + 10831 "11111111" // /* MW 5 */ + 10832 "00001111" // /* MW 4 */ + 10833 "10110000" // /* MW 3 */ + 10834 "11001010" // /* MW 2 */ + 10835 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10836 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00001101" // /* MW 3 */ + 10838 "10100001" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10840 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00000100" // /* MW 3 */ + 10842 "01100001" // /* MW 2 */ + 10843 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 22 + 10844 "10000100" // JZ r16, #10928 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10928 delay_slots=5 */ + 10845 "00000001" // /* MW 5 */ + 10846 "00000000" // /* MW 4 */ + 10847 "01011000" // /* MW 3 */ + 10848 "00010101" // /* MW 2 */ + 10849 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.delay_slot + 10850 "11010100" // LDA p7, [p0]; MOV p0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10851 "10000001" // /* MW 5 */ + 10852 "11011101" // /* MW 4 */ + 10853 "11010000" // /* MW 3 */ + 10854 "11110011" // /* MW 2 */ + 10855 "00000000" // /* MW 1 */ +.delay_slot + 10856 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10857 "00011101" // /* MW 3 */ + 10858 "11111000" // /* MW 2 */ + 10859 "00001111" // /* MW 1 */ +.delay_slot + 10860 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10861 "11010101" // /* MW 3 */ + 10862 "11110101" // /* MW 2 */ + 10863 "00001111" // /* MW 1 */ +.delay_slot + 10864 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10865 "00111101" // /* MW 3 */ + 10866 "11110000" // /* MW 2 */ + 10867 "00001111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 25 24 first +.delay_slot + 10868 "00001100" // LDA r14, [p1, #-8]; ST r0, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10869 "00101011" // /* MW 5 */ + 10870 "11111000" // /* MW 4 */ + 10871 "11011111" // /* MW 3 */ + 10872 "10111010" // /* MW 2 */ + 10873 "00111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10874 "01011100" // ST el0, [sp, #-24]; MOVX r0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10875 "00000010" // /* MW 5 */ + 10876 "00000000" // /* MW 4 */ + 10877 "10110000" // /* MW 3 */ + 10878 "00000101" // /* MW 2 */ + 10879 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10880 "00011000" // LDA p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10881 "10011001" // /* MW 3 */ + 10882 "11101000" // /* MW 2 */ + 10883 "00000111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 first +.no_stack_arguments + 10884 "00000100" // JL #12608 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12608 delay_slots=5 */ + 10885 "00000001" // /* MW 5 */ + 10886 "00000000" // /* MW 4 */ + 10887 "10100000" // /* MW 3 */ + 10888 "00011000" // /* MW 2 */ + 10889 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.delay_slot + 10890 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "00001001" // /* MW 3 */ + 10892 "00100010" // /* MW 2 */ + 10893 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 first +.delay_slot + 10894 "10011000" // LSHL r1, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00011101" // /* MW 3 */ + 10896 "00000011" // /* MW 2 */ + 10897 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10903 "01111110" // /* MW 9 */ + 10904 "10100101" // /* MW 8 */ + 10905 "00000001" // /* MW 7 */ + 10906 "00000000" // /* MW 6 */ + 10907 "00010000" // /* MW 5 */ + 10908 "00000000" // /* MW 4 */ + 10909 "11110000" // /* MW 3 */ + 10910 "00101100" // /* MW 2 */ + 10911 "00000000" // /* MW 1 */ +.return_address + 10912 "10000100" // J #10944 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10944 delay_slots=5 */ + 10913 "00000000" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01100000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 10928 "11100001" // NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10929 "00000000" // /* MW 15 */ + 10930 "00000000" // /* MW 14 */ + 10931 "01111000" // /* MW 13 */ + 10932 "10100101" // /* MW 12 */ + 10933 "00000001" // /* MW 11 */ + 10934 "00000000" // /* MW 10 */ + 10935 "00000000" // /* MW 9 */ + 10936 "10000000" // /* MW 8 */ + 10937 "00101101" // /* MW 7 */ + 10938 "11101000" // /* MW 6 */ + 10939 "00100111" // /* MW 5 */ + 10940 "00000000" // /* MW 4 */ + 10941 "11110000" // /* MW 3 */ + 10942 "00101100" // /* MW 2 */ + 10943 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 22 first + 10944 "10000100" // JZ r15, #11216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11216 delay_slots=5 */ + 10945 "00000001" // /* MW 5 */ + 10946 "00000000" // /* MW 4 */ + 10947 "11101000" // /* MW 3 */ + 10948 "00010101" // /* MW 2 */ + 10949 "01111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10959 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 first + 10960 "10111010" // LDA r17, [sp, #-20]; MOVXM ls, #11056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10961 "00010000" // /* MW 9 */ + 10962 "10011000" // /* MW 8 */ + 10963 "01111101" // /* MW 7 */ + 10964 "00001000" // /* MW 6 */ + 10965 "00000000" // /* MW 5 */ + 10966 "00000000" // /* MW 4 */ + 10967 "00100000" // /* MW 3 */ + 10968 "11000110" // /* MW 2 */ + 10969 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 + 10970 "10111010" // MOVA r19, #1; MOVXM le, #11152 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10971 "00010000" // /* MW 9 */ + 10972 "11001000" // /* MW 8 */ + 10973 "10111101" // /* MW 7 */ + 10974 "00001001" // /* MW 6 */ + 10975 "00000000" // /* MW 5 */ + 10976 "00000000" // /* MW 4 */ + 10977 "00000000" // /* MW 3 */ + 10978 "00110011" // /* MW 2 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 10980 "10111010" // LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10981 "11001000" // /* MW 9 */ + 10982 "11111111" // /* MW 8 */ + 10983 "00001011" // /* MW 7 */ + 10984 "11101110" // /* MW 6 */ + 10985 "01001001" // /* MW 5 */ + 10986 "00011101" // /* MW 4 */ + 10987 "00100000" // /* MW 3 */ + 10988 "01001010" // /* MW 2 */ + 10989 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 10990 "10111010" // LDA lr, [sp, #-16]; MOVXM p0, #11024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10991 "00010000" // /* MW 9 */ + 10992 "10001000" // /* MW 8 */ + 10993 "00110101" // /* MW 7 */ + 10994 "00001000" // /* MW 6 */ + 10995 "00000000" // /* MW 5 */ + 10996 "00000000" // /* MW 4 */ + 10997 "00100000" // /* MW 3 */ + 10998 "00000111" // /* MW 2 */ + 10999 "11111110" // /* MW 1 */ + 11000 "11111000" // MOV m0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11001 "00100000" // /* MW 3 */ + 11002 "00001010" // /* MW 2 */ + 11003 "00011000" // /* MW 1 */ + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ + 11008 "11100001" // NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11009 "00000000" // /* MW 15 */ + 11010 "00000000" // /* MW 14 */ + 11011 "01111000" // /* MW 13 */ + 11012 "10100101" // /* MW 12 */ + 11013 "00000001" // /* MW 11 */ + 11014 "11101100" // /* MW 10 */ + 11015 "00011001" // /* MW 9 */ + 11016 "00100011" // /* MW 8 */ + 11017 "01011011" // /* MW 7 */ + 11018 "00000001" // /* MW 6 */ + 11019 "00100000" // /* MW 5 */ + 11020 "00000000" // /* MW 4 */ + 11021 "11110000" // /* MW 3 */ + 11022 "00101100" // /* MW 2 */ + 11023 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.loop_nesting 1 + 11024 "10000100" // JZ r14, #11168 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11168 delay_slots=5 */ + 11025 "00000001" // /* MW 5 */ + 11026 "00000000" // /* MW 4 */ + 11027 "11010000" // /* MW 3 */ + 11028 "00010101" // /* MW 2 */ + 11029 "01110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11033 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11035 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11037 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11039 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11040 "00000010" // MOVS p2, p7; MOV lc, r14 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11041 "01110000" // /* MW 7 */ + 11042 "10010000" // /* MW 6 */ + 11043 "10111011" // /* MW 5 */ + 11044 "00000010" // /* MW 4 */ + 11045 "01100000" // /* MW 3 */ + 11046 "10010001" // /* MW 2 */ + 11047 "01010011" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11048 "00000010" // NOPS; MOV p1, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11049 "01110000" // /* MW 7 */ + 11050 "10010000" // /* MW 6 */ + 11051 "10110100" // /* MW 5 */ + 11052 "00000000" // /* MW 4 */ + 11053 "01100000" // /* MW 3 */ + 11054 "00101011" // /* MW 2 */ + 11055 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 first +.begin_of_loop +.loop_nesting 2 + 11056 "11100001" // LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11057 "00000000" // /* MW 15 */ + 11058 "00000000" // /* MW 14 */ + 11059 "01111000" // /* MW 13 */ + 11060 "10100101" // /* MW 12 */ + 11061 "00000001" // /* MW 11 */ + 11062 "00000000" // /* MW 10 */ + 11063 "00000000" // /* MW 9 */ + 11064 "00000000" // /* MW 8 */ + 11065 "01011011" // /* MW 7 */ + 11066 "00000001" // /* MW 6 */ + 11067 "00100000" // /* MW 5 */ + 11068 "00000000" // /* MW 4 */ + 11069 "01010000" // /* MW 3 */ + 11070 "11001110" // /* MW 2 */ + 11071 "01000011" // /* MW 1 */ + 11072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11073 "00000000" // /* MW 15 */ + 11074 "00000000" // /* MW 14 */ + 11075 "01111000" // /* MW 13 */ + 11076 "10100101" // /* MW 12 */ + 11077 "00000001" // /* MW 11 */ + 11078 "00000000" // /* MW 10 */ + 11079 "00000000" // /* MW 9 */ + 11080 "00000000" // /* MW 8 */ + 11081 "01011011" // /* MW 7 */ + 11082 "00000001" // /* MW 6 */ + 11083 "00100000" // /* MW 5 */ + 11084 "00000000" // /* MW 4 */ + 11085 "11110000" // /* MW 3 */ + 11086 "00101100" // /* MW 2 */ + 11087 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11088 "11100001" // ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11089 "00000000" // /* MW 15 */ + 11090 "00000000" // /* MW 14 */ + 11091 "01111000" // /* MW 13 */ + 11092 "10100101" // /* MW 12 */ + 11093 "00000001" // /* MW 11 */ + 11094 "00000000" // /* MW 10 */ + 11095 "00000000" // /* MW 9 */ + 11096 "00000000" // /* MW 8 */ + 11097 "01011011" // /* MW 7 */ + 11098 "00000001" // /* MW 6 */ + 11099 "00100000" // /* MW 5 */ + 11100 "00000000" // /* MW 4 */ + 11101 "11100000" // /* MW 3 */ + 11102 "11001110" // /* MW 2 */ + 11103 "00100011" // /* MW 1 */ + 11104 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11105 "00000000" // /* MW 15 */ + 11106 "00000000" // /* MW 14 */ + 11107 "01111000" // /* MW 13 */ + 11108 "10100101" // /* MW 12 */ + 11109 "00000001" // /* MW 11 */ + 11110 "00000000" // /* MW 10 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "01011011" // /* MW 7 */ + 11114 "00000001" // /* MW 6 */ + 11115 "00100000" // /* MW 5 */ + 11116 "00000000" // /* MW 4 */ + 11117 "11110000" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ + 11120 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11121 "00000000" // /* MW 15 */ + 11122 "00000000" // /* MW 14 */ + 11123 "01111000" // /* MW 13 */ + 11124 "10100101" // /* MW 12 */ + 11125 "00000001" // /* MW 11 */ + 11126 "00000000" // /* MW 10 */ + 11127 "00000000" // /* MW 9 */ + 11128 "00000000" // /* MW 8 */ + 11129 "01011011" // /* MW 7 */ + 11130 "00000001" // /* MW 6 */ + 11131 "00100000" // /* MW 5 */ + 11132 "00000000" // /* MW 4 */ + 11133 "11110000" // /* MW 3 */ + 11134 "00101100" // /* MW 2 */ + 11135 "00000000" // /* MW 1 */ + 11136 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11137 "00000000" // /* MW 15 */ + 11138 "00000000" // /* MW 14 */ + 11139 "01111000" // /* MW 13 */ + 11140 "10100101" // /* MW 12 */ + 11141 "00000001" // /* MW 11 */ + 11142 "00000000" // /* MW 10 */ + 11143 "00000000" // /* MW 9 */ + 11144 "00000000" // /* MW 8 */ + 11145 "01011011" // /* MW 7 */ + 11146 "00000001" // /* MW 6 */ + 11147 "00100000" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "11110000" // /* MW 3 */ + 11150 "00101100" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 11152 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11153 "00000000" // /* MW 15 */ + 11154 "00000000" // /* MW 14 */ + 11155 "01111000" // /* MW 13 */ + 11156 "10100101" // /* MW 12 */ + 11157 "00000001" // /* MW 11 */ + 11158 "00000000" // /* MW 10 */ + 11159 "00000000" // /* MW 9 */ + 11160 "00000000" // /* MW 8 */ + 11161 "01011011" // /* MW 7 */ + 11162 "00000001" // /* MW 6 */ + 11163 "00100000" // /* MW 5 */ + 11164 "00000000" // /* MW 4 */ + 11165 "11110000" // /* MW 3 */ + 11166 "00101100" // /* MW 2 */ + 11167 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.loop_nesting 1 + 11168 "00011100" // PADDB [p7], m0; JNZD r16, r16, p0 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 11169 "01000000" // /* MW 5 */ + 11170 "01000000" // /* MW 4 */ + 11171 "00001000" // /* MW 3 */ + 11172 "01110010" // /* MW 2 */ + 11173 "11100001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11175 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11177 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11179 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11181 "00000000" // /* MW 1 */ +.delay_slot + 11182 "01011000" // ADD.NC r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "11001001" // /* MW 3 */ + 11184 "10011000" // /* MW 2 */ + 11185 "00011100" // /* MW 1 */ +.loop_nesting 0 + 11186 "10000100" // J #11232 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11232 delay_slots=5 */ + 11187 "00000000" // /* MW 5 */ + 11188 "00000000" // /* MW 4 */ + 11189 "11110000" // /* MW 3 */ + 11190 "00010101" // /* MW 2 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11195 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11197 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11199 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11200 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11201 "00000000" // /* MW 15 */ + 11202 "00000000" // /* MW 14 */ + 11203 "01111000" // /* MW 13 */ + 11204 "10100101" // /* MW 12 */ + 11205 "00000001" // /* MW 11 */ + 11206 "00000000" // /* MW 10 */ + 11207 "00000000" // /* MW 9 */ + 11208 "00000000" // /* MW 8 */ + 11209 "01011011" // /* MW 7 */ + 11210 "00000001" // /* MW 6 */ + 11211 "00100000" // /* MW 5 */ + 11212 "00000000" // /* MW 4 */ + 11213 "11110000" // /* MW 3 */ + 11214 "00101100" // /* MW 2 */ + 11215 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 11216 "11100001" // LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11217 "00000000" // /* MW 15 */ + 11218 "00000000" // /* MW 14 */ + 11219 "01111000" // /* MW 13 */ + 11220 "10100101" // /* MW 12 */ + 11221 "00000001" // /* MW 11 */ + 11222 "00000000" // /* MW 10 */ + 11223 "00000000" // /* MW 9 */ + 11224 "00000000" // /* MW 8 */ + 11225 "01011011" // /* MW 7 */ + 11226 "00000001" // /* MW 6 */ + 11227 "00100000" // /* MW 5 */ + 11228 "00000000" // /* MW 4 */ + 11229 "00100000" // /* MW 3 */ + 11230 "00000111" // /* MW 2 */ + 11231 "11111110" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 11232 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11233 "11010001" // /* MW 3 */ + 11234 "11110101" // /* MW 2 */ + 11235 "00000111" // /* MW 1 */ + 11236 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11237 "10011001" // /* MW 3 */ + 11238 "11111011" // /* MW 2 */ + 11239 "00000111" // /* MW 1 */ + 11240 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11241 "11110001" // /* MW 3 */ + 11242 "11111101" // /* MW 2 */ + 11243 "00000111" // /* MW 1 */ + 11244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11245 "00000000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ + 11248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11249 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 first + 11250 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11251 "00000000" // /* MW 3 */ + 11252 "00101000" // /* MW 2 */ + 11253 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 +.delay_slot + 11254 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000001" // /* MW 5 */ + 11256 "00000000" // /* MW 4 */ + 11257 "00000000" // /* MW 3 */ + 11258 "11111000" // /* MW 2 */ + 11259 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + 11267 "00000000" // /* MW 1 */ +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function _b8148_wrapper _Z14_b8148_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 38 first +.src_ref 0 "0_0_reloadable4.cc" 40 79 +.function_start + 11280 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "01100000" // /* MW 2 */ + 11283 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 40 79 first + 11284 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11285 "00011110" // /* MW 3 */ + 11286 "00011100" // /* MW 2 */ + 11287 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 42 46 first + 11288 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11289 "00011110" // /* MW 3 */ + 11290 "00010101" // /* MW 2 */ + 11291 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 41 80 first + 11292 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "10011110" // /* MW 3 */ + 11294 "00000100" // /* MW 2 */ + 11295 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 39 4 first +.tail_call + 11296 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 11297 "00000000" // /* MW 5 */ + 11298 "00000000" // /* MW 4 */ + 11299 "00010000" // /* MW 3 */ + 11300 "00010101" // /* MW 2 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11307 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 + 11311 "00000000" // /* MW 1 */ +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function _b13739_wrapper _Z15_b13739_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 46 first +.src_ref 0 "0_0_reloadable4.cc" 48 79 +.function_start + 11312 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "11000000" // /* MW 3 */ + 11314 "01100000" // /* MW 2 */ + 11315 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 48 79 first + 11316 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00011110" // /* MW 3 */ + 11318 "00101100" // /* MW 2 */ + 11319 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 50 81 first + 11320 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00011110" // /* MW 3 */ + 11322 "11110101" // /* MW 2 */ + 11323 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 49 47 first + 11324 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "10011110" // /* MW 3 */ + 11326 "00000100" // /* MW 2 */ + 11327 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 47 4 first +.tail_call + 11328 "10000100" // J #3904 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3904 delay_slots=5 */ + 11329 "00000000" // /* MW 5 */ + 11330 "00000000" // /* MW 4 */ + 11331 "10100000" // /* MW 3 */ + 11332 "00000111" // /* MW 2 */ + 11333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 + 11343 "00000000" // /* MW 1 */ +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function _b13744_wrapper _Z15_b13744_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 54 first +.src_ref 0 "0_0_reloadable4.cc" 56 79 +.function_start + 11344 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11345 "11000000" // /* MW 3 */ + 11346 "01100000" // /* MW 2 */ + 11347 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 56 79 first + 11348 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11349 "00011110" // /* MW 3 */ + 11350 "00101100" // /* MW 2 */ + 11351 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 58 81 first + 11352 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11353 "00011110" // /* MW 3 */ + 11354 "11110101" // /* MW 2 */ + 11355 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 57 47 first + 11356 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11357 "10011110" // /* MW 3 */ + 11358 "00000100" // /* MW 2 */ + 11359 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 55 4 first +.tail_call + 11360 "10000100" // J #4864 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4864 delay_slots=5 */ + 11361 "00000000" // /* MW 5 */ + 11362 "00000000" // /* MW 4 */ + 11363 "10000000" // /* MW 3 */ + 11364 "00001001" // /* MW 2 */ + 11365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 + 11375 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 62 first +.src_ref 0 "0_0_reloadable4.cc" 64 79 +.function_start + 11376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "11000000" // /* MW 3 */ + 11378 "01100000" // /* MW 2 */ + 11379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 64 79 first + 11380 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11381 "00011110" // /* MW 3 */ + 11382 "00111100" // /* MW 2 */ + 11383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 65 47 first + 11384 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "10011110" // /* MW 3 */ + 11386 "11101100" // /* MW 2 */ + 11387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 67 81 first + 11388 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "10011110" // /* MW 3 */ + 11390 "00010101" // /* MW 2 */ + 11391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 66 80 first + 11392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00011110" // /* MW 3 */ + 11394 "00000101" // /* MW 2 */ + 11395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 63 4 first +.tail_call + 11396 "10000100" // J #5872 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5872 delay_slots=5 */ + 11397 "00000000" // /* MW 5 */ + 11398 "00000000" // /* MW 4 */ + 11399 "01111000" // /* MW 3 */ + 11400 "00001011" // /* MW 2 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11411 "00000000" // /* MW 1 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function load_slice_generic_innermost_rtp _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 40 first +.src_ref 3 "slice_generic_innermost_params.h" 41 19 first +.function_start + 11424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11425 "00101110" // /* MW 3 */ + 11426 "00011100" // /* MW 2 */ + 11427 "00000001" // /* MW 1 */ + 11428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11429 "00000000" // /* MW 1 */ + 11430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11431 "00000000" // /* MW 1 */ + 11432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11433 "00000000" // /* MW 1 */ + 11434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11435 "00000000" // /* MW 1 */ + 11436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11437 "00000000" // /* MW 1 */ + 11438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11439 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 41 17 first + 11440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11441 "00101001" // /* MW 3 */ + 11442 "00011100" // /* MW 2 */ + 11443 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 19 first + 11444 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11445 "00101110" // /* MW 3 */ + 11446 "00011100" // /* MW 2 */ + 11447 "00000001" // /* MW 1 */ + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ + 11456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11457 "00000000" // /* MW 1 */ + 11458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11459 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 17 + 11460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00101001" // /* MW 3 */ + 11462 "00011100" // /* MW 2 */ + 11463 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 19 first + 11464 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00101110" // /* MW 3 */ + 11466 "00011100" // /* MW 2 */ + 11467 "00000001" // /* MW 1 */ + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ + 11472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11473 "00000000" // /* MW 1 */ + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11475 "00000000" // /* MW 1 */ + 11476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11477 "00000000" // /* MW 1 */ + 11478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11479 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 17 + 11480 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "00101001" // /* MW 3 */ + 11482 "00011100" // /* MW 2 */ + 11483 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 19 first + 11484 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11485 "00101110" // /* MW 3 */ + 11486 "00011100" // /* MW 2 */ + 11487 "00000001" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ + 11490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11491 "00000000" // /* MW 1 */ + 11492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11493 "00000000" // /* MW 1 */ + 11494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11495 "00000000" // /* MW 1 */ + 11496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11497 "00000000" // /* MW 1 */ + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 17 + 11500 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "00101001" // /* MW 3 */ + 11502 "00011100" // /* MW 2 */ + 11503 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 19 first + 11504 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11505 "00101110" // /* MW 3 */ + 11506 "00011100" // /* MW 2 */ + 11507 "00000001" // /* MW 1 */ + 11508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11509 "00000000" // /* MW 1 */ + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 17 + 11520 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11521 "00101001" // /* MW 3 */ + 11522 "00011100" // /* MW 2 */ + 11523 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 17 first + 11524 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "00101110" // /* MW 3 */ + 11526 "00011100" // /* MW 2 */ + 11527 "00000001" // /* MW 1 */ + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ + 11536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11537 "00000000" // /* MW 1 */ + 11538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11539 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 15 + 11540 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00101001" // /* MW 3 */ + 11542 "00011100" // /* MW 2 */ + 11543 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 18 first + 11544 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "00101110" // /* MW 3 */ + 11546 "00000100" // /* MW 2 */ + 11547 "00000001" // /* MW 1 */ + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ + 11552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11553 "00000000" // /* MW 1 */ + 11554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11555 "00000000" // /* MW 1 */ + 11556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11557 "00000000" // /* MW 1 */ + 11558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11559 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 16 + 11560 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11561 "00101001" // /* MW 3 */ + 11562 "00000100" // /* MW 2 */ + 11563 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 18 first + 11564 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11565 "00101110" // /* MW 3 */ + 11566 "00010100" // /* MW 2 */ + 11567 "00000001" // /* MW 1 */ + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 49 first + 11570 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11571 "00000000" // /* MW 3 */ + 11572 "00101000" // /* MW 2 */ + 11573 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 16 first +.delay_slot + 11582 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11583 "00101001" // /* MW 3 */ + 11584 "00010100" // /* MW 2 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11585 "00001000" // /* MW 1 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function setup_slice_generic_innermost _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.src_ref 3 "slice_generic_innermost_params.h" 52 first +.src_ref 3 "slice_generic_innermost_params.h" 53 25 first +.src_ref 3 "slice_generic_innermost_params.h" 55 42 +.src_ref 3 "slice_generic_innermost_params.h" 58 40 +.function_start + 11600 "10111010" // LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11601 "01011000" // /* MW 9 */ + 11602 "00100000" // /* MW 8 */ + 11603 "10000000" // /* MW 7 */ + 11604 "00101000" // /* MW 6 */ + 11605 "00000000" // /* MW 5 */ + 11606 "00000000" // /* MW 4 */ + 11607 "11010000" // /* MW 3 */ + 11608 "10000110" // /* MW 2 */ + 11609 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 38 first +.src_ref 3 "slice_generic_innermost_params.h" 58 30 +.src_ref 3 "slice_generic_innermost_params.h" 59 31 + 11610 "10111010" // LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11611 "01011000" // /* MW 9 */ + 11612 "11111010" // /* MW 8 */ + 11613 "01001111" // /* MW 7 */ + 11614 "01001000" // /* MW 6 */ + 11615 "00110000" // /* MW 5 */ + 11616 "00000000" // /* MW 4 */ + 11617 "11010000" // /* MW 3 */ + 11618 "10010110" // /* MW 2 */ + 11619 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 51 +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.src_ref 3 "slice_generic_innermost_params.h" 62 27 + 11620 "01010100" // LDA r4, [p0], #8; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11621 "00000001" // /* MW 5 */ + 11622 "00000001" // /* MW 4 */ + 11623 "11010000" // /* MW 3 */ + 11624 "10010010" // /* MW 2 */ + 11625 "00000101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 55 42 first +.src_ref 3 "slice_generic_innermost_params.h" 60 27 + 11626 "01010100" // LDA r6, [p0], m1; MOV dj0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11627 "00000001" // /* MW 5 */ + 11628 "00000010" // /* MW 4 */ + 11629 "11010001" // /* MW 3 */ + 11630 "00011010" // /* MW 2 */ + 11631 "00000101" // /* MW 1 */ + 11632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11633 "00000000" // /* MW 1 */ + 11634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11635 "00000000" // /* MW 1 */ + 11636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11637 "00000000" // /* MW 1 */ + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 30 first + 11642 "10011000" // MUL r1, r5, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11643 "00011111" // /* MW 3 */ + 11644 "01000010" // /* MW 2 */ + 11645 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 40 first + 11646 "10011000" // AND r0, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11647 "00000100" // /* MW 3 */ + 11648 "10000000" // /* MW 2 */ + 11649 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 30 + 11650 "10011000" // OR r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11651 "00000101" // /* MW 3 */ + 11652 "11000000" // /* MW 2 */ + 11653 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 43 first +.src_ref 3 "slice_generic_innermost_params.h" 58 28 + 11654 "01011100" // ST r0, [p0], #-16; MUL r1, r1, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11655 "10011111" // /* MW 5 */ + 11656 "10000100" // /* MW 4 */ + 11657 "00110000" // /* MW 3 */ + 11658 "10000010" // /* MW 2 */ + 11659 "00011001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 75 first + 11660 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11661 "00000000" // /* MW 3 */ + 11662 "00101000" // /* MW 2 */ + 11663 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 31 first +.delay_slot + 11664 "10011000" // LSHL r0, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "00101101" // /* MW 3 */ + 11666 "01000000" // /* MW 2 */ + 11667 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 25 +.delay_slot + 11668 "10011000" // ST r0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "00010001" // /* MW 3 */ + 11670 "00011100" // /* MW 2 */ + 11671 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 first +.delay_slot + 11672 "10011000" // ST m0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00000001" // /* MW 3 */ + 11674 "00011100" // /* MW 2 */ + 11675 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.delay_slot + 11676 "10011000" // ST dj0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "01000001" // /* MW 3 */ + 11678 "00000100" // /* MW 2 */ + 11679 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 62 27 first +.delay_slot + 11680 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11681 "00000001" // /* MW 3 */ + 11682 "00010100" // /* MW 2 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + 11683 "00001000" // /* MW 1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function setup_slice_generic_innermost_params _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 79 first +.src_ref 3 "slice_generic_innermost_params.h" 80 4 first +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 11696 "00000100" // JL #11424 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11424 delay_slots=5 */ + 11697 "00000001" // /* MW 5 */ + 11698 "00000000" // /* MW 4 */ + 11699 "01010000" // /* MW 3 */ + 11700 "00010110" // /* MW 2 */ + 11701 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11702 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11703 "11100000" // /* MW 3 */ + 11704 "11000001" // /* MW 2 */ + 11705 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11706 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11707 "11000000" // /* MW 3 */ + 11708 "01100000" // /* MW 2 */ + 11709 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11714 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11715 "00011100" // /* MW 13 */ + 11716 "00000000" // /* MW 12 */ + 11717 "00000000" // /* MW 11 */ + 11718 "01010111" // /* MW 10 */ + 11719 "00011010" // /* MW 9 */ + 11720 "01000000" // /* MW 8 */ + 11721 "00000000" // /* MW 7 */ + 11722 "00000000" // /* MW 6 */ + 11723 "10110110" // /* MW 5 */ + 11724 "00000010" // /* MW 4 */ + 11725 "11110000" // /* MW 3 */ + 11726 "00101100" // /* MW 2 */ + 11727 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 first +.tail_call +.return_address + 11728 "10000100" // J #11600 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11600 delay_slots=5 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00000000" // /* MW 4 */ + 11731 "10101000" // /* MW 3 */ + 11732 "00010110" // /* MW 2 */ + 11733 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11734 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "10000000" // /* MW 3 */ + 11736 "01110001" // /* MW 2 */ + 11737 "00011111" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11738 "11111000" // MOV p0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "11000000" // /* MW 3 */ + 11740 "01100100" // /* MW 2 */ + 11741 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11743 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11745 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11747 "00000000" // /* MW 1 */ +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function slice_generic_innermost _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 25 first +.src_ref 3 "slice_generic_innermost.h" 35 60 +.src_ref 3 "slice_generic_innermost.h" 54 19 +.function_start + 11760 "00000010" // MOVS p5, p1; MOV r0, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11761 "01110000" // /* MW 7 */ + 11762 "01100000" // /* MW 6 */ + 11763 "00001010" // /* MW 5 */ + 11764 "00000000" // /* MW 4 */ + 11765 "01100000" // /* MW 3 */ + 11766 "10010001" // /* MW 2 */ + 11767 "10110000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 first + 11768 "00011000" // ADD.NC p3, r0, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11769 "00010010" // /* MW 3 */ + 11770 "01100000" // /* MW 2 */ + 11771 "00011011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 + 11772 "11010100" // LDA m2, [p3], #4; MOV r0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11773 "10000001" // /* MW 5 */ + 11774 "00111101" // /* MW 4 */ + 11775 "11010000" // /* MW 3 */ + 11776 "10100000" // /* MW 2 */ + 11777 "01100011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 36 61 first + 11778 "10011000" // LDA m0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11779 "00000110" // /* MW 3 */ + 11780 "00011100" // /* MW 2 */ + 11781 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 35 first + 11782 "10011000" // LDA r2, [p3, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01010110" // /* MW 3 */ + 11784 "11010100" // /* MW 2 */ + 11785 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 59 first + 11786 "10011000" // LDA m1, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11787 "10000110" // /* MW 3 */ + 11788 "00000100" // /* MW 2 */ + 11789 "00000011" // /* MW 1 */ + 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11791 "00000000" // /* MW 1 */ + 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11793 "00000000" // /* MW 1 */ + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ + 11796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11797 "00000000" // /* MW 1 */ + 11798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11799 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 40 26 first + 11800 "10000100" // JZ r2, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11801 "00000001" // /* MW 5 */ + 11802 "00000000" // /* MW 4 */ + 11803 "11100000" // /* MW 3 */ + 11804 "00010111" // /* MW 2 */ + 11805 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11806 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11807 "11000000" // /* MW 3 */ + 11808 "01100000" // /* MW 2 */ + 11809 "00011111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 first +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11810 "11110100" // PADDB [p7], m0; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11811 "10000001" // /* MW 5 */ + 11812 "11011101" // /* MW 4 */ + 11813 "00000110" // /* MW 3 */ + 11814 "01110010" // /* MW 2 */ + 11815 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 38 first +.delay_slot + 11816 "00011000" // PADDB [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "10010000" // /* MW 3 */ + 11818 "01001011" // /* MW 2 */ + 11819 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 37 39 first +.src_ref 3 "slice_generic_innermost.h" 52 20 +.delay_slot + 11820 "11110100" // PADDB [p0], m0; MOV p4, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11821 "10000001" // /* MW 5 */ + 11822 "11000001" // /* MW 4 */ + 11823 "00001000" // /* MW 3 */ + 11824 "01110010" // /* MW 2 */ + 11825 "00000001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 37 first +.delay_slot + 11826 "00011000" // PADDB [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11827 "10010000" // /* MW 3 */ + 11828 "00101011" // /* MW 2 */ + 11829 "00111001" // /* MW 1 */ + 11830 "00011000" // MOVX r1, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11831 "00010001" // /* MW 3 */ + 11832 "00000010" // /* MW 2 */ + 11833 "00010000" // /* MW 1 */ + 11834 "10011000" // LTU r3, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00011100" // /* MW 3 */ + 11836 "10000110" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ + 11838 "10000100" // JNZ r3, #12080 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12080 delay_slots=5 */ + 11839 "00000001" // /* MW 5 */ + 11840 "01000000" // /* MW 4 */ + 11841 "10011000" // /* MW 3 */ + 11842 "00010111" // /* MW 2 */ + 11843 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 +.delay_slot + 11844 "10111000" // MOV dj0, #48 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11845 "01100000" // /* MW 3 */ + 11846 "10000000" // /* MW 2 */ + 11847 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.delay_slot + 11848 "10011000" // LDA r1, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11849 "00110110" // /* MW 3 */ + 11850 "00000000" // /* MW 2 */ + 11851 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first + 11858 "10110110" // VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #11952 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11859 "00010000" // /* MW 11 */ + 11860 "01011000" // /* MW 10 */ + 11861 "01111111" // /* MW 9 */ + 11862 "00001000" // /* MW 8 */ + 11863 "00000000" // /* MW 7 */ + 11864 "00000000" // /* MW 6 */ + 11865 "11101000" // /* MW 5 */ + 11866 "00010000" // /* MW 4 */ + 11867 "01110110" // /* MW 3 */ + 11868 "00010011" // /* MW 2 */ + 11869 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11870 "01111110" // PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #12000 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11871 "01100000" // /* MW 13 */ + 11872 "00001011" // /* MW 12 */ + 11873 "01100001" // /* MW 11 */ + 11874 "00000010" // /* MW 10 */ + 11875 "11101110" // /* MW 9 */ + 11876 "00110111" // /* MW 8 */ + 11877 "00000001" // /* MW 7 */ + 11878 "00000000" // /* MW 6 */ + 11879 "01101000" // /* MW 5 */ + 11880 "00010000" // /* MW 4 */ + 11881 "11111110" // /* MW 3 */ + 11882 "00001100" // /* MW 2 */ + 11883 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11884 "11110110" // VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11885 "01000000" // /* MW 11 */ + 11886 "10111111" // /* MW 10 */ + 11887 "10111000" // /* MW 9 */ + 11888 "00000010" // /* MW 8 */ + 11889 "01011011" // /* MW 7 */ + 11890 "00001000" // /* MW 6 */ + 11891 "11101111" // /* MW 5 */ + 11892 "00010001" // /* MW 4 */ + 11893 "01110000" // /* MW 3 */ + 11894 "00001011" // /* MW 2 */ + 11895 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11896 "00110010" // PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00001000" // /* MW 6 */ + 11899 "01101011" // /* MW 5 */ + 11900 "00010001" // /* MW 4 */ + 11901 "11111000" // /* MW 3 */ + 11902 "00001100" // /* MW 2 */ + 11903 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11904 "00111100" // PADDA [p4], m0; VLDB x0, [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11905 "01101000" // /* MW 5 */ + 11906 "00010000" // /* MW 4 */ + 11907 "11111110" // /* MW 3 */ + 11908 "00001100" // /* MW 2 */ + 11909 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11910 "01001100" // VLDB x3, [p0], m0; PADDS [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11911 "10110110" // /* MW 5 */ + 11912 "00010000" // /* MW 4 */ + 11913 "10001110" // /* MW 3 */ + 11914 "00011110" // /* MW 2 */ + 11915 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11916 "00111100" // PADDA [p0], m0; VLDB x1, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11917 "11101000" // /* MW 5 */ + 11918 "00010000" // /* MW 4 */ + 11919 "11110110" // /* MW 3 */ + 11920 "00001100" // /* MW 2 */ + 11921 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11922 "10110100" // VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11923 "00001011" // /* MW 5 */ + 11924 "00010010" // /* MW 4 */ + 11925 "10000000" // /* MW 3 */ + 11926 "00010110" // /* MW 2 */ + 11927 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11928 "00110010" // NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11929 "01011011" // /* MW 7 */ + 11930 "00001000" // /* MW 6 */ + 11931 "01101011" // /* MW 5 */ + 11932 "00010000" // /* MW 4 */ + 11933 "11111110" // /* MW 3 */ + 11934 "00101100" // /* MW 2 */ + 11935 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11936 "11100001" // NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11937 "00000000" // /* MW 15 */ + 11938 "00000000" // /* MW 14 */ + 11939 "11101000" // /* MW 13 */ + 11940 "11000010" // /* MW 12 */ + 11941 "01000000" // /* MW 11 */ + 11942 "00000000" // /* MW 10 */ + 11943 "00000000" // /* MW 9 */ + 11944 "10000000" // /* MW 8 */ + 11945 "00000110" // /* MW 7 */ + 11946 "00101000" // /* MW 6 */ + 11947 "11101101" // /* MW 5 */ + 11948 "00010001" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11952 "11100001" // PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11953 "00000000" // /* MW 15 */ + 11954 "00000000" // /* MW 14 */ + 11955 "11101000" // /* MW 13 */ + 11956 "10000010" // /* MW 12 */ + 11957 "00000100" // /* MW 11 */ + 11958 "00000000" // /* MW 10 */ + 11959 "00000000" // /* MW 9 */ + 11960 "00000000" // /* MW 8 */ + 11961 "01011011" // /* MW 7 */ + 11962 "00001000" // /* MW 6 */ + 11963 "11101111" // /* MW 5 */ + 11964 "00010000" // /* MW 4 */ + 11965 "11110110" // /* MW 3 */ + 11966 "00001100" // /* MW 2 */ + 11967 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11968 "11100001" // PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11969 "00000000" // /* MW 15 */ + 11970 "00000000" // /* MW 14 */ + 11971 "01111000" // /* MW 13 */ + 11972 "10100101" // /* MW 12 */ + 11973 "00000001" // /* MW 11 */ + 11974 "00000000" // /* MW 10 */ + 11975 "00000000" // /* MW 9 */ + 11976 "10000000" // /* MW 8 */ + 11977 "00100110" // /* MW 7 */ + 11978 "00101000" // /* MW 6 */ + 11979 "01101001" // /* MW 5 */ + 11980 "00010001" // /* MW 4 */ + 11981 "11111000" // /* MW 3 */ + 11982 "00001100" // /* MW 2 */ + 11983 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11984 "11100001" // PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11985 "00000000" // /* MW 15 */ + 11986 "00000000" // /* MW 14 */ + 11987 "11101000" // /* MW 13 */ + 11988 "11000010" // /* MW 12 */ + 11989 "01000000" // /* MW 11 */ + 11990 "00000000" // /* MW 10 */ + 11991 "00000000" // /* MW 9 */ + 11992 "00000000" // /* MW 8 */ + 11993 "01011011" // /* MW 7 */ + 11994 "00001000" // /* MW 6 */ + 11995 "01101011" // /* MW 5 */ + 11996 "00010000" // /* MW 4 */ + 11997 "11111110" // /* MW 3 */ + 11998 "00001100" // /* MW 2 */ + 11999 "10100101" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12000 "11100001" // PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "00000000" // /* MW 10 */ + 12007 "00000000" // /* MW 9 */ + 12008 "10000000" // /* MW 8 */ + 12009 "00000110" // /* MW 7 */ + 12010 "00101000" // /* MW 6 */ + 12011 "11101101" // /* MW 5 */ + 12012 "00010001" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00001100" // /* MW 2 */ + 12015 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12016 "11011000" // VSHUFFLE bmll0, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12017 "00000101" // /* MW 3 */ + 12018 "00001001" // /* MW 2 */ + 12019 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12020 "10011000" // VST bmlh0, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12021 "00100110" // /* MW 3 */ + 12022 "00101000" // /* MW 2 */ + 12023 "00001001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12024 "10010100" // PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12025 "00001011" // /* MW 5 */ + 12026 "00000011" // /* MW 4 */ + 12027 "11110001" // /* MW 3 */ + 12028 "00001100" // /* MW 2 */ + 12029 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12030 "10000100" // J #12224 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12224 delay_slots=5 */ + 12031 "00000000" // /* MW 5 */ + 12032 "00000000" // /* MW 4 */ + 12033 "11100000" // /* MW 3 */ + 12034 "00010111" // /* MW 2 */ + 12035 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12036 "10111010" // PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12037 "11100010" // /* MW 9 */ + 12038 "10000010" // /* MW 8 */ + 12039 "00000100" // /* MW 7 */ + 12040 "10000000" // /* MW 6 */ + 12041 "00100110" // /* MW 5 */ + 12042 "00101000" // /* MW 4 */ + 12043 "11110001" // /* MW 3 */ + 12044 "00001100" // /* MW 2 */ + 12045 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12046 "00001100" // PADDA [p1], m1; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12047 "00001101" // /* MW 5 */ + 12048 "01010000" // /* MW 4 */ + 12049 "11111010" // /* MW 3 */ + 12050 "00001100" // /* MW 2 */ + 12051 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.delay_slot + 12052 "10010100" // PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12053 "00001011" // /* MW 5 */ + 12054 "00000011" // /* MW 4 */ + 12055 "11110001" // /* MW 3 */ + 12056 "00001100" // /* MW 2 */ + 12057 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.delay_slot + 12058 "00001100" // NOPA; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12059 "00001101" // /* MW 5 */ + 12060 "01010000" // /* MW 4 */ + 12061 "11111010" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot + 12064 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12065 "00000000" // /* MW 15 */ + 12066 "00000000" // /* MW 14 */ + 12067 "01111000" // /* MW 13 */ + 12068 "10100101" // /* MW 12 */ + 12069 "00000001" // /* MW 11 */ + 12070 "00000000" // /* MW 10 */ + 12071 "00000000" // /* MW 9 */ + 12072 "10000000" // /* MW 8 */ + 12073 "00100110" // /* MW 7 */ + 12074 "00101000" // /* MW 6 */ + 12075 "00100001" // /* MW 5 */ + 12076 "00000000" // /* MW 4 */ + 12077 "11110000" // /* MW 3 */ + 12078 "00101100" // /* MW 2 */ + 12079 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.src_ref 3 "slice_generic_innermost.h" 40 8 first + 12080 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00100000" // /* MW 3 */ + 12082 "01110001" // /* MW 2 */ + 12083 "00011101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12084 "01000100" // MOVXM ls, #12096 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12085 "10000000" // /* MW 5 */ + 12086 "11111110" // /* MW 4 */ + 12087 "00100001" // /* MW 3 */ + 12088 "00000000" // /* MW 2 */ + 12089 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12090 "01000100" // MOVXM le, #12208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "01100000" // /* MW 5 */ + 12092 "11111111" // /* MW 4 */ + 12093 "00100110" // /* MW 3 */ + 12094 "00000000" // /* MW 2 */ + 12095 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.begin_of_loop +.loop_nesting 1 + 12096 "00111100" // VLDA x1, [p4], m0; VLDB x2, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "01101000" // /* MW 5 */ + 12098 "00010001" // /* MW 4 */ + 12099 "01110110" // /* MW 3 */ + 12100 "00001011" // /* MW 2 */ + 12101 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first + 12102 "00110010" // PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12103 "01011011" // /* MW 7 */ + 12104 "00001000" // /* MW 6 */ + 12105 "01101100" // /* MW 5 */ + 12106 "00010000" // /* MW 4 */ + 12107 "11111110" // /* MW 3 */ + 12108 "00001100" // /* MW 2 */ + 12109 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first + 12110 "00111100" // PADDA [p7], m0; VLDB x3, [p0], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12111 "11101000" // /* MW 5 */ + 12112 "00010001" // /* MW 4 */ + 12113 "11110000" // /* MW 3 */ + 12114 "00001100" // /* MW 2 */ + 12115 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 59 21 first + 12116 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "10010000" // /* MW 3 */ + 12118 "00001011" // /* MW 2 */ + 12119 "00111000" // /* MW 1 */ + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ + 12124 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12125 "01100111" // /* MW 3 */ + 12126 "00000001" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first + 12128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12129 "00000000" // /* MW 15 */ + 12130 "00000000" // /* MW 14 */ + 12131 "11101000" // /* MW 13 */ + 12132 "01000010" // /* MW 12 */ + 12133 "00001000" // /* MW 11 */ + 12134 "00000000" // /* MW 10 */ + 12135 "00000000" // /* MW 9 */ + 12136 "00000000" // /* MW 8 */ + 12137 "01011011" // /* MW 7 */ + 12138 "00000001" // /* MW 6 */ + 12139 "00100000" // /* MW 5 */ + 12140 "00000000" // /* MW 4 */ + 12141 "11110000" // /* MW 3 */ + 12142 "00101100" // /* MW 2 */ + 12143 "00000000" // /* MW 1 */ + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "01111000" // /* MW 13 */ + 12148 "10100101" // /* MW 12 */ + 12149 "00000001" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first + 12160 "11100001" // NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "11101000" // /* MW 13 */ + 12164 "11000010" // /* MW 12 */ + 12165 "01000000" // /* MW 11 */ + 12166 "00000000" // /* MW 10 */ + 12167 "00000000" // /* MW 9 */ + 12168 "10000000" // /* MW 8 */ + 12169 "00000110" // /* MW 7 */ + 12170 "00101000" // /* MW 6 */ + 12171 "00100101" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 55 19 first + 12176 "11100001" // NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12177 "00000000" // /* MW 15 */ + 12178 "00000000" // /* MW 14 */ + 12179 "01111000" // /* MW 13 */ + 12180 "10100101" // /* MW 12 */ + 12181 "00000001" // /* MW 11 */ + 12182 "00000000" // /* MW 10 */ + 12183 "00000000" // /* MW 9 */ + 12184 "00000000" // /* MW 8 */ + 12185 "01011011" // /* MW 7 */ + 12186 "00000001" // /* MW 6 */ + 12187 "00100000" // /* MW 5 */ + 12188 "01010111" // /* MW 4 */ + 12189 "11111010" // /* MW 3 */ + 12190 "00101100" // /* MW 2 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first + 12192 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12193 "00000000" // /* MW 15 */ + 12194 "00000000" // /* MW 14 */ + 12195 "01111000" // /* MW 13 */ + 12196 "10100101" // /* MW 12 */ + 12197 "00000001" // /* MW 11 */ + 12198 "00000000" // /* MW 10 */ + 12199 "00000000" // /* MW 9 */ + 12200 "10000000" // /* MW 8 */ + 12201 "00100110" // /* MW 7 */ + 12202 "00101000" // /* MW 6 */ + 12203 "00100001" // /* MW 5 */ + 12204 "00000000" // /* MW 4 */ + 12205 "11110000" // /* MW 3 */ + 12206 "00101100" // /* MW 2 */ + 12207 "00000000" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop + 12208 "11100001" // NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12209 "00000000" // /* MW 15 */ + 12210 "00000000" // /* MW 14 */ + 12211 "01111000" // /* MW 13 */ + 12212 "10100101" // /* MW 12 */ + 12213 "00000001" // /* MW 11 */ + 12214 "00000000" // /* MW 10 */ + 12215 "00000000" // /* MW 9 */ + 12216 "00000000" // /* MW 8 */ + 12217 "01011011" // /* MW 7 */ + 12218 "00000001" // /* MW 6 */ + 12219 "00100000" // /* MW 5 */ + 12220 "01010111" // /* MW 4 */ + 12221 "11110010" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.src_ref 3 "slice_generic_innermost.h" 76 first +.loop_nesting 0 + 12224 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12225 "00000000" // /* MW 3 */ + 12226 "00101000" // /* MW 2 */ + 12227 "00010000" // /* MW 1 */ +.delay_slot + 12228 "11111000" // MOV p7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12229 "00100000" // /* MW 3 */ + 12230 "01100000" // /* MW 2 */ + 12231 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 + 12239 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function slice_generic_innermost_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 first +.function_start + 12240 "00111010" // MOVS p5, p0; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12241 "01110001" // /* MW 9 */ + 12242 "00000000" // /* MW 8 */ + 12243 "00000000" // /* MW 7 */ + 12244 "00000000" // /* MW 6 */ + 12245 "00000100" // /* MW 5 */ + 12246 "00000000" // /* MW 4 */ + 12247 "01100000" // /* MW 3 */ + 12248 "00010001" // /* MW 2 */ + 12249 "10110000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 12250 "00000010" // ST lr, [sp, #-4]; MOV p3, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12251 "01110000" // /* MW 7 */ + 12252 "01100000" // /* MW 6 */ + 12253 "10110001" // /* MW 5 */ + 12254 "00000001" // /* MW 4 */ + 12255 "10110000" // /* MW 3 */ + 12256 "10000111" // /* MW 2 */ + 12257 "11111111" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 first +.no_stack_arguments + 12258 "00111010" // MOVS p1, p2; JL #11696 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */ + 12259 "01000001" // /* MW 9 */ + 12260 "00000000" // /* MW 8 */ + 12261 "00000000" // /* MW 7 */ + 12262 "10110110" // /* MW 6 */ + 12263 "00000101" // /* MW 5 */ + 12264 "00000000" // /* MW 4 */ + 12265 "01100000" // /* MW 3 */ + 12266 "00010001" // /* MW 2 */ + 12267 "00110001" // /* MW 1 */ +.delay_slot + 12268 "11111000" // MOV p0, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "11100000" // /* MW 3 */ + 12270 "01100101" // /* MW 2 */ + 12271 "00011000" // /* MW 1 */ +.delay_slot + 12272 "00011000" // PADDB [p0], #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "10010000" // /* MW 3 */ + 12274 "11101111" // /* MW 2 */ + 12275 "00111000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.delay_slot + 12276 "11111000" // MOV p4, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12277 "11000000" // /* MW 3 */ + 12278 "01100000" // /* MW 2 */ + 12279 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12283 "00100000" // /* MW 5 */ + 12284 "00000000" // /* MW 4 */ + 12285 "11110000" // /* MW 3 */ + 12286 "00101100" // /* MW 2 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 31 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.return_address + 12288 "10111010" // LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12289 "01110010" // /* MW 9 */ + 12290 "01110000" // /* MW 8 */ + 12291 "00101101" // /* MW 7 */ + 12292 "00000010" // /* MW 6 */ + 12293 "10001011" // /* MW 5 */ + 12294 "10010000" // /* MW 4 */ + 12295 "00100010" // /* MW 3 */ + 12296 "01001010" // /* MW 2 */ + 12297 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 44 + 12298 "00101100" // LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12299 "00100000" // /* MW 5 */ + 12300 "11000101" // /* MW 4 */ + 12301 "00101000" // /* MW 3 */ + 12302 "11011010" // /* MW 2 */ + 12303 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 57 first + 12304 "10111010" // LDA r20, [sp, #-120]; MOVXM r19, #65534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12305 "00010000" // /* MW 9 */ + 12306 "11111111" // /* MW 8 */ + 12307 "01101111" // /* MW 7 */ + 12308 "00111110" // /* MW 6 */ + 12309 "00000000" // /* MW 5 */ + 12310 "00000000" // /* MW 4 */ + 12311 "00100000" // /* MW 3 */ + 12312 "01010010" // /* MW 2 */ + 12313 "11110001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first + 12314 "00101100" // LDA p1, [p3]; ADD r17, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12315 "00100001" // /* MW 5 */ + 12316 "11000110" // /* MW 4 */ + 12317 "11011001" // /* MW 3 */ + 12318 "10010011" // /* MW 2 */ + 12319 "01100000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 70 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 59 first + 12320 "00101100" // LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12321 "01100000" // /* MW 5 */ + 12322 "11010101" // /* MW 4 */ + 12323 "00101000" // /* MW 3 */ + 12324 "11001110" // /* MW 2 */ + 12325 "11110001" // /* MW 1 */ + 12326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12327 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 12328 "10011000" // LDA r17, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12329 "00110110" // /* MW 3 */ + 12330 "00000110" // /* MW 2 */ + 12331 "00000101" // /* MW 1 */ + 12332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12333 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 36 first + 12334 "10011000" // MUL r18, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12335 "00101111" // /* MW 3 */ + 12336 "10100101" // /* MW 2 */ + 12337 "00010101" // /* MW 1 */ + 12338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12339 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 49 + 12340 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12341 "01001111" // /* MW 3 */ + 12342 "10100101" // /* MW 2 */ + 12343 "00010100" // /* MW 1 */ + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12345 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 62 + 12346 "10011000" // MUL r18, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00101111" // /* MW 3 */ + 12348 "01100101" // /* MW 2 */ + 12349 "00010101" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 first +.no_stack_arguments + 12350 "00000100" // JL #11760 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11760 delay_slots=5 */ + 12351 "00000001" // /* MW 5 */ + 12352 "00000000" // /* MW 4 */ + 12353 "11111000" // /* MW 3 */ + 12354 "00010110" // /* MW 2 */ + 12355 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 57 +.delay_slot + 12356 "10011000" // MUL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12357 "00101111" // /* MW 3 */ + 12358 "11100101" // /* MW 2 */ + 12359 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12360 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12361 "00000101" // /* MW 3 */ + 12362 "00100000" // /* MW 2 */ + 12363 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12364 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12365 "00001101" // /* MW 3 */ + 12366 "10100001" // /* MW 2 */ + 12367 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12368 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12369 "11000001" // /* MW 3 */ + 12370 "01101000" // /* MW 2 */ + 12371 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12372 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12373 "10000001" // /* MW 11 */ + 12374 "10101101" // /* MW 10 */ + 12375 "00000000" // /* MW 9 */ + 12376 "00000000" // /* MW 8 */ + 12377 "00000000" // /* MW 7 */ + 12378 "00000000" // /* MW 6 */ + 12379 "00100000" // /* MW 5 */ + 12380 "00000000" // /* MW 4 */ + 12381 "11110000" // /* MW 3 */ + 12382 "00101100" // /* MW 2 */ + 12383 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.return_address + 12384 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12385 "00111001" // /* MW 3 */ + 12386 "11111100" // /* MW 2 */ + 12387 "00000111" // /* MW 1 */ + 12388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12389 "00000000" // /* MW 1 */ + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 first + 12400 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12401 "00000000" // /* MW 3 */ + 12402 "00101000" // /* MW 2 */ + 12403 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.delay_slot + 12404 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12405 "00000001" // /* MW 5 */ + 12406 "00000000" // /* MW 4 */ + 12407 "00000000" // /* MW 3 */ + 12408 "11110000" // /* MW 2 */ + 12409 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + 12417 "00000000" // /* MW 1 */ +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function _b8170_wrapper _Z14_b8170_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 71 first +.src_ref 0 "0_0_reloadable4.cc" 73 79 +.function_start + 12432 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "11000000" // /* MW 3 */ + 12434 "01100000" // /* MW 2 */ + 12435 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 73 79 first + 12436 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "00011110" // /* MW 3 */ + 12438 "00011100" // /* MW 2 */ + 12439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 75 47 first + 12440 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00011110" // /* MW 3 */ + 12442 "00010101" // /* MW 2 */ + 12443 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 74 80 first + 12444 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12445 "10011110" // /* MW 3 */ + 12446 "00000100" // /* MW 2 */ + 12447 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 72 4 first +.tail_call + 12448 "10000100" // J #12240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12240 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11101000" // /* MW 3 */ + 12452 "00010111" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 + 12463 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 115 4 first +.function_start + 12464 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "01000001" // /* MW 5 */ + 12466 "10100000" // /* MW 4 */ + 12467 "00101111" // /* MW 3 */ + 12468 "11000000" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12470 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12471 "00011100" // /* MW 3 */ + 12472 "11000110" // /* MW 2 */ + 12473 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12474 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12475 "00011100" // /* MW 3 */ + 12476 "11000110" // /* MW 2 */ + 12477 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12478 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12479 "00011100" // /* MW 3 */ + 12480 "11000110" // /* MW 2 */ + 12481 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12482 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12483 "00011100" // /* MW 3 */ + 12484 "11000110" // /* MW 2 */ + 12485 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12486 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12487 "00011100" // /* MW 3 */ + 12488 "11000110" // /* MW 2 */ + 12489 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12490 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12491 "00011100" // /* MW 3 */ + 12492 "11000110" // /* MW 2 */ + 12493 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12494 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12495 "00011100" // /* MW 3 */ + 12496 "11000110" // /* MW 2 */ + 12497 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12498 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12499 "00011100" // /* MW 3 */ + 12500 "11000110" // /* MW 2 */ + 12501 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12502 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12503 "00011100" // /* MW 3 */ + 12504 "11000110" // /* MW 2 */ + 12505 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12506 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12507 "00011100" // /* MW 3 */ + 12508 "11000110" // /* MW 2 */ + 12509 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12510 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12511 "00011100" // /* MW 3 */ + 12512 "11000110" // /* MW 2 */ + 12513 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12514 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12515 "00011100" // /* MW 3 */ + 12516 "11000110" // /* MW 2 */ + 12517 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12518 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12519 "00011100" // /* MW 3 */ + 12520 "11000110" // /* MW 2 */ + 12521 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12522 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "00011100" // /* MW 3 */ + 12524 "11000110" // /* MW 2 */ + 12525 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12526 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00011100" // /* MW 3 */ + 12528 "11000110" // /* MW 2 */ + 12529 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12530 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12531 "00011100" // /* MW 3 */ + 12532 "11000110" // /* MW 2 */ + 12533 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12534 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12535 "00011100" // /* MW 3 */ + 12536 "11000110" // /* MW 2 */ + 12537 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12538 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12539 "00011100" // /* MW 3 */ + 12540 "11000110" // /* MW 2 */ + 12541 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12542 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12543 "00011100" // /* MW 3 */ + 12544 "11000110" // /* MW 2 */ + 12545 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12546 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00011100" // /* MW 3 */ + 12548 "11000110" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12550 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00011100" // /* MW 3 */ + 12552 "11000110" // /* MW 2 */ + 12553 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12554 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00011100" // /* MW 3 */ + 12556 "11000110" // /* MW 2 */ + 12557 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12558 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "00011100" // /* MW 3 */ + 12560 "11000110" // /* MW 2 */ + 12561 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12562 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12563 "00011100" // /* MW 3 */ + 12564 "11000110" // /* MW 2 */ + 12565 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12566 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12567 "00011100" // /* MW 3 */ + 12568 "11000110" // /* MW 2 */ + 12569 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12570 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12571 "00011100" // /* MW 3 */ + 12572 "11000110" // /* MW 2 */ + 12573 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12574 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "00011100" // /* MW 3 */ + 12576 "11000110" // /* MW 2 */ + 12577 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12578 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12579 "00011100" // /* MW 3 */ + 12580 "11000110" // /* MW 2 */ + 12581 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 119 first + 12582 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12583 "00000000" // /* MW 3 */ + 12584 "00101000" // /* MW 2 */ + 12585 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 first +.delay_slot + 12586 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12587 "00011100" // /* MW 3 */ + 12588 "11000110" // /* MW 2 */ + 12589 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12590 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12591 "00011100" // /* MW 3 */ + 12592 "11000110" // /* MW 2 */ + 12593 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12594 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12595 "00011100" // /* MW 3 */ + 12596 "11000110" // /* MW 2 */ + 12597 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12598 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12599 "00011100" // /* MW 3 */ + 12600 "11000110" // /* MW 2 */ + 12601 "00010000" // /* MW 1 */ +.delay_slot + 12602 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12603 "10100000" // /* MW 3 */ + 12604 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 12605 "00011000" // /* MW 1 */ +.label memset +.function memset memset +.src_ref 12 "string.c" 325 first +.src_ref 12 "string.c" 328 4 first +.function_start + 12608 "10000100" // JZ r1, #12768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12768 delay_slots=5 */ + 12609 "00000001" // /* MW 5 */ + 12610 "00000000" // /* MW 4 */ + 12611 "11110000" // /* MW 3 */ + 12612 "00011000" // /* MW 2 */ + 12613 "00001000" // /* MW 1 */ +.src_ref 12 "string.c" 329 3 +.delay_slot + 12614 "11111000" // MOV p0, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12615 "11000000" // /* MW 3 */ + 12616 "01100010" // /* MW 2 */ + 12617 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 first +.src_ref 12 "string.c" 329 3 + 12626 "00000010" // MOVS p1, p0; MOV lc, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12627 "01110000" // /* MW 7 */ + 12628 "01010000" // /* MW 6 */ + 12629 "10111000" // /* MW 5 */ + 12630 "00000010" // /* MW 4 */ + 12631 "01100000" // /* MW 3 */ + 12632 "00010001" // /* MW 2 */ + 12633 "00110000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12634 "01000100" // MOVXM ls, #12656 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12635 "11100000" // /* MW 5 */ + 12636 "11100010" // /* MW 4 */ + 12637 "00110001" // /* MW 3 */ + 12638 "00000000" // /* MW 2 */ + 12639 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12640 "11100001" // NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12641 "00000000" // /* MW 15 */ + 12642 "00000000" // /* MW 14 */ + 12643 "00010000" // /* MW 13 */ + 12644 "11101000" // /* MW 12 */ + 12645 "10111000" // /* MW 11 */ + 12646 "00001101" // /* MW 10 */ + 12647 "00000000" // /* MW 9 */ + 12648 "00000000" // /* MW 8 */ + 12649 "01011011" // /* MW 7 */ + 12650 "00000001" // /* MW 6 */ + 12651 "00100000" // /* MW 5 */ + 12652 "00000000" // /* MW 4 */ + 12653 "11110000" // /* MW 3 */ + 12654 "00101100" // /* MW 2 */ + 12655 "00000000" // /* MW 1 */ +.label ZLS_Fmemset_48 +.src_ref 12 "string.c" 329 3 first +.begin_of_loop +.loop_nesting 1 + 12656 "11100001" // ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12657 "00000000" // /* MW 15 */ + 12658 "00000000" // /* MW 14 */ + 12659 "01111000" // /* MW 13 */ + 12660 "10100101" // /* MW 12 */ + 12661 "00000001" // /* MW 11 */ + 12662 "00000000" // /* MW 10 */ + 12663 "00000000" // /* MW 9 */ + 12664 "00000000" // /* MW 8 */ + 12665 "01011011" // /* MW 7 */ + 12666 "00000001" // /* MW 6 */ + 12667 "00100000" // /* MW 5 */ + 12668 "00000000" // /* MW 4 */ + 12669 "11100000" // /* MW 3 */ + 12670 "10000000" // /* MW 2 */ + 12671 "00100011" // /* MW 1 */ + 12672 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12673 "00000000" // /* MW 15 */ + 12674 "00000000" // /* MW 14 */ + 12675 "01111000" // /* MW 13 */ + 12676 "10100101" // /* MW 12 */ + 12677 "00000001" // /* MW 11 */ + 12678 "00000000" // /* MW 10 */ + 12679 "00000000" // /* MW 9 */ + 12680 "00000000" // /* MW 8 */ + 12681 "01011011" // /* MW 7 */ + 12682 "00000001" // /* MW 6 */ + 12683 "00100000" // /* MW 5 */ + 12684 "00000000" // /* MW 4 */ + 12685 "11110000" // /* MW 3 */ + 12686 "00101100" // /* MW 2 */ + 12687 "00000000" // /* MW 1 */ + 12688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12689 "00000000" // /* MW 15 */ + 12690 "00000000" // /* MW 14 */ + 12691 "01111000" // /* MW 13 */ + 12692 "10100101" // /* MW 12 */ + 12693 "00000001" // /* MW 11 */ + 12694 "00000000" // /* MW 10 */ + 12695 "00000000" // /* MW 9 */ + 12696 "00000000" // /* MW 8 */ + 12697 "01011011" // /* MW 7 */ + 12698 "00000001" // /* MW 6 */ + 12699 "00100000" // /* MW 5 */ + 12700 "00000000" // /* MW 4 */ + 12701 "11110000" // /* MW 3 */ + 12702 "00101100" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ + 12704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12705 "00000000" // /* MW 15 */ + 12706 "00000000" // /* MW 14 */ + 12707 "01111000" // /* MW 13 */ + 12708 "10100101" // /* MW 12 */ + 12709 "00000001" // /* MW 11 */ + 12710 "00000000" // /* MW 10 */ + 12711 "00000000" // /* MW 9 */ + 12712 "00000000" // /* MW 8 */ + 12713 "01011011" // /* MW 7 */ + 12714 "00000001" // /* MW 6 */ + 12715 "00100000" // /* MW 5 */ + 12716 "00000000" // /* MW 4 */ + 12717 "11110000" // /* MW 3 */ + 12718 "00101100" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ + 12720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12721 "00000000" // /* MW 15 */ + 12722 "00000000" // /* MW 14 */ + 12723 "01111000" // /* MW 13 */ + 12724 "10100101" // /* MW 12 */ + 12725 "00000001" // /* MW 11 */ + 12726 "00000000" // /* MW 10 */ + 12727 "00000000" // /* MW 9 */ + 12728 "00000000" // /* MW 8 */ + 12729 "01011011" // /* MW 7 */ + 12730 "00000001" // /* MW 6 */ + 12731 "00100000" // /* MW 5 */ + 12732 "00000000" // /* MW 4 */ + 12733 "11110000" // /* MW 3 */ + 12734 "00101100" // /* MW 2 */ + 12735 "00000000" // /* MW 1 */ + 12736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12737 "00000000" // /* MW 15 */ + 12738 "00000000" // /* MW 14 */ + 12739 "01111000" // /* MW 13 */ + 12740 "10100101" // /* MW 12 */ + 12741 "00000001" // /* MW 11 */ + 12742 "00000000" // /* MW 10 */ + 12743 "00000000" // /* MW 9 */ + 12744 "00000000" // /* MW 8 */ + 12745 "01011011" // /* MW 7 */ + 12746 "00000001" // /* MW 6 */ + 12747 "00100000" // /* MW 5 */ + 12748 "00000000" // /* MW 4 */ + 12749 "11110000" // /* MW 3 */ + 12750 "00101100" // /* MW 2 */ + 12751 "00000000" // /* MW 1 */ +.label ZLE_Fmemset_144 +.end_of_loop + 12752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12753 "00000000" // /* MW 15 */ + 12754 "00000000" // /* MW 14 */ + 12755 "01111000" // /* MW 13 */ + 12756 "10100101" // /* MW 12 */ + 12757 "00000001" // /* MW 11 */ + 12758 "00000000" // /* MW 10 */ + 12759 "00000000" // /* MW 9 */ + 12760 "00000000" // /* MW 8 */ + 12761 "01011011" // /* MW 7 */ + 12762 "00000001" // /* MW 6 */ + 12763 "00100000" // /* MW 5 */ + 12764 "00000000" // /* MW 4 */ + 12765 "11110000" // /* MW 3 */ + 12766 "00101100" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.label TGT_Fmemset_160 +.src_ref 12 "string.c" 330 4 first +.loop_nesting 0 + 12768 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12769 "00000000" // /* MW 3 */ + 12770 "00101000" // /* MW 2 */ + 12771 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label memset__end + 12781 "00000000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/conv" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 11 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 12 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.cmico b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.lst b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.lst new file mode 100644 index 0000000000000000000000000000000000000000..0b60ff24e25725d17ee1d91c96109567c043ad75 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.lst @@ -0,0 +1,4100 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable4 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2528 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2528 0x00 0xc2 0xd0 0xe9 0xe0 0x2c LDA r16, [p0]; NEZ r26, r1 + 2534 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2540 0x0f 0xef 0x1d 0x98 ST p6, [sp, #-20] + 2544 0xfe 0x3a 0xb0 0x01 0xc8 0xd0 0x70 0x02 ST r14, [sp, #-16]; MOV r14, r3 + 2552 0xff 0x3e 0xb0 0x01 0xe8 0x50 0x70 0x02 ST r15, [sp, #-8]; MOV r15, r1 + 2560 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 2564 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 2568 0x1e 0x68 0x02 0x18 ADD.NC p6, r16, #4 + 2572 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 2576 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 2580 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 2584 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2588 0x00 0x00 NOPX + 2590 0x00 0x00 NOPX + 2592 0x00 0x00 NOPX + 2594 0x00 0x00 NOPX + 2596 0x00 0x00 NOPX + 2598 0x00 0x00 NOPX + 2600 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2604 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 2608 0xfc 0x1f 0xa0 0x35 0x39 0xe4 MOVX r16, #-1; MOV el0, r26 + 2614 0x00 0x00 NOPX + 2616 0x00 0x00 NOPX + 2618 0x00 0x00 NOPX + 2620 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2624 0x04 0x41 0x29 0xa0 0x05 0x64 MOVX r17, #2; MOV r19, #1 + 2630 0xd5 0x23 0xb9 0x21 0x81 0xe4 LSHL r20, r26, r17; MOV r18, p0 + 2636 0x9c 0x9f 0x9c 0xd2 0xa2 0xa4 LTU r18, r19, r15; ADD.NC p6, r18, r20 + 2642 0xc0 0xd2 0xd7 0xe6 0x95 0x82 0x6e 0x60 0x72 0xba LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 + 2652 0xfd 0x4a 0xb0 0x03 0x4c 0x90 0x70 0x02 ST r18, [sp, #-24]; MOV r26, r18 + 2660 0x00 0x00 NOPX + 2662 0x00 0x00 NOPX + 2664 0x00 0x00 NOPX + 2666 0x00 0x00 NOPX + 2668 0x00 0x00 NOPX + 2670 0x1e 0x6a 0x02 0x18 ADD.NC p6, r20, #4 + 2674 0x06 0x1e 0x96 0x98 LDA r20, [p6], #4 + 2678 0x06 0x3e 0xd6 0x98 LDA r22, [p6], #12 + 2682 0x06 0xee 0xb6 0x98 LDA r21, [p6], #-8 + 2686 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2690 0x00 0x00 NOPX + 2692 0x00 0x00 NOPX + 2694 0x00 0x00 NOPX + 2696 0x00 0x00 NOPX + 2698 0x00 0x00 NOPX + 2700 0x00 0x00 NOPX + 2702 0x15 0x29 0x62 0x18 SEL.EQZ r20, r20, r22, r27 + 2706 0x0e 0xd6 0x91 0x98 ST r20, [p6, #-12] + 2710 0x00 0x00 NOPX + 2712 0x00 0x00 NOPX + 2714 0x00 0x00 NOPX + 2716 0x00 0x00 NOPX + 2718 0x15 0x57 0x08 0x18 ACQ.COND r21, r16, r26 + 2722 0x14 0xa5 0x1d 0x98 LSHL r18, r18, r17 + 2726 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 2732 0x76 0x9e 0x0c 0xd3 0x92 0xa4 NEZ r26, r14; ADD.NC p6, r19, r18 + 2738 0xc0 0xca 0xdf 0xc6 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-32] + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x00 0x00 NOPX + 2756 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 2760 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2764 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2768 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 2772 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2776 0x00 0x00 NOPX + 2778 0x00 0x00 NOPX + 2780 0x00 0x00 NOPX + 2782 0x00 0x00 NOPX + 2784 0x00 0x00 NOPX + 2786 0x00 0x00 NOPX + 2788 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 2792 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2796 0x00 0x00 NOPX + 2798 0x00 0x00 NOPX + 2800 0x00 0x00 NOPX + 2802 0x00 0x00 NOPX + 2804 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 2808 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 2812 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 2816 0x00 0x07 0xce 0xc4 0x80 0x44 MOVXM p7, #508480 + 2822 0xe0 0x13 0xdf 0xb8 0x5b 0x0c LDA p1, [p7, dj0]; ST el0, [sp, #-36] + 2828 0x00 0x00 NOPX + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX +.no_stack_arguments + 2840 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2844 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2848 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2854 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2864 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2870 0x07 0xdf 0x51 0x18 LDA r26, [sp, #-36] + 2874 0x07 0xe4 0x41 0x18 LDA dj0, [sp, #-28] + 2878 0x07 0xe8 0x29 0x18 LDA el0, [sp, #-24] + 2882 0x07 0xe0 0x09 0x18 LDA eh0, [sp, #-32] + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x18 0x68 0x88 0x18 ADD.NC p0, r17, #16 + 2894 0x00 0x06 0x36 0x98 LDA r17, [p0] + 2898 0x00 0x00 NOPX + 2900 0x00 0x00 NOPX + 2902 0x00 0x00 NOPX + 2904 0x00 0x00 NOPX + 2906 0x00 0x00 NOPX + 2908 0x00 0x00 NOPX + 2910 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2914 0x1e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p0, #-4]; MOV r27, r15 + 2920 0xe0 0x4a 0xdd 0x40 0x39 0xd4 LDA r18, [p7, dj0]; MOV r26, el0 + 2926 0x00 0x00 NOPX + 2928 0x00 0x00 NOPX + 2930 0x00 0x00 NOPX + 2932 0x00 0x00 NOPX + 2934 0x00 0x00 NOPX + 2936 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2940 0x8c 0x66 0x4e 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 + 2946 0xe0 0xc6 0xd1 0xec 0x63 0x0c LDA r17, [p7]; ST r17, [p0, #-4] + 2952 0x00 0x00 NOPX + 2954 0x00 0x00 NOPX + 2956 0x00 0x00 NOPX + 2958 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 0x1e 0xa1 0x1c 0xf8 MOV r26, eh0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2970 0xfe 0xc6 0xdd 0xc0 0x39 0xd4 LDA r17, [p7, #-4]; MOV r27, el0 + 2976 0x06 0x06 0x56 0x98 LDA r18, [p6] + 2980 0x00 0x00 NOPX + 2982 0x00 0x00 NOPX + 2984 0x00 0x00 NOPX + 2986 0x00 0x00 NOPX + 2988 0x00 0x00 NOPX + 2990 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2994 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 3000 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x00 0x00 NOPX + 3016 0x00 0x00 NOPX + 3018 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 3022 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3026 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] + 3030 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 0x1e 0xd7 0x20 0xf8 MOV r27, r14 +.delay_slot + 3066 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 3070 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 3088 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3088 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3092 0x00 0x07 0xc0 0xc6 0x40 0x44 MOVXM p0, #508704 +.delay_slot + 3098 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 3102 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 3106 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 3110 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3120 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3120 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0x80 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508672 + 3130 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3136 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 3140 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 3144 0x00 0x00 NOPX + 3146 0x00 0x00 NOPX + 3148 0x00 0x00 NOPX + 3150 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3154 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 3158 0x00 0x00 NOPX + 3160 0x00 0x00 NOPX + 3162 0x00 0x00 NOPX + 3164 0x00 0x00 NOPX + 3166 0x00 0x00 NOPX + 3168 0x00 0x00 NOPX + 3170 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3174 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x00 0x00 NOPX + 3184 0x00 0x00 NOPX + 3186 0x00 0x00 NOPX + 3188 0x00 0x00 NOPX + 3190 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3194 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 3198 0x00 0x00 NOPX + 3200 0x00 0x00 NOPX +.no_stack_arguments + 3202 0x00 0x06 0x08 0x00 0x01 0x04 JL #3088 +.delay_slot +.swstall delay_slot + 3208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3210 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3212 0x00 0x00 NOPX +.delay_slot + 3214 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 3218 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x5e 0x86 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV r15, p0; NOPV +.return_address + 3232 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 3242 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 3252 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 3262 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 3266 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 3296 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3312 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3312 0x02 0x80 0x80 0x00 0x01 0xf0 0x31 0x86 0x10 0xba MOVA m0, #20; MOVXM p0, #508684 + 3322 0x01 0x01 0x50 0x00 0x20 0x28 0x28 0x06 0x58 0xba LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 + 3332 0x00 0x00 NOPX + 3334 0x00 0x00 NOPX + 3336 0x00 0x00 NOPX + 3338 0x00 0x00 NOPX + 3340 0x00 0x00 NOPX + 3342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3346 0x10 0x06 0xf0 0x18 NEZ r3, r0 +.delay_slot + 3350 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 3354 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 3358 0x02 0x82 0x31 0x88 0x3b 0x5c ST r0, [p0, #4]; LSHL r2, r3, r1 +.delay_slot + 3364 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3376 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3376 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3382 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 3386 0x00 0x06 0x18 0x00 0x01 0x04 JL #3120 +.delay_slot + 3392 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.delay_slot +.swstall delay_slot + 3398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3404 0x00 0x01 0x67 0x98 NOPA +.return_address + 3408 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call + 3412 0x00 0x06 0x78 0x00 0x00 0x84 J #3312 +.delay_slot + 3418 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.delay_slot + 3424 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3434 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3440 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3440 0x02 0x80 0x80 0x00 0x01 0xf1 0xb1 0x80 0x10 0xba MOVA m0, #20; MOVXM p3, #508672 + 3450 0x03 0x3c 0x16 0x98 LDA r0, [p3], #12 + 3454 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 3460 0x00 0x00 NOPX + 3462 0x00 0x00 NOPX + 3464 0x00 0x00 NOPX + 3466 0x00 0x00 NOPX + 3468 0x00 0x00 NOPX + 3470 0x00 0x00 NOPX + 3472 0x08 0x06 0xe8 0x40 0x01 0x84 JNZ r1, #3536 +.delay_slot + 3478 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 3482 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 3486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3488 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3490 0x00 0x00 NOPX + 3492 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 3496 0x00 0x00 NOPX + 3498 0x00 0x00 NOPX + 3500 0x00 0x00 NOPX + 3502 0x00 0x06 0xf8 0x00 0x00 0x84 J #3568 +.delay_slot +.swstall delay_slot + 3508 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3510 0x00 0x00 NOPX +.delay_slot + 3512 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 3516 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 3520 0x00 0x2c 0xf0 0x00 0x20 0x04 0x13 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 + 3536 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 3540 0x00 0x00 NOPX + 3542 0x00 0x00 NOPX + 3544 0x00 0x00 NOPX + 3546 0x00 0x00 NOPX + 3548 0x00 0x00 NOPX + 3550 0x00 0x00 NOPX + 3552 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 3556 0x00 0x00 NOPX + 3558 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 + 3568 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 3578 0x62 0x90 0xd0 0x00 0x00 0x00 0x7f 0x30 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #3680 + 3588 0x00 0x00 0x06 0xfd 0x00 0x44 MOVXM le, #3712 + 3594 0x00 0x07 0xc8 0xc4 0x40 0x44 MOVXM p4, #508448 + 3600 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 3604 0x00 0x00 NOPX + 3606 0x00 0x00 NOPX + 3608 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 3612 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 3616 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3680 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3728 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 3756 0x00 0x00 NOPX +.delay_slot + 3758 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 3762 0x00 0x00 NOPX +.delay_slot + 3764 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 3768 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3776 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3776 0x50 0x91 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p2, p1; PADDXM [sp], #128 + 3786 0xff 0x87 0xb0 0x02 0x08 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p0 + 3794 0x1c 0x55 0xe0 0xf8 MOV r17, sp + 3798 0x00 0x07 0xc6 0xc6 0x18 0x44 MOVXM p3, #508684 + 3804 0x65 0xed 0x50 0xd1 0x80 0x14 LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 + 3810 0x73 0xca 0x50 0x0e 0x56 0x0c LDA.s16 r18, [p3], #-14; VST sfh, [p0] + 3816 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 3820 0x00 0x00 NOPX + 3822 0x00 0x00 NOPX +.no_stack_arguments + 3824 0x00 0x06 0xb8 0x00 0x01 0x04 JL #3440 +.delay_slot + 3830 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 3834 0x00 0x00 NOPX +.delay_slot + 3836 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 3840 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 3846 0x00 0x2c 0xf0 0x00 0x10 0x00 0x34 0x10 0x7e 0xba NOPA; NOPB; MOV p0, r16 +.return_address + 3856 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3860 0x00 0x00 NOPX + 3862 0x00 0x00 NOPX + 3864 0x00 0x00 NOPX + 3866 0x00 0x00 NOPX + 3868 0x00 0x00 NOPX + 3870 0x00 0x00 NOPX + 3872 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3876 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 3882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3888 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3904 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 3904 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 3910 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 3916 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3922 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 3930 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0x02 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508420 + 3940 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 3944 0x00 0x00 NOPX + 3946 0x00 0x00 NOPX + 3948 0x80 0x08 0x08 0x40 0x01 0x84 JNZ r16, #4112 +.delay_slot + 3954 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 3958 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 3962 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 3966 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 3974 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 0x00 0x07 0xc4 0xc4 0x40 0x44 MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 0x00 0x06 0x98 0x00 0x01 0x04 JL #3376 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4012 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4016 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 4032 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 + 4038 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #508672 + 4048 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #508672 + 4058 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 + 4068 0x00 0x00 NOPX + 4070 0x00 0x00 NOPX + 4072 0x00 0x08 0x10 0x00 0x00 0x84 J #4128 +.delay_slot + 4078 0x00 0x07 0xc0 0xc4 0x30 0x44 MOVXM p0, #508440 +.delay_slot +.swstall delay_slot + 4084 0x00 0x00 NOPX +.delay_slot + 4086 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 4090 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 4096 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 4112 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0x04 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 4128 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 4136 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508416 + 4146 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 4150 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 4154 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 4158 0x00 0x00 NOPX + 4160 0x00 0x00 NOPX + 4162 0x00 0x00 NOPX + 4164 0x00 0x00 NOPX + 4166 0x00 0x00 NOPX + 4168 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 4172 0x0f 0x06 0x11 0x98 ST r16, [p7] + 4176 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 4180 0x00 0x00 NOPX + 4182 0x00 0x00 NOPX + 4184 0x00 0x00 NOPX + 4186 0x14 0x93 0x08 0x18 ACQ r18, r16 + 4190 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 4196 0x00 0x00 NOPX + 4198 0x00 0x00 NOPX + 4200 0x00 0x06 0x36 0x98 LDA r17, [p0] + 4204 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 4210 0x01 0x06 0x76 0x98 LDA r19, [p1] + 4214 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 4218 0x00 0x00 NOPX +.no_stack_arguments + 4220 0x00 0x07 0x60 0x00 0x01 0x04 JL #3776 +.delay_slot +.swstall delay_slot + 4226 0x00 0x00 NOPX +.delay_slot + 4228 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 4232 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 4236 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 4240 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 4256 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508416 + 4266 0x10 0x20 0x05 0x18 MOVX r16, #1 + 4270 0x00 0x00 NOPX + 4272 0x00 0x00 NOPX + 4274 0x00 0x00 NOPX + 4276 0x00 0x00 NOPX + 4278 0x00 0x00 NOPX + 4280 0x14 0x51 0x08 0x18 REL r17, r16 + 4284 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508440 + 4294 0x06 0x06 0x36 0x98 LDA r17, [p6] + 4298 0x02 0x06 0x56 0x98 LDA r18, [p2] + 4302 0x00 0x00 NOPX + 4304 0x00 0x00 NOPX + 4306 0x00 0x00 NOPX + 4308 0x00 0x00 NOPX + 4310 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 4314 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 4318 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 4322 0x80 0x08 0x80 0x40 0x01 0x84 JNZ r16, #4352 +.delay_slot +.swstall delay_slot + 4328 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4330 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4332 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4336 0x00 0x00 NOPX + 4338 0x10 0x20 0x01 0x18 MOVX r16, #0 + 4342 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 4352 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 4356 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 4360 0x00 0x00 NOPX + 4362 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 4380 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 4386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4390 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 4400 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 4400 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xc0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508800 + 4410 0x00 0x00 NOPX + 4412 0x00 0x00 NOPX + 4414 0x00 0x00 NOPX + 4416 0x00 0x00 NOPX + 4418 0x00 0x00 NOPX + 4420 0x00 0x00 NOPX + 4422 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 4426 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 4430 0x00 0x00 NOPX + 4432 0x00 0x00 NOPX + 4434 0x00 0x00 NOPX + 4436 0x00 0x00 NOPX + 4438 0x00 0x00 NOPX + 4440 0x00 0x00 NOPX + 4442 0x08 0x04 0x29 0x98 ST el0, [p0] + 4446 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 4450 0x00 0x00 NOPX + 4452 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4462 0x00 0x00 NOPX +.delay_slot + 4464 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 4480 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 4480 0xff 0x40 0x00 0x3d 0x68 0x00 0x01 0xf1 0x31 0xc0 0x10 0xb6 MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 0x40 0x8a 0xd0 0x3b 0xe8 0x00 0x01 0xf1 0x31 0x10 0x10 0xb6 LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 0x40 0x84 0x50 0x3d 0x68 0x00 0x00 0x10 0xc8 0x40 0x10 0xb6 LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 0x03 0xbe 0x80 0x32 0xe5 0xf4 VLDB x7, [p0], #64; VBCST.16 x0, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 0x00 0x00 0xc2 0x21 0x00 0x44 MOVXM r4, #49280 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 0x18 0x91 0x72 0xf8 VBCST.16 x1, r4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 0x00 0x00 0x71 0xbf 0xfe 0x44 MOVXM r3, #32767 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 0x1c 0x50 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 0x10 0x01 0xb6 0x81 0xd9 0xe4 LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 0x0f 0x50 0x08 0x70 0x59 0xe4 MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 + 4554 0x19 0x0d 0x72 0xf8 VBCST.16 x2, r3 + 4558 0x00 0x00 0x32 0xba 0x00 0x44 MOVXM r5, #15616 + 4564 0x19 0x95 0x72 0xf8 VBCST.16 x3, r5 + 4568 0x00 0x00 0x38 0xbe 0x00 0x44 MOVXM r17, #16128 + 4574 0x1d 0xb1 0x2b 0x78 VBAND x11, x6, x2 + 4578 0x64 0x5e 0x25 0x8a 0xe5 0xe4 MOVX r17, #828; VBCST.16 x5, r17 + 4584 0x04 0xc0 0xec 0xe6 0x8c 0xe7 0x61 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 + 4592 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 + 4596 0x00 0x00 0x31 0x3d 0x00 0x44 MOVXM r2, #16000 + 4602 0x02 0x09 0x72 0xe6 0x8a 0xe7 0x01 0x62 VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 + 4610 0x18 0x0b 0x8a 0xf8 VCONV.fp32.bf16 cml0, x5 + 4614 0x04 0x50 0x2c 0xe6 0x8b 0x0c 0x81 0x62 VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 0xb2 0x42 0xc0 0x00 0x00 0x8f 0x24 0x02 0x89 0x12 0x81 0x56 VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 0x1b 0x40 0xec 0xf8 VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 0x00 0x00 0x00 0xb7 0x2a 0x02 0x8a 0x76 0xc3 0x5a MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 0x52 0x22 0xc0 0x02 0xb8 0x3f 0x80 0x02 VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 + 4656 0x1c 0x38 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x7, x0 + 4660 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0xd8 0x95 0xb0 0xf6 NOPA; NOPB; NOPS; VBAND x11, x6, x2 +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4672 0x00 0x3d 0x6c 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x4a VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 0x00 0x3b 0xec 0x49 0x2b 0x66 0x8c 0xe7 0x61 0x4a VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 0x23 0xa4 0x60 0x02 0x89 0x12 0x81 0x62 VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 0x00 0x2c 0xf1 0x1e 0x23 0x00 0x00 0x00 0x00 0x7a NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 0x00 0x2c 0xf0 0x00 0x25 0x92 0x16 0x00 0x00 0x02 0x28 0x16 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0xa0 0x76 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 0x00 0x2c 0xf0 0x00 0x22 0x91 0x16 0x00 0x00 0x02 0x1c 0x16 0x7c 0x53 0xb6 0x1b NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.end_of_loop + 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xd8 0x95 0xb8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV +.loop_nesting 0 + 4784 0x04 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 0x8c 0xe7 0x61 0x48 VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 + 4814 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 + 4818 0xb2 0x42 0xc0 0x02 0x89 0x12 0x81 0x62 VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 + 4826 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 + 4830 0x8a 0x76 0xc3 0x48 VMSC.f dm2, dm3, x11, x6, r17 + 4834 0x8c 0x2b 0x23 0x48 VMSC.f dm4, dm1, x5, x9, r17 + 4838 0x00 0x00 NOPX + 4840 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4844 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4846 0x00 0x00 NOPX +.delay_slot + 4848 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.delay_slot + 4852 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 +.delay_slot +.swstall delay_slot + 4856 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 4864 +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 4864 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 4870 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 4876 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 4882 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 4890 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0x02 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508420 + 4900 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 4904 0x00 0x00 NOPX + 4906 0x00 0x00 NOPX + 4908 0x80 0x09 0xe8 0x40 0x01 0x84 JNZ r16, #5072 +.delay_slot + 4914 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 4918 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 4922 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 4926 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 4934 0x00 0x07 0xc0 0xc7 0x00 0x44 MOVXM p0, #508800 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 0x00 0x07 0xc4 0xc4 0x40 0x44 MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 0x00 0x08 0x98 0x00 0x01 0x04 JL #4400 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4972 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4976 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 4992 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 + 4998 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xc0 0x10 0xba LDA r16, [p2]; MOVXM p2, #508800 + 5008 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x31 0xc0 0x10 0xba LDA r17, [p2]; MOVXM p2, #508800 + 5018 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 + 5028 0x00 0x00 NOPX + 5030 0x00 0x00 NOPX + 5032 0x00 0x09 0xf0 0x00 0x00 0x84 J #5088 +.delay_slot + 5038 0x00 0x07 0xc0 0xc4 0x30 0x44 MOVXM p0, #508440 +.delay_slot +.swstall delay_slot + 5044 0x00 0x00 NOPX +.delay_slot + 5046 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 5050 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 5056 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 5072 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0x04 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 5088 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 5096 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508416 + 5106 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 5110 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 5114 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 5118 0x00 0x00 NOPX + 5120 0x00 0x00 NOPX + 5122 0x00 0x00 NOPX + 5124 0x00 0x00 NOPX + 5126 0x00 0x00 NOPX + 5128 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 5132 0x0f 0x06 0x11 0x98 ST r16, [p7] + 5136 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 5140 0x00 0x00 NOPX + 5142 0x00 0x00 NOPX + 5144 0x00 0x00 NOPX + 5146 0x14 0x93 0x08 0x18 ACQ r18, r16 + 5150 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 5156 0x00 0x00 NOPX + 5158 0x00 0x00 NOPX + 5160 0x00 0x06 0x36 0x98 LDA r17, [p0] + 5164 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 5170 0x01 0x06 0x76 0x98 LDA r19, [p1] + 5174 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 5178 0x00 0x00 NOPX +.no_stack_arguments + 5180 0x00 0x08 0xc0 0x00 0x01 0x04 JL #4480 +.delay_slot +.swstall delay_slot + 5186 0x00 0x00 NOPX +.delay_slot + 5188 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 5192 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 5196 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 5200 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 5216 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508416 + 5226 0x10 0x20 0x05 0x18 MOVX r16, #1 + 5230 0x00 0x00 NOPX + 5232 0x00 0x00 NOPX + 5234 0x00 0x00 NOPX + 5236 0x00 0x00 NOPX + 5238 0x00 0x00 NOPX + 5240 0x14 0x51 0x08 0x18 REL r17, r16 + 5244 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508440 + 5254 0x06 0x06 0x36 0x98 LDA r17, [p6] + 5258 0x02 0x06 0x56 0x98 LDA r18, [p2] + 5262 0x00 0x00 NOPX + 5264 0x00 0x00 NOPX + 5266 0x00 0x00 NOPX + 5268 0x00 0x00 NOPX + 5270 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 5274 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 5278 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 5282 0x80 0x0a 0x60 0x40 0x01 0x84 JNZ r16, #5312 +.delay_slot +.swstall delay_slot + 5288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5296 0x00 0x00 NOPX + 5298 0x10 0x20 0x01 0x18 MOVX r16, #0 + 5302 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 5312 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 5316 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 5320 0x00 0x00 NOPX + 5322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 5340 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 5346 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5350 0x00 0x00 NOPX +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 5360 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 5360 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 5364 0x00 0x07 0xc0 0xc6 0xc0 0x44 MOVXM p0, #508768 +.delay_slot + 5370 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 5374 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 5378 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 5382 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5392 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 5392 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xa0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508736 + 5402 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5408 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 5412 0x00 0x00 NOPX + 5414 0x00 0x00 NOPX + 5416 0x00 0x00 NOPX + 5418 0x00 0x00 NOPX + 5420 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5424 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 5428 0x00 0x00 NOPX + 5430 0x00 0x00 NOPX + 5432 0x00 0x00 NOPX + 5434 0x00 0x00 NOPX + 5436 0x00 0x00 NOPX + 5438 0x00 0x00 NOPX + 5440 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5444 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 5448 0x00 0x00 NOPX + 5450 0x00 0x00 NOPX + 5452 0x00 0x00 NOPX + 5454 0x00 0x00 NOPX + 5456 0x00 0x00 NOPX + 5458 0x00 0x00 NOPX + 5460 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5464 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 5468 0x00 0x00 NOPX + 5470 0x00 0x00 NOPX +.no_stack_arguments + 5472 0x00 0x0a 0x78 0x00 0x01 0x04 JL #5360 +.delay_slot + 5478 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 5482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5484 0x00 0x00 NOPX +.delay_slot + 5486 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 5490 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x7b 0x06 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p7, p0; NOPV +.return_address + 5504 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 5508 0x00 0x00 NOPX + 5510 0x00 0x00 NOPX + 5512 0x00 0x00 NOPX + 5514 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 5552 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 5552 0x04 0x00 0x80 0x00 0x01 0xf1 0xb1 0xa0 0x10 0xba MOVA m0, #32; MOVXM p3, #508736 + 5562 0x61 0x06 0xd0 0x00 0x01 0xf2 0x31 0x10 0x10 0xba LDA r1, [p3], m0; MOVXM p4, #508448 + 5572 0x60 0x90 0xd0 0x18 0x07 0x88 0x6f 0xfa 0x58 0xba LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 + 5582 0x62 0x80 0xd0 0x00 0x00 0x04 0x7b 0x38 0x10 0xba LDA m0, [p3, #4]; MOVXM ls, #5744 + 5592 0x80 0x88 0x50 0x00 0x00 0x05 0xbb 0x40 0x10 0xba LDA.s8 r2, [p4]; MOVXM le, #5760 + 5602 0x00 0x00 NOPX + 5604 0x00 0x00 NOPX + 5606 0x00 0x00 NOPX + 5608 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 + 5612 0x1d 0x70 0xfc 0x98 ADD.NC lc, r1, #-7 + 5616 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 0x21 0x1b 0x70 0x50 0x68 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 0x21 0x1b 0x70 0x50 0x68 0x00 0xad 0x8e 0x00 0xe2 0x41 0x66 VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 0x21 0x13 0x70 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5744 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5776 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 5850 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 5854 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 5858 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5872 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 5872 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 5878 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 5884 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5890 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 5900 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 5908 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 5912 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 5916 0x00 0x00 NOPX + 5918 0x80 0x0b 0xd8 0x40 0x01 0x84 JNZ r16, #6064 +.delay_slot + 5924 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 5928 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 +.delay_slot + 5934 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 5942 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 5946 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb1 0xa0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #508736 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 0x13 0x91 0x60 0x00 0x01 0xf1 0x31 0x10 0x11 0x3a MOVS p0, p7; MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 0x00 0x0a 0x88 0x00 0x01 0x04 JL #5392 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 5992 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 5996 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 6000 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x02 0x10 0xba LDA r16, [p7]; MOVXM p1, #508420 + 6010 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0x04 0x10 0xba LDA r17, [p1]; MOVXM p3, #508424 + 6020 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 + 6030 0x00 0x00 NOPX + 6032 0x00 0x00 NOPX + 6034 0x00 0x00 NOPX + 6036 0x00 0x0b 0xe0 0x00 0x00 0x84 J #6080 +.delay_slot + 6042 0x00 0x07 0xc4 0xc4 0x30 0x44 MOVXM p2, #508440 +.delay_slot + 6048 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 6052 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 6056 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 6060 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 6064 0x00 0x07 0xc6 0xc4 0x10 0x44 MOVXM p3, #508424 + 6070 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba NOPA; MOVXM p1, #508428 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 6080 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 6084 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508416 + 6094 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 6098 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 6102 0x02 0x06 0x56 0x98 LDA r18, [p2] + 6106 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 6110 0x00 0x00 NOPX + 6112 0x00 0x00 NOPX + 6114 0x00 0x00 NOPX + 6116 0x00 0x00 NOPX + 6118 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 6122 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 6128 0x0a 0x06 0x11 0x98 ST r16, [p2] + 6132 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 6136 0x00 0x00 NOPX + 6138 0x00 0x00 NOPX + 6140 0x00 0x00 NOPX + 6142 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6146 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 6150 0x00 0x00 NOPX + 6152 0x00 0x00 NOPX + 6154 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 6158 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 6162 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 6166 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 6170 0x00 0x00 NOPX + 6172 0x00 0x00 NOPX + 6174 0x00 0x00 NOPX + 6176 0x00 0x00 NOPX + 6178 0x00 0x00 NOPX + 6180 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 6184 0x0a 0x06 0x31 0x98 ST r17, [p2] + 6188 0x00 0x00 NOPX + 6190 0x00 0x00 NOPX + 6192 0x00 0x00 NOPX + 6194 0x00 0x00 NOPX + 6196 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6200 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 6210 0x00 0x00 NOPX + 6212 0x00 0x00 NOPX + 6214 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 6218 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 6224 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 0x00 0x0a 0xd8 0x00 0x01 0x04 JL #5552 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 6260 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 6264 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 6268 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 6272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 6288 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 6298 0x00 0x07 0xcc 0xc4 0x30 0x44 MOVXM p6, #508440 + 6304 0x00 0x00 NOPX + 6306 0x00 0x00 NOPX + 6308 0x00 0x00 NOPX + 6310 0x00 0x00 NOPX + 6312 0x00 0x00 NOPX + 6314 0x14 0x51 0x08 0x18 REL r17, r16 + 6318 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 6322 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 6326 0x00 0x00 NOPX + 6328 0x00 0x00 NOPX + 6330 0x00 0x00 NOPX + 6332 0x00 0x00 NOPX + 6334 0x00 0x00 NOPX + 6336 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 6340 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 6346 0x00 0x00 NOPX + 6348 0x00 0x00 NOPX + 6350 0x00 0x00 NOPX + 6352 0x00 0x00 NOPX + 6354 0x00 0x00 NOPX + 6356 0x00 0x00 NOPX + 6358 0x14 0x51 0x08 0x18 REL r17, r16 + 6362 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508416 + 6372 0x06 0x06 0x56 0x98 LDA r18, [p6] + 6376 0x01 0x06 0x36 0x98 LDA r17, [p1] + 6380 0x00 0x00 NOPX + 6382 0x00 0x00 NOPX + 6384 0x00 0x00 NOPX + 6386 0x00 0x00 NOPX + 6388 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 6392 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 6396 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 6400 0x80 0x0c 0x90 0x40 0x01 0x84 JNZ r16, #6432 +.delay_slot +.swstall delay_slot + 6406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6414 0x00 0x00 NOPX + 6416 0x10 0x20 0x01 0x18 MOVX r16, #0 + 6420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 6432 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 6436 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 6440 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 6462 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6472 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 6480 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function_start + 6480 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb1 0xe0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #508864 + 6490 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 + 6500 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6506 0xfe 0x73 0xb0 0x00 0x01 0xf3 0xb1 0xe0 0x11 0x3a ST p7, [sp, #-16]; MOVXM p7, #508864 + 6516 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 6520 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] + 6524 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] + 6528 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 6532 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 6536 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 6540 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 6544 0x00 0x00 NOPX + 6546 0x00 0x00 NOPX + 6548 0x00 0x00 NOPX + 6550 0x00 0x00 NOPX + 6552 0x00 0x00 NOPX + 6554 0x09 0x04 0x29 0x98 ST el0, [p1] + 6558 0x09 0x14 0x09 0x98 ST eh0, [p1, #4] + 6562 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5 + 6566 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2 + 6570 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2 + 6574 0x00 0x00 NOPX + 6576 0x00 0x00 NOPX + 6578 0x00 0x00 NOPX + 6580 0x00 0x00 NOPX +.no_stack_arguments + 6582 0x00 0x18 0x58 0x00 0x01 0x04 JL #12464 +.delay_slot + 6588 0xfd 0xca 0xb8 0xba 0x43 0x5c ST r18, [sp, #-20]; SUB r14, r17, r18 +.delay_slot + 6594 0xfc 0x86 0xb0 0x03 0x08 0x45 0xe8 0x50 0x79 0x3a ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 +.delay_slot + 6604 0xfd 0x42 0xb7 0x6f 0x15 0x5c ST r16, [sp, #-24]; LT r27, r14, r24 +.delay_slot + 6610 0x16 0x22 0xe1 0x98 SUB r17, r24, r14 +.delay_slot + 6614 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x03 0x81 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 +.return_address + 6624 0xe7 0xc5 0x50 0x1f 0x47 0x36 0x08 0x00 0x58 0xba LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 + 6634 0xfd 0xc9 0x58 0x4c 0x43 0x2c LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 + 6640 0xfc 0x86 0x2a 0x6e 0x15 0x2c LDA r1, [sp, #-28]; LT r27, r20, r16 + 6646 0x10 0xa7 0x32 0x18 SEL.EQZ r19, r2, r19, r27 + 6650 0x00 0x00 NOPX + 6652 0x00 0x00 NOPX +.no_stack_arguments + 6654 0x00 0x18 0x58 0x00 0x01 0x04 JL #12464 +.delay_slot + 6660 0x14 0xe6 0x70 0x18 EXTEND.s16 r19, r19 +.delay_slot + 6664 0xfc 0x4a 0xb0 0x22 0xe9 0x0d 0xec 0xc0 0x49 0x3a ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 +.delay_slot + 6674 0x13 0xb7 0x0a 0x98 LT r27, r14, r16 +.delay_slot + 6678 0x14 0x22 0xe1 0x98 SUB r17, r16, r14 +.delay_slot + 6682 0x00 0x2c 0xf7 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r14, r17, r27 +.return_address + 6688 0xfc 0x86 0x20 0x01 0x30 0x48 0x00 0x42 0x58 0xba LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 + 6698 0xe1 0x51 0x50 0x01 0x80 0x0a 0x48 0x08 0x58 0xba LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 + 6708 0xfc 0x72 0x20 0x3f 0x07 0x4b 0xe8 0x17 0x58 0xba LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 + 6718 0xfd 0xda 0x20 0x3f 0xa7 0xca 0xa8 0x06 0x58 0xba LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 + 6728 0xfd 0x0e 0x20 0x0f 0xd7 0x89 0x00 0x20 0x58 0xba LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 + 6738 0xe9 0xc0 0x80 0x01 0x70 0x28 0x08 0x80 0x58 0xba MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 + 6748 0x17 0x44 0x80 0x31 0x11 0x0c 0x9d 0xb0 0x78 0xba MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn + 6758 0x10 0x7c 0xe6 0x98 XOR r30, r1, r14 + 6762 0x17 0xb7 0x8a 0x98 LT r27, r30, r24 + 6766 0x14 0x62 0x43 0xbc 0xff 0x24 SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 + 6772 0x8f 0x8e 0x0b 0x36 0x02 0x24 EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 + 6778 0x7f 0xa9 0xf7 0x3e 0x01 0x24 MUL r30, r15, r20; ADD.NC r14, r30, #1 + 6784 0x08 0x9d 0xf8 0xb6 0x01 0x24 MUL r2, r1, r14; ADD.NC r17, r22, #1 + 6790 0x14 0xf6 0x17 0x98 EQ r27, r19, r1 + 6794 0x17 0x84 0x2f 0x98 MUL r2, r30, r2 + 6798 0xff 0xe4 0x49 0x3f 0xf5 0x64 SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 + 6804 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16 + 6808 0x14 0x45 0xad 0x98 LSHL r2, r17, r26 + 6812 0x10 0xb9 0xf2 0x22 0xff 0x24 MUL r2, r2, r28; ADD.NC r4, r2, #-1 + 6818 0x10 0xc7 0x5d 0x98 LSHL r3, r3, r21 + 6822 0xff 0x8a 0x37 0x94 0x3f 0x5c ST r2, [p7], #-4; MUL r5, r15, r1 + 6828 0xe9 0x42 0x30 0x3b 0x6b 0x26 0x08 0x04 0x59 0x3a ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 + 6838 0xf9 0xfe 0x32 0xda 0xc1 0x5c ST r31, [p7], #-16; ADD r22, r5, r22 + 6844 0xed 0x8e 0x3b 0x7e 0x9f 0x5c ST r3, [p7], #24; MUL r31, r22, r20 + 6850 0xe3 0x92 0x3b 0x5a 0x1b 0x5c ST r4, [p7], #4; LSHL r22, r22, r16 + 6856 0x17 0xc7 0x7d 0x98 LSHL r3, r31, r23 + 6860 0x11 0x09 0x5d 0x98 LSHL r4, r4, r21 + 6864 0xb6 0x46 0x32 0x24 0x02 0xa4 SUB r25, r22, r3; ADD.NC r4, r4, r0 + 6870 0xf8 0x00 0x00 0x06 0x62 0x0f 0x2e 0x40 0xa8 0xba MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 + 6880 0xe3 0x82 0x3f 0x84 0x3f 0x5c ST r0, [p7], #4; MUL r1, r31, r1 + 6886 0xe3 0x9e 0x3f 0xfc 0xff 0x5c ST r7, [p7], #4; MUL r31, r31, r7 + 6892 0xe3 0x9a 0x32 0x96 0x5b 0x5c ST r6, [p7], #4; LSHL r5, r5, r18 + 6898 0xf9 0xaf 0xbf 0xa5 0xff 0x24 LSHL r6, r31, r23; ADD.NC r31, r5, #-1 + 6904 0x00 0xe4 0x00 0x28 0x59 0x6e 0x49 0x88 0xa8 0xba MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 + 6914 0x16 0x25 0x21 0x98 SUB r18, r24, r18 + 6918 0xe3 0xca 0x30 0x02 0x1b 0xee 0x49 0x7f 0xc9 0x3a ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 + 6928 0xe3 0xca 0x30 0x9a 0xc1 0x5c ST r18, [p7], #4; ADD r6, r1, r22 + 6934 0xe3 0xda 0x3f 0x84 0x9b 0x5c ST r22, [p7], #4; LSHL r1, r31, r4 + 6940 0xe3 0xfe 0x30 0x07 0x60 0x84 0x2f 0xff 0x59 0x3a ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 + 6950 0xe3 0xe6 0x30 0x0c 0x3b 0x0e 0xc8 0x40 0x59 0x3a ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 + 6960 0xe3 0x8e 0x37 0xc2 0x1b 0x5c ST r3, [p7], #4; LSHL r16, r15, r16 + 6966 0xe3 0xca 0x30 0x03 0x04 0x5c ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 + 6972 0xf0 0xef 0xb0 0x30 0x02 0xa4 LSHL r3, r30, r23; ADD.NC r0, r16, r0 + 6978 0xe3 0x82 0x38 0x40 0x63 0x5c ST r0, [p7], #4; SUB r16, r16, r3 + 6984 0xe3 0xfe 0x30 0x0a 0x11 0x33 0xec 0x10 0x09 0x3a ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 + 6994 0xe3 0xfe 0x39 0x7e 0xbb 0x5c ST r31, [p7], #4; LSHL r31, r18, r21 + 7000 0xe3 0xda 0x30 0x8a 0xbb 0x5c ST r22, [p7], #4; LSHL r2, r1, r21 + 7006 0xe3 0x86 0x3c 0x04 0x43 0x5c ST r1, [p7], #4; SUB r1, r24, r2 + 7012 0xe3 0xda 0x3c 0x0b 0xe3 0x5c ST r22, [p7], #4; SUB r2, r24, r31 + 7018 0x0f 0x1c 0x31 0x98 ST r1, [p7], #4 + 7022 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 7026 0x0f 0x1e 0xd1 0x98 ST r22, [p7], #4 + 7030 0x0f 0x08 0x51 0x98 ST r2, [p7], m0 + 7034 0x07 0x28 0x2a 0x98 LDA.u8 r1, [p7], m1 + 7038 0x00 0x00 NOPX + 7040 0x00 0x00 NOPX + 7042 0x00 0x00 NOPX + 7044 0x00 0x00 NOPX + 7046 0x00 0x00 NOPX + 7048 0x00 0x00 NOPX + 7050 0x08 0x0d 0xd8 0x00 0x01 0x84 JZ r1, #7088 +.delay_slot + 7056 0x10 0x20 0x0d 0x18 MOVX r16, #3 +.delay_slot + 7060 0x13 0xe1 0x0d 0x98 LSHL r16, r15, r16 +.delay_slot + 7064 0xff 0x7f 0x0f 0xa0 0x00 0x44 MOVXM r31, #-8454144 +.delay_slot +.swstall delay_slot + 7070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7072 0x00 0x00 NOPX + 7074 0x00 0x2c 0xf0 0x00 0x20 0x3e 0x01 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; MOVX r31, #0; NOPM +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 + 7088 0xe7 0x60 0x80 0x00 0x01 0xf0 0x31 0x10 0x10 0xba MOVA m0, #-197; MOVXM p0, #508448 + 7098 0x00 0xc4 0x50 0x3b 0xd8 0xa4 0x01 0xf8 0xb8 0xba LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 + 7108 0xff 0x06 0x20 0x01 0xf0 0xa8 0x00 0x49 0x78 0xba LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 + 7118 0xff 0x87 0x20 0x1f 0xff 0xec 0x80 0xc9 0x58 0xba LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 + 7128 0xfe 0x03 0x20 0x64 0x02 0x2c LDA p0, [sp, #-16]; MOVX r25, #0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 0x07 0x2c 0x37 0x18 ST.s16 r1, [p7], #4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 0xef 0x39 0xff 0x71 0x41 0xe4 MUL r28, r29, r28; MOV crRnd, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 0x08 0x02 0xc0 0x1f 0x1d 0x6d 0xe8 0x50 0x79 0x3a VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 0xe5 0x29 0xf8 0xb1 0xff 0x24 MUL r20, r28, r20; ADD.NC r17, r17, #-1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 0xa7 0x67 0xb0 0x82 0x03 0x64 LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 0x17 0xb8 0xef 0x98 MUL r28, r30, r14 + 7176 0x14 0x6b 0x5d 0x98 LSHL r21, r17, r21 + 7180 0xe3 0xd2 0x3e 0x5e 0xfb 0x5c ST r20, [p7], #4; LSHL r23, r28, r23 + 7186 0xe3 0xf6 0x3f 0xea 0xa3 0x5c ST r29, [p7], #4; SUB r26, r31, r21 + 7192 0xe1 0x72 0x3f 0xd6 0x4c 0x5c ST r28, [p7], m0; MAC r21, r21, r31, r18 + 7198 0x07 0x2a 0x8a 0x98 LDA.u8 r20, [p7], m1 + 7202 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 7208 0x00 0x00 NOPX + 7210 0x00 0x00 NOPX + 7212 0x00 0x00 NOPX + 7214 0x17 0xbd 0x3d 0x98 LSHL r30, r30, r19 + 7218 0x17 0xab 0x51 0x98 SUB r21, r30, r21 + 7222 0x14 0xf7 0x47 0x98 EQ r27, r19, r20 + 7226 0x16 0x27 0x72 0x18 SEL.EQZ r19, r24, r23, r27 + 7230 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 7234 0x0f 0x1e 0x11 0x98 ST r16, [p7], #4 + 7238 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 7242 0xe3 0xda 0x30 0x50 0x00 0x5c ST r22, [p7], #4; RET lr +.delay_slot + 7248 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 +.delay_slot + 7252 0x0f 0x1f 0x51 0x98 ST r26, [p7], #4 +.delay_slot + 7256 0x0f 0x1e 0xb1 0x98 ST r21, [p7], #4 +.delay_slot + 7260 0x0f 0x07 0x31 0x98 ST r25, [p7] +.delay_slot + 7264 0xe2 0xe6 0x30 0x03 0xb0 0x60 0x70 0x02 ST r25, [p7, #4]; MOV p7, p0 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + +.text_segment PM 7280 +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function_start + 7280 0xf1 0x18 0x80 0x3b 0x68 0x00 0x01 0xf2 0x32 0x16 0x10 0xb6 MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 + 7292 0x9f 0xa8 0xd0 0x38 0xe8 0x00 0x12 0x0a 0x80 0x80 0x58 0xb6 LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 + 7304 0x87 0xa4 0xd0 0x00 0x07 0x8a 0x07 0x90 0x58 0xba LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 + 7314 0x9f 0xe8 0xd0 0x00 0x24 0x0a 0x60 0x00 0x58 0xba LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 + 7324 0x85 0xe4 0xd7 0x10 0x4b 0x00 0x00 0x04 0x7e 0xb0 0x10 0x76 LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 + 7336 0x85 0xa0 0xd2 0x10 0x4b 0x00 0x00 0x05 0xbe 0xd8 0x10 0x76 LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 + 7348 0x9f 0x88 0xd6 0x10 0x4b 0x00 0x01 0xf2 0xb1 0x10 0x10 0x76 LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 + 7360 0x87 0x84 0xd1 0x10 0x4b 0x00 0x36 0x09 0xe4 0xc0 0x78 0x76 LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 + 7372 0x9f 0xc8 0xd0 0x10 0x4b 0x01 0x18 0x43 0x62 0xba LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 + 7382 0x85 0xc4 0xdb 0x93 0x01 0xd4 LDA dn4, [p4], #8; MOV dc5, dc4 + 7388 0x04 0x2c 0x06 0x98 LDA m0, [p4], #8 + 7392 0x04 0xfc 0xc6 0x98 LDA dj1, [p4], #-4 + 7396 0x87 0x94 0xd0 0xb1 0x68 0x3c LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 + 7402 0x04 0xfe 0xc6 0x98 LDA dj5, [p4], #-4 + 7406 0x04 0x2e 0xa6 0x98 LDA dn5, [p4], #8 + 7410 0x04 0x2c 0x86 0x98 LDA m1, [p4], #8 + 7414 0x04 0xff 0xc6 0x98 LDA dj7, [p4], #-4 + 7418 0x04 0x2f 0xa6 0x98 LDA dn7, [p4], #8 + 7422 0x04 0x2f 0x86 0x98 LDA m7, [p4], #8 + 7426 0x04 0xfd 0xc6 0x98 LDA dj3, [p4], #-4 + 7430 0x04 0x2d 0xa6 0x98 LDA dn3, [p4], #8 + 7434 0x04 0xc9 0x86 0x98 LDA m3, [p4], m6 + 7438 0x04 0xa8 0x96 0x98 LDA r4, [p4], m5 + 7442 0x04 0x88 0xf2 0x98 LDA.s16 r7, [p4], m4 + 7446 0x04 0x4e 0x06 0x98 LDA m4, [p4], #16 + 7450 0x92 0x96 0xd3 0xe1 0xe8 0x3c LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 + 7456 0x02 0x04 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p2] + 7460 0x00 0x00 NOPX + 7462 0x05 0x04 0xc2 0x98 LDA.s8 r6, [p5] + 7466 0x11 0x09 0xfb 0x18 ADD r4, r4, #-2 + 7470 0x80 0xc6 0xd0 0x00 0x00 0x06 0x36 0xf8 0x10 0xba LDA r17, [p4]; MOVXM p4, #7664 + 7480 0x18 0x1d 0x72 0xf8 VBCST.16 x0, r7 + 7484 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 + 7488 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 + 7492 0x00 0x2c 0xf0 0x00 0x23 0x00 0x8a 0xe2 0x04 0x6d 0x41 0x66 NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0d 0xd4 0x02 0x0e 0x03 0xa8 0x08 0x1a 0x0b NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7520 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7574 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 7582 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 7590 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 7596 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7616 0x03 0x0c 0xf2 0x73 0x90 0x02 0x84 0x83 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x02 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.loop_nesting 1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7664 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 0x00 0x00 0x00 0xb7 0xea 0x02 0x03 0x92 0xe1 0x5a MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 0x40 0x85 0x70 0x00 0x00 0x8f 0xe5 0x02 0x00 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 0x71 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 0x67 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p3], d1; VMOV cml3, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 0x04 0x1c 0x07 0x46 0x04 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 0x02 0x30 0x86 0xc6 0x01 0x03 0x41 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7760 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7814 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 7822 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 7830 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 7836 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 1 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7856 0x03 0x0c 0xf4 0xe7 0x20 0x08 0x49 0x02 0x84 0x83 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 0x04 0xb0 0x8e 0xc6 0x02 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7904 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7908 0x03 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r0 + 7912 0x00 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r0 + 7916 0x00 0x00 NOPX + 7918 0x00 0x00 NOPX + 7920 0x00 0x00 NOPX + 7922 0x00 0x00 NOPX + 7924 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 + 7928 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr +.delay_slot + 7934 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.delay_slot + 7938 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.delay_slot + 7942 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0 +.delay_slot + 7946 0x0b 0x8a 0x13 0x18 VST x8, [p3], m4 +.delay_slot + 7950 0x0b 0x3a 0x93 0x18 VST.3D x10, [p3], d1 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 7968 +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function_start + 7968 0xfb 0x90 0x82 0x39 0x68 0x00 0x01 0xf1 0xb2 0x4c 0x10 0xb6 MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 + 7980 0x63 0x84 0xd4 0x38 0x68 0x3e 0x47 0x68 0x68 0x01 0x58 0xb6 LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 + 7992 0x63 0x88 0xd0 0x00 0x00 0x04 0x7f 0xc8 0x10 0xba LDA dj0, [p3], #4; MOVXM ls, #8080 + 8002 0x63 0xc4 0xd0 0x00 0x00 0x05 0xbf 0xf8 0x10 0xba LDA dn4, [p3], #4; MOVXM le, #8176 + 8012 0x63 0xc8 0xd0 0x00 0x16 0x48 0x08 0x12 0x58 0xba LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 + 8022 0x63 0x80 0xd0 0x08 0x9a 0x2c LDA m0, [p3], #4; MOVX r2, #19 + 8028 0x03 0x1c 0x66 0x98 LDA dc0, [p3], #4 + 8032 0x03 0x8a 0x66 0x98 LDA dc4, [p3], m4 + 8036 0x03 0x04 0xb6 0x98 LDA r5, [p3] + 8040 0x03 0x24 0xd6 0x98 LDA r6, [p3, #8] + 8044 0x00 0x00 NOPX + 8046 0x00 0x00 NOPX + 8048 0x00 0x00 NOPX + 8050 0x00 0x00 NOPX + 8052 0x00 0x00 NOPX + 8054 0x11 0x48 0x4d 0x98 LSHL r4, r5, r4 + 8058 0x30 0xc7 0xba 0xe4 0xff 0x24 LSHL r3, r6, r3; ADD.NC lc, r4, #-1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0xd0 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8080 0x43 0x83 0x72 0x39 0x6c 0x80 0x8b 0x00 0x00 0x00 0x48 0x02 0x38 0x00 0x00 0xe1 VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0x02 0x38 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV + 8112 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0xc0 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV + 8128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0xc4 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV + 8144 0x00 0x2c 0xf0 0x00 0x20 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV + 8160 0x00 0x2c 0xf0 0x00 0x24 0x20 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 8192 0x90 0x11 0x60 0x01 0x40 0x00 0x48 0x02 0x39 0x3a MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 +.delay_slot + 8202 0x19 0x80 0x04 0x78 VSHUFFLE x3, x0, x0, r1 +.delay_slot + 8206 0x18 0x89 0x81 0xd8 VSHUFFLE bmlh0, x1, x3, r0 +.delay_slot + 8210 0x18 0x09 0x89 0xd8 VSHUFFLE bmll0, x1, x3, r2 +.delay_slot + 8214 0x08 0x18 0x26 0x98 VST.3D bmlh0, [p0], d0 +.delay_slot + 8218 0x0c 0x20 0x06 0x98 VST bmll0, [p4, dj1] +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 8224 +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function_start + 8224 0x20 0x93 0xd0 0x01 0x10 0x28 0x07 0x3f 0x58 0xba LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 + 8234 0xe6 0x04 0x80 0x00 0x01 0xf2 0x32 0x44 0x10 0xba MOVA m1, #-208; MOVXM p4, #509064 + 8244 0x81 0x42 0xd0 0x03 0x25 0x54 LDA r16, [p4], m0; MOV m0, #201 + 8250 0x04 0x0a 0x6a 0x98 LDA.u8 r19, [p4], m0 + 8254 0x04 0x2a 0x56 0x98 LDA r18, [p4], m1 + 8258 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 0x00 0x83 0xdf 0xf0 0x7b 0x0c LDA p0, [p0]; ST lr, [sp, #-8] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 0x40 0xbe 0xdf 0xe2 0x3b 0x0c LDA r15, [p2]; ST p2, [sp, #-16] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 0xfd 0xe3 0xb0 0x00 0x03 0x8e 0x00 0x00 0x41 0x3a ST p6, [sp, #-20]; JL #7280 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 0xfe 0xbe 0xb0 0x27 0x08 0x7d 0x31 0x60 0x79 0x3a ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 +.delay_slot + 8300 0x1e 0x68 0xc0 0xf8 MOV p6, p4 +.delay_slot + 8304 0xfd 0x13 0xb8 0x42 0x3b 0x5c ST p1, [sp, #-24]; LSHL r16, r16, r17 +.delay_slot + 8310 0xf0 0x11 0x60 0x25 0x08 0xec 0x04 0x10 0x79 0x3a MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 +.delay_slot + 8320 0x00 0x2c 0xf2 0x17 0x20 0x01 0x5b 0x00 0x00 0x01 0xb3 0xe0 0xa8 0x00 0x00 0xe1 NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV +.return_address + 8336 0xce 0xc1 0x50 0x44 0x12 0x2c LDA.u8 r16, [p6, #7]; MOVX r17, #2 + 8342 0x00 0x00 NOPX + 8344 0x00 0x00 NOPX + 8346 0x00 0x00 NOPX + 8348 0x00 0x00 NOPX + 8350 0x00 0x00 NOPX + 8352 0x00 0x00 NOPX + 8354 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 8358 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544 +.delay_slot + 8364 0x00 0x07 0xc8 0xc9 0x10 0x44 MOVXM p4, #509064 +.delay_slot +.swstall delay_slot + 8370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8376 0x00 0x00 NOPX + 8378 0x9f 0xc2 0xd0 0x00 0x00 0x28 0x07 0x30 0x58 0xba LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 + 8388 0x04 0x2e 0xb6 0x98 LDA r21, [p4], #8 + 8392 0x04 0x1e 0x56 0x98 LDA r18, [p4], #4 + 8396 0xfd 0x4e 0x20 0xd1 0x81 0xd4 LDA r19, [sp, #-24]; MOV p0, p4 + 8402 0x81 0x52 0xd0 0x9c 0x8b 0x03 0xb0 0x60 0x72 0xba LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 + 8412 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 8416 0x00 0x00 NOPX + 8418 0x14 0x23 0x1d 0x98 LSHL r17, r16, r17 + 8422 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 +.no_stack_arguments + 8426 0x00 0x0e 0x38 0x00 0x01 0x04 JL #7280 +.delay_slot + 8432 0x94 0x81 0xbb 0x33 0x8a 0xa4 LSHL r18, r18, r0; ADD.NC r22, r19, r17 +.delay_slot + 8438 0xac 0x41 0xba 0xaf 0x92 0xa4 LSHL r17, r21, r0; ADD.NC r21, r15, r18 +.delay_slot + 8444 0xa4 0x81 0xb2 0xd1 0xb2 0xa4 LSHL r18, r20, r0; ADD.NC p1, r17, r22 +.delay_slot + 8450 0x1a 0x69 0xc1 0x58 ADD.NC p2, r19, r16 +.delay_slot + 8454 0x00 0x2c 0xf0 0x00 0x10 0x01 0xb5 0x64 0xae 0xba NOPA; NOPB; ADD.NC p3, r21, r18 +.return_address + 8464 0xfe 0x43 0x20 0x00 0x00 0x28 0x07 0x34 0x58 0xba LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 + 8474 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 8478 0xff 0xf3 0x26 0xdd 0x81 0xd4 LDA p7, [sp, #-4]; MOV p3, p7 + 8484 0x03 0x0a 0x36 0x98 LDA r17, [p3], m0 + 8488 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 8492 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 8496 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 8502 0x04 0x06 0x56 0x98 LDA r18, [p4] + 8506 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 + 8510 0x00 0x00 NOPX + 8512 0x00 0x00 NOPX + 8514 0x00 0x00 NOPX +.tail_call + 8516 0x00 0x0f 0x90 0x00 0x00 0x84 J #7968 +.delay_slot + 8522 0x14 0x62 0x0d 0x98 LSHL r17, r17, r0 +.delay_slot + 8526 0x1c 0x58 0xc9 0x58 ADD.NC r17, r17, r18 +.delay_slot + 8530 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 8534 0x18 0x69 0x20 0xf8 MOV p0, r18 +.delay_slot + 8538 0x00 0x2c 0xf4 0xd1 0x82 0x94 NOPA; ADD.NC p2, r17, r16 +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.return_address + 8544 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 8548 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 8552 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 8556 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 8560 0x00 0x00 NOPX + 8562 0x00 0x00 NOPX + 8564 0x00 0x00 NOPX + 8566 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8570 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8582 0x00 0x00 NOPX +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 8592 +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 8592 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 8598 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15 + 8604 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 8610 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID + 8618 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr + 8626 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20] + 8630 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 8634 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2 + 8642 0x80 0x11 0x20 0x40 0x01 0x84 JNZ r16, #8768 +.delay_slot + 8648 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 8652 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 8656 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 8660 0x00 0x07 0xc6 0xc4 0x08 0x44 MOVXM p3, #508420 +.delay_slot + 8666 0x0b 0x06 0x31 0x98 ST r17, [p3] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb1 0x10 0x11 0x3a MOVS p7, p1; MOVXM p1, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb1 0x0e 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 0x00 0x0c 0xa8 0x00 0x01 0x04 JL #6480 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 8708 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8712 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM +.return_address + 8720 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8 + 8728 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 8732 0x44 0xc3 0x50 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 + 8742 0x00 0x00 NOPX + 8744 0x00 0x11 0x28 0x00 0x00 0x84 J #8784 +.delay_slot + 8750 0x00 0x07 0xc6 0xc4 0x20 0x44 MOVXM p3, #508432 +.delay_slot +.swstall delay_slot + 8756 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8758 0x00 0x00 NOPX +.delay_slot + 8760 0x0b 0x06 0x31 0x98 ST r17, [p3] +.delay_slot + 8764 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 + 8768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb1 0x08 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 + 8784 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 8788 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x31 0x02 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #508420 + 8798 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 8802 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 8806 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 8810 0x00 0x00 NOPX + 8812 0x00 0x00 NOPX + 8814 0x00 0x00 NOPX + 8816 0x00 0x00 NOPX + 8818 0x00 0x00 NOPX + 8820 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 8824 0x0a 0x06 0x11 0x98 ST r16, [p2] + 8828 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 8832 0x00 0x00 NOPX + 8834 0x00 0x00 NOPX + 8836 0x00 0x00 NOPX + 8838 0x14 0x93 0x08 0x18 ACQ r18, r16 + 8842 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb1 0x00 0x10 0xba MOVA r15, #1; MOVXM p7, #508416 + 8852 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp + 8858 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76 + 8862 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2 + 8868 0x04 0x06 0x36 0x98 LDA r17, [p4] + 8872 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb1 0xe0 0x10 0xba LDA r16, [p3]; MOVXM p3, #508864 + 8882 0x07 0x06 0x56 0x98 LDA r18, [p7] + 8886 0x00 0x00 NOPX + 8888 0x00 0x00 NOPX + 8890 0x00 0x00 NOPX + 8892 0x05 0x06 0x76 0x98 LDA r19, [p5] + 8896 0x00 0x00 NOPX + 8898 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 8902 0x14 0xa2 0x07 0x18 ADD r17, r18, #1 + 8906 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15 +.no_stack_arguments + 8910 0x00 0x10 0x10 0x00 0x01 0x04 JL #8224 +.delay_slot + 8916 0x0f 0x06 0x31 0x98 ST r17, [p7] +.delay_slot + 8920 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16 +.delay_slot + 8924 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76] +.delay_slot + 8928 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72] +.delay_slot + 8932 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX +.return_address + 8944 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20 + 8948 0x02 0x06 0x16 0x98 LDA r16, [p2] + 8952 0x00 0x00 NOPX + 8954 0x00 0x00 NOPX + 8956 0x00 0x00 NOPX + 8958 0x00 0x00 NOPX + 8960 0x00 0x00 NOPX + 8962 0x00 0x00 NOPX + 8964 0x14 0x10 0xf8 0x18 REL r16, r15 + 8968 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x0c 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #508440 + 8978 0x01 0x06 0x56 0x98 LDA r18, [p1] + 8982 0x07 0x06 0x36 0x98 LDA r17, [p7] + 8986 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12] + 8990 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] + 8994 0x00 0x00 NOPX + 8996 0x00 0x00 NOPX + 8998 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 9002 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8] + 9006 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 9010 0x80 0x11 0xa8 0x40 0x01 0x84 JNZ r16, #9040 +.delay_slot + 9016 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 9020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9024 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9026 0x00 0x00 NOPX + 9028 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 9040 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13 + 9046 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] + 9050 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 9054 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 9060 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9062 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9064 0x00 0x00 NOPX +.delay_slot + 9066 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 9072 +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function_start + 9072 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 9076 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 9080 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 9084 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 9088 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 9092 0x00 0x10 0xc8 0x00 0x00 0x84 J #8592 +.delay_slot +.swstall delay_slot + 9098 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9104 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9106 0x00 0x00 NOPX +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + +.text_segment PM 9120 +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function_start + 9120 0x23 0x85 0xd0 0x00 0x01 0xf0 0x09 0x40 0x10 0xba LDA el0, [p1], #4; MOVXM r0, #508544 + 9130 0x08 0x00 0x80 0x80 0x0b 0x3e 0x27 0xa9 0x30 0x01 0x08 0x76 MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 + 9142 0x00 0x42 0x20 0x22 0x01 0x64 MOVX r1, #4; MOV r0, #128 + 9148 0x00 0x00 NOPX + 9150 0x00 0x00 NOPX + 9152 0x00 0x00 NOPX + 9154 0x00 0x00 NOPX + 9156 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9160 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9164 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 9168 0x00 0x00 NOPX + 9170 0x00 0x00 NOPX + 9172 0x00 0x00 NOPX + 9174 0x00 0x00 NOPX + 9176 0x00 0x00 NOPX + 9178 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 9182 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 9186 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9190 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 9194 0x00 0x00 NOPX + 9196 0x00 0x00 NOPX + 9198 0x00 0x00 NOPX + 9200 0x00 0x00 NOPX + 9202 0x00 0x00 NOPX + 9204 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 9208 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 9212 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 9216 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 9220 0x00 0x00 NOPX + 9222 0x00 0x00 NOPX + 9224 0x00 0x00 NOPX + 9226 0x00 0x00 NOPX + 9228 0x00 0x00 NOPX + 9230 0x0a 0x04 0x09 0x98 ST eh0, [p2] + 9234 0x0a 0x14 0x29 0x98 ST el0, [p2, #4] + 9238 0x00 0x08 0x76 0x98 LDA r3, [p0], m0 + 9242 0x00 0x00 NOPX + 9244 0x00 0x00 NOPX + 9246 0x00 0x00 NOPX + 9248 0x00 0x00 NOPX + 9250 0x00 0x00 NOPX + 9252 0x00 0x00 NOPX + 9254 0x10 0xc8 0x2d 0x98 LSHL r4, r3, r2 + 9258 0x18 0xc3 0xb0 0xa4 0xff 0x24 LSHL r3, r3, r1; ADD.NC r1, r4, #-1 + 9264 0x00 0x86 0x30 0x00 0x88 0x60 0x70 0x02 ST r1, [p0]; MOV r4, p0 + 9272 0x19 0x62 0x62 0x18 ADD.NC p1, r4, #-60 + 9276 0x01 0x08 0x96 0x98 LDA r4, [p1], m0 + 9280 0x00 0x00 NOPX + 9282 0x00 0x00 NOPX + 9284 0x00 0x00 NOPX + 9286 0x00 0x00 NOPX + 9288 0x00 0x00 NOPX + 9290 0x00 0x00 NOPX + 9292 0x20 0x85 0xb2 0x22 0x01 0x64 LSHL r2, r4, r2; MOV r4, #128 + 9298 0x10 0x85 0xff 0x18 ADD r2, r2, #-1 + 9302 0x23 0x8a 0x31 0x90 0x5c 0x5c ST r2, [p1], #4; MSC r4, r4, r3, r2 + 9308 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9312 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9316 0x09 0x2c 0x11 0x98 ST r0, [p1], #8 + 9320 0x09 0xfc 0x71 0x98 ST r3, [p1], #-4 + 9324 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 + 9328 0x20 0x82 0x30 0x00 0xa9 0x60 0x70 0x02 ST r0, [p1]; MOV r5, p1 + 9336 0x19 0x62 0xde 0x18 ADD.NC p1, r5, #-68 + 9340 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9344 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9348 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 9352 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 9356 0x23 0x82 0x30 0x50 0x00 0x5c ST r0, [p1], #4; RET lr +.delay_slot + 9362 0x09 0x2c 0x71 0x98 ST r3, [p1], #8 +.delay_slot + 9366 0x09 0xfc 0x51 0x98 ST r2, [p1], #-4 +.delay_slot + 9370 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 +.delay_slot + 9374 0x09 0x04 0x31 0x98 ST r1, [p1] +.delay_slot + 9378 0x09 0x14 0x11 0x98 ST r0, [p1, #4] +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + +.text_segment PM 9392 +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function_start + 9392 0x00 0x41 0x00 0x00 0x01 0xf1 0x31 0x46 0x10 0xba MOVA r1, #2; MOVXM p2, #508556 + 9402 0x40 0xee 0xd0 0x00 0xb2 0x2c LDA r27, [p2]; MOVX r0, #22 + 9408 0x00 0x00 NOPX + 9410 0x00 0x00 NOPX + 9412 0x00 0x00 NOPX + 9414 0x00 0x00 NOPX + 9416 0x00 0x00 NOPX + 9418 0x00 0x00 NOPX + 9420 0x16 0xc2 0x17 0x98 EQ r1, r27, r1 + 9424 0x08 0x13 0x50 0x40 0x01 0x84 JNZ r1, #9888 +.delay_slot + 9430 0x10 0x04 0x75 0x18 MOVX r2, #29 +.delay_slot + 9434 0x10 0x00 0x22 0x18 SEL.EQZ r0, r0, r2, r27 +.delay_slot +.swstall delay_slot + 9438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9442 0x00 0x00 NOPX + 9444 0x00 0x07 0xc4 0xc5 0x20 0x44 MOVXM p2, #508560 + 9450 0x02 0x04 0x36 0x98 LDA r1, [p2] + 9454 0x00 0x00 NOPX + 9456 0x00 0x00 NOPX + 9458 0x00 0x00 NOPX + 9460 0x00 0x00 NOPX + 9462 0x00 0x00 NOPX + 9464 0x00 0x00 NOPX + 9466 0x08 0x14 0x90 0x00 0x01 0x84 JZ r1, #10528 +.delay_slot +.swstall delay_slot + 9472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9476 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9480 0x00 0x00 NOPX + 9482 0x10 0x04 0x29 0x18 MOVX r2, #10 + 9486 0x10 0x44 0x2c 0x98 LTU r2, r1, r2 + 9490 0x10 0x13 0x00 0x40 0x01 0x84 JNZ r2, #9728 +.delay_slot +.swstall delay_slot + 9496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9504 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9506 0x00 0x38 0x68 0x00 0x00 0x08 0x7a 0xd0 0x10 0x3a VLDB x0, [p0], #64; MOVXM ls, #9632 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9516 0x00 0x38 0x68 0x00 0x00 0x09 0xba 0xd0 0x10 0x3a VLDB x0, [p0], #64; MOVXM le, #9632 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9526 0x00 0x2c 0xf0 0x1c 0x34 0x02 0xb8 0x7d 0xce 0xba NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9536 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9552 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9568 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9584 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9600 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9616 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.loop_nesting 1 +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9632 0x00 0x2c 0xf0 0x38 0x69 0x1c 0x06 0x80 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9648 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9656 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9664 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9672 0x23 0x80 0xd0 0x01 0x40 0x00 0x00 0x00 0xe9 0x3a VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9682 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9690 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9698 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9706 0x00 0x2c 0xf2 0x38 0x0d 0x0c NOPA; VST bmll0, [p1], #64 +.delay_slot + 9712 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 + 9728 0x1d 0x70 0xa0 0xf8 MOV lc, r1 + 9732 0x00 0x00 0x21 0xec 0x20 0x44 MOVXM ls, #9744 + 9738 0x00 0x00 0x26 0xed 0x00 0x44 MOVXM le, #9856 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.loop_nesting 1 +.begin_of_loop + 9744 0x38 0x1c 0x34 0x18 VLDB x0, [p0], #64 + 9748 0x00 0x00 NOPX + 9750 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM + 9760 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9776 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9792 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9808 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9824 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV + 9840 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.end_of_loop + 9856 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.loop_nesting 0 + 9872 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 9876 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9878 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9880 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9884 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 + 9888 0x00 0x07 0xc4 0xc5 0x00 0x44 MOVXM p2, #508544 + 9894 0x02 0x04 0x16 0x98 LDA r0, [p2] + 9898 0x00 0x00 NOPX + 9900 0x00 0x00 NOPX + 9902 0x00 0x00 NOPX + 9904 0x00 0x00 NOPX + 9906 0x00 0x00 NOPX + 9908 0x00 0x00 NOPX + 9910 0x00 0x14 0x90 0x00 0x01 0x84 JZ r0, #10528 +.delay_slot +.swstall delay_slot + 9916 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9924 0x00 0x00 NOPX + 9926 0x04 0x94 0x80 0x00 0x01 0xf2 0x31 0x42 0x10 0xba MOVA m5, #36; MOVXM p4, #508548 + 9936 0x83 0x86 0xd0 0x00 0x51 0x08 0x4f 0xfd 0x58 0xba LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 + 9946 0x95 0x12 0xd0 0x00 0x30 0x2a 0x60 0x00 0x58 0xba LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 + 9956 0x9d 0x90 0xd0 0x10 0x4b 0x00 0x60 0x8a 0x00 0x20 0x58 0x76 LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 + 9968 0x9d 0x94 0xd1 0x10 0x4b 0x00 0x0f 0xf8 0xe8 0x34 0x58 0x76 LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 + 9980 0x87 0x98 0xd5 0x10 0x4b 0x00 0x00 0x09 0x33 0xa8 0x10 0x76 LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #10064 + 9992 0x9d 0xd4 0xd0 0x00 0x00 0x09 0xb3 0xb8 0x10 0xba LDA dn5, [p4], #-8; MOVXM p3, #10096 + 10002 0x91 0x58 0xd0 0x41 0xaa 0x2c LDA dj5, [p4], m4; MOVX r16, #53 + 10008 0x9d 0x80 0xd0 0x0b 0xb0 0xe4 0xa8 0x7f 0xc8 0xba LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 + 10018 0x9d 0x84 0xd0 0x0b 0x11 0x6c 0xa9 0x3f 0xc8 0xba LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 + 10028 0x87 0x88 0xd0 0x0a 0x21 0x6c 0xac 0x40 0x48 0xba LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 + 10038 0x80 0xc4 0xd0 0x06 0x52 0x90 0x68 0x80 0x48 0xba LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 + 10048 0x9c 0xc8 0xd0 0x00 0x20 0x01 0x5b 0x0a 0x5f 0xf8 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.loop_nesting 1 + 10064 0x08 0x14 0x88 0x00 0x01 0x84 JZ r1, #10512 +.delay_slot +.swstall delay_slot + 10070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10076 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10078 0x00 0x00 NOPX + 10080 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x29 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.loop_nesting 2 + 10096 0x20 0x14 0x80 0x00 0x01 0x84 JZ r4, #10496 +.delay_slot +.swstall delay_slot + 10102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10104 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10106 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10110 0x00 0x00 NOPX + 10112 0x10 0xe4 0x6c 0x98 LTU r18, r3, r6 + 10116 0x90 0x14 0x38 0x40 0x01 0x84 JNZ r18, #10352 +.delay_slot +.swstall delay_slot + 10122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10130 0x00 0x00 NOPX + 10132 0x00 0x28 0x68 0x00 0x00 0x08 0x7c 0x00 0x10 0x3a VLDB x0, [p0, #64]; MOVXM ls, #10240 + 10142 0x00 0x70 0xe8 0x00 0x00 0x09 0xbc 0x10 0x10 0x3a VLDB.3D x1, [p0], d1; MOVXM le, #10272 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10152 0x1d 0x71 0xfe 0x98 ADD.NC lc, r3, #-3 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10156 0x38 0x14 0x34 0x18 VLDB x0, [p0, #64] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 0x00 0x2c 0xf0 0x28 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.loop_nesting 3 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10240 0x00 0x2c 0xf0 0x28 0x6c 0x84 0x8b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 0x00 0x2c 0xf0 0x70 0xe9 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.loop_nesting 2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10296 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10300 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10308 0x18 0x08 0x41 0xd8 VSHUFFLE bmll0, x1, x0, r16 + 10312 0x00 0x14 0x80 0x00 0x00 0x84 J #10496 +.delay_slot + 10318 0x23 0x04 0xd0 0x02 0x31 0x60 0x70 0x02 VST.3D bmlh0, [p1], d0; MOV p4, p1 +.delay_slot + 10326 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.delay_slot + 10334 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.delay_slot + 10342 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.delay_slot + 10346 0x00 0x2c 0xf8 0x28 0x0d 0x0c NOPA; VST bmll0, [p4, #64] +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 + 10352 0x00 0x00 0x21 0xf1 0x00 0x44 MOVXM ls, #10368 + 10358 0x00 0x00 0x26 0xf1 0xe0 0x44 MOVXM le, #10480 + 10364 0x1d 0x71 0x00 0x98 ADD.NC lc, r2, #1 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.loop_nesting 3 +.begin_of_loop + 10368 0x02 0x86 0x88 0xc5 0x81 0xf4 VLDB x0, [p0, #64]; MOV p4, p1 + 10374 0x38 0x38 0x74 0x18 VLDB.3D x1, [p0], d1 + 10378 0x00 0x00 NOPX + 10380 0x00 0x00 NOPX + 10382 0x00 0x00 NOPX + 10384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10400 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10416 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV + 10448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV + 10464 0x00 0x2c 0xf0 0x00 0x21 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.end_of_loop + 10480 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.loop_nesting 2 + 10496 0x14 0x62 0xe0 0x18 JNZD r17, r17, p3 +.delay_slot +.swstall delay_slot + 10500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10504 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10506 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10508 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.loop_nesting 1 + 10512 0x10 0x00 0xa0 0x18 JNZD r0, r0, p2 +.delay_slot +.swstall delay_slot + 10516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10524 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.loop_nesting 0 + 10528 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 10532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10536 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10538 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10540 0x00 0x00 NOPX +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + +.text_segment PM 10544 +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function_start + 10544 0x1b 0x6c 0xc0 0xf8 MOV p3, p6 + 10548 0xd0 0x91 0x60 0x00 0x01 0xf0 0xb1 0x0a 0x11 0x3a MOVS p6, p1; MOVXM p1, #508436 + 10558 0x01 0x06 0x16 0x98 LDA r16, [p1] + 10562 0x00 0x00 NOPX + 10564 0x00 0x00 NOPX + 10566 0x00 0x00 NOPX + 10568 0x00 0x00 NOPX + 10570 0x00 0x00 NOPX + 10572 0x00 0x00 NOPX + 10574 0x80 0x14 0xc8 0x40 0x01 0x84 JNZ r16, #10640 +.delay_slot + 10580 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 10586 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 10590 0xf0 0x11 0x60 0x00 0xb7 0x60 0x70 0x02 MOVS p7, p0; MOV p1, p7 +.delay_slot + 10598 0x0f 0xf9 0x9d 0x98 ST p3, [sp, #-8] +.delay_slot + 10602 0xff 0x93 0xb0 0x00 0x01 0xf0 0x31 0x40 0x11 0x3a ST p1, [sp, #-4]; MOVXM p0, #508544 +.no_stack_arguments + 10612 0x00 0x11 0xd0 0x00 0x01 0x04 JL #9120 +.delay_slot + 10618 0x19 0x64 0xc0 0xf8 MOV p1, p2 +.delay_slot +.swstall delay_slot + 10622 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10624 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10626 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10628 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.return_address + 10640 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb1 0x4a 0x10 0xba LDA r16, [p7]; MOVXM p7, #508564 + 10650 0x07 0x06 0x36 0x98 LDA r17, [p7] + 10654 0x06 0x04 0x9e 0x98 LDA p1, [p6] + 10658 0x00 0x00 NOPX +.no_stack_arguments + 10660 0x00 0x12 0x58 0x00 0x01 0x04 JL #9392 +.delay_slot + 10666 0x10 0x24 0x05 0x18 MOVX r18, #1 +.delay_slot + 10670 0x00 0x07 0xc4 0xc5 0x00 0x44 MOVXM p2, #508544 +.delay_slot + 10676 0x1e 0x64 0xc0 0xf8 MOV p6, p2 +.delay_slot + 10680 0x14 0x63 0x2d 0x98 LSHL r17, r17, r18 +.delay_slot + 10684 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.return_address + 10688 0xfe 0x87 0x20 0x00 0x01 0xf1 0x31 0x0a 0x10 0xba LDA lr, [sp, #-12]; MOVXM p2, #508436 + 10698 0x40 0xc2 0xd0 0x60 0x02 0x2c LDA r16, [p2]; MOVX r24, #0 + 10704 0x06 0x66 0x36 0x98 LDA r17, [p6, #24] + 10708 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 10712 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 10716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 10722 0x00 0x00 NOPX + 10724 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10728 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 10732 0x14 0x77 0x07 0x98 EQ r27, r17, r16 +.delay_slot + 10736 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot + 10740 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 10744 0x00 0x00 NOPX +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + +.text_segment PM 10752 +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function_start + 10752 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 10756 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 10760 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 10764 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 10768 0x00 0x14 0x98 0x00 0x00 0x84 J #10544 +.delay_slot +.swstall delay_slot + 10774 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10776 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10778 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10780 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10782 0x00 0x00 NOPX +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function_start + 10784 0x20 0x85 0xd8 0xa9 0x81 0xd4 LDA el0, [p1]; MOV r17, p2 + 10790 0x19 0x68 0x82 0x18 ADD.NC p1, r17, #4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10794 0x01 0x1e 0x56 0x98 LDA r18, [p1], #4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10798 0x01 0x05 0xf6 0x98 LDA r15, [p1] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10802 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10804 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10806 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10808 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10810 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10814 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10820 0x7c 0xa5 0xf8 0x3f 0xfd 0x64 MUL r18, r15, r18; MOV r16, #-1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10826 0xfd 0xca 0xb0 0x0f 0xff 0xfe 0x2f 0xff 0x91 0x3a ST r18, [sp, #-20]; MOVXM r17, #1073741823 + 10836 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 + 10840 0x14 0x61 0x04 0x98 AND r16, r17, r16 + 10844 0x80 0x15 0x58 0x00 0x01 0x84 JZ r16, #10928 +.delay_slot + 10850 0x00 0xf3 0xd0 0xdd 0x81 0xd4 LDA p7, [p0]; MOV p0, p7 +.delay_slot + 10856 0x0f 0xf8 0x1d 0x98 ST p0, [sp, #-8] +.delay_slot + 10860 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] +.delay_slot + 10864 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] +.delay_slot + 10868 0x3c 0xba 0xdf 0xf8 0x2b 0x0c LDA r14, [p1, #-8]; ST r0, [sp, #-4] + 10874 0xfd 0x05 0xb0 0x00 0x02 0x5c ST el0, [sp, #-24]; MOVX r0, #0 + 10880 0x07 0xe8 0x99 0x18 LDA p1, [sp, #-24] +.no_stack_arguments + 10884 0x00 0x18 0xa0 0x00 0x01 0x04 JL #12608 +.delay_slot + 10890 0x10 0x22 0x09 0x18 MOVX r17, #2 +.delay_slot + 10894 0x14 0x03 0x1d 0x98 LSHL r1, r16, r17 +.delay_slot +.swstall delay_slot + 10898 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10900 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10902 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 10912 0x00 0x15 0x60 0x00 0x00 0x84 J #10944 +.delay_slot +.swstall delay_slot + 10918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10926 0x00 0x00 NOPX +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 10928 0x00 0x2c 0xf0 0x00 0x27 0xe8 0x2d 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 + 10944 0x78 0x15 0xe8 0x00 0x01 0x84 JZ r15, #11216 +.delay_slot +.swstall delay_slot + 10950 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10952 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10956 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10958 0x00 0x00 NOPX + 10960 0xfd 0xc6 0x20 0x00 0x00 0x08 0x7d 0x98 0x10 0xba LDA r17, [sp, #-20]; MOVXM ls, #11056 + 10970 0x00 0x33 0x00 0x00 0x00 0x09 0xbd 0xc8 0x10 0xba MOVA r19, #1; MOVXM le, #11152 + 10980 0xfd 0x4a 0x20 0x1d 0x49 0xee 0x0b 0xff 0xc8 0xba LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 + 10990 0xfe 0x07 0x20 0x00 0x00 0x08 0x35 0x88 0x10 0xba LDA lr, [sp, #-16]; MOVXM p0, #11024 + 11000 0x18 0x0a 0x20 0xf8 MOV m0, r20 + 11004 0x00 0x00 NOPX + 11006 0x00 0x00 NOPX + 11008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x23 0x19 0xec 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.loop_nesting 1 + 11024 0x70 0x15 0xd0 0x00 0x01 0x84 JZ r14, #11168 +.delay_slot +.swstall delay_slot + 11030 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11032 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11034 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11036 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11038 0x00 0x00 NOPX + 11040 0x53 0x91 0x60 0x02 0xbb 0x90 0x70 0x02 MOVS p2, p7; MOV lc, r14 + 11048 0x00 0x2b 0x60 0x00 0xb4 0x90 0x70 0x02 NOPS; MOV p1, r18 +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.loop_nesting 2 +.begin_of_loop + 11056 0x43 0xce 0x50 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 11072 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11088 0x23 0xce 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 11104 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11120 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11136 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 11152 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.loop_nesting 1 + 11168 0xe1 0x72 0x08 0x40 0x40 0x1c PADDB [p7], m0; JNZD r16, r16, p0 +.delay_slot +.swstall delay_slot + 11174 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11176 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11178 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11180 0x00 0x00 NOPX +.delay_slot + 11182 0x1c 0x98 0xc9 0x58 ADD.NC r18, r17, r18 +.loop_nesting 0 + 11186 0x00 0x15 0xf0 0x00 0x00 0x84 J #11232 +.delay_slot +.swstall delay_slot + 11192 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11194 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11196 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11198 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11200 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 + 11216 0xfe 0x07 0x20 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 11232 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] + 11236 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 11240 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 11244 0x00 0x00 NOPX + 11246 0x00 0x00 NOPX + 11248 0x00 0x00 NOPX + 11250 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11254 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 11260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11262 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11264 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11266 0x00 0x00 NOPX +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + +.text_segment PM 11280 +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function_start + 11280 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11284 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 11288 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 11292 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11296 0x00 0x15 0x10 0x00 0x00 0x84 J #10784 +.delay_slot +.swstall delay_slot + 11302 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11304 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11306 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11308 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11310 0x00 0x00 NOPX +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function_start + 11312 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11316 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11320 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11324 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11328 0x00 0x07 0xa0 0x00 0x00 0x84 J #3904 +.delay_slot +.swstall delay_slot + 11334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11336 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11338 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11342 0x00 0x00 NOPX +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function_start + 11344 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11348 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11352 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11356 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11360 0x00 0x09 0x80 0x00 0x00 0x84 J #4864 +.delay_slot +.swstall delay_slot + 11366 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11368 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11374 0x00 0x00 NOPX +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function_start + 11376 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11380 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 11384 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 11388 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 11392 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11396 0x00 0x0b 0x78 0x00 0x00 0x84 J #5872 +.delay_slot +.swstall delay_slot + 11402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11410 0x00 0x00 NOPX +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + +.text_segment PM 11424 +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start + 11424 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11428 0x00 0x00 NOPX + 11430 0x00 0x00 NOPX + 11432 0x00 0x00 NOPX + 11434 0x00 0x00 NOPX + 11436 0x00 0x00 NOPX + 11438 0x00 0x00 NOPX + 11440 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11444 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11448 0x00 0x00 NOPX + 11450 0x00 0x00 NOPX + 11452 0x00 0x00 NOPX + 11454 0x00 0x00 NOPX + 11456 0x00 0x00 NOPX + 11458 0x00 0x00 NOPX + 11460 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11464 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11468 0x00 0x00 NOPX + 11470 0x00 0x00 NOPX + 11472 0x00 0x00 NOPX + 11474 0x00 0x00 NOPX + 11476 0x00 0x00 NOPX + 11478 0x00 0x00 NOPX + 11480 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11484 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11488 0x00 0x00 NOPX + 11490 0x00 0x00 NOPX + 11492 0x00 0x00 NOPX + 11494 0x00 0x00 NOPX + 11496 0x00 0x00 NOPX + 11498 0x00 0x00 NOPX + 11500 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11504 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11508 0x00 0x00 NOPX + 11510 0x00 0x00 NOPX + 11512 0x00 0x00 NOPX + 11514 0x00 0x00 NOPX + 11516 0x00 0x00 NOPX + 11518 0x00 0x00 NOPX + 11520 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11524 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11528 0x00 0x00 NOPX + 11530 0x00 0x00 NOPX + 11532 0x00 0x00 NOPX + 11534 0x00 0x00 NOPX + 11536 0x00 0x00 NOPX + 11538 0x00 0x00 NOPX + 11540 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11544 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11548 0x00 0x00 NOPX + 11550 0x00 0x00 NOPX + 11552 0x00 0x00 NOPX + 11554 0x00 0x00 NOPX + 11556 0x00 0x00 NOPX + 11558 0x00 0x00 NOPX + 11560 0x08 0x04 0x29 0x98 ST el0, [p0] + 11564 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 11568 0x00 0x00 NOPX + 11570 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 11574 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11580 0x00 0x00 NOPX +.delay_slot + 11582 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 11600 +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function_start + 11600 0x03 0x86 0xd0 0x00 0x00 0x28 0x80 0x20 0x58 0xba LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 + 11610 0x03 0x96 0xd0 0x00 0x30 0x48 0x4f 0xfa 0x58 0xba LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 + 11620 0x05 0x92 0xd0 0x01 0x01 0x54 LDA r4, [p0], #8; MOV m0, #64 + 11626 0x05 0x1a 0xd1 0x02 0x01 0x54 LDA r6, [p0], m1; MOV dj0, #128 + 11632 0x00 0x00 NOPX + 11634 0x00 0x00 NOPX + 11636 0x00 0x00 NOPX + 11638 0x00 0x00 NOPX + 11640 0x00 0x00 NOPX + 11642 0x11 0x42 0x1f 0x98 MUL r1, r5, r1 + 11646 0x11 0x80 0x04 0x98 AND r0, r6, r0 + 11650 0x10 0xc0 0x05 0x98 OR r0, r3, r0 + 11654 0x19 0x82 0x30 0x84 0x9f 0x5c ST r0, [p0], #-16; MUL r1, r1, r4 + 11660 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11664 0x10 0x40 0x2d 0x98 LSHL r0, r1, r2 +.delay_slot + 11668 0x08 0x1c 0x11 0x98 ST r0, [p0], #4 +.delay_slot + 11672 0x08 0x1c 0x01 0x98 ST m0, [p0], #4 +.delay_slot + 11676 0x08 0x04 0x41 0x98 ST dj0, [p0] +.delay_slot + 11680 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + +.text_segment PM 11696 +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 11696 0x00 0x16 0x50 0x00 0x01 0x04 JL #11424 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11702 0x18 0xc1 0xe0 0xf8 MOV dc0, lr +.delay_slot + 11706 0x1a 0x60 0xc0 0xf8 MOV p2, p0 +.delay_slot +.swstall delay_slot + 11710 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11712 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11714 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.tail_call +.return_address + 11728 0x00 0x16 0xa8 0x00 0x00 0x84 J #11600 +.delay_slot + 11734 0x1f 0x71 0x80 0xf8 MOV lr, dc0 +.delay_slot + 11738 0x18 0x64 0xc0 0xf8 MOV p0, p2 +.delay_slot +.swstall delay_slot + 11742 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11744 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11746 0x00 0x00 NOPX +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 11760 +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function_start + 11760 0xb0 0x91 0x60 0x00 0x0a 0x60 0x70 0x02 MOVS p5, p1; MOV r0, p2 + 11768 0x1b 0x60 0x12 0x18 ADD.NC p3, r0, #36 + 11772 0x63 0xa0 0xd0 0x3d 0x81 0xd4 LDA m2, [p3], #4; MOV r0, p7 + 11778 0x03 0x1c 0x06 0x98 LDA m0, [p3], #4 + 11782 0x03 0xd4 0x56 0x98 LDA r2, [p3, #-12] + 11786 0x03 0x04 0x86 0x98 LDA m1, [p3] + 11790 0x00 0x00 NOPX + 11792 0x00 0x00 NOPX + 11794 0x00 0x00 NOPX + 11796 0x00 0x00 NOPX + 11798 0x00 0x00 NOPX + 11800 0x10 0x17 0xe0 0x00 0x01 0x84 JZ r2, #12224 +.delay_slot + 11806 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot + 11810 0xe1 0x72 0x06 0xdd 0x81 0xf4 PADDB [p7], m0; MOV p3, p7 +.delay_slot + 11816 0x38 0x4b 0x90 0x18 PADDB [p0], m2 +.delay_slot + 11820 0x01 0x72 0x08 0xc1 0x81 0xf4 PADDB [p0], m0; MOV p4, p0 +.delay_slot + 11826 0x39 0x2b 0x90 0x18 PADDB [p1], m1 + 11830 0x10 0x02 0x11 0x18 MOVX r1, #4 + 11834 0x10 0x86 0x1c 0x98 LTU r3, r2, r1 + 11838 0x18 0x17 0x98 0x40 0x01 0x84 JNZ r3, #12080 +.delay_slot + 11844 0x18 0x80 0x60 0xb8 MOV dj0, #48 +.delay_slot + 11848 0x02 0x00 0x36 0x98 LDA r1, [p2, dj0] +.delay_slot +.swstall delay_slot + 11852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11854 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11856 0x00 0x00 NOPX + 11858 0x81 0x13 0x76 0x10 0xe8 0x00 0x00 0x08 0x7f 0x58 0x10 0xb6 VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #11952 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11870 0x81 0x0c 0xfe 0x10 0x68 0x00 0x01 0x37 0xee 0x02 0x61 0x0b 0x60 0x7e PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #12000 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11884 0x61 0x0b 0x70 0x11 0xef 0x08 0x5b 0x02 0xb8 0xbf 0x40 0xf6 VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11896 0x01 0x0c 0xf8 0x11 0x6b 0x08 0x5b 0x32 PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11904 0x81 0x0c 0xfe 0x10 0x68 0x3c PADDA [p4], m0; VLDB x0, [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11910 0x01 0x1e 0x8e 0x10 0xb6 0x4c VLDB x3, [p0], m0; PADDS [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11916 0x01 0x0c 0xf6 0x10 0xe8 0x3c PADDA [p0], m0; VLDB x1, [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11922 0x81 0x16 0x80 0x12 0x0b 0xb4 VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11928 0x00 0x2c 0xfe 0x10 0x6b 0x08 0x5b 0x32 NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11936 0x00 0x2c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11952 0x81 0x0c 0xf6 0x10 0xef 0x08 0x5b 0x00 0x00 0x00 0x04 0x82 0xe8 0x00 0x00 0xe1 PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11968 0x01 0x0c 0xf8 0x11 0x69 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11984 0xa5 0x0c 0xfe 0x10 0x6b 0x08 0x5b 0x00 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12000 0x25 0x0c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12016 0x18 0x09 0x05 0xd8 VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12020 0x09 0x28 0x26 0x98 VST bmlh0, [p1], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12024 0x25 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12030 0x00 0x17 0xe0 0x00 0x00 0x84 J #12224 +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12036 0xa5 0x0c 0xf1 0x28 0x26 0x80 0x04 0x82 0xe2 0xba PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12046 0x25 0x0c 0xfa 0x50 0x0d 0x0c PADDA [p1], m1; VST bmll0, [p5], m1 +.delay_slot + 12052 0xa5 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 +.delay_slot + 12058 0x00 0x2c 0xfa 0x50 0x0d 0x0c NOPA; VST bmll0, [p5], m1 +.delay_slot + 12064 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 + 12080 0x1d 0x71 0x20 0xf8 MOV lc, r2 + 12084 0x00 0x00 0x21 0xfe 0x80 0x44 MOVXM ls, #12096 + 12090 0x00 0x00 0x26 0xff 0x60 0x44 MOVXM le, #12208 +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.loop_nesting 1 +.begin_of_loop + 12096 0x81 0x0b 0x76 0x11 0x68 0x3c VLDA x1, [p4], m0; VLDB x2, [p3], m0 + 12102 0x61 0x0c 0xfe 0x10 0x6c 0x08 0x5b 0x32 PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 + 12110 0xe1 0x0c 0xf0 0x11 0xe8 0x3c PADDA [p7], m0; VLDB x3, [p0], m0 + 12116 0x38 0x0b 0x90 0x18 PADDB [p0], m0 + 12120 0x00 0x00 NOPX + 12122 0x00 0x00 NOPX + 12124 0x00 0x01 0x67 0x98 NOPA + 12128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x08 0x42 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV + 12144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12160 0x00 0x2c 0xf0 0x00 0x25 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV + 12176 0x00 0x2c 0xfa 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV + 12192 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.end_of_loop + 12208 0x00 0x2c 0xf2 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.loop_nesting 0 + 12224 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12228 0x1f 0x60 0x20 0xf8 MOV p7, r0 +.delay_slot +.swstall delay_slot + 12232 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12234 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12236 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12238 0x00 0x00 NOPX +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function_start + 12240 0xb0 0x11 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p5, p0; PADDXM [sp], #128 + 12250 0xff 0x87 0xb0 0x01 0xb1 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV p3, p1 +.no_stack_arguments + 12258 0x31 0x11 0x60 0x00 0x05 0xb6 0x00 0x00 0x41 0x3a MOVS p1, p2; JL #11696 +.delay_slot + 12268 0x18 0x65 0xe0 0xf8 MOV p0, sp +.delay_slot + 12272 0x38 0xef 0x90 0x18 PADDB [p0], #-128 +.delay_slot + 12276 0x1c 0x60 0xc0 0xf8 MOV p4, p0 +.delay_slot +.swstall delay_slot + 12280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12282 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.return_address + 12288 0xf0 0x4a 0x22 0x90 0x8b 0x02 0x2d 0x70 0x72 0xba LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID + 12298 0xf0 0xda 0x28 0xc5 0x20 0x2c LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 + 12304 0xf1 0x52 0x20 0x00 0x00 0x3e 0x6f 0xff 0x10 0xba LDA r20, [sp, #-120]; MOVXM r19, #65534 + 12314 0x60 0x93 0xd9 0xc6 0x21 0x2c LDA p1, [p3]; ADD r17, r19, r17 + 12320 0xf1 0xce 0x28 0xd5 0x60 0x2c LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 + 12326 0x00 0x00 NOPX + 12328 0x05 0x06 0x36 0x98 LDA r17, [p5] + 12332 0x00 0x00 NOPX + 12334 0x15 0xa5 0x2f 0x98 MUL r18, r22, r18 + 12338 0x00 0x00 NOPX + 12340 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 12344 0x00 0x00 NOPX + 12346 0x15 0x65 0x2f 0x98 MUL r18, r21, r18 +.no_stack_arguments + 12350 0x00 0x16 0xf8 0x00 0x01 0x04 JL #11760 +.delay_slot + 12356 0x14 0xe5 0x2f 0x98 MUL r18, r19, r18 +.delay_slot + 12360 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 12364 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 +.delay_slot + 12368 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.delay_slot +.swstall delay_slot + 12372 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 12384 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 12388 0x00 0x00 NOPX + 12390 0x00 0x00 NOPX + 12392 0x00 0x00 NOPX + 12394 0x00 0x00 NOPX + 12396 0x00 0x00 NOPX + 12398 0x00 0x00 NOPX + 12400 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12404 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 12410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12416 0x00 0x00 NOPX +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + +.text_segment PM 12432 +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function_start + 12432 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 12436 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 12440 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 12444 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 12448 0x00 0x17 0xe8 0x00 0x00 0x84 J #12240 +.delay_slot +.swstall delay_slot + 12454 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12462 0x00 0x00 NOPX +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 12464 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 12470 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12474 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12478 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12482 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12486 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12490 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12494 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12498 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12502 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12506 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12510 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12514 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12518 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12522 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12526 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12530 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12534 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12538 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12542 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12546 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12550 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12554 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12558 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12562 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12566 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12570 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12574 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12578 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12582 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12586 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12590 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12594 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12598 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12602 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.text_segment PM 12608 +.label memset +.function_start + 12608 0x08 0x18 0xf0 0x00 0x01 0x84 JZ r1, #12768 +.delay_slot + 12614 0x18 0x62 0xc0 0xf8 MOV p0, p1 +.delay_slot +.swstall delay_slot + 12618 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12620 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12622 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12624 0x00 0x00 NOPX + 12626 0x30 0x11 0x60 0x02 0xb8 0x50 0x70 0x02 MOVS p1, p0; MOV lc, r1 + 12634 0x00 0x00 0x31 0xe2 0xe0 0x44 MOVXM ls, #12656 + 12640 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x0d 0xb8 0xe8 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV +.label ZLS_Fmemset_48 +.loop_nesting 1 +.begin_of_loop + 12656 0x23 0x80 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV + 12672 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12704 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_Fmemset_144 +.end_of_loop + 12752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_Fmemset_160 +.loop_nesting 0 + 12768 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 12772 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12774 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12776 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12778 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12780 0x00 0x00 NOPX +.label memset__end + +.bss_segment DMb 508416 24 + +.data_segment DMb 508440 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 508444 4 + +.bss_segment DMb 508448 1 + +.rodata_segment DMb 508480 +.label _ZL20g_uniformKernelFuncs + 0x70 + 0x23 + 0x0 + 0x0 + 0x0 + 0x2a + 0x0 + 0x0 + 0x10 + 0x2c + 0x0 + 0x0 + 0x30 + 0x2c + 0x0 + 0x0 + 0x50 + 0x2c + 0x0 + 0x0 + 0x70 + 0x2c + 0x0 + 0x0 + 0x90 + 0x30 + 0x0 + 0x0 + +.bss_segment DMb 508544 576 + +.stack DM_stack 507264 508352 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.map b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.map new file mode 100644 index 0000000000000000000000000000000000000000..983d2fac110a6f03228d0251bef89839729e58af --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.map @@ -0,0 +1,314 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable4 ../Release/0_0_reloadable4.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable4.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577691 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1088 + + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1725 + + 0x00000000..0x0007bd7f ( 507264 items) : Reserved + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + 0x0007c1c0..0x0007c1ff ( 64 items) : Reserved + 0x0007c200..0x0007c203 ( 4 items) : ../Release/0_0_reloadable4.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c204..0x0007c207 ( 4 items) : ../Release/0_0_reloadable4.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c208..0x0007c20b ( 4 items) : ../Release/0_0_reloadable4.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c20c..0x0007c20f ( 4 items) : ../Release/0_0_reloadable4.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c210..0x0007c213 ( 4 items) : ../Release/0_0_reloadable4.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4) + 0x0007c214..0x0007c217 ( 4 items) : ../Release/0_0_reloadable4.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep (Data, Weak, .bss.DMb.4) + 0x0007c218..0x0007c21b ( 4 items) : ../Release/0_0_reloadable4.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c21c..0x0007c21f ( 4 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c220..0x0007c220 ( 1 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c240..0x0007c25b ( 28 items) : ../Release/0_0_reloadable4.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z15_b14160_wrapperPPv + _Z14_b7835_wrapperPPv + _Z14_b8148_wrapperPPv + _Z15_b13739_wrapperPPv + _Z15_b13744_wrapperPPv + _Z15_b13749_wrapperPPv + _Z14_b8170_wrapperPPv + + 0x0007c280..0x0007c2ff ( 128 items) : ../Release/0_0_reloadable4.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params (Data, Weak, .bss.DMb.64) + 0x0007c300..0x0007c33f ( 64 items) : ../Release/0_0_reloadable4.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c340..0x0007c37f ( 64 items) : ../Release/0_0_reloadable4.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c380..0x0007c3bf ( 64 items) : ../Release/0_0_reloadable4.o::sigmoid1d_params (Data, Global, .bss.DMb.64) + 0x0007c3c0..0x0007c4bf ( 256 items) : ../Release/0_0_reloadable4.o::conv2d_dw_params (Data, Global, .bss.DMb.64) + 0x0007ca00..0x000fffff ( 538112 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 9978 + + 0x00000000..0x000009df ( 2528 items) : Reserved + 0x000009e0..0x00000c01 ( 546 items) : ../Release/0_0_reloadable4.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000c10..0x00000c27 ( 24 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000c30..0x00000ce1 ( 178 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000cf0..0x00000d27 ( 56 items) : ../Release/0_0_reloadable4.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d30..0x00000d6b ( 60 items) : ../Release/0_0_reloadable4.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d70..0x00000eb9 ( 330 items) : ../Release/0_0_reloadable4.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + + 0x00000ec0..0x00000f31 ( 114 items) : ../Release/0_0_reloadable4.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000f40..0x00001127 ( 488 items) : ../Release/0_0_reloadable4.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00001130..0x00001173 ( 68 items) : ../Release/0_0_reloadable4.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + + 0x00001180..0x000012f9 ( 378 items) : ../Release/0_0_reloadable4.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + _ZN12me_primitive11control_rndE + + 0x00001300..0x000014e7 ( 488 items) : ../Release/0_0_reloadable4.o::_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + sigmoid1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x000014f0..0x00001507 ( 24 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + + 0x00001510..0x000015a9 ( 154 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + Referenced symbols: mul1d_params + + 0x000015b0..0x000016e3 ( 308 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + _ZN12me_primitive11control_rndE + + 0x000016f0..0x00001949 ( 602 items) : ../Release/0_0_reloadable4.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00001950..0x00001c67 ( 792 items) : ../Release/0_0_reloadable4.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x00001c70..0x00001f11 ( 674 items) : ../Release/0_0_reloadable4.o::_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x00001f20..0x0000201d ( 254 items) : ../Release/0_0_reloadable4.o::_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: conv2d_dw_params + + 0x00002020..0x00002187 ( 360 items) : ../Release/0_0_reloadable4.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + + Referenced symbols: conv2d_dw_params + + 0x00002190..0x0000236d ( 478 items) : ../Release/0_0_reloadable4.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL8num_iter + _ZL10ifmsv_size + conv2d_dw_params + + 0x00002370..0x00002393 ( 36 items) : ../Release/0_0_reloadable4.o::_Z15_b14160_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x000023a0..0x000024a5 ( 262 items) : ../Release/0_0_reloadable4.o::_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x000024b0..0x0000292d ( 1150 items) : ../Release/0_0_reloadable4.o::_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002930..0x000029f9 ( 202 items) : ../Release/0_0_reloadable4.o::_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep + _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002a00..0x00002a1f ( 32 items) : ../Release/0_0_reloadable4.o::_Z14_b7835_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + + 0x00002a20..0x00002c03 ( 484 items) : ../Release/0_0_reloadable4.o::_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : memset + + 0x00002c10..0x00002c2f ( 32 items) : ../Release/0_0_reloadable4.o::_Z14_b8148_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + + 0x00002c30..0x00002c4f ( 32 items) : ../Release/0_0_reloadable4.o::_Z15_b13739_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002c50..0x00002c6f ( 32 items) : ../Release/0_0_reloadable4.o::_Z15_b13744_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002c70..0x00002c93 ( 36 items) : ../Release/0_0_reloadable4.o::_Z15_b13749_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00002ca0..0x00002d41 ( 162 items) : ../Release/0_0_reloadable4.o::_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + 0x00002d50..0x00002da3 ( 84 items) : ../Release/0_0_reloadable4.o::_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params (Function, Local, .text) (stack frame size = 0) + 0x00002db0..0x00002de3 ( 52 items) : ../Release/0_0_reloadable4.o::_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + + 0x00002df0..0x00002fcf ( 480 items) : ../Release/0_0_reloadable4.o::_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params (Function, Weak, .text) (stack frame size = 0) + 0x00002fd0..0x00003081 ( 178 items) : ../Release/0_0_reloadable4.o::_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + 0x00003090..0x000030af ( 32 items) : ../Release/0_0_reloadable4.o::_Z14_b8170_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + + 0x000030b0..0x0000313d ( 142 items) : me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + 0x00003140..0x000031ed ( 174 items) : string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a)::memset (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x31ee + _pc_start = 0x9e0 + _sp_end_DM_stack = 0x7c1c0 + _sp_start_DM_stack = 0x7bd80 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 1088 + ---------- ---------- + 1088 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 600 4 28 ../Release/0_0_reloadable4.o + 5 0 0 me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 605 4 28 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 9662 ../Release/0_0_reloadable4.o + 142 me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + 174 string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + ---------- ---------- + 9978 Total + +File summary: + +../Release/0_0_reloadable4.o + DMb 632 + PM 9662 + +me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + PM 142 + +string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + PM 174 + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.sdr b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.sdr new file mode 100644 index 0000000000000000000000000000000000000000..d4b928c4974d1777a4363132c93bc355c04f2901 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.sdr @@ -0,0 +1,125 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable4 ../Release/0_0_reloadable4.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable4.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577691 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep 0x0007c214 +_symbol _ZN12me_primitive11control_satE 0x0007c21c +_symbol _ZN12me_primitive11control_rndE 0x0007c220 +_symbol _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params 0x0007c280 +_symbol add1d_attribute_broadcasting_params 0x0007c300 +_symbol mul1d_params 0x0007c340 +_symbol sigmoid1d_params 0x0007c380 +_symbol conv2d_dw_params 0x0007c3c0 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x000009e0 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00000c10 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00000c30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00000cf0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00000d30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00000d70 +_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00000ec0 +_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00000f40 +_symbol _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00001130 +_symbol _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00001180 +_symbol _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001300 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x000014f0 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00001510 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x000015b0 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x000016f0 +_symbol _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params 0x00001c70 +_symbol _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params 0x00001f20 +_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x00002020 +_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002190 +_symbol _Z15_b14160_wrapperPPv 0x00002370 +_symbol _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj 0x000023a0 +_symbol _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params 0x000024b0 +_symbol _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj 0x00002930 +_symbol _Z14_b7835_wrapperPPv 0x00002a00 +_symbol _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj 0x00002a20 +_symbol _Z14_b8148_wrapperPPv 0x00002c10 +_symbol _Z15_b13739_wrapperPPv 0x00002c30 +_symbol _Z15_b13744_wrapperPPv 0x00002c50 +_symbol _Z15_b13749_wrapperPPv 0x00002c70 +_symbol _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params 0x00002df0 +_symbol _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj 0x00002fd0 +_symbol _Z14_b8170_wrapperPPv 0x00003090 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x000030b0 +_symbol memset 0x00003140 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.srv b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.srv new file mode 100644 index 0000000000000000000000000000000000000000..f5e6b3e5828701d92c85b709bbc3b7c45a16ad8f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.srv @@ -0,0 +1,14042 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable4 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable4.cc" 91 first +.src_ref 0 "0_0_reloadable4.cc" 93 60 +.src_ref 0 "0_0_reloadable4.cc" 93 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 91 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2816 "01000100" // MOVXM p7, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "10000000" // /* MW 5 */ + 2818 "11000100" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 104 60 +.src_ref 0 "0_0_reloadable4.cc" 106 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 104 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 106 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 106 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 109 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 3088 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3089 "00000000" // /* MW 3 */ + 3090 "00101000" // /* MW 2 */ + 3091 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3092 "01000100" // MOVXM p0, #508704 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3093 "01000000" // /* MW 5 */ + 3094 "11000110" // /* MW 4 */ + 3095 "11000000" // /* MW 3 */ + 3096 "00000111" // /* MW 2 */ + 3097 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3098 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3099 "10000000" // /* MW 3 */ + 3100 "00000000" // /* MW 2 */ + 3101 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 3102 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3103 "00000001" // /* MW 3 */ + 3104 "00000100" // /* MW 2 */ + 3105 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3106 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3107 "00000001" // /* MW 3 */ + 3108 "00010100" // /* MW 2 */ + 3109 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3111 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 3120 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3121 "00010000" // /* MW 9 */ + 3122 "10000000" // /* MW 8 */ + 3123 "00110001" // /* MW 7 */ + 3124 "11110000" // /* MW 6 */ + 3125 "00000001" // /* MW 5 */ + 3126 "00000000" // /* MW 4 */ + 3127 "11010000" // /* MW 3 */ + 3128 "10000101" // /* MW 2 */ + 3129 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 3130 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3131 "00000001" // /* MW 5 */ + 3132 "00000000" // /* MW 4 */ + 3133 "00000000" // /* MW 3 */ + 3134 "00001000" // /* MW 2 */ + 3135 "00000000" // /* MW 1 */ + 3136 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3137 "00111101" // /* MW 3 */ + 3138 "11111000" // /* MW 2 */ + 3139 "00001111" // /* MW 1 */ + 3140 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "11110101" // /* MW 3 */ + 3142 "11111101" // /* MW 2 */ + 3143 "00001111" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 3150 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "00101001" // /* MW 3 */ + 3152 "00011100" // /* MW 2 */ + 3153 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 3154 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3155 "00101110" // /* MW 3 */ + 3156 "00011100" // /* MW 2 */ + 3157 "00000001" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 3170 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3171 "00101001" // /* MW 3 */ + 3172 "00011100" // /* MW 2 */ + 3173 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 3174 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3175 "00101110" // /* MW 3 */ + 3176 "00000100" // /* MW 2 */ + 3177 "00000001" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ + 3182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3183 "00000000" // /* MW 1 */ + 3184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3185 "00000000" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 3190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00101001" // /* MW 3 */ + 3192 "00011100" // /* MW 2 */ + 3193 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 3194 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "00101110" // /* MW 3 */ + 3196 "00010100" // /* MW 2 */ + 3197 "00000001" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ + 3200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3201 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 3202 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 3203 "00000001" // /* MW 5 */ + 3204 "00000000" // /* MW 4 */ + 3205 "00001000" // /* MW 3 */ + 3206 "00000110" // /* MW 2 */ + 3207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3213 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 3214 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3215 "00101001" // /* MW 3 */ + 3216 "11011100" // /* MW 2 */ + 3217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.delay_slot + 3218 "00101110" // NOPA; NOPS; MOV r15, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3219 "00011100" // /* MW 13 */ + 3220 "00000000" // /* MW 12 */ + 3221 "00000000" // /* MW 11 */ + 3222 "00000111" // /* MW 10 */ + 3223 "10000110" // /* MW 9 */ + 3224 "01011110" // /* MW 8 */ + 3225 "00000000" // /* MW 7 */ + 3226 "00000000" // /* MW 6 */ + 3227 "10110110" // /* MW 5 */ + 3228 "00000010" // /* MW 4 */ + 3229 "11110000" // /* MW 3 */ + 3230 "00101100" // /* MW 2 */ + 3231 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 3232 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3233 "00001000" // /* MW 9 */ + 3234 "11000100" // /* MW 8 */ + 3235 "00110011" // /* MW 7 */ + 3236 "01101000" // /* MW 6 */ + 3237 "00000000" // /* MW 5 */ + 3238 "00000001" // /* MW 4 */ + 3239 "00100000" // /* MW 3 */ + 3240 "00000111" // /* MW 2 */ + 3241 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 3242 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3243 "01011000" // /* MW 9 */ + 3244 "11111101" // /* MW 8 */ + 3245 "00000111" // /* MW 7 */ + 3246 "00001000" // /* MW 6 */ + 3247 "10000000" // /* MW 5 */ + 3248 "00000001" // /* MW 4 */ + 3249 "10000000" // /* MW 3 */ + 3250 "11100010" // /* MW 2 */ + 3251 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 3252 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3253 "00000001" // /* MW 9 */ + 3254 "10100000" // /* MW 8 */ + 3255 "00000111" // /* MW 7 */ + 3256 "10000000" // /* MW 6 */ + 3257 "00010001" // /* MW 5 */ + 3258 "00001010" // /* MW 4 */ + 3259 "00100000" // /* MW 3 */ + 3260 "10111110" // /* MW 2 */ + 3261 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 3262 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3263 "01001010" // /* MW 3 */ + 3264 "00000110" // /* MW 2 */ + 3265 "00000000" // /* MW 1 */ + 3266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3271 "00010111" // /* MW 3 */ + 3272 "00000010" // /* MW 2 */ + 3273 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3275 "00000000" // /* MW 3 */ + 3276 "00101000" // /* MW 2 */ + 3277 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3279 "00000101" // /* MW 3 */ + 3280 "00100010" // /* MW 2 */ + 3281 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3283 "00000001" // /* MW 5 */ + 3284 "00000000" // /* MW 4 */ + 3285 "00000000" // /* MW 3 */ + 3286 "11111000" // /* MW 2 */ + 3287 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00100111" // /* MW 3 */ + 3290 "01110111" // /* MW 2 */ + 3291 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "10000010" // /* MW 3 */ + 3294 "00100001" // /* MW 2 */ + 3295 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3297 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 40 first +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.function_start + 3312 "10111010" // MOVA m0, #20; MOVXM p0, #508684 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3313 "00010000" // /* MW 9 */ + 3314 "10000110" // /* MW 8 */ + 3315 "00110001" // /* MW 7 */ + 3316 "11110000" // /* MW 6 */ + 3317 "00000001" // /* MW 5 */ + 3318 "00000000" // /* MW 4 */ + 3319 "10000000" // /* MW 3 */ + 3320 "10000000" // /* MW 2 */ + 3321 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 + 3322 "10111010" // LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3323 "01011000" // /* MW 9 */ + 3324 "00000110" // /* MW 8 */ + 3325 "00101000" // /* MW 7 */ + 3326 "00101000" // /* MW 6 */ + 3327 "00100000" // /* MW 5 */ + 3328 "00000000" // /* MW 4 */ + 3329 "01010000" // /* MW 3 */ + 3330 "00000001" // /* MW 2 */ + 3331 "00000001" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 43 4 first + 3342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3343 "00000000" // /* MW 3 */ + 3344 "00101000" // /* MW 2 */ + 3345 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.delay_slot + 3346 "00011000" // NEZ r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "11110000" // /* MW 3 */ + 3348 "00000110" // /* MW 2 */ + 3349 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.delay_slot + 3350 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00001000" // /* MW 3 */ + 3352 "10000000" // /* MW 2 */ + 3353 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 first +.delay_slot + 3354 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00011101" // /* MW 3 */ + 3356 "00000000" // /* MW 2 */ + 3357 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 23 +.delay_slot + 3358 "01011100" // ST r0, [p0, #4]; LSHL r2, r3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3359 "00111011" // /* MW 5 */ + 3360 "10001000" // /* MW 4 */ + 3361 "00110001" // /* MW 3 */ + 3362 "10000010" // /* MW 2 */ + 3363 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 23 +.delay_slot + 3364 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3365 "01010001" // /* MW 3 */ + 3366 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3367 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_broadcasting.h" 35 +.src_ref 2 "elementwise_binary_broadcasting.h" 35 first +.function_start + 3376 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000000" // /* MW 4 */ + 3379 "00000000" // /* MW 3 */ + 3380 "00001000" // /* MW 2 */ + 3381 "00000000" // /* MW 1 */ + 3382 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00111101" // /* MW 3 */ + 3384 "11111100" // /* MW 2 */ + 3385 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 first +.no_stack_arguments + 3386 "00000100" // JL #3120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3120 delay_slots=5 */ + 3387 "00000001" // /* MW 5 */ + 3388 "00000000" // /* MW 4 */ + 3389 "00011000" // /* MW 3 */ + 3390 "00000110" // /* MW 2 */ + 3391 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 +.delay_slot + 3392 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000000" // /* MW 5 */ + 3394 "11000110" // /* MW 4 */ + 3395 "11000000" // /* MW 3 */ + 3396 "00000111" // /* MW 2 */ + 3397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "00000001" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.return_address + 3408 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00111001" // /* MW 3 */ + 3410 "11111100" // /* MW 2 */ + 3411 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 first +.tail_call + 3412 "10000100" // J #3312 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3312 delay_slots=5 */ + 3413 "00000000" // /* MW 5 */ + 3414 "00000000" // /* MW 4 */ + 3415 "01111000" // /* MW 3 */ + 3416 "00000110" // /* MW 2 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.delay_slot + 3418 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "00000000" // /* MW 5 */ + 3420 "11000110" // /* MW 4 */ + 3421 "11000000" // /* MW 3 */ + 3422 "00000111" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 38 4 first +.delay_slot + 3424 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3425 "00000001" // /* MW 5 */ + 3426 "00000000" // /* MW 4 */ + 3427 "00000000" // /* MW 3 */ + 3428 "11111000" // /* MW 2 */ + 3429 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3435 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 48 first +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 +.function_start + 3440 "10111010" // MOVA m0, #20; MOVXM p3, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3441 "00010000" // /* MW 9 */ + 3442 "10000000" // /* MW 8 */ + 3443 "10110001" // /* MW 7 */ + 3444 "11110001" // /* MW 6 */ + 3445 "00000001" // /* MW 5 */ + 3446 "00000000" // /* MW 4 */ + 3447 "10000000" // /* MW 3 */ + 3448 "10000000" // /* MW 2 */ + 3449 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 first + 3450 "10011000" // LDA r0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3451 "00010110" // /* MW 3 */ + 3452 "00111100" // /* MW 2 */ + 3453 "00000011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3454 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3455 "10000001" // /* MW 5 */ + 3456 "11001101" // /* MW 4 */ + 3457 "01011000" // /* MW 3 */ + 3458 "00000101" // /* MW 2 */ + 3459 "01100001" // /* MW 1 */ + 3460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3461 "00000000" // /* MW 1 */ + 3462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3463 "00000000" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 12 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 35 + 3472 "10000100" // JNZ r1, #3536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3536 delay_slots=5 */ + 3473 "00000001" // /* MW 5 */ + 3474 "01000000" // /* MW 4 */ + 3475 "11101000" // /* MW 3 */ + 3476 "00000110" // /* MW 2 */ + 3477 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 +.delay_slot + 3478 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3479 "11101001" // /* MW 3 */ + 3480 "11000100" // /* MW 2 */ + 3481 "00010111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 first +.delay_slot + 3482 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3483 "00101101" // /* MW 3 */ + 3484 "00000000" // /* MW 2 */ + 3485 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 62 28 first + 3492 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00110010" // /* MW 3 */ + 3494 "00000100" // /* MW 2 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "10000100" // J #3568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3568 delay_slots=5 */ + 3503 "00000000" // /* MW 5 */ + 3504 "00000000" // /* MW 4 */ + 3505 "11111000" // /* MW 3 */ + 3506 "00000110" // /* MW 2 */ + 3507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3511 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 3512 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "01110010" // /* MW 3 */ + 3514 "00000101" // /* MW 2 */ + 3515 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3516 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "01100111" // /* MW 3 */ + 3518 "00000001" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3520 "11100001" // NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00000000" // /* MW 15 */ + 3522 "00000000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "00010011" // /* MW 7 */ + 3530 "00000100" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.src_ref 2 "elementwise_binary_broadcasting.h" 65 28 first + 3536 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "00110010" // /* MW 3 */ + 3538 "00000100" // /* MW 2 */ + 3539 "00000001" // /* MW 1 */ + 3540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3541 "00000000" // /* MW 1 */ + 3542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3543 "00000000" // /* MW 1 */ + 3544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3545 "00000000" // /* MW 1 */ + 3546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3547 "00000000" // /* MW 1 */ + 3548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3549 "00000000" // /* MW 1 */ + 3550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3551 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 3552 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "01110010" // /* MW 3 */ + 3554 "00000101" // /* MW 2 */ + 3555 "00011000" // /* MW 1 */ + 3556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3557 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3558 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "00000000" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00010011" // /* MW 5 */ + 3564 "00000100" // /* MW 4 */ + 3565 "11110001" // /* MW 3 */ + 3566 "00101100" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first + 3568 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01001000" // /* MW 9 */ + 3570 "00111111" // /* MW 8 */ + 3571 "10111000" // /* MW 7 */ + 3572 "10001010" // /* MW 6 */ + 3573 "00000111" // /* MW 5 */ + 3574 "00000000" // /* MW 4 */ + 3575 "11010000" // /* MW 3 */ + 3576 "10000000" // /* MW 2 */ + 3577 "10001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3578 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #3680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3579 "00010000" // /* MW 9 */ + 3580 "00110000" // /* MW 8 */ + 3581 "01111111" // /* MW 7 */ + 3582 "00000000" // /* MW 6 */ + 3583 "00000000" // /* MW 5 */ + 3584 "00000000" // /* MW 4 */ + 3585 "11010000" // /* MW 3 */ + 3586 "10010000" // /* MW 2 */ + 3587 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3588 "01000100" // MOVXM le, #3712 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3589 "00000000" // /* MW 5 */ + 3590 "11111101" // /* MW 4 */ + 3591 "00000110" // /* MW 3 */ + 3592 "00000000" // /* MW 2 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3594 "01000100" // MOVXM p4, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "01000000" // /* MW 5 */ + 3596 "11000100" // /* MW 4 */ + 3597 "11001000" // /* MW 3 */ + 3598 "00000111" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3600 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00100010" // /* MW 3 */ + 3602 "00000100" // /* MW 2 */ + 3603 "00000100" // /* MW 1 */ + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first + 3608 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "10101011" // /* MW 3 */ + 3610 "00001000" // /* MW 2 */ + 3611 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 148 20 first + 3612 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "00101011" // /* MW 3 */ + 3614 "00101001" // /* MW 2 */ + 3615 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first + 3616 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00101011" // /* MW 3 */ + 3618 "00001000" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "00101011" // /* MW 3 */ + 3622 "00101010" // /* MW 2 */ + 3623 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "00000000" // /* MW 5 */ + 3626 "11110101" // /* MW 4 */ + 3627 "01110000" // /* MW 3 */ + 3628 "00010101" // /* MW 2 */ + 3629 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3631 "00111101" // /* MW 7 */ + 3632 "00101000" // /* MW 6 */ + 3633 "00000011" // /* MW 5 */ + 3634 "00000100" // /* MW 4 */ + 3635 "01110000" // /* MW 3 */ + 3636 "00100101" // /* MW 2 */ + 3637 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3639 "00101011" // /* MW 3 */ + 3640 "00001000" // /* MW 2 */ + 3641 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3643 "00111101" // /* MW 7 */ + 3644 "00010000" // /* MW 6 */ + 3645 "00000100" // /* MW 5 */ + 3646 "00000100" // /* MW 4 */ + 3647 "01110000" // /* MW 3 */ + 3648 "01000101" // /* MW 2 */ + 3649 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3651 "10101011" // /* MW 3 */ + 3652 "00001000" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3655 "00111101" // /* MW 7 */ + 3656 "00101000" // /* MW 6 */ + 3657 "00000011" // /* MW 5 */ + 3658 "00000100" // /* MW 4 */ + 3659 "01110000" // /* MW 3 */ + 3660 "00100101" // /* MW 2 */ + 3661 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3663 "00101011" // /* MW 3 */ + 3664 "00001000" // /* MW 2 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3667 "00111101" // /* MW 13 */ + 3668 "00010000" // /* MW 12 */ + 3669 "00000100" // /* MW 11 */ + 3670 "01010111" // /* MW 10 */ + 3671 "00011010" // /* MW 9 */ + 3672 "01000000" // /* MW 8 */ + 3673 "00000000" // /* MW 7 */ + 3674 "00000000" // /* MW 6 */ + 3675 "01000110" // /* MW 5 */ + 3676 "00111011" // /* MW 4 */ + 3677 "01110100" // /* MW 3 */ + 3678 "01000101" // /* MW 2 */ + 3679 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3680 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "10101011" // /* MW 3 */ + 3682 "00001000" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3685 "00111101" // /* MW 11 */ + 3686 "00101000" // /* MW 10 */ + 3687 "00000011" // /* MW 9 */ + 3688 "10001110" // /* MW 8 */ + 3689 "00010001" // /* MW 7 */ + 3690 "00001111" // /* MW 6 */ + 3691 "00100001" // /* MW 5 */ + 3692 "00000000" // /* MW 4 */ + 3693 "01110000" // /* MW 3 */ + 3694 "00100101" // /* MW 2 */ + 3695 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3697 "00000000" // /* MW 15 */ + 3698 "00000000" // /* MW 14 */ + 3699 "01111000" // /* MW 13 */ + 3700 "10100101" // /* MW 12 */ + 3701 "00000001" // /* MW 11 */ + 3702 "00000000" // /* MW 10 */ + 3703 "00000000" // /* MW 9 */ + 3704 "00000000" // /* MW 8 */ + 3705 "01011011" // /* MW 7 */ + 3706 "00000001" // /* MW 6 */ + 3707 "00100000" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "01110000" // /* MW 3 */ + 3710 "00000101" // /* MW 2 */ + 3711 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3713 "10000001" // /* MW 15 */ + 3714 "00100000" // /* MW 14 */ + 3715 "01111000" // /* MW 13 */ + 3716 "10100101" // /* MW 12 */ + 3717 "00000001" // /* MW 11 */ + 3718 "00000000" // /* MW 10 */ + 3719 "00000000" // /* MW 9 */ + 3720 "00000000" // /* MW 8 */ + 3721 "10100011" // /* MW 7 */ + 3722 "00011101" // /* MW 6 */ + 3723 "00100010" // /* MW 5 */ + 3724 "00000000" // /* MW 4 */ + 3725 "01110000" // /* MW 3 */ + 3726 "01000101" // /* MW 2 */ + 3727 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3729 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3731 "00111101" // /* MW 7 */ + 3732 "00101000" // /* MW 6 */ + 3733 "00000011" // /* MW 5 */ + 3734 "00000010" // /* MW 4 */ + 3735 "01100000" // /* MW 3 */ + 3736 "11000100" // /* MW 2 */ + 3737 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3741 "00111101" // /* MW 7 */ + 3742 "00010000" // /* MW 6 */ + 3743 "00000100" // /* MW 5 */ + 3744 "00000010" // /* MW 4 */ + 3745 "01100000" // /* MW 3 */ + 3746 "10110100" // /* MW 2 */ + 3747 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.src_ref 2 "elementwise_binary_broadcasting.h" 80 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3751 "00000000" // /* MW 5 */ + 3752 "01010000" // /* MW 4 */ + 3753 "01100000" // /* MW 3 */ + 3754 "11000100" // /* MW 2 */ + 3755 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 3758 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3759 "10100011" // /* MW 3 */ + 3760 "00011101" // /* MW 2 */ + 3761 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 3764 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3765 "00100011" // /* MW 3 */ + 3766 "00011110" // /* MW 2 */ + 3767 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3769 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 first +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.function_start + 3776 "00111010" // MOVS p2, p1; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3777 "01110001" // /* MW 9 */ + 3778 "00000000" // /* MW 8 */ + 3779 "00000000" // /* MW 7 */ + 3780 "00000000" // /* MW 6 */ + 3781 "00000100" // /* MW 5 */ + 3782 "00000000" // /* MW 4 */ + 3783 "01100000" // /* MW 3 */ + 3784 "10010001" // /* MW 2 */ + 3785 "01010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 + 3786 "00000010" // ST lr, [sp, #-4]; MOV r16, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3787 "01110000" // /* MW 7 */ + 3788 "01100000" // /* MW 6 */ + 3789 "00001000" // /* MW 5 */ + 3790 "00000010" // /* MW 4 */ + 3791 "10110000" // /* MW 3 */ + 3792 "10000111" // /* MW 2 */ + 3793 "11111111" // /* MW 1 */ + 3794 "11111000" // MOV r17, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "11100000" // /* MW 3 */ + 3796 "01010101" // /* MW 2 */ + 3797 "00011100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 + 3798 "01000100" // MOVXM p3, #508684 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3799 "00011000" // /* MW 5 */ + 3800 "11000110" // /* MW 4 */ + 3801 "11000110" // /* MW 3 */ + 3802 "00000111" // /* MW 2 */ + 3803 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 first + 3804 "00010100" // LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3805 "10000000" // /* MW 5 */ + 3806 "11010001" // /* MW 4 */ + 3807 "01010000" // /* MW 3 */ + 3808 "11101101" // /* MW 2 */ + 3809 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 3810 "00001100" // LDA.s16 r18, [p3], #-14; VST sfh, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3811 "01010110" // /* MW 5 */ + 3812 "00001110" // /* MW 4 */ + 3813 "01010000" // /* MW 3 */ + 3814 "11001010" // /* MW 2 */ + 3815 "01110011" // /* MW 1 */ + 3816 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3817 "01010111" // /* MW 3 */ + 3818 "00000110" // /* MW 2 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ + 3822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3823 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 first +.no_stack_arguments + 3824 "00000100" // JL #3440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3440 delay_slots=5 */ + 3825 "00000001" // /* MW 5 */ + 3826 "00000000" // /* MW 4 */ + 3827 "10111000" // /* MW 3 */ + 3828 "00000110" // /* MW 2 */ + 3829 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.delay_slot + 3830 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3831 "11000000" // /* MW 3 */ + 3832 "01010000" // /* MW 2 */ + 3833 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 first +.delay_slot + 3836 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "00010010" // /* MW 3 */ + 3838 "00100101" // /* MW 2 */ + 3839 "00010100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3840 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000001" // /* MW 5 */ + 3842 "11010010" // /* MW 4 */ + 3843 "01000010" // /* MW 3 */ + 3844 "00100000" // /* MW 2 */ + 3845 "10001100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3846 "10111010" // NOPA; NOPB; MOV p0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111110" // /* MW 9 */ + 3848 "00010000" // /* MW 8 */ + 3849 "00110100" // /* MW 7 */ + 3850 "00000000" // /* MW 6 */ + 3851 "00010000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.return_address + 3856 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00111001" // /* MW 3 */ + 3858 "11111100" // /* MW 2 */ + 3859 "00000111" // /* MW 1 */ + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ + 3862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3863 "00000000" // /* MW 1 */ + 3864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3865 "00000000" // /* MW 1 */ + 3866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3867 "00000000" // /* MW 1 */ + 3868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3869 "00000000" // /* MW 1 */ + 3870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 first + 3872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3873 "00000000" // /* MW 3 */ + 3874 "00101000" // /* MW 2 */ + 3875 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.delay_slot + 3876 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3877 "00000001" // /* MW 5 */ + 3878 "00000000" // /* MW 4 */ + 3879 "00000000" // /* MW 3 */ + 3880 "11110000" // /* MW 2 */ + 3881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3889 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 152 first +.src_ref 6 "superkernels.cpp" 157 6 +.function_start + 3904 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3905 "00000000" // /* MW 5 */ + 3906 "11000100" // /* MW 4 */ + 3907 "11000110" // /* MW 3 */ + 3908 "00000111" // /* MW 2 */ + 3909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 first + 3910 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3911 "11000001" // /* MW 5 */ + 3912 "10110101" // /* MW 4 */ + 3913 "11011000" // /* MW 3 */ + 3914 "11000010" // /* MW 2 */ + 3915 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 152 + 3916 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3917 "00000001" // /* MW 5 */ + 3918 "00000000" // /* MW 4 */ + 3919 "00000000" // /* MW 3 */ + 3920 "00001000" // /* MW 2 */ + 3921 "00000000" // /* MW 1 */ + 3922 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3923 "01110000" // /* MW 7 */ + 3924 "11010000" // /* MW 6 */ + 3925 "00001011" // /* MW 5 */ + 3926 "00000000" // /* MW 4 */ + 3927 "10110000" // /* MW 3 */ + 3928 "01100011" // /* MW 2 */ + 3929 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 11 + 3930 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3931 "00010001" // /* MW 9 */ + 3932 "00000010" // /* MW 8 */ + 3933 "00110001" // /* MW 7 */ + 3934 "11110011" // /* MW 6 */ + 3935 "00000001" // /* MW 5 */ + 3936 "00000000" // /* MW 4 */ + 3937 "10110000" // /* MW 3 */ + 3938 "10000010" // /* MW 2 */ + 3939 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3940 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3941 "11000000" // /* MW 3 */ + 3942 "11010100" // /* MW 2 */ + 3943 "00011011" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 +.src_ref 6 "superkernels.cpp" 157 16 + 3948 "10000100" // JNZ r16, #4112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4112 delay_slots=5 */ + 3949 "00000001" // /* MW 5 */ + 3950 "01000000" // /* MW 4 */ + 3951 "00001000" // /* MW 3 */ + 3952 "00001000" // /* MW 2 */ + 3953 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 22 first +.delay_slot + 3954 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10010000" // /* MW 3 */ + 3956 "01100010" // /* MW 2 */ + 3957 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 30 +.delay_slot + 3958 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3959 "11111011" // /* MW 3 */ + 3960 "01100011" // /* MW 2 */ + 3961 "00010100" // /* MW 1 */ +.delay_slot + 3962 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3963 "00111101" // /* MW 3 */ + 3964 "11110100" // /* MW 2 */ + 3965 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 154 11 +.delay_slot + 3966 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3967 "01110000" // /* MW 7 */ + 3968 "01100000" // /* MW 6 */ + 3969 "00110000" // /* MW 5 */ + 3970 "00000011" // /* MW 4 */ + 3971 "00110000" // /* MW 3 */ + 3972 "11000110" // /* MW 2 */ + 3973 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 +.src_ref 6 "superkernels.cpp" 171 2 +.delay_slot + 3974 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3975 "00000000" // /* MW 5 */ + 3976 "11000110" // /* MW 4 */ + 3977 "11000000" // /* MW 3 */ + 3978 "00000111" // /* MW 2 */ + 3979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3981 "01000000" // /* MW 5 */ + 3982 "11000100" // /* MW 4 */ + 3983 "11000100" // /* MW 3 */ + 3984 "00000111" // /* MW 2 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "00010000" // /* MW 9 */ + 3988 "00001110" // /* MW 8 */ + 3989 "00110001" // /* MW 7 */ + 3990 "11110001" // /* MW 6 */ + 3991 "00000001" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "11100000" // /* MW 3 */ + 3994 "11000000" // /* MW 2 */ + 3995 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 "00000100" // JL #3376 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3376 delay_slots=5 */ + 3999 "00000001" // /* MW 5 */ + 4000 "00000000" // /* MW 4 */ + 4001 "10011000" // /* MW 3 */ + 4002 "00000110" // /* MW 2 */ + 4003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4009 "00110001" // /* MW 3 */ + 4010 "00100000" // /* MW 2 */ + 4011 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4012 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4013 "00000101" // /* MW 3 */ + 4014 "00100000" // /* MW 2 */ + 4015 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4016 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "01111000" // /* MW 13 */ + 4020 "10100101" // /* MW 12 */ + 4021 "00000001" // /* MW 11 */ + 4022 "00000000" // /* MW 10 */ + 4023 "00000000" // /* MW 9 */ + 4024 "10000000" // /* MW 8 */ + 4025 "00010001" // /* MW 7 */ + 4026 "00000110" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 +.return_address + 4032 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4033 "00001000" // /* MW 5 */ + 4034 "11000100" // /* MW 4 */ + 4035 "11000100" // /* MW 3 */ + 4036 "00000111" // /* MW 2 */ + 4037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 first +.src_ref 6 "superkernels.cpp" 164 65 + 4038 "10111010" // LDA r16, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4039 "00010000" // /* MW 9 */ + 4040 "10000000" // /* MW 8 */ + 4041 "00110001" // /* MW 7 */ + 4042 "11110001" // /* MW 6 */ + 4043 "00000001" // /* MW 5 */ + 4044 "00000000" // /* MW 4 */ + 4045 "11010000" // /* MW 3 */ + 4046 "11000010" // /* MW 2 */ + 4047 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 +.src_ref 6 "superkernels.cpp" 164 65 +.src_ref 6 "superkernels.cpp" 171 2 + 4048 "10111010" // LDA r17, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "00010000" // /* MW 9 */ + 4050 "10000000" // /* MW 8 */ + 4051 "00110001" // /* MW 7 */ + 4052 "11110001" // /* MW 6 */ + 4053 "00000001" // /* MW 5 */ + 4054 "00000000" // /* MW 4 */ + 4055 "11010000" // /* MW 3 */ + 4056 "11000110" // /* MW 2 */ + 4057 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 first +.src_ref 6 "superkernels.cpp" 164 16 +.src_ref 6 "superkernels.cpp" 169 47 + 4058 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4059 "00010000" // /* MW 9 */ + 4060 "00000100" // /* MW 8 */ + 4061 "10110001" // /* MW 7 */ + 4062 "11110000" // /* MW 6 */ + 4063 "00000001" // /* MW 5 */ + 4064 "00000000" // /* MW 4 */ + 4065 "01010000" // /* MW 3 */ + 4066 "11001011" // /* MW 2 */ + 4067 "01001010" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "10000100" // J #4128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4128 delay_slots=5 */ + 4073 "00000000" // /* MW 5 */ + 4074 "00000000" // /* MW 4 */ + 4075 "00010000" // /* MW 3 */ + 4076 "00001000" // /* MW 2 */ + 4077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 +.delay_slot + 4078 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4079 "00110000" // /* MW 5 */ + 4080 "11000100" // /* MW 4 */ + 4081 "11000000" // /* MW 3 */ + 4082 "00000111" // /* MW 2 */ + 4083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 27 first +.delay_slot + 4086 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4087 "00001111" // /* MW 3 */ + 4088 "01100001" // /* MW 2 */ + 4089 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 first +.delay_slot + 4090 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4091 "10100011" // /* MW 5 */ + 4092 "00001100" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 16 first +.delay_slot + 4096 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4097 "00000000" // /* MW 15 */ + 4098 "00000000" // /* MW 14 */ + 4099 "01111000" // /* MW 13 */ + 4100 "10100101" // /* MW 12 */ + 4101 "00000001" // /* MW 11 */ + 4102 "00000000" // /* MW 10 */ + 4103 "00000000" // /* MW 9 */ + 4104 "10000000" // /* MW 8 */ + 4105 "00010001" // /* MW 7 */ + 4106 "00000110" // /* MW 6 */ + 4107 "00100001" // /* MW 5 */ + 4108 "00000000" // /* MW 4 */ + 4109 "11110000" // /* MW 3 */ + 4110 "00101100" // /* MW 2 */ + 4111 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 169 47 +.src_ref 6 "superkernels.cpp" 171 2 + 4112 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4113 "00000000" // /* MW 15 */ + 4114 "00000000" // /* MW 14 */ + 4115 "00010000" // /* MW 13 */ + 4116 "00000100" // /* MW 12 */ + 4117 "10110001" // /* MW 11 */ + 4118 "11110000" // /* MW 10 */ + 4119 "00000001" // /* MW 9 */ + 4120 "00000000" // /* MW 8 */ + 4121 "10001011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "00100010" // /* MW 5 */ + 4124 "00000000" // /* MW 4 */ + 4125 "11110000" // /* MW 3 */ + 4126 "00101100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4128 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4129 "00000000" // /* MW 7 */ + 4130 "11000011" // /* MW 6 */ + 4131 "10110011" // /* MW 5 */ + 4132 "00000011" // /* MW 4 */ + 4133 "01100000" // /* MW 3 */ + 4134 "10010001" // /* MW 2 */ + 4135 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 168 2 + 4136 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4137 "00010000" // /* MW 9 */ + 4138 "00000000" // /* MW 8 */ + 4139 "00110001" // /* MW 7 */ + 4140 "11110000" // /* MW 6 */ + 4141 "00000001" // /* MW 5 */ + 4142 "00000000" // /* MW 4 */ + 4143 "11010000" // /* MW 3 */ + 4144 "11101110" // /* MW 2 */ + 4145 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4146 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4147 "00010110" // /* MW 3 */ + 4148 "11111110" // /* MW 2 */ + 4149 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4150 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "00110110" // /* MW 3 */ + 4152 "11111110" // /* MW 2 */ + 4153 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4154 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4155 "01010110" // /* MW 3 */ + 4156 "01000110" // /* MW 2 */ + 4157 "00000111" // /* MW 1 */ + 4158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4159 "00000000" // /* MW 1 */ + 4160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4161 "00000000" // /* MW 1 */ + 4162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4163 "00000000" // /* MW 1 */ + 4164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4165 "00000000" // /* MW 1 */ + 4166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4167 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4168 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "00000010" // /* MW 3 */ + 4170 "01100001" // /* MW 2 */ + 4171 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4172 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4173 "00010001" // /* MW 3 */ + 4174 "00000110" // /* MW 2 */ + 4175 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4176 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4177 "11111101" // /* MW 3 */ + 4178 "11100000" // /* MW 2 */ + 4179 "00010111" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ + 4184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4186 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4187 "00001000" // /* MW 3 */ + 4188 "10010011" // /* MW 2 */ + 4189 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 + 4190 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4191 "10000001" // /* MW 5 */ + 4192 "10101101" // /* MW 4 */ + 4193 "10100111" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00000100" // /* MW 1 */ + 4196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4197 "00000000" // /* MW 1 */ + 4198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4199 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first + 4200 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4201 "00110110" // /* MW 3 */ + 4202 "00000110" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4204 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4205 "10000001" // /* MW 5 */ + 4206 "11011101" // /* MW 4 */ + 4207 "11011100" // /* MW 3 */ + 4208 "11001010" // /* MW 2 */ + 4209 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 47 first + 4210 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "01110110" // /* MW 3 */ + 4212 "00000110" // /* MW 2 */ + 4213 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4214 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "10011110" // /* MW 3 */ + 4216 "01011100" // /* MW 2 */ + 4217 "00000111" // /* MW 1 */ + 4218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 171 2 first +.no_stack_arguments + 4220 "00000100" // JL #3776 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 4221 "00000001" // /* MW 5 */ + 4222 "00000000" // /* MW 4 */ + 4223 "01100000" // /* MW 3 */ + 4224 "00000111" // /* MW 2 */ + 4225 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4227 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first +.delay_slot + 4228 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4229 "00000111" // /* MW 3 */ + 4230 "01100010" // /* MW 2 */ + 4231 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 +.delay_slot + 4232 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4233 "00110001" // /* MW 3 */ + 4234 "00000110" // /* MW 2 */ + 4235 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 first +.delay_slot + 4236 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4237 "00001101" // /* MW 3 */ + 4238 "11100001" // /* MW 2 */ + 4239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 +.delay_slot + 4240 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4241 "00000000" // /* MW 15 */ + 4242 "00000000" // /* MW 14 */ + 4243 "10101000" // /* MW 13 */ + 4244 "10100000" // /* MW 12 */ + 4245 "00110100" // /* MW 11 */ + 4246 "00000000" // /* MW 10 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "01011011" // /* MW 7 */ + 4250 "00000001" // /* MW 6 */ + 4251 "00100000" // /* MW 5 */ + 4252 "00000000" // /* MW 4 */ + 4253 "11110000" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 173 6 +.src_ref 6 "superkernels.cpp" 174 14 +.return_address + 4256 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4257 "00010000" // /* MW 9 */ + 4258 "00000000" // /* MW 8 */ + 4259 "00110001" // /* MW 7 */ + 4260 "11110011" // /* MW 6 */ + 4261 "00000001" // /* MW 5 */ + 4262 "00000000" // /* MW 4 */ + 4263 "11010000" // /* MW 3 */ + 4264 "11000110" // /* MW 2 */ + 4265 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4266 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4267 "00000101" // /* MW 3 */ + 4268 "00100000" // /* MW 2 */ + 4269 "00010000" // /* MW 1 */ + 4270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4271 "00000000" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4280 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "00001000" // /* MW 3 */ + 4282 "01010001" // /* MW 2 */ + 4283 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 173 19 + 4284 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4285 "00010000" // /* MW 9 */ + 4286 "00001100" // /* MW 8 */ + 4287 "00110001" // /* MW 7 */ + 4288 "11110001" // /* MW 6 */ + 4289 "00000001" // /* MW 5 */ + 4290 "00000000" // /* MW 4 */ + 4291 "11010000" // /* MW 3 */ + 4292 "11001110" // /* MW 2 */ + 4293 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 first + 4294 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4295 "00110110" // /* MW 3 */ + 4296 "00000110" // /* MW 2 */ + 4297 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 19 + 4298 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4299 "01010110" // /* MW 3 */ + 4300 "00000110" // /* MW 2 */ + 4301 "00000010" // /* MW 1 */ + 4302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4303 "00000000" // /* MW 1 */ + 4304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4305 "00000000" // /* MW 1 */ + 4306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4307 "00000000" // /* MW 1 */ + 4308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4309 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4310 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "00110001" // /* MW 3 */ + 4312 "00100001" // /* MW 2 */ + 4313 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4314 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4315 "00010001" // /* MW 3 */ + 4316 "11100110" // /* MW 2 */ + 4317 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 16 first + 4318 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4319 "00101000" // /* MW 3 */ + 4320 "01100001" // /* MW 2 */ + 4321 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 + 4322 "10000100" // JNZ r16, #4352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4352 delay_slots=5 */ + 4323 "00000001" // /* MW 5 */ + 4324 "01000000" // /* MW 4 */ + 4325 "10000000" // /* MW 3 */ + 4326 "00001000" // /* MW 2 */ + 4327 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4337 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 + 4338 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00000001" // /* MW 3 */ + 4340 "00100000" // /* MW 2 */ + 4341 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 first + 4342 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "00000000" // /* MW 7 */ + 4346 "10000000" // /* MW 6 */ + 4347 "00010001" // /* MW 5 */ + 4348 "00000110" // /* MW 4 */ + 4349 "11110110" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 176 + 4352 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4353 "00111001" // /* MW 3 */ + 4354 "11110100" // /* MW 2 */ + 4355 "00000111" // /* MW 1 */ + 4356 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4357 "00011001" // /* MW 3 */ + 4358 "11111011" // /* MW 2 */ + 4359 "00000111" // /* MW 1 */ + 4360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4361 "00000000" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4367 "11110001" // /* MW 3 */ + 4368 "11111101" // /* MW 2 */ + 4369 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4373 "00000000" // /* MW 3 */ + 4374 "00101000" // /* MW 2 */ + 4375 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4377 "10100000" // /* MW 3 */ + 4378 "01100111" // /* MW 2 */ + 4379 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 +.delay_slot + 4380 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4381 "00000001" // /* MW 5 */ + 4382 "00000000" // /* MW 4 */ + 4383 "00000000" // /* MW 3 */ + 4384 "11111000" // /* MW 2 */ + 4385 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4391 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 2 "elementwise_unary.h" 95 first +.src_ref 2 "elementwise_unary.h" 97 22 +.src_ref 2 "elementwise_unary.h" 97 24 first +.function_start + 4400 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4401 "00010000" // /* MW 9 */ + 4402 "11000000" // /* MW 8 */ + 4403 "00110001" // /* MW 7 */ + 4404 "11110000" // /* MW 6 */ + 4405 "00000001" // /* MW 5 */ + 4406 "00000000" // /* MW 4 */ + 4407 "11010000" // /* MW 3 */ + 4408 "10000101" // /* MW 2 */ + 4409 "00100011" // /* MW 1 */ + 4410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4411 "00000000" // /* MW 1 */ + 4412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4413 "00000000" // /* MW 1 */ + 4414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4415 "00000000" // /* MW 1 */ + 4416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4417 "00000000" // /* MW 1 */ + 4418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4419 "00000000" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 97 22 first + 4422 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4423 "00101001" // /* MW 3 */ + 4424 "00011100" // /* MW 2 */ + 4425 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 24 first + 4426 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4427 "00101110" // /* MW 3 */ + 4428 "00000100" // /* MW 2 */ + 4429 "00000001" // /* MW 1 */ + 4430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4431 "00000000" // /* MW 1 */ + 4432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4433 "00000000" // /* MW 1 */ + 4434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4435 "00000000" // /* MW 1 */ + 4436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4437 "00000000" // /* MW 1 */ + 4438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4439 "00000000" // /* MW 1 */ + 4440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4441 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 22 + 4442 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00101001" // /* MW 3 */ + 4444 "00000100" // /* MW 2 */ + 4445 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 24 first + 4446 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "00101110" // /* MW 3 */ + 4448 "00010100" // /* MW 2 */ + 4449 "00000001" // /* MW 1 */ + 4450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4451 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 101 4 first + 4452 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4453 "00000000" // /* MW 3 */ + 4454 "00101000" // /* MW 2 */ + 4455 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 22 first +.delay_slot + 4464 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4465 "00101001" // /* MW 3 */ + 4466 "00010100" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 4467 "00001000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 107 first +.src_ref 2 "elementwise_unary.h" 113 37 +.src_ref 2 "elementwise_unary.h" 113 78 +.src_ref 2 "elementwise_unary.h" 142 19 +.function_start + 4480 "10110110" // MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4481 "00010000" // /* MW 11 */ + 4482 "11000000" // /* MW 10 */ + 4483 "00110001" // /* MW 9 */ + 4484 "11110001" // /* MW 8 */ + 4485 "00000001" // /* MW 7 */ + 4486 "00000000" // /* MW 6 */ + 4487 "01101000" // /* MW 5 */ + 4488 "00111101" // /* MW 4 */ + 4489 "00000000" // /* MW 3 */ + 4490 "01000000" // /* MW 2 */ + 4491 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 113 37 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 "10110110" // LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4493 "00010000" // /* MW 11 */ + 4494 "00010000" // /* MW 10 */ + 4495 "00110001" // /* MW 9 */ + 4496 "11110001" // /* MW 8 */ + 4497 "00000001" // /* MW 7 */ + 4498 "00000000" // /* MW 6 */ + 4499 "11101000" // /* MW 5 */ + 4500 "00111011" // /* MW 4 */ + 4501 "11010000" // /* MW 3 */ + 4502 "10001010" // /* MW 2 */ + 4503 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 142 19 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 "10110110" // LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4505 "00010000" // /* MW 11 */ + 4506 "01000000" // /* MW 10 */ + 4507 "11001000" // /* MW 9 */ + 4508 "00010000" // /* MW 8 */ + 4509 "00000000" // /* MW 7 */ + 4510 "00000000" // /* MW 6 */ + 4511 "01101000" // /* MW 5 */ + 4512 "00111101" // /* MW 4 */ + 4513 "01010000" // /* MW 3 */ + 4514 "10000100" // /* MW 2 */ + 4515 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 "11110100" // VLDB x7, [p0], #64; VBCST.16 x0, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4517 "11100101" // /* MW 5 */ + 4518 "00110010" // /* MW 4 */ + 4519 "10000000" // /* MW 3 */ + 4520 "10111110" // /* MW 2 */ + 4521 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 "01000100" // MOVXM r4, #49280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4523 "00000000" // /* MW 5 */ + 4524 "00100001" // /* MW 4 */ + 4525 "11000010" // /* MW 3 */ + 4526 "00000000" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "11111000" // VBCST.16 x1, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4529 "01110010" // /* MW 3 */ + 4530 "10010001" // /* MW 2 */ + 4531 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 "01000100" // MOVXM r3, #32767 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4533 "11111110" // /* MW 5 */ + 4534 "10111111" // /* MW 4 */ + 4535 "01110001" // /* MW 3 */ + 4536 "00000000" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 "11111000" // VMIN_GE.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4539 "00101100" // /* MW 3 */ + 4540 "01010000" // /* MW 2 */ + 4541 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "elementwise_unary.h" 113 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 "11100100" // LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4543 "11011001" // /* MW 5 */ + 4544 "10000001" // /* MW 4 */ + 4545 "10110110" // /* MW 3 */ + 4546 "00000001" // /* MW 2 */ + 4547 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 +.src_ref 2 "elementwise_unary.h" 166 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 "11100100" // MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4549 "01011001" // /* MW 5 */ + 4550 "01110000" // /* MW 4 */ + 4551 "00001000" // /* MW 3 */ + 4552 "01010000" // /* MW 2 */ + 4553 "00001111" // /* MW 1 */ + 4554 "11111000" // VBCST.16 x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4555 "01110010" // /* MW 3 */ + 4556 "00001101" // /* MW 2 */ + 4557 "00011001" // /* MW 1 */ + 4558 "01000100" // MOVXM r5, #15616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4559 "00000000" // /* MW 5 */ + 4560 "10111010" // /* MW 4 */ + 4561 "00110010" // /* MW 3 */ + 4562 "00000000" // /* MW 2 */ + 4563 "00000000" // /* MW 1 */ + 4564 "11111000" // VBCST.16 x3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "01110010" // /* MW 3 */ + 4566 "10010101" // /* MW 2 */ + 4567 "00011001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 + 4568 "01000100" // MOVXM r17, #16128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "00000000" // /* MW 5 */ + 4570 "10111110" // /* MW 4 */ + 4571 "00111000" // /* MW 3 */ + 4572 "00000000" // /* MW 2 */ + 4573 "00000000" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4574 "01111000" // VBAND x11, x6, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4575 "00101011" // /* MW 3 */ + 4576 "10110001" // /* MW 2 */ + 4577 "00011101" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4578 "11100100" // MOVX r17, #828; VBCST.16 x5, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4579 "11100101" // /* MW 5 */ + 4580 "10001010" // /* MW 4 */ + 4581 "00100101" // /* MW 3 */ + 4582 "01011110" // /* MW 2 */ + 4583 "01100100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4584 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4585 "01100001" // /* MW 7 */ + 4586 "11100111" // /* MW 6 */ + 4587 "10001100" // /* MW 5 */ + 4588 "11100110" // /* MW 4 */ + 4589 "11101100" // /* MW 3 */ + 4590 "11000000" // /* MW 2 */ + 4591 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4592 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4593 "00101011" // /* MW 3 */ + 4594 "01001001" // /* MW 2 */ + 4595 "00011100" // /* MW 1 */ + 4596 "01000100" // MOVXM r2, #16000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4597 "00000000" // /* MW 5 */ + 4598 "00111101" // /* MW 4 */ + 4599 "00110001" // /* MW 3 */ + 4600 "00000000" // /* MW 2 */ + 4601 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4602 "01100010" // VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4603 "00000001" // /* MW 7 */ + 4604 "11100111" // /* MW 6 */ + 4605 "10001010" // /* MW 5 */ + 4606 "11100110" // /* MW 4 */ + 4607 "01110010" // /* MW 3 */ + 4608 "00001001" // /* MW 2 */ + 4609 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 4610 "11111000" // VCONV.fp32.bf16 cml0, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4611 "10001010" // /* MW 3 */ + 4612 "00001011" // /* MW 2 */ + 4613 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4614 "01100010" // VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4615 "10000001" // /* MW 7 */ + 4616 "00001100" // /* MW 6 */ + 4617 "10001011" // /* MW 5 */ + 4618 "11100110" // /* MW 4 */ + 4619 "00101100" // /* MW 3 */ + 4620 "01010000" // /* MW 2 */ + 4621 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 "01010110" // VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4623 "10000001" // /* MW 11 */ + 4624 "00010010" // /* MW 10 */ + 4625 "10001001" // /* MW 9 */ + 4626 "00000010" // /* MW 8 */ + 4627 "00100100" // /* MW 7 */ + 4628 "10001111" // /* MW 6 */ + 4629 "00000000" // /* MW 5 */ + 4630 "00000000" // /* MW 4 */ + 4631 "11000000" // /* MW 3 */ + 4632 "01000010" // /* MW 2 */ + 4633 "10110010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 "11111000" // VMAX_LT.bf16 x6, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11101100" // /* MW 3 */ + 4636 "01000000" // /* MW 2 */ + 4637 "00011011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 "01011010" // MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4639 "11000011" // /* MW 9 */ + 4640 "01110110" // /* MW 8 */ + 4641 "10001010" // /* MW 7 */ + 4642 "00000010" // /* MW 6 */ + 4643 "00101010" // /* MW 5 */ + 4644 "10110111" // /* MW 4 */ + 4645 "00000000" // /* MW 3 */ + 4646 "00000000" // /* MW 2 */ + 4647 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 125 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 "00000010" // VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4649 "10000000" // /* MW 7 */ + 4650 "00111111" // /* MW 6 */ + 4651 "10111000" // /* MW 5 */ + 4652 "00000010" // /* MW 4 */ + 4653 "11000000" // /* MW 3 */ + 4654 "00100010" // /* MW 2 */ + 4655 "01010010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first + 4656 "11111000" // VMIN_GE.bf16 x8, r16, x7, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4657 "00101100" // /* MW 3 */ + 4658 "00111000" // /* MW 2 */ + 4659 "00011100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4660 "11110110" // NOPA; NOPB; NOPS; VBAND x11, x6, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4661 "10110000" // /* MW 11 */ + 4662 "10010101" // /* MW 10 */ + 4663 "11011000" // /* MW 9 */ + 4664 "00000010" // /* MW 8 */ + 4665 "01011011" // /* MW 7 */ + 4666 "00000001" // /* MW 6 */ + 4667 "00100000" // /* MW 5 */ + 4668 "00000000" // /* MW 4 */ + 4669 "11110000" // /* MW 3 */ + 4670 "00101100" // /* MW 2 */ + 4671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 142 19 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first +.loop_nesting 1 + 4672 "01001010" // VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4673 "00100011" // /* MW 9 */ + 4674 "00101011" // /* MW 8 */ + 4675 "10001100" // /* MW 7 */ + 4676 "11100110" // /* MW 6 */ + 4677 "11101100" // /* MW 5 */ + 4678 "11000000" // /* MW 4 */ + 4679 "01101100" // /* MW 3 */ + 4680 "00111101" // /* MW 2 */ + 4681 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "abs.hpp" 32 22 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 "01001010" // VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4683 "01100001" // /* MW 9 */ + 4684 "11100111" // /* MW 8 */ + 4685 "10001100" // /* MW 7 */ + 4686 "01100110" // /* MW 6 */ + 4687 "00101011" // /* MW 5 */ + 4688 "01001001" // /* MW 4 */ + 4689 "11101100" // /* MW 3 */ + 4690 "00111011" // /* MW 2 */ + 4691 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "10000001" // /* MW 3 */ + 4694 "00001100" // /* MW 2 */ + 4695 "10001011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4697 "00000001" // /* MW 3 */ + 4698 "11100111" // /* MW 2 */ + 4699 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 "01100010" // VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4701 "10000001" // /* MW 7 */ + 4702 "00010010" // /* MW 6 */ + 4703 "10001001" // /* MW 5 */ + 4704 "00000010" // /* MW 4 */ + 4705 "01100000" // /* MW 3 */ + 4706 "10100100" // /* MW 2 */ + 4707 "00100011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 "01111010" // NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4711 "00000000" // /* MW 9 */ + 4712 "00000000" // /* MW 8 */ + 4713 "00000000" // /* MW 7 */ + 4714 "00000000" // /* MW 6 */ + 4715 "00100011" // /* MW 5 */ + 4716 "00011110" // /* MW 4 */ + 4717 "11110001" // /* MW 3 */ + 4718 "00101100" // /* MW 2 */ + 4719 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "00010110" // /* MW 12 */ + 4725 "00101000" // /* MW 11 */ + 4726 "00000010" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "00010110" // /* MW 7 */ + 4730 "10010010" // /* MW 6 */ + 4731 "00100101" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "01110110" // /* MW 12 */ + 4741 "10100000" // /* MW 11 */ + 4742 "00000001" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 "00011011" // NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "10110110" // /* MW 15 */ + 4754 "01010011" // /* MW 14 */ + 4755 "01111100" // /* MW 13 */ + 4756 "00010110" // /* MW 12 */ + 4757 "00011100" // /* MW 11 */ + 4758 "00000010" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "00010110" // /* MW 7 */ + 4762 "10010001" // /* MW 6 */ + 4763 "00100010" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.src_ref 4 "abs.hpp" 32 22 first +.end_of_loop + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "10111000" // /* MW 13 */ + 4772 "10010101" // /* MW 12 */ + 4773 "11011000" // /* MW 11 */ + 4774 "00000010" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.loop_nesting 0 + 4784 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4785 "00100011" // /* MW 7 */ + 4786 "00101011" // /* MW 6 */ + 4787 "10001100" // /* MW 5 */ + 4788 "11100110" // /* MW 4 */ + 4789 "11101100" // /* MW 3 */ + 4790 "11000000" // /* MW 2 */ + 4791 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4793 "00101011" // /* MW 3 */ + 4794 "01001001" // /* MW 2 */ + 4795 "00011100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 "01001000" // VMUL.f dm4, x3, x11, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100001" // /* MW 3 */ + 4798 "11100111" // /* MW 2 */ + 4799 "10001100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4801 "00000001" // /* MW 3 */ + 4802 "11100111" // /* MW 2 */ + 4803 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4805 "00100011" // /* MW 3 */ + 4806 "00011101" // /* MW 2 */ + 4807 "00001001" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4809 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4811 "00100011" // /* MW 3 */ + 4812 "00011110" // /* MW 2 */ + 4813 "00001001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4814 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "10000001" // /* MW 3 */ + 4816 "00001100" // /* MW 2 */ + 4817 "10001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 + 4818 "01100010" // VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4819 "10000001" // /* MW 7 */ + 4820 "00010010" // /* MW 6 */ + 4821 "10001001" // /* MW 5 */ + 4822 "00000010" // /* MW 4 */ + 4823 "11000000" // /* MW 3 */ + 4824 "01000010" // /* MW 2 */ + 4825 "10110010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 + 4826 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4827 "00010110" // /* MW 3 */ + 4828 "10010001" // /* MW 2 */ + 4829 "00001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first + 4830 "01001000" // VMSC.f dm2, dm3, x11, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4831 "11000011" // /* MW 3 */ + 4832 "01110110" // /* MW 2 */ + 4833 "10001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4834 "01001000" // VMSC.f dm4, dm1, x5, x9, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4835 "00100011" // /* MW 3 */ + 4836 "00101011" // /* MW 2 */ + 4837 "10001100" // /* MW 1 */ + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 129 4 first + 4840 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4841 "00000000" // /* MW 3 */ + 4842 "00101000" // /* MW 2 */ + 4843 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4847 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.delay_slot + 4848 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "00100011" // /* MW 3 */ + 4850 "00011101" // /* MW 2 */ + 4851 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.delay_slot + 4852 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4853 "00100011" // /* MW 3 */ + 4854 "00011110" // /* MW 2 */ + 4855 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 4857 "00000000" // /* MW 1 */ +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_sigmoid1d _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 210 first +.src_ref 6 "superkernels.cpp" 215 6 +.function_start + 4864 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4865 "00000000" // /* MW 5 */ + 4866 "11000100" // /* MW 4 */ + 4867 "11000110" // /* MW 3 */ + 4868 "00000111" // /* MW 2 */ + 4869 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 first + 4870 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4871 "11000001" // /* MW 5 */ + 4872 "10110101" // /* MW 4 */ + 4873 "11011000" // /* MW 3 */ + 4874 "11000010" // /* MW 2 */ + 4875 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 210 + 4876 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4877 "00000001" // /* MW 5 */ + 4878 "00000000" // /* MW 4 */ + 4879 "00000000" // /* MW 3 */ + 4880 "00001000" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ + 4882 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4883 "01110000" // /* MW 7 */ + 4884 "11010000" // /* MW 6 */ + 4885 "00001011" // /* MW 5 */ + 4886 "00000000" // /* MW 4 */ + 4887 "10110000" // /* MW 3 */ + 4888 "01100011" // /* MW 2 */ + 4889 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 11 + 4890 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4891 "00010001" // /* MW 9 */ + 4892 "00000010" // /* MW 8 */ + 4893 "00110001" // /* MW 7 */ + 4894 "11110011" // /* MW 6 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "10110000" // /* MW 3 */ + 4898 "10000010" // /* MW 2 */ + 4899 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 4900 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "11000000" // /* MW 3 */ + 4902 "11010100" // /* MW 2 */ + 4903 "00011011" // /* MW 1 */ + 4904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4905 "00000000" // /* MW 1 */ + 4906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4907 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 +.src_ref 6 "superkernels.cpp" 215 16 + 4908 "10000100" // JNZ r16, #5072 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5072 delay_slots=5 */ + 4909 "00000001" // /* MW 5 */ + 4910 "01000000" // /* MW 4 */ + 4911 "11101000" // /* MW 3 */ + 4912 "00001001" // /* MW 2 */ + 4913 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 22 first +.delay_slot + 4914 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4915 "10010000" // /* MW 3 */ + 4916 "01100010" // /* MW 2 */ + 4917 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 30 +.delay_slot + 4918 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4919 "11111011" // /* MW 3 */ + 4920 "01100011" // /* MW 2 */ + 4921 "00010100" // /* MW 1 */ +.delay_slot + 4922 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4923 "00111101" // /* MW 3 */ + 4924 "11110100" // /* MW 2 */ + 4925 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 212 11 +.delay_slot + 4926 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4927 "01110000" // /* MW 7 */ + 4928 "01100000" // /* MW 6 */ + 4929 "00110000" // /* MW 5 */ + 4930 "00000011" // /* MW 4 */ + 4931 "00110000" // /* MW 3 */ + 4932 "11000110" // /* MW 2 */ + 4933 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 +.src_ref 6 "superkernels.cpp" 229 2 +.delay_slot + 4934 "01000100" // MOVXM p0, #508800 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4935 "00000000" // /* MW 5 */ + 4936 "11000111" // /* MW 4 */ + 4937 "11000000" // /* MW 3 */ + 4938 "00000111" // /* MW 2 */ + 4939 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4941 "01000000" // /* MW 5 */ + 4942 "11000100" // /* MW 4 */ + 4943 "11000100" // /* MW 3 */ + 4944 "00000111" // /* MW 2 */ + 4945 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4947 "00010000" // /* MW 9 */ + 4948 "00001110" // /* MW 8 */ + 4949 "00110001" // /* MW 7 */ + 4950 "11110001" // /* MW 6 */ + 4951 "00000001" // /* MW 5 */ + 4952 "00000000" // /* MW 4 */ + 4953 "11100000" // /* MW 3 */ + 4954 "11000000" // /* MW 2 */ + 4955 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4957 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 "00000100" // JL #4400 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4400 delay_slots=5 */ + 4959 "00000001" // /* MW 5 */ + 4960 "00000000" // /* MW 4 */ + 4961 "10011000" // /* MW 3 */ + 4962 "00001000" // /* MW 2 */ + 4963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4967 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4969 "00110001" // /* MW 3 */ + 4970 "00100000" // /* MW 2 */ + 4971 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4972 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4973 "00000101" // /* MW 3 */ + 4974 "00100000" // /* MW 2 */ + 4975 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4976 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "10000000" // /* MW 8 */ + 4985 "00010001" // /* MW 7 */ + 4986 "00000110" // /* MW 6 */ + 4987 "00100010" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 +.return_address + 4992 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4993 "00001000" // /* MW 5 */ + 4994 "11000100" // /* MW 4 */ + 4995 "11000100" // /* MW 3 */ + 4996 "00000111" // /* MW 2 */ + 4997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 first +.src_ref 6 "superkernels.cpp" 222 46 + 4998 "10111010" // LDA r16, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4999 "00010000" // /* MW 9 */ + 5000 "11000000" // /* MW 8 */ + 5001 "00110001" // /* MW 7 */ + 5002 "11110001" // /* MW 6 */ + 5003 "00000001" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11010000" // /* MW 3 */ + 5006 "11000010" // /* MW 2 */ + 5007 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 +.src_ref 6 "superkernels.cpp" 222 46 +.src_ref 6 "superkernels.cpp" 229 2 + 5008 "10111010" // LDA r17, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5009 "00010000" // /* MW 9 */ + 5010 "11000000" // /* MW 8 */ + 5011 "00110001" // /* MW 7 */ + 5012 "11110001" // /* MW 6 */ + 5013 "00000001" // /* MW 5 */ + 5014 "00000000" // /* MW 4 */ + 5015 "11010000" // /* MW 3 */ + 5016 "11000110" // /* MW 2 */ + 5017 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 first +.src_ref 6 "superkernels.cpp" 222 16 +.src_ref 6 "superkernels.cpp" 227 47 + 5018 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5019 "00010000" // /* MW 9 */ + 5020 "00000100" // /* MW 8 */ + 5021 "10110001" // /* MW 7 */ + 5022 "11110000" // /* MW 6 */ + 5023 "00000001" // /* MW 5 */ + 5024 "00000000" // /* MW 4 */ + 5025 "01010000" // /* MW 3 */ + 5026 "11001011" // /* MW 2 */ + 5027 "01001000" // /* MW 1 */ + 5028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5029 "00000000" // /* MW 1 */ + 5030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5031 "00000000" // /* MW 1 */ + 5032 "10000100" // J #5088 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5088 delay_slots=5 */ + 5033 "00000000" // /* MW 5 */ + 5034 "00000000" // /* MW 4 */ + 5035 "11110000" // /* MW 3 */ + 5036 "00001001" // /* MW 2 */ + 5037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 +.delay_slot + 5038 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5039 "00110000" // /* MW 5 */ + 5040 "11000100" // /* MW 4 */ + 5041 "11000000" // /* MW 3 */ + 5042 "00000111" // /* MW 2 */ + 5043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5045 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 27 first +.delay_slot + 5046 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5047 "00001111" // /* MW 3 */ + 5048 "01100001" // /* MW 2 */ + 5049 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 first +.delay_slot + 5050 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5051 "10100011" // /* MW 5 */ + 5052 "00001100" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 16 first +.delay_slot + 5056 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "10000000" // /* MW 8 */ + 5065 "00010001" // /* MW 7 */ + 5066 "00000110" // /* MW 6 */ + 5067 "00100001" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 227 47 +.src_ref 6 "superkernels.cpp" 229 2 + 5072 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "00010000" // /* MW 13 */ + 5076 "00000100" // /* MW 12 */ + 5077 "10110001" // /* MW 11 */ + 5078 "11110000" // /* MW 10 */ + 5079 "00000001" // /* MW 9 */ + 5080 "00000000" // /* MW 8 */ + 5081 "10001011" // /* MW 7 */ + 5082 "10000000" // /* MW 6 */ + 5083 "00100010" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5088 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5089 "00000000" // /* MW 7 */ + 5090 "11000011" // /* MW 6 */ + 5091 "10110011" // /* MW 5 */ + 5092 "00000011" // /* MW 4 */ + 5093 "01100000" // /* MW 3 */ + 5094 "10010001" // /* MW 2 */ + 5095 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 226 2 + 5096 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5097 "00010000" // /* MW 9 */ + 5098 "00000000" // /* MW 8 */ + 5099 "00110001" // /* MW 7 */ + 5100 "11110000" // /* MW 6 */ + 5101 "00000001" // /* MW 5 */ + 5102 "00000000" // /* MW 4 */ + 5103 "11010000" // /* MW 3 */ + 5104 "11101110" // /* MW 2 */ + 5105 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5106 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5107 "00010110" // /* MW 3 */ + 5108 "11111110" // /* MW 2 */ + 5109 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5110 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5111 "00110110" // /* MW 3 */ + 5112 "11111110" // /* MW 2 */ + 5113 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5114 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5115 "01010110" // /* MW 3 */ + 5116 "01000110" // /* MW 2 */ + 5117 "00000111" // /* MW 1 */ + 5118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5119 "00000000" // /* MW 1 */ + 5120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5121 "00000000" // /* MW 1 */ + 5122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5123 "00000000" // /* MW 1 */ + 5124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5125 "00000000" // /* MW 1 */ + 5126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5128 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5129 "00000010" // /* MW 3 */ + 5130 "01100001" // /* MW 2 */ + 5131 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 5132 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5133 "00010001" // /* MW 3 */ + 5134 "00000110" // /* MW 2 */ + 5135 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 5136 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5137 "11111101" // /* MW 3 */ + 5138 "11100000" // /* MW 2 */ + 5139 "00010111" // /* MW 1 */ + 5140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5141 "00000000" // /* MW 1 */ + 5142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5143 "00000000" // /* MW 1 */ + 5144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5145 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5146 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5147 "00001000" // /* MW 3 */ + 5148 "10010011" // /* MW 2 */ + 5149 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 + 5150 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5151 "10000001" // /* MW 5 */ + 5152 "10101101" // /* MW 4 */ + 5153 "10100111" // /* MW 3 */ + 5154 "00000000" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ + 5156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5157 "00000000" // /* MW 1 */ + 5158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5159 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first + 5160 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "00110110" // /* MW 3 */ + 5162 "00000110" // /* MW 2 */ + 5163 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 5164 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5165 "10000001" // /* MW 5 */ + 5166 "11011101" // /* MW 4 */ + 5167 "11011100" // /* MW 3 */ + 5168 "11001010" // /* MW 2 */ + 5169 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 47 first + 5170 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5171 "01110110" // /* MW 3 */ + 5172 "00000110" // /* MW 2 */ + 5173 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 5174 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5175 "10011110" // /* MW 3 */ + 5176 "01011100" // /* MW 2 */ + 5177 "00000111" // /* MW 1 */ + 5178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5179 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 229 2 first +.no_stack_arguments + 5180 "00000100" // JL #4480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4480 delay_slots=5 */ + 5181 "00000001" // /* MW 5 */ + 5182 "00000000" // /* MW 4 */ + 5183 "11000000" // /* MW 3 */ + 5184 "00001000" // /* MW 2 */ + 5185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5187 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first +.delay_slot + 5188 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "00000111" // /* MW 3 */ + 5190 "01100010" // /* MW 2 */ + 5191 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 +.delay_slot + 5192 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "00110001" // /* MW 3 */ + 5194 "00000110" // /* MW 2 */ + 5195 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 first +.delay_slot + 5196 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "00001101" // /* MW 3 */ + 5198 "11100001" // /* MW 2 */ + 5199 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 +.delay_slot + 5200 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5201 "00000000" // /* MW 15 */ + 5202 "00000000" // /* MW 14 */ + 5203 "10101000" // /* MW 13 */ + 5204 "10100000" // /* MW 12 */ + 5205 "00110100" // /* MW 11 */ + 5206 "00000000" // /* MW 10 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "01011011" // /* MW 7 */ + 5210 "00000001" // /* MW 6 */ + 5211 "00100000" // /* MW 5 */ + 5212 "00000000" // /* MW 4 */ + 5213 "11110000" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 231 6 +.src_ref 6 "superkernels.cpp" 232 14 +.return_address + 5216 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5217 "00010000" // /* MW 9 */ + 5218 "00000000" // /* MW 8 */ + 5219 "00110001" // /* MW 7 */ + 5220 "11110011" // /* MW 6 */ + 5221 "00000001" // /* MW 5 */ + 5222 "00000000" // /* MW 4 */ + 5223 "11010000" // /* MW 3 */ + 5224 "11000110" // /* MW 2 */ + 5225 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 5226 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5227 "00000101" // /* MW 3 */ + 5228 "00100000" // /* MW 2 */ + 5229 "00010000" // /* MW 1 */ + 5230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5231 "00000000" // /* MW 1 */ + 5232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5233 "00000000" // /* MW 1 */ + 5234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5235 "00000000" // /* MW 1 */ + 5236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5237 "00000000" // /* MW 1 */ + 5238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5239 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5240 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5241 "00001000" // /* MW 3 */ + 5242 "01010001" // /* MW 2 */ + 5243 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 231 19 + 5244 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5245 "00010000" // /* MW 9 */ + 5246 "00001100" // /* MW 8 */ + 5247 "00110001" // /* MW 7 */ + 5248 "11110001" // /* MW 6 */ + 5249 "00000001" // /* MW 5 */ + 5250 "00000000" // /* MW 4 */ + 5251 "11010000" // /* MW 3 */ + 5252 "11001110" // /* MW 2 */ + 5253 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 first + 5254 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5255 "00110110" // /* MW 3 */ + 5256 "00000110" // /* MW 2 */ + 5257 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 19 + 5258 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "01010110" // /* MW 3 */ + 5260 "00000110" // /* MW 2 */ + 5261 "00000010" // /* MW 1 */ + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ + 5264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5265 "00000000" // /* MW 1 */ + 5266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5267 "00000000" // /* MW 1 */ + 5268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5270 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5271 "00110001" // /* MW 3 */ + 5272 "00100001" // /* MW 2 */ + 5273 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5274 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5275 "00010001" // /* MW 3 */ + 5276 "11100110" // /* MW 2 */ + 5277 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 16 first + 5278 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5279 "00101000" // /* MW 3 */ + 5280 "01100001" // /* MW 2 */ + 5281 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 + 5282 "10000100" // JNZ r16, #5312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5312 delay_slots=5 */ + 5283 "00000001" // /* MW 5 */ + 5284 "01000000" // /* MW 4 */ + 5285 "01100000" // /* MW 3 */ + 5286 "00001010" // /* MW 2 */ + 5287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5297 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 + 5298 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5299 "00000001" // /* MW 3 */ + 5300 "00100000" // /* MW 2 */ + 5301 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 first + 5302 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00000000" // /* MW 7 */ + 5306 "10000000" // /* MW 6 */ + 5307 "00010001" // /* MW 5 */ + 5308 "00000110" // /* MW 4 */ + 5309 "11110110" // /* MW 3 */ + 5310 "00101100" // /* MW 2 */ + 5311 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 234 + 5312 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "00111001" // /* MW 3 */ + 5314 "11110100" // /* MW 2 */ + 5315 "00000111" // /* MW 1 */ + 5316 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5317 "00011001" // /* MW 3 */ + 5318 "11111011" // /* MW 2 */ + 5319 "00000111" // /* MW 1 */ + 5320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5321 "00000000" // /* MW 1 */ + 5322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "11110001" // /* MW 3 */ + 5328 "11111101" // /* MW 2 */ + 5329 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5331 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5333 "00000000" // /* MW 3 */ + 5334 "00101000" // /* MW 2 */ + 5335 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5337 "10100000" // /* MW 3 */ + 5338 "01100111" // /* MW 2 */ + 5339 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 +.delay_slot + 5340 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5341 "00000001" // /* MW 5 */ + 5342 "00000000" // /* MW 4 */ + 5343 "00000000" // /* MW 3 */ + 5344 "11111000" // /* MW 2 */ + 5345 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 5351 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 5360 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5361 "00000000" // /* MW 3 */ + 5362 "00101000" // /* MW 2 */ + 5363 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5364 "01000100" // MOVXM p0, #508768 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5365 "11000000" // /* MW 5 */ + 5366 "11000110" // /* MW 4 */ + 5367 "11000000" // /* MW 3 */ + 5368 "00000111" // /* MW 2 */ + 5369 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5370 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "10000000" // /* MW 3 */ + 5372 "00000000" // /* MW 2 */ + 5373 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 5374 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "00000001" // /* MW 3 */ + 5376 "00000100" // /* MW 2 */ + 5377 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5378 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "00000001" // /* MW 3 */ + 5380 "00010100" // /* MW 2 */ + 5381 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 5383 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 5392 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5393 "00010000" // /* MW 9 */ + 5394 "10100000" // /* MW 8 */ + 5395 "00110001" // /* MW 7 */ + 5396 "11110000" // /* MW 6 */ + 5397 "00000001" // /* MW 5 */ + 5398 "00000000" // /* MW 4 */ + 5399 "11010000" // /* MW 3 */ + 5400 "10000101" // /* MW 2 */ + 5401 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 5402 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5403 "00000001" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "00000000" // /* MW 3 */ + 5406 "00001000" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ + 5408 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "00111101" // /* MW 3 */ + 5410 "11111100" // /* MW 2 */ + 5411 "00001111" // /* MW 1 */ + 5412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5413 "00000000" // /* MW 1 */ + 5414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5415 "00000000" // /* MW 1 */ + 5416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5417 "00000000" // /* MW 1 */ + 5418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5419 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 5420 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "00101001" // /* MW 3 */ + 5422 "00011100" // /* MW 2 */ + 5423 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 5424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5425 "00101110" // /* MW 3 */ + 5426 "00011100" // /* MW 2 */ + 5427 "00000001" // /* MW 1 */ + 5428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5429 "00000000" // /* MW 1 */ + 5430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5431 "00000000" // /* MW 1 */ + 5432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5433 "00000000" // /* MW 1 */ + 5434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5435 "00000000" // /* MW 1 */ + 5436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5437 "00000000" // /* MW 1 */ + 5438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5439 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 5440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5441 "00101001" // /* MW 3 */ + 5442 "00011100" // /* MW 2 */ + 5443 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 5444 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "00101110" // /* MW 3 */ + 5446 "00000100" // /* MW 2 */ + 5447 "00000001" // /* MW 1 */ + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ + 5450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5451 "00000000" // /* MW 1 */ + 5452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5453 "00000000" // /* MW 1 */ + 5454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5455 "00000000" // /* MW 1 */ + 5456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5457 "00000000" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 5460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5461 "00101001" // /* MW 3 */ + 5462 "00011100" // /* MW 2 */ + 5463 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 5464 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5465 "00101110" // /* MW 3 */ + 5466 "00010100" // /* MW 2 */ + 5467 "00000001" // /* MW 1 */ + 5468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5469 "00000000" // /* MW 1 */ + 5470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 5472 "00000100" // JL #5360 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5360 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "00000000" // /* MW 4 */ + 5475 "01111000" // /* MW 3 */ + 5476 "00001010" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot + 5478 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5479 "10011101" // /* MW 3 */ + 5480 "11111011" // /* MW 2 */ + 5481 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5485 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 5486 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5487 "00101001" // /* MW 3 */ + 5488 "11011100" // /* MW 2 */ + 5489 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 5490 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5491 "00011100" // /* MW 13 */ + 5492 "00000000" // /* MW 12 */ + 5493 "00000000" // /* MW 11 */ + 5494 "00000111" // /* MW 10 */ + 5495 "00000110" // /* MW 9 */ + 5496 "01111011" // /* MW 8 */ + 5497 "00000000" // /* MW 7 */ + 5498 "00000000" // /* MW 6 */ + 5499 "10110110" // /* MW 5 */ + 5500 "00000010" // /* MW 4 */ + 5501 "11110000" // /* MW 3 */ + 5502 "00101100" // /* MW 2 */ + 5503 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 5504 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5505 "00111001" // /* MW 3 */ + 5506 "11111100" // /* MW 2 */ + 5507 "00000111" // /* MW 1 */ + 5508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5509 "00000000" // /* MW 1 */ + 5510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5511 "00000000" // /* MW 1 */ + 5512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5513 "00000000" // /* MW 1 */ + 5514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "10011001" // /* MW 3 */ + 5520 "11111011" // /* MW 2 */ + 5521 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5523 "00000000" // /* MW 3 */ + 5524 "00101000" // /* MW 2 */ + 5525 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5531 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5533 "00000001" // /* MW 3 */ + 5534 "00100000" // /* MW 2 */ + 5535 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5537 "01110001" // /* MW 9 */ + 5538 "00000000" // /* MW 8 */ + 5539 "00000000" // /* MW 7 */ + 5540 "00000000" // /* MW 6 */ + 5541 "11111110" // /* MW 5 */ + 5542 "00111111" // /* MW 4 */ + 5543 "00110000" // /* MW 3 */ + 5544 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 5545 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 5552 "10111010" // MOVA m0, #32; MOVXM p3, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "00010000" // /* MW 9 */ + 5554 "10100000" // /* MW 8 */ + 5555 "10110001" // /* MW 7 */ + 5556 "11110001" // /* MW 6 */ + 5557 "00000001" // /* MW 5 */ + 5558 "00000000" // /* MW 4 */ + 5559 "10000000" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 5562 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5563 "00010000" // /* MW 9 */ + 5564 "00010000" // /* MW 8 */ + 5565 "00110001" // /* MW 7 */ + 5566 "11110010" // /* MW 6 */ + 5567 "00000001" // /* MW 5 */ + 5568 "00000000" // /* MW 4 */ + 5569 "11010000" // /* MW 3 */ + 5570 "00000110" // /* MW 2 */ + 5571 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 5572 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5573 "01011000" // /* MW 9 */ + 5574 "11111010" // /* MW 8 */ + 5575 "01101111" // /* MW 7 */ + 5576 "10001000" // /* MW 6 */ + 5577 "00000111" // /* MW 5 */ + 5578 "00011000" // /* MW 4 */ + 5579 "11010000" // /* MW 3 */ + 5580 "10010000" // /* MW 2 */ + 5581 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 5582 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #5744 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5583 "00010000" // /* MW 9 */ + 5584 "00111000" // /* MW 8 */ + 5585 "01111011" // /* MW 7 */ + 5586 "00000100" // /* MW 6 */ + 5587 "00000000" // /* MW 5 */ + 5588 "00000000" // /* MW 4 */ + 5589 "11010000" // /* MW 3 */ + 5590 "10000000" // /* MW 2 */ + 5591 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 5592 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5593 "00010000" // /* MW 9 */ + 5594 "01000000" // /* MW 8 */ + 5595 "10111011" // /* MW 7 */ + 5596 "00000101" // /* MW 6 */ + 5597 "00000000" // /* MW 5 */ + 5598 "00000000" // /* MW 4 */ + 5599 "01010000" // /* MW 3 */ + 5600 "10001000" // /* MW 2 */ + 5601 "10000000" // /* MW 1 */ + 5602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5603 "00000000" // /* MW 1 */ + 5604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5605 "00000000" // /* MW 1 */ + 5606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5607 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 5608 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5609 "00111101" // /* MW 3 */ + 5610 "01000010" // /* MW 2 */ + 5611 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 5612 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5613 "11111100" // /* MW 3 */ + 5614 "01110000" // /* MW 2 */ + 5615 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 5616 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5617 "11101000" // /* MW 5 */ + 5618 "01010000" // /* MW 4 */ + 5619 "01110000" // /* MW 3 */ + 5620 "00010011" // /* MW 2 */ + 5621 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5623 "10000000" // /* MW 7 */ + 5624 "10111010" // /* MW 6 */ + 5625 "01101000" // /* MW 5 */ + 5626 "01010000" // /* MW 4 */ + 5627 "01110000" // /* MW 3 */ + 5628 "00011011" // /* MW 2 */ + 5629 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5631 "11101000" // /* MW 5 */ + 5632 "01010000" // /* MW 4 */ + 5633 "01110000" // /* MW 3 */ + 5634 "00010011" // /* MW 2 */ + 5635 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5637 "01101000" // /* MW 5 */ + 5638 "01010000" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00011011" // /* MW 2 */ + 5641 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5643 "11101000" // /* MW 5 */ + 5644 "01010000" // /* MW 4 */ + 5645 "01110000" // /* MW 3 */ + 5646 "00010011" // /* MW 2 */ + 5647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5649 "01101000" // /* MW 5 */ + 5650 "01010000" // /* MW 4 */ + 5651 "01110000" // /* MW 3 */ + 5652 "00011011" // /* MW 2 */ + 5653 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5655 "11101000" // /* MW 5 */ + 5656 "01010000" // /* MW 4 */ + 5657 "01110000" // /* MW 3 */ + 5658 "00010011" // /* MW 2 */ + 5659 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5661 "01000001" // /* MW 9 */ + 5662 "11100010" // /* MW 8 */ + 5663 "00000000" // /* MW 7 */ + 5664 "00011101" // /* MW 6 */ + 5665 "00110100" // /* MW 5 */ + 5666 "00101000" // /* MW 4 */ + 5667 "01110000" // /* MW 3 */ + 5668 "00011011" // /* MW 2 */ + 5669 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5671 "01100001" // /* MW 9 */ + 5672 "11100000" // /* MW 8 */ + 5673 "00000001" // /* MW 7 */ + 5674 "00011101" // /* MW 6 */ + 5675 "01110100" // /* MW 5 */ + 5676 "00101000" // /* MW 4 */ + 5677 "01110000" // /* MW 3 */ + 5678 "00010011" // /* MW 2 */ + 5679 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5681 "01000001" // /* MW 9 */ + 5682 "11100010" // /* MW 8 */ + 5683 "00000000" // /* MW 7 */ + 5684 "00011101" // /* MW 6 */ + 5685 "00110100" // /* MW 5 */ + 5686 "00101000" // /* MW 4 */ + 5687 "01110000" // /* MW 3 */ + 5688 "00011011" // /* MW 2 */ + 5689 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5691 "01100001" // /* MW 9 */ + 5692 "11100000" // /* MW 8 */ + 5693 "00000001" // /* MW 7 */ + 5694 "00011101" // /* MW 6 */ + 5695 "01110100" // /* MW 5 */ + 5696 "00101000" // /* MW 4 */ + 5697 "01110000" // /* MW 3 */ + 5698 "00010011" // /* MW 2 */ + 5699 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5701 "01000001" // /* MW 11 */ + 5702 "11100010" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "10001110" // /* MW 8 */ + 5705 "10101101" // /* MW 7 */ + 5706 "00000000" // /* MW 6 */ + 5707 "01101000" // /* MW 5 */ + 5708 "01010000" // /* MW 4 */ + 5709 "01110000" // /* MW 3 */ + 5710 "00011011" // /* MW 2 */ + 5711 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "00000011" // /* MW 15 */ + 5714 "00001111" // /* MW 14 */ + 5715 "01111000" // /* MW 13 */ + 5716 "10100101" // /* MW 12 */ + 5717 "00000001" // /* MW 11 */ + 5718 "00000000" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "11101000" // /* MW 5 */ + 5724 "01010000" // /* MW 4 */ + 5725 "01110000" // /* MW 3 */ + 5726 "00010011" // /* MW 2 */ + 5727 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00010010" // /* MW 15 */ + 5730 "00000111" // /* MW 14 */ + 5731 "01111000" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "00100011" // /* MW 7 */ + 5738 "00011100" // /* MW 6 */ + 5739 "01101010" // /* MW 5 */ + 5740 "01010000" // /* MW 4 */ + 5741 "01110000" // /* MW 3 */ + 5742 "00011011" // /* MW 2 */ + 5743 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5744 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000011" // /* MW 15 */ + 5746 "00001111" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "10100011" // /* MW 7 */ + 5754 "00011100" // /* MW 6 */ + 5755 "11101010" // /* MW 5 */ + 5756 "01010000" // /* MW 4 */ + 5757 "01110000" // /* MW 3 */ + 5758 "00010011" // /* MW 2 */ + 5759 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "00010010" // /* MW 15 */ + 5762 "00000111" // /* MW 14 */ + 5763 "01111000" // /* MW 13 */ + 5764 "10100101" // /* MW 12 */ + 5765 "00000001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "00100011" // /* MW 7 */ + 5770 "00011100" // /* MW 6 */ + 5771 "01101010" // /* MW 5 */ + 5772 "01010000" // /* MW 4 */ + 5773 "01110000" // /* MW 3 */ + 5774 "00011011" // /* MW 2 */ + 5775 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5776 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5777 "01100001" // /* MW 7 */ + 5778 "11100000" // /* MW 6 */ + 5779 "00000001" // /* MW 5 */ + 5780 "00000010" // /* MW 4 */ + 5781 "01100000" // /* MW 3 */ + 5782 "10010100" // /* MW 2 */ + 5783 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5785 "01000001" // /* MW 7 */ + 5786 "11100010" // /* MW 6 */ + 5787 "00000000" // /* MW 5 */ + 5788 "00000010" // /* MW 4 */ + 5789 "01100000" // /* MW 3 */ + 5790 "10000100" // /* MW 2 */ + 5791 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5793 "01100001" // /* MW 7 */ + 5794 "11100000" // /* MW 6 */ + 5795 "00000001" // /* MW 5 */ + 5796 "00000010" // /* MW 4 */ + 5797 "01100000" // /* MW 3 */ + 5798 "10010100" // /* MW 2 */ + 5799 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5801 "01000001" // /* MW 7 */ + 5802 "11100010" // /* MW 6 */ + 5803 "00000000" // /* MW 5 */ + 5804 "00000010" // /* MW 4 */ + 5805 "01100000" // /* MW 3 */ + 5806 "10000100" // /* MW 2 */ + 5807 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5809 "01100001" // /* MW 7 */ + 5810 "11100000" // /* MW 6 */ + 5811 "00000001" // /* MW 5 */ + 5812 "00000010" // /* MW 4 */ + 5813 "01100000" // /* MW 3 */ + 5814 "10010100" // /* MW 2 */ + 5815 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5817 "01000001" // /* MW 7 */ + 5818 "11100010" // /* MW 6 */ + 5819 "00000000" // /* MW 5 */ + 5820 "00000010" // /* MW 4 */ + 5821 "01100000" // /* MW 3 */ + 5822 "10000100" // /* MW 2 */ + 5823 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5825 "01100001" // /* MW 7 */ + 5826 "11100000" // /* MW 6 */ + 5827 "00000001" // /* MW 5 */ + 5828 "00000010" // /* MW 4 */ + 5829 "01100000" // /* MW 3 */ + 5830 "10010100" // /* MW 2 */ + 5831 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "00100011" // /* MW 3 */ + 5834 "00011100" // /* MW 2 */ + 5835 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 5837 "00000000" // /* MW 5 */ + 5838 "01010000" // /* MW 4 */ + 5839 "01100000" // /* MW 3 */ + 5840 "10010100" // /* MW 2 */ + 5841 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "00100011" // /* MW 3 */ + 5844 "00011100" // /* MW 2 */ + 5845 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5847 "10100011" // /* MW 3 */ + 5848 "00011100" // /* MW 2 */ + 5849 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 5850 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5851 "00100011" // /* MW 3 */ + 5852 "00011100" // /* MW 2 */ + 5853 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 5854 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10100011" // /* MW 3 */ + 5856 "00011100" // /* MW 2 */ + 5857 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 5859 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 5872 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5873 "00000000" // /* MW 5 */ + 5874 "11000100" // /* MW 4 */ + 5875 "11001000" // /* MW 3 */ + 5876 "00000111" // /* MW 2 */ + 5877 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 5878 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5879 "11000001" // /* MW 5 */ + 5880 "10110101" // /* MW 4 */ + 5881 "11011000" // /* MW 3 */ + 5882 "11000010" // /* MW 2 */ + 5883 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 5884 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5885 "00000001" // /* MW 5 */ + 5886 "00000000" // /* MW 4 */ + 5887 "00000000" // /* MW 3 */ + 5888 "00001000" // /* MW 2 */ + 5889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 5890 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5891 "01111001" // /* MW 9 */ + 5892 "01100000" // /* MW 8 */ + 5893 "11001010" // /* MW 7 */ + 5894 "10000001" // /* MW 6 */ + 5895 "00010100" // /* MW 5 */ + 5896 "00100011" // /* MW 4 */ + 5897 "10110000" // /* MW 3 */ + 5898 "00111010" // /* MW 2 */ + 5899 "11111111" // /* MW 1 */ + 5900 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5901 "01110000" // /* MW 7 */ + 5902 "11010000" // /* MW 6 */ + 5903 "00001011" // /* MW 5 */ + 5904 "00000000" // /* MW 4 */ + 5905 "10110000" // /* MW 3 */ + 5906 "10000011" // /* MW 2 */ + 5907 "11111101" // /* MW 1 */ + 5908 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5909 "00010101" // /* MW 3 */ + 5910 "11111100" // /* MW 2 */ + 5911 "00001111" // /* MW 1 */ + 5912 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5913 "00111101" // /* MW 3 */ + 5914 "11110000" // /* MW 2 */ + 5915 "00001111" // /* MW 1 */ + 5916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5917 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 5918 "10000100" // JNZ r16, #6064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6064 delay_slots=5 */ + 5919 "00000001" // /* MW 5 */ + 5920 "01000000" // /* MW 4 */ + 5921 "11011000" // /* MW 3 */ + 5922 "00001011" // /* MW 2 */ + 5923 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 5924 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5925 "11111011" // /* MW 3 */ + 5926 "01100011" // /* MW 2 */ + 5927 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5928 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5929 "00001000" // /* MW 5 */ + 5930 "11000100" // /* MW 4 */ + 5931 "11000100" // /* MW 3 */ + 5932 "00000111" // /* MW 2 */ + 5933 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5934 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "01110000" // /* MW 7 */ + 5936 "01100000" // /* MW 6 */ + 5937 "00110111" // /* MW 5 */ + 5938 "00000001" // /* MW 4 */ + 5939 "00110000" // /* MW 3 */ + 5940 "11000110" // /* MW 2 */ + 5941 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 5942 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "11000000" // /* MW 3 */ + 5944 "11010110" // /* MW 2 */ + 5945 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 5946 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "00010001" // /* MW 9 */ + 5948 "10100000" // /* MW 8 */ + 5949 "10110001" // /* MW 7 */ + 5950 "11110011" // /* MW 6 */ + 5951 "00000001" // /* MW 5 */ + 5952 "00000000" // /* MW 4 */ + 5953 "10110000" // /* MW 3 */ + 5954 "10100011" // /* MW 2 */ + 5955 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 "00111010" // MOVS p0, p7; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5957 "00010001" // /* MW 9 */ + 5958 "00010000" // /* MW 8 */ + 5959 "00110001" // /* MW 7 */ + 5960 "11110001" // /* MW 6 */ + 5961 "00000001" // /* MW 5 */ + 5962 "00000000" // /* MW 4 */ + 5963 "01100000" // /* MW 3 */ + 5964 "10010001" // /* MW 2 */ + 5965 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5967 "00010000" // /* MW 9 */ + 5968 "00001110" // /* MW 8 */ + 5969 "00110001" // /* MW 7 */ + 5970 "11110001" // /* MW 6 */ + 5971 "00000001" // /* MW 5 */ + 5972 "00000000" // /* MW 4 */ + 5973 "11100000" // /* MW 3 */ + 5974 "11000000" // /* MW 2 */ + 5975 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5977 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 "00000100" // JL #5392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5392 delay_slots=5 */ + 5979 "00000001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "10001000" // /* MW 3 */ + 5982 "00001010" // /* MW 2 */ + 5983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5987 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5989 "00110001" // /* MW 3 */ + 5990 "00100000" // /* MW 2 */ + 5991 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5992 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5993 "00000101" // /* MW 3 */ + 5994 "00100000" // /* MW 2 */ + 5995 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5996 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010001" // /* MW 3 */ + 5998 "00000110" // /* MW 2 */ + 5999 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 6000 "10111010" // LDA r16, [p7]; MOVXM p1, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6001 "00010000" // /* MW 9 */ + 6002 "00000010" // /* MW 8 */ + 6003 "10110001" // /* MW 7 */ + 6004 "11110000" // /* MW 6 */ + 6005 "00000001" // /* MW 5 */ + 6006 "00000000" // /* MW 4 */ + 6007 "11010000" // /* MW 3 */ + 6008 "11000010" // /* MW 2 */ + 6009 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 6010 "10111010" // LDA r17, [p1]; MOVXM p3, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6011 "00010000" // /* MW 9 */ + 6012 "00000100" // /* MW 8 */ + 6013 "10110001" // /* MW 7 */ + 6014 "11110001" // /* MW 6 */ + 6015 "00000001" // /* MW 5 */ + 6016 "00000000" // /* MW 4 */ + 6017 "11010000" // /* MW 3 */ + 6018 "11000110" // /* MW 2 */ + 6019 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 6020 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6021 "00010000" // /* MW 9 */ + 6022 "00000110" // /* MW 8 */ + 6023 "10110001" // /* MW 7 */ + 6024 "11110000" // /* MW 6 */ + 6025 "00000001" // /* MW 5 */ + 6026 "00000000" // /* MW 4 */ + 6027 "01010000" // /* MW 3 */ + 6028 "11001011" // /* MW 2 */ + 6029 "11101010" // /* MW 1 */ + 6030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6031 "00000000" // /* MW 1 */ + 6032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6033 "00000000" // /* MW 1 */ + 6034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6035 "00000000" // /* MW 1 */ + 6036 "10000100" // J #6080 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6080 delay_slots=5 */ + 6037 "00000000" // /* MW 5 */ + 6038 "00000000" // /* MW 4 */ + 6039 "11100000" // /* MW 3 */ + 6040 "00001011" // /* MW 2 */ + 6041 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 6042 "01000100" // MOVXM p2, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6043 "00110000" // /* MW 5 */ + 6044 "11000100" // /* MW 4 */ + 6045 "11000100" // /* MW 3 */ + 6046 "00000111" // /* MW 2 */ + 6047 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 6048 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6049 "00001111" // /* MW 3 */ + 6050 "01100001" // /* MW 2 */ + 6051 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 6052 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6053 "01010001" // /* MW 3 */ + 6054 "00000110" // /* MW 2 */ + 6055 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 6056 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6057 "00010001" // /* MW 3 */ + 6058 "00000110" // /* MW 2 */ + 6059 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 6060 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "00010001" // /* MW 3 */ + 6062 "00000110" // /* MW 2 */ + 6063 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 6064 "01000100" // MOVXM p3, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6065 "00010000" // /* MW 5 */ + 6066 "11000100" // /* MW 4 */ + 6067 "11000110" // /* MW 3 */ + 6068 "00000111" // /* MW 2 */ + 6069 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 6070 "10111010" // NOPA; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6071 "00010000" // /* MW 9 */ + 6072 "00000110" // /* MW 8 */ + 6073 "10110001" // /* MW 7 */ + 6074 "11110000" // /* MW 6 */ + 6075 "00000001" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "11110000" // /* MW 3 */ + 6078 "00101100" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6080 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6081 "10000110" // /* MW 3 */ + 6082 "01100111" // /* MW 2 */ + 6083 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 6084 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6085 "00010000" // /* MW 9 */ + 6086 "00000000" // /* MW 8 */ + 6087 "00110001" // /* MW 7 */ + 6088 "11110001" // /* MW 6 */ + 6089 "00000001" // /* MW 5 */ + 6090 "00000000" // /* MW 4 */ + 6091 "11010000" // /* MW 3 */ + 6092 "11101110" // /* MW 2 */ + 6093 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6094 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6095 "00010110" // /* MW 3 */ + 6096 "11111110" // /* MW 2 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6098 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00110110" // /* MW 3 */ + 6100 "11111110" // /* MW 2 */ + 6101 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 6102 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6103 "01010110" // /* MW 3 */ + 6104 "00000110" // /* MW 2 */ + 6105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6106 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6107 "01110110" // /* MW 3 */ + 6108 "01000110" // /* MW 2 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6118 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00000010" // /* MW 3 */ + 6120 "01100001" // /* MW 2 */ + 6121 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 6122 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6123 "00001110" // /* MW 5 */ + 6124 "01000000" // /* MW 4 */ + 6125 "00111001" // /* MW 3 */ + 6126 "11000010" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 6128 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "00010001" // /* MW 3 */ + 6130 "00000110" // /* MW 2 */ + 6131 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6132 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6133 "11111101" // /* MW 3 */ + 6134 "11100000" // /* MW 2 */ + 6135 "00010111" // /* MW 1 */ + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ + 6138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6139 "00000000" // /* MW 1 */ + 6140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6141 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6142 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00001000" // /* MW 3 */ + 6144 "11010011" // /* MW 2 */ + 6145 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6146 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00000110" // /* MW 3 */ + 6148 "01100111" // /* MW 2 */ + 6149 "00011010" // /* MW 1 */ + 6150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6151 "00000000" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6154 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "01110110" // /* MW 3 */ + 6156 "11111111" // /* MW 2 */ + 6157 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6158 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6159 "00110110" // /* MW 3 */ + 6160 "11111110" // /* MW 2 */ + 6161 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6162 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6163 "01010110" // /* MW 3 */ + 6164 "11111110" // /* MW 2 */ + 6165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6166 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6167 "01110110" // /* MW 3 */ + 6168 "01010110" // /* MW 2 */ + 6169 "00000010" // /* MW 1 */ + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6180 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6181 "00010010" // /* MW 3 */ + 6182 "10100011" // /* MW 2 */ + 6183 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6184 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6185 "00110001" // /* MW 3 */ + 6186 "00000110" // /* MW 2 */ + 6187 "00001010" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ + 6190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6191 "00000000" // /* MW 1 */ + 6192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6193 "00000000" // /* MW 1 */ + 6194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6195 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6196 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6197 "00001000" // /* MW 3 */ + 6198 "11010011" // /* MW 2 */ + 6199 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 6200 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111001" // /* MW 9 */ + 6202 "01100000" // /* MW 8 */ + 6203 "11001110" // /* MW 7 */ + 6204 "00101001" // /* MW 6 */ + 6205 "00000000" // /* MW 5 */ + 6206 "00000001" // /* MW 4 */ + 6207 "01100000" // /* MW 3 */ + 6208 "00010001" // /* MW 2 */ + 6209 "11010001" // /* MW 1 */ + 6210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6211 "00000000" // /* MW 1 */ + 6212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6213 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6214 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6215 "00011001" // /* MW 3 */ + 6216 "11101110" // /* MW 2 */ + 6217 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 6218 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6219 "00111011" // /* MW 5 */ + 6220 "11011000" // /* MW 4 */ + 6221 "11011111" // /* MW 3 */ + 6222 "11000110" // /* MW 2 */ + 6223 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 6224 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6225 "10000001" // /* MW 5 */ + 6226 "11011101" // /* MW 4 */ + 6227 "11010110" // /* MW 3 */ + 6228 "11010010" // /* MW 2 */ + 6229 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6231 "01010110" // /* MW 3 */ + 6232 "01001110" // /* MW 2 */ + 6233 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6235 "00011110" // /* MW 3 */ + 6236 "01011101" // /* MW 2 */ + 6237 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6239 "11000000" // /* MW 3 */ + 6240 "01100000" // /* MW 2 */ + 6241 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6245 "01110110" // /* MW 3 */ + 6246 "00000110" // /* MW 2 */ + 6247 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 "00000100" // JL #5552 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5552 delay_slots=5 */ + 6251 "00000001" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "11011000" // /* MW 3 */ + 6254 "00001010" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "11000000" // /* MW 3 */ + 6258 "11010100" // /* MW 2 */ + 6259 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6260 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "00001101" // /* MW 3 */ + 6262 "01100011" // /* MW 2 */ + 6263 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 6264 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00001101" // /* MW 3 */ + 6266 "00100001" // /* MW 2 */ + 6267 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 6268 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "01000001" // /* MW 3 */ + 6270 "01101001" // /* MW 2 */ + 6271 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6272 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6273 "00000000" // /* MW 15 */ + 6274 "00000000" // /* MW 14 */ + 6275 "10101000" // /* MW 13 */ + 6276 "11100010" // /* MW 12 */ + 6277 "00110100" // /* MW 11 */ + 6278 "00000000" // /* MW 10 */ + 6279 "00000000" // /* MW 9 */ + 6280 "00000000" // /* MW 8 */ + 6281 "01011011" // /* MW 7 */ + 6282 "00000001" // /* MW 6 */ + 6283 "00100000" // /* MW 5 */ + 6284 "00000000" // /* MW 4 */ + 6285 "11110000" // /* MW 3 */ + 6286 "00101100" // /* MW 2 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6288 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6289 "01111000" // /* MW 9 */ + 6290 "11010000" // /* MW 8 */ + 6291 "10110011" // /* MW 7 */ + 6292 "00101000" // /* MW 6 */ + 6293 "00000000" // /* MW 5 */ + 6294 "00000001" // /* MW 4 */ + 6295 "11010000" // /* MW 3 */ + 6296 "11000110" // /* MW 2 */ + 6297 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 6298 "01000100" // MOVXM p6, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6299 "00110000" // /* MW 5 */ + 6300 "11000100" // /* MW 4 */ + 6301 "11001100" // /* MW 3 */ + 6302 "00000111" // /* MW 2 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ + 6308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6309 "00000000" // /* MW 1 */ + 6310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6311 "00000000" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6314 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "00001000" // /* MW 3 */ + 6316 "01010001" // /* MW 2 */ + 6317 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6318 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "00110110" // /* MW 3 */ + 6320 "11110110" // /* MW 2 */ + 6321 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6322 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6323 "00011001" // /* MW 3 */ + 6324 "11101101" // /* MW 2 */ + 6325 "00000111" // /* MW 1 */ + 6326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6327 "00000000" // /* MW 1 */ + 6328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6329 "00000000" // /* MW 1 */ + 6330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6331 "00000000" // /* MW 1 */ + 6332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6333 "00000000" // /* MW 1 */ + 6334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6335 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6336 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6337 "00010001" // /* MW 3 */ + 6338 "00100011" // /* MW 2 */ + 6339 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6340 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6341 "01100011" // /* MW 5 */ + 6342 "11101100" // /* MW 4 */ + 6343 "11010011" // /* MW 3 */ + 6344 "11000110" // /* MW 2 */ + 6345 "01001010" // /* MW 1 */ + 6346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6347 "00000000" // /* MW 1 */ + 6348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6349 "00000000" // /* MW 1 */ + 6350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6351 "00000000" // /* MW 1 */ + 6352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6353 "00000000" // /* MW 1 */ + 6354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6355 "00000000" // /* MW 1 */ + 6356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6357 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6358 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6359 "00001000" // /* MW 3 */ + 6360 "01010001" // /* MW 2 */ + 6361 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 6362 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6363 "00010000" // /* MW 9 */ + 6364 "00000000" // /* MW 8 */ + 6365 "10110001" // /* MW 7 */ + 6366 "11110000" // /* MW 6 */ + 6367 "00000001" // /* MW 5 */ + 6368 "00000000" // /* MW 4 */ + 6369 "11010000" // /* MW 3 */ + 6370 "11001110" // /* MW 2 */ + 6371 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 6372 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6373 "01010110" // /* MW 3 */ + 6374 "00000110" // /* MW 2 */ + 6375 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6376 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6377 "00110110" // /* MW 3 */ + 6378 "00000110" // /* MW 2 */ + 6379 "00000001" // /* MW 1 */ + 6380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6388 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6389 "00110001" // /* MW 3 */ + 6390 "00100001" // /* MW 2 */ + 6391 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6392 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6393 "00010001" // /* MW 3 */ + 6394 "11100110" // /* MW 2 */ + 6395 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 6396 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6397 "00101000" // /* MW 3 */ + 6398 "01100001" // /* MW 2 */ + 6399 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6400 "10000100" // JNZ r16, #6432 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6432 delay_slots=5 */ + 6401 "00000001" // /* MW 5 */ + 6402 "01000000" // /* MW 4 */ + 6403 "10010000" // /* MW 3 */ + 6404 "00001100" // /* MW 2 */ + 6405 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6415 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 6416 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6417 "00000001" // /* MW 3 */ + 6418 "00100000" // /* MW 2 */ + 6419 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 6420 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6421 "11000001" // /* MW 11 */ + 6422 "00001000" // /* MW 10 */ + 6423 "10000011" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "00000000" // /* MW 7 */ + 6426 "00000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 6432 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11110000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "11110001" // /* MW 3 */ + 6438 "11111101" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ + 6440 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "10011001" // /* MW 3 */ + 6442 "11110111" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6445 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6447 "11010001" // /* MW 3 */ + 6448 "11111001" // /* MW 2 */ + 6449 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00101000" // /* MW 2 */ + 6457 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00001011" // /* MW 3 */ + 6460 "10001110" // /* MW 2 */ + 6461 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 6462 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6463 "00000001" // /* MW 5 */ + 6464 "00000000" // /* MW 4 */ + 6465 "00000000" // /* MW 3 */ + 6466 "11111000" // /* MW 2 */ + 6467 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6473 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 7 "conv2d_dw_bf16_params.h" 177 first +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.function_start + 6480 "10111010" // LDA el0, [p0], #4; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6481 "00010000" // /* MW 9 */ + 6482 "11100000" // /* MW 8 */ + 6483 "10110001" // /* MW 7 */ + 6484 "11110000" // /* MW 6 */ + 6485 "00000001" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "11010000" // /* MW 3 */ + 6488 "10000101" // /* MW 2 */ + 6489 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6490 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6491 "01011000" // /* MW 9 */ + 6492 "00000000" // /* MW 8 */ + 6493 "00001000" // /* MW 7 */ + 6494 "01001011" // /* MW 6 */ + 6495 "00000000" // /* MW 5 */ + 6496 "00000001" // /* MW 4 */ + 6497 "11010000" // /* MW 3 */ + 6498 "10000001" // /* MW 2 */ + 6499 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 177 + 6500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6501 "00000001" // /* MW 5 */ + 6502 "00000000" // /* MW 4 */ + 6503 "00000000" // /* MW 3 */ + 6504 "00001000" // /* MW 2 */ + 6505 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 + 6506 "00111010" // ST p7, [sp, #-16]; MOVXM p7, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6507 "00010001" // /* MW 9 */ + 6508 "11100000" // /* MW 8 */ + 6509 "10110001" // /* MW 7 */ + 6510 "11110011" // /* MW 6 */ + 6511 "00000001" // /* MW 5 */ + 6512 "00000000" // /* MW 4 */ + 6513 "10110000" // /* MW 3 */ + 6514 "01110011" // /* MW 2 */ + 6515 "11111110" // /* MW 1 */ + 6516 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "00111101" // /* MW 3 */ + 6518 "11111100" // /* MW 2 */ + 6519 "00001111" // /* MW 1 */ + 6520 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6521 "11010101" // /* MW 3 */ + 6522 "11110101" // /* MW 2 */ + 6523 "00001111" // /* MW 1 */ + 6524 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6525 "11110101" // /* MW 3 */ + 6526 "11111001" // /* MW 2 */ + 6527 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6528 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6529 "00101001" // /* MW 3 */ + 6530 "00011100" // /* MW 2 */ + 6531 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6532 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6533 "00001001" // /* MW 3 */ + 6534 "00011100" // /* MW 2 */ + 6535 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6536 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00101110" // /* MW 3 */ + 6538 "00000100" // /* MW 2 */ + 6539 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6540 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "00001110" // /* MW 3 */ + 6542 "00010100" // /* MW 2 */ + 6543 "00000000" // /* MW 1 */ + 6544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6545 "00000000" // /* MW 1 */ + 6546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6547 "00000000" // /* MW 1 */ + 6548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6549 "00000000" // /* MW 1 */ + 6550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6551 "00000000" // /* MW 1 */ + 6552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6553 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6554 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6555 "00101001" // /* MW 3 */ + 6556 "00000100" // /* MW 2 */ + 6557 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6558 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6559 "00001001" // /* MW 3 */ + 6560 "00010100" // /* MW 2 */ + 6561 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 first + 6562 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6563 "00101010" // /* MW 3 */ + 6564 "01011110" // /* MW 2 */ + 6565 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 52 + 6566 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6567 "01001010" // /* MW 3 */ + 6568 "11101110" // /* MW 2 */ + 6569 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6570 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6571 "00101010" // /* MW 3 */ + 6572 "11101100" // /* MW 2 */ + 6573 "00000111" // /* MW 1 */ + 6574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6575 "00000000" // /* MW 1 */ + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ + 6580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6581 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.no_stack_arguments + 6582 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6583 "00000001" // /* MW 5 */ + 6584 "00000000" // /* MW 4 */ + 6585 "01011000" // /* MW 3 */ + 6586 "00011000" // /* MW 2 */ + 6587 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 38 +.delay_slot + 6588 "01011100" // ST r18, [sp, #-20]; SUB r14, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6589 "01000011" // /* MW 5 */ + 6590 "10111010" // /* MW 4 */ + 6591 "10111000" // /* MW 3 */ + 6592 "11001010" // /* MW 2 */ + 6593 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 +.delay_slot + 6594 "00111010" // ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6595 "01111001" // /* MW 9 */ + 6596 "01010000" // /* MW 8 */ + 6597 "11101000" // /* MW 7 */ + 6598 "01000101" // /* MW 6 */ + 6599 "00001000" // /* MW 5 */ + 6600 "00000011" // /* MW 4 */ + 6601 "10110000" // /* MW 3 */ + 6602 "10000110" // /* MW 2 */ + 6603 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6604 "01011100" // ST r16, [sp, #-24]; LT r27, r14, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6605 "00010101" // /* MW 5 */ + 6606 "01101111" // /* MW 4 */ + 6607 "10110111" // /* MW 3 */ + 6608 "01000010" // /* MW 2 */ + 6609 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6610 "10011000" // SUB r17, r24, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6611 "11100001" // /* MW 3 */ + 6612 "00100010" // /* MW 2 */ + 6613 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6614 "01111010" // NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6615 "00010010" // /* MW 9 */ + 6616 "10000001" // /* MW 8 */ + 6617 "00000011" // /* MW 7 */ + 6618 "00000000" // /* MW 6 */ + 6619 "01011011" // /* MW 5 */ + 6620 "00000001" // /* MW 4 */ + 6621 "11110000" // /* MW 3 */ + 6622 "00101100" // /* MW 2 */ + 6623 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 32 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.return_address + 6624 "10111010" // LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6625 "01011000" // /* MW 9 */ + 6626 "00000000" // /* MW 8 */ + 6627 "00001000" // /* MW 7 */ + 6628 "00110110" // /* MW 6 */ + 6629 "01000111" // /* MW 5 */ + 6630 "00011111" // /* MW 4 */ + 6631 "01010000" // /* MW 3 */ + 6632 "11000101" // /* MW 2 */ + 6633 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 52 + 6634 "00101100" // LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6635 "01000011" // /* MW 5 */ + 6636 "01001100" // /* MW 4 */ + 6637 "01011000" // /* MW 3 */ + 6638 "11001001" // /* MW 2 */ + 6639 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6640 "00101100" // LDA r1, [sp, #-28]; LT r27, r20, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6641 "00010101" // /* MW 5 */ + 6642 "01101110" // /* MW 4 */ + 6643 "00101010" // /* MW 3 */ + 6644 "10000110" // /* MW 2 */ + 6645 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 + 6646 "00011000" // SEL.EQZ r19, r2, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6647 "00110010" // /* MW 3 */ + 6648 "10100111" // /* MW 2 */ + 6649 "00010000" // /* MW 1 */ + 6650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6651 "00000000" // /* MW 1 */ + 6652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6653 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.no_stack_arguments + 6654 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6655 "00000001" // /* MW 5 */ + 6656 "00000000" // /* MW 4 */ + 6657 "01011000" // /* MW 3 */ + 6658 "00011000" // /* MW 2 */ + 6659 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.delay_slot + 6660 "00011000" // EXTEND.s16 r19, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6661 "01110000" // /* MW 3 */ + 6662 "11100110" // /* MW 2 */ + 6663 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 87 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 38 first +.delay_slot + 6664 "00111010" // ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6665 "01001001" // /* MW 9 */ + 6666 "11000000" // /* MW 8 */ + 6667 "11101100" // /* MW 7 */ + 6668 "00001101" // /* MW 6 */ + 6669 "11101001" // /* MW 5 */ + 6670 "00100010" // /* MW 4 */ + 6671 "10110000" // /* MW 3 */ + 6672 "01001010" // /* MW 2 */ + 6673 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6674 "10011000" // LT r27, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6675 "00001010" // /* MW 3 */ + 6676 "10110111" // /* MW 2 */ + 6677 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6678 "10011000" // SUB r17, r16, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6679 "11100001" // /* MW 3 */ + 6680 "00100010" // /* MW 2 */ + 6681 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6682 "00101100" // NOPA; SEL.EQZ r0, r14, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6683 "00100100" // /* MW 5 */ + 6684 "00000010" // /* MW 4 */ + 6685 "11110111" // /* MW 3 */ + 6686 "00101100" // /* MW 2 */ + 6687 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 +.return_address + 6688 "10111010" // LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6689 "01011000" // /* MW 9 */ + 6690 "01000010" // /* MW 8 */ + 6691 "00000000" // /* MW 7 */ + 6692 "01001000" // /* MW 6 */ + 6693 "00110000" // /* MW 5 */ + 6694 "00000001" // /* MW 4 */ + 6695 "00100000" // /* MW 3 */ + 6696 "10000110" // /* MW 2 */ + 6697 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6698 "10111010" // LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6699 "01011000" // /* MW 9 */ + 6700 "00001000" // /* MW 8 */ + 6701 "01001000" // /* MW 7 */ + 6702 "00001010" // /* MW 6 */ + 6703 "10000000" // /* MW 5 */ + 6704 "00000001" // /* MW 4 */ + 6705 "01010000" // /* MW 3 */ + 6706 "01010001" // /* MW 2 */ + 6707 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 + 6708 "10111010" // LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6709 "01011000" // /* MW 9 */ + 6710 "00010111" // /* MW 8 */ + 6711 "11101000" // /* MW 7 */ + 6712 "01001011" // /* MW 6 */ + 6713 "00000111" // /* MW 5 */ + 6714 "00111111" // /* MW 4 */ + 6715 "00100000" // /* MW 3 */ + 6716 "01110010" // /* MW 2 */ + 6717 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 + 6718 "10111010" // LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6719 "01011000" // /* MW 9 */ + 6720 "00000110" // /* MW 8 */ + 6721 "10101000" // /* MW 7 */ + 6722 "11001010" // /* MW 6 */ + 6723 "10100111" // /* MW 5 */ + 6724 "00111111" // /* MW 4 */ + 6725 "00100000" // /* MW 3 */ + 6726 "11011010" // /* MW 2 */ + 6727 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 + 6728 "10111010" // LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6729 "01011000" // /* MW 9 */ + 6730 "00100000" // /* MW 8 */ + 6731 "00000000" // /* MW 7 */ + 6732 "10001001" // /* MW 6 */ + 6733 "11010111" // /* MW 5 */ + 6734 "00001111" // /* MW 4 */ + 6735 "00100000" // /* MW 3 */ + 6736 "00001110" // /* MW 2 */ + 6737 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6738 "10111010" // MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6739 "01011000" // /* MW 9 */ + 6740 "10000000" // /* MW 8 */ + 6741 "00001000" // /* MW 7 */ + 6742 "00101000" // /* MW 6 */ + 6743 "01110000" // /* MW 5 */ + 6744 "00000001" // /* MW 4 */ + 6745 "10000000" // /* MW 3 */ + 6746 "11000000" // /* MW 2 */ + 6747 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 + 6748 "10111010" // MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6749 "01111000" // /* MW 9 */ + 6750 "10110000" // /* MW 8 */ + 6751 "10011101" // /* MW 7 */ + 6752 "00001100" // /* MW 6 */ + 6753 "00010001" // /* MW 5 */ + 6754 "00110001" // /* MW 4 */ + 6755 "10000000" // /* MW 3 */ + 6756 "01000100" // /* MW 2 */ + 6757 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6758 "10011000" // XOR r30, r1, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6759 "11100110" // /* MW 3 */ + 6760 "01111100" // /* MW 2 */ + 6761 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6762 "10011000" // LT r27, r30, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6763 "10001010" // /* MW 3 */ + 6764 "10110111" // /* MW 2 */ + 6765 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 206 70 + 6766 "00100100" // SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6767 "11111111" // /* MW 5 */ + 6768 "10111100" // /* MW 4 */ + 6769 "01000011" // /* MW 3 */ + 6770 "01100010" // /* MW 2 */ + 6771 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 + 6772 "00100100" // EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6773 "00000010" // /* MW 5 */ + 6774 "00110110" // /* MW 4 */ + 6775 "00001011" // /* MW 3 */ + 6776 "10001110" // /* MW 2 */ + 6777 "10001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 88 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 22 first + 6778 "00100100" // MUL r30, r15, r20; ADD.NC r14, r30, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6779 "00000001" // /* MW 5 */ + 6780 "00111110" // /* MW 4 */ + 6781 "11110111" // /* MW 3 */ + 6782 "10101001" // /* MW 2 */ + 6783 "01111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 first + 6784 "00100100" // MUL r2, r1, r14; ADD.NC r17, r22, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6785 "00000001" // /* MW 5 */ + 6786 "10110110" // /* MW 4 */ + 6787 "11111000" // /* MW 3 */ + 6788 "10011101" // /* MW 2 */ + 6789 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 first + 6790 "10011000" // EQ r27, r19, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6791 "00010111" // /* MW 3 */ + 6792 "11110110" // /* MW 2 */ + 6793 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 55 first + 6794 "10011000" // MUL r2, r30, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6795 "00101111" // /* MW 3 */ + 6796 "10000100" // /* MW 2 */ + 6797 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 + 6798 "01100100" // SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6799 "11110101" // /* MW 5 */ + 6800 "00111111" // /* MW 4 */ + 6801 "01001001" // /* MW 3 */ + 6802 "11100100" // /* MW 2 */ + 6803 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 first + 6804 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00001101" // /* MW 3 */ + 6806 "10100001" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 first + 6808 "10011000" // LSHL r2, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6809 "10101101" // /* MW 3 */ + 6810 "01000101" // /* MW 2 */ + 6811 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 204 79 + 6812 "00100100" // MUL r2, r2, r28; ADD.NC r4, r2, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6813 "11111111" // /* MW 5 */ + 6814 "00100010" // /* MW 4 */ + 6815 "11110010" // /* MW 3 */ + 6816 "10111001" // /* MW 2 */ + 6817 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 first + 6818 "10011000" // LSHL r3, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6819 "01011101" // /* MW 3 */ + 6820 "11000111" // /* MW 2 */ + 6821 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 197 39 first + 6822 "01011100" // ST r2, [p7], #-4; MUL r5, r15, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6823 "00111111" // /* MW 5 */ + 6824 "10010100" // /* MW 4 */ + 6825 "00110111" // /* MW 3 */ + 6826 "10001010" // /* MW 2 */ + 6827 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 + 6828 "00111010" // ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6829 "01011001" // /* MW 9 */ + 6830 "00000100" // /* MW 8 */ + 6831 "00001000" // /* MW 7 */ + 6832 "00100110" // /* MW 6 */ + 6833 "01101011" // /* MW 5 */ + 6834 "00111011" // /* MW 4 */ + 6835 "00110000" // /* MW 3 */ + 6836 "01000010" // /* MW 2 */ + 6837 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 44 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 42 first + 6838 "01011100" // ST r31, [p7], #-16; ADD r22, r5, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6839 "11000001" // /* MW 5 */ + 6840 "11011010" // /* MW 4 */ + 6841 "00110010" // /* MW 3 */ + 6842 "11111110" // /* MW 2 */ + 6843 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 193 22 first +.src_ref 7 "conv2d_dw_bf16_params.h" 201 47 first + 6844 "01011100" // ST r3, [p7], #24; MUL r31, r22, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6845 "10011111" // /* MW 5 */ + 6846 "01111110" // /* MW 4 */ + 6847 "00111011" // /* MW 3 */ + 6848 "10001110" // /* MW 2 */ + 6849 "11101101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 204 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 first + 6850 "01011100" // ST r4, [p7], #4; LSHL r22, r22, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6851 "00011011" // /* MW 5 */ + 6852 "01011010" // /* MW 4 */ + 6853 "00111011" // /* MW 3 */ + 6854 "10010010" // /* MW 2 */ + 6855 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 first + 6856 "10011000" // LSHL r3, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "01111101" // /* MW 3 */ + 6858 "11000111" // /* MW 2 */ + 6859 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 + 6860 "10011000" // LSHL r4, r4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6861 "01011101" // /* MW 3 */ + 6862 "00001001" // /* MW 2 */ + 6863 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 first + 6864 "10100100" // SUB r25, r22, r3; ADD.NC r4, r4, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6865 "00000010" // /* MW 5 */ + 6866 "00100100" // /* MW 4 */ + 6867 "00110010" // /* MW 3 */ + 6868 "01000110" // /* MW 2 */ + 6869 "10110110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6870 "10111010" // MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6871 "10101000" // /* MW 9 */ + 6872 "01000000" // /* MW 8 */ + 6873 "00101110" // /* MW 7 */ + 6874 "00001111" // /* MW 6 */ + 6875 "01100010" // /* MW 5 */ + 6876 "00000110" // /* MW 4 */ + 6877 "00000000" // /* MW 3 */ + 6878 "00000000" // /* MW 2 */ + 6879 "11111000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6880 "01011100" // ST r0, [p7], #4; MUL r1, r31, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6881 "00111111" // /* MW 5 */ + 6882 "10000100" // /* MW 4 */ + 6883 "00111111" // /* MW 3 */ + 6884 "10000010" // /* MW 2 */ + 6885 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 206 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 53 first + 6886 "01011100" // ST r7, [p7], #4; MUL r31, r31, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6887 "11111111" // /* MW 5 */ + 6888 "11111100" // /* MW 4 */ + 6889 "00111111" // /* MW 3 */ + 6890 "10011110" // /* MW 2 */ + 6891 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 207 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 first + 6892 "01011100" // ST r6, [p7], #4; LSHL r5, r5, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6893 "01011011" // /* MW 5 */ + 6894 "10010110" // /* MW 4 */ + 6895 "00110010" // /* MW 3 */ + 6896 "10011010" // /* MW 2 */ + 6897 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 73 + 6898 "00100100" // LSHL r6, r31, r23; ADD.NC r31, r5, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6899 "11111111" // /* MW 5 */ + 6900 "10100101" // /* MW 4 */ + 6901 "10111111" // /* MW 3 */ + 6902 "10101111" // /* MW 2 */ + 6903 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6904 "10111010" // MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6905 "10101000" // /* MW 9 */ + 6906 "10001000" // /* MW 8 */ + 6907 "01001001" // /* MW 7 */ + 6908 "01101110" // /* MW 6 */ + 6909 "01011001" // /* MW 5 */ + 6910 "00101000" // /* MW 4 */ + 6911 "00000000" // /* MW 3 */ + 6912 "11100100" // /* MW 2 */ + 6913 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 + 6914 "10011000" // SUB r18, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6915 "00100001" // /* MW 3 */ + 6916 "00100101" // /* MW 2 */ + 6917 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 211 77 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6918 "00111010" // ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6919 "11001001" // /* MW 9 */ + 6920 "01111111" // /* MW 8 */ + 6921 "01001001" // /* MW 7 */ + 6922 "11101110" // /* MW 6 */ + 6923 "00011011" // /* MW 5 */ + 6924 "00000010" // /* MW 4 */ + 6925 "00110000" // /* MW 3 */ + 6926 "11001010" // /* MW 2 */ + 6927 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 211 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6928 "01011100" // ST r18, [p7], #4; ADD r6, r1, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6929 "11000001" // /* MW 5 */ + 6930 "10011010" // /* MW 4 */ + 6931 "00110000" // /* MW 3 */ + 6932 "11001010" // /* MW 2 */ + 6933 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 212 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6934 "01011100" // ST r22, [p7], #4; LSHL r1, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6935 "10011011" // /* MW 5 */ + 6936 "10000100" // /* MW 4 */ + 6937 "00111111" // /* MW 3 */ + 6938 "11011010" // /* MW 2 */ + 6939 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 213 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 + 6940 "00111010" // ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6941 "01011001" // /* MW 9 */ + 6942 "11111111" // /* MW 8 */ + 6943 "00101111" // /* MW 7 */ + 6944 "10000100" // /* MW 6 */ + 6945 "01100000" // /* MW 5 */ + 6946 "00000111" // /* MW 4 */ + 6947 "00110000" // /* MW 3 */ + 6948 "11111110" // /* MW 2 */ + 6949 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 214 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 6950 "00111010" // ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6951 "01011001" // /* MW 9 */ + 6952 "01000000" // /* MW 8 */ + 6953 "11001000" // /* MW 7 */ + 6954 "00001110" // /* MW 6 */ + 6955 "00111011" // /* MW 5 */ + 6956 "00001100" // /* MW 4 */ + 6957 "00110000" // /* MW 3 */ + 6958 "11100110" // /* MW 2 */ + 6959 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 215 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 first + 6960 "01011100" // ST r3, [p7], #4; LSHL r16, r15, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6961 "00011011" // /* MW 5 */ + 6962 "11000010" // /* MW 4 */ + 6963 "00110111" // /* MW 3 */ + 6964 "10001110" // /* MW 2 */ + 6965 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 218 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6966 "01011100" // ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6967 "00000100" // /* MW 5 */ + 6968 "00000011" // /* MW 4 */ + 6969 "00110000" // /* MW 3 */ + 6970 "11001010" // /* MW 2 */ + 6971 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 60 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 first + 6972 "10100100" // LSHL r3, r30, r23; ADD.NC r0, r16, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6973 "00000010" // /* MW 5 */ + 6974 "00110000" // /* MW 4 */ + 6975 "10110000" // /* MW 3 */ + 6976 "11101111" // /* MW 2 */ + 6977 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 + 6978 "01011100" // ST r0, [p7], #4; SUB r16, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6979 "01100011" // /* MW 5 */ + 6980 "01000000" // /* MW 4 */ + 6981 "00111000" // /* MW 3 */ + 6982 "10000010" // /* MW 2 */ + 6983 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 220 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 first + 6984 "00111010" // ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6985 "00001001" // /* MW 9 */ + 6986 "00010000" // /* MW 8 */ + 6987 "11101100" // /* MW 7 */ + 6988 "00110011" // /* MW 6 */ + 6989 "00010001" // /* MW 5 */ + 6990 "00001010" // /* MW 4 */ + 6991 "00110000" // /* MW 3 */ + 6992 "11111110" // /* MW 2 */ + 6993 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 221 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 6994 "01011100" // ST r31, [p7], #4; LSHL r31, r18, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6995 "10111011" // /* MW 5 */ + 6996 "01111110" // /* MW 4 */ + 6997 "00111001" // /* MW 3 */ + 6998 "11111110" // /* MW 2 */ + 6999 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 first + 7000 "01011100" // ST r22, [p7], #4; LSHL r2, r1, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7001 "10111011" // /* MW 5 */ + 7002 "10001010" // /* MW 4 */ + 7003 "00110000" // /* MW 3 */ + 7004 "11011010" // /* MW 2 */ + 7005 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 224 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 + 7006 "01011100" // ST r1, [p7], #4; SUB r1, r24, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7007 "01000011" // /* MW 5 */ + 7008 "00000100" // /* MW 4 */ + 7009 "00111100" // /* MW 3 */ + 7010 "10000110" // /* MW 2 */ + 7011 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 7012 "01011100" // ST r22, [p7], #4; SUB r2, r24, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7013 "11100011" // /* MW 5 */ + 7014 "00001011" // /* MW 4 */ + 7015 "00111100" // /* MW 3 */ + 7016 "11011010" // /* MW 2 */ + 7017 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 226 43 first + 7018 "10011000" // ST r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7019 "00110001" // /* MW 3 */ + 7020 "00011100" // /* MW 2 */ + 7021 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 228 40 first + 7022 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7023 "01010001" // /* MW 3 */ + 7024 "00011110" // /* MW 2 */ + 7025 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 first + 7026 "10011000" // ST r22, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7027 "11010001" // /* MW 3 */ + 7028 "00011110" // /* MW 2 */ + 7029 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 first + 7030 "10011000" // ST r2, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7031 "01010001" // /* MW 3 */ + 7032 "00001000" // /* MW 2 */ + 7033 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 first + 7034 "10011000" // LDA.u8 r1, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "00101010" // /* MW 3 */ + 7036 "00101000" // /* MW 2 */ + 7037 "00000111" // /* MW 1 */ + 7038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7039 "00000000" // /* MW 1 */ + 7040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7041 "00000000" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ + 7046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7047 "00000000" // /* MW 1 */ + 7048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7049 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 58 + 7050 "10000100" // JZ r1, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 7051 "00000001" // /* MW 5 */ + 7052 "00000000" // /* MW 4 */ + 7053 "11011000" // /* MW 3 */ + 7054 "00001101" // /* MW 2 */ + 7055 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 +.delay_slot + 7056 "00011000" // MOVX r16, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "00001101" // /* MW 3 */ + 7058 "00100000" // /* MW 2 */ + 7059 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 first +.delay_slot + 7060 "10011000" // LSHL r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001101" // /* MW 3 */ + 7062 "11100001" // /* MW 2 */ + 7063 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.delay_slot + 7064 "01000100" // MOVXM r31, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7065 "00000000" // /* MW 5 */ + 7066 "10100000" // /* MW 4 */ + 7067 "00001111" // /* MW 3 */ + 7068 "01111111" // /* MW 2 */ + 7069 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 + 7074 "01111110" // NOPA; NOPB; NOPS; MOVX r31, #0; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7075 "01100000" // /* MW 13 */ + 7076 "00101011" // /* MW 12 */ + 7077 "00000000" // /* MW 11 */ + 7078 "10101111" // /* MW 10 */ + 7079 "00110100" // /* MW 9 */ + 7080 "00000000" // /* MW 8 */ + 7081 "00000001" // /* MW 7 */ + 7082 "00111110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 + 7088 "10111010" // MOVA m0, #-197; MOVXM p0, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00010000" // /* MW 8 */ + 7091 "00110001" // /* MW 7 */ + 7092 "11110000" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "10000000" // /* MW 3 */ + 7096 "01100000" // /* MW 2 */ + 7097 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 first + 7098 "10111010" // LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "10111000" // /* MW 9 */ + 7100 "11111000" // /* MW 8 */ + 7101 "00000001" // /* MW 7 */ + 7102 "10100100" // /* MW 6 */ + 7103 "11011000" // /* MW 5 */ + 7104 "00111011" // /* MW 4 */ + 7105 "01010000" // /* MW 3 */ + 7106 "11000100" // /* MW 2 */ + 7107 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 + 7108 "10111010" // LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "01111000" // /* MW 9 */ + 7110 "01001001" // /* MW 8 */ + 7111 "00000000" // /* MW 7 */ + 7112 "10101000" // /* MW 6 */ + 7113 "11110000" // /* MW 5 */ + 7114 "00000001" // /* MW 4 */ + 7115 "00100000" // /* MW 3 */ + 7116 "00000110" // /* MW 2 */ + 7117 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 first +.src_ref 7 "conv2d_dw_bf16_params.h" 240 + 7118 "10111010" // LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7119 "01011000" // /* MW 9 */ + 7120 "11001001" // /* MW 8 */ + 7121 "10000000" // /* MW 7 */ + 7122 "11101100" // /* MW 6 */ + 7123 "11111111" // /* MW 5 */ + 7124 "00011111" // /* MW 4 */ + 7125 "00100000" // /* MW 3 */ + 7126 "10000111" // /* MW 2 */ + 7127 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 7128 "00101100" // LDA p0, [sp, #-16]; MOVX r25, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7129 "00000010" // /* MW 5 */ + 7130 "01100100" // /* MW 4 */ + 7131 "00100000" // /* MW 3 */ + 7132 "00000011" // /* MW 2 */ + 7133 "11111110" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "11010001" // /* MW 3 */ + 7138 "11110101" // /* MW 2 */ + 7139 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 39 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 "00011000" // ST.s16 r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00110111" // /* MW 3 */ + 7142 "00101100" // /* MW 2 */ + 7143 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 "11100100" // MUL r28, r29, r28; MOV crRnd, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7145 "01000001" // /* MW 5 */ + 7146 "01110001" // /* MW 4 */ + 7147 "11111111" // /* MW 3 */ + 7148 "00111001" // /* MW 2 */ + 7149 "11101111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 "00111010" // VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7151 "01111001" // /* MW 9 */ + 7152 "01010000" // /* MW 8 */ + 7153 "11101000" // /* MW 7 */ + 7154 "01101101" // /* MW 6 */ + 7155 "00011101" // /* MW 5 */ + 7156 "00011111" // /* MW 4 */ + 7157 "11000000" // /* MW 3 */ + 7158 "00000010" // /* MW 2 */ + 7159 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 109 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 "00100100" // MUL r20, r28, r20; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7161 "11111111" // /* MW 5 */ + 7162 "10110001" // /* MW 4 */ + 7163 "11111000" // /* MW 3 */ + 7164 "00101001" // /* MW 2 */ + 7165 "11100101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 "01100100" // LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7167 "00000011" // /* MW 5 */ + 7168 "10000010" // /* MW 4 */ + 7169 "10110000" // /* MW 3 */ + 7170 "01100111" // /* MW 2 */ + 7171 "10100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 52 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 "10011000" // MUL r28, r30, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7173 "11101111" // /* MW 3 */ + 7174 "10111000" // /* MW 2 */ + 7175 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7176 "10011000" // LSHL r21, r17, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "01011101" // /* MW 3 */ + 7178 "01101011" // /* MW 2 */ + 7179 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 first + 7180 "01011100" // ST r20, [p7], #4; LSHL r23, r28, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7181 "11111011" // /* MW 5 */ + 7182 "01011110" // /* MW 4 */ + 7183 "00111110" // /* MW 3 */ + 7184 "11010010" // /* MW 2 */ + 7185 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 235 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7186 "01011100" // ST r29, [p7], #4; SUB r26, r31, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7187 "10100011" // /* MW 5 */ + 7188 "11101010" // /* MW 4 */ + 7189 "00111111" // /* MW 3 */ + 7190 "11110110" // /* MW 2 */ + 7191 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7192 "01011100" // ST r28, [p7], m0; MAC r21, r21, r31, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7193 "01001100" // /* MW 5 */ + 7194 "11010110" // /* MW 4 */ + 7195 "00111111" // /* MW 3 */ + 7196 "01110010" // /* MW 2 */ + 7197 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 first + 7198 "10011000" // LDA.u8 r20, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "10001010" // /* MW 3 */ + 7200 "00101010" // /* MW 2 */ + 7201 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7202 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7203 "00000001" // /* MW 5 */ + 7204 "00000000" // /* MW 4 */ + 7205 "00000000" // /* MW 3 */ + 7206 "11111000" // /* MW 2 */ + 7207 "11111111" // /* MW 1 */ + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 first + 7214 "10011000" // LSHL r30, r30, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7215 "00111101" // /* MW 3 */ + 7216 "10111101" // /* MW 2 */ + 7217 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7218 "10011000" // SUB r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7219 "01010001" // /* MW 3 */ + 7220 "10101011" // /* MW 2 */ + 7221 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 first + 7222 "10011000" // EQ r27, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7223 "01000111" // /* MW 3 */ + 7224 "11110111" // /* MW 2 */ + 7225 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 7226 "00011000" // SEL.EQZ r19, r24, r23, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7227 "01110010" // /* MW 3 */ + 7228 "00100111" // /* MW 2 */ + 7229 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 39 + 7230 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7231 "01110001" // /* MW 3 */ + 7232 "00011110" // /* MW 2 */ + 7233 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 238 39 first + 7234 "10011000" // ST r16, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7235 "00010001" // /* MW 3 */ + 7236 "00011110" // /* MW 2 */ + 7237 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first + 7238 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7239 "00110001" // /* MW 3 */ + 7240 "00011110" // /* MW 2 */ + 7241 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7242 "01011100" // ST r22, [p7], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7243 "00000000" // /* MW 5 */ + 7244 "01010000" // /* MW 4 */ + 7245 "00110000" // /* MW 3 */ + 7246 "11011010" // /* MW 2 */ + 7247 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first +.delay_slot + 7248 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7249 "01010001" // /* MW 3 */ + 7250 "00011110" // /* MW 2 */ + 7251 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7252 "10011000" // ST r26, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7253 "01010001" // /* MW 3 */ + 7254 "00011111" // /* MW 2 */ + 7255 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7256 "10011000" // ST r21, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7257 "10110001" // /* MW 3 */ + 7258 "00011110" // /* MW 2 */ + 7259 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7260 "10011000" // ST r25, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7261 "00110001" // /* MW 3 */ + 7262 "00000111" // /* MW 2 */ + 7263 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7264 "00000010" // ST r25, [p7, #4]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7265 "01110000" // /* MW 7 */ + 7266 "01100000" // /* MW 6 */ + 7267 "10110000" // /* MW 5 */ + 7268 "00000011" // /* MW 4 */ + 7269 "00110000" // /* MW 3 */ + 7270 "11100110" // /* MW 2 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 7271 "11100010" // /* MW 1 */ +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function conv2d_dw_core _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 158 first +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 183 4 +.function_start + 7280 "10110110" // MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7281 "00010000" // /* MW 11 */ + 7282 "00010110" // /* MW 10 */ + 7283 "00110010" // /* MW 9 */ + 7284 "11110010" // /* MW 8 */ + 7285 "00000001" // /* MW 7 */ + 7286 "00000000" // /* MW 6 */ + 7287 "01101000" // /* MW 5 */ + 7288 "00111011" // /* MW 4 */ + 7289 "10000000" // /* MW 3 */ + 7290 "00011000" // /* MW 2 */ + 7291 "11110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7292 "10110110" // LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7293 "01011000" // /* MW 11 */ + 7294 "10000000" // /* MW 10 */ + 7295 "10000000" // /* MW 9 */ + 7296 "00001010" // /* MW 8 */ + 7297 "00010010" // /* MW 7 */ + 7298 "00000000" // /* MW 6 */ + 7299 "11101000" // /* MW 5 */ + 7300 "00111000" // /* MW 4 */ + 7301 "11010000" // /* MW 3 */ + 7302 "10101000" // /* MW 2 */ + 7303 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 202 56 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 229 12 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 +.src_ref 7 "conv2d_dw_bf16.h" 231 12 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 +.src_ref 7 "conv2d_dw_bf16.h" 233 12 +.src_ref 7 "conv2d_dw_bf16.h" 234 12 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 + 7304 "10111010" // LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7305 "01011000" // /* MW 9 */ + 7306 "10010000" // /* MW 8 */ + 7307 "00000111" // /* MW 7 */ + 7308 "10001010" // /* MW 6 */ + 7309 "00000111" // /* MW 5 */ + 7310 "00000000" // /* MW 4 */ + 7311 "11010000" // /* MW 3 */ + 7312 "10100100" // /* MW 2 */ + 7313 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 + 7314 "10111010" // LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7315 "01011000" // /* MW 9 */ + 7316 "00000000" // /* MW 8 */ + 7317 "01100000" // /* MW 7 */ + 7318 "00001010" // /* MW 6 */ + 7319 "00100100" // /* MW 5 */ + 7320 "00000000" // /* MW 4 */ + 7321 "11010000" // /* MW 3 */ + 7322 "11101000" // /* MW 2 */ + 7323 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7324 "01110110" // LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7325 "00010000" // /* MW 11 */ + 7326 "10110000" // /* MW 10 */ + 7327 "01111110" // /* MW 9 */ + 7328 "00000100" // /* MW 8 */ + 7329 "00000000" // /* MW 7 */ + 7330 "00000000" // /* MW 6 */ + 7331 "01001011" // /* MW 5 */ + 7332 "00010000" // /* MW 4 */ + 7333 "11010111" // /* MW 3 */ + 7334 "11100100" // /* MW 2 */ + 7335 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 + 7336 "01110110" // LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7337 "00010000" // /* MW 11 */ + 7338 "11011000" // /* MW 10 */ + 7339 "10111110" // /* MW 9 */ + 7340 "00000101" // /* MW 8 */ + 7341 "00000000" // /* MW 7 */ + 7342 "00000000" // /* MW 6 */ + 7343 "01001011" // /* MW 5 */ + 7344 "00010000" // /* MW 4 */ + 7345 "11010010" // /* MW 3 */ + 7346 "10100000" // /* MW 2 */ + 7347 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7348 "01110110" // LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7349 "00010000" // /* MW 11 */ + 7350 "00010000" // /* MW 10 */ + 7351 "10110001" // /* MW 9 */ + 7352 "11110010" // /* MW 8 */ + 7353 "00000001" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "01001011" // /* MW 5 */ + 7356 "00010000" // /* MW 4 */ + 7357 "11010110" // /* MW 3 */ + 7358 "10001000" // /* MW 2 */ + 7359 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7360 "01110110" // LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7361 "01111000" // /* MW 11 */ + 7362 "11000000" // /* MW 10 */ + 7363 "11100100" // /* MW 9 */ + 7364 "00001001" // /* MW 8 */ + 7365 "00110110" // /* MW 7 */ + 7366 "00000000" // /* MW 6 */ + 7367 "01001011" // /* MW 5 */ + 7368 "00010000" // /* MW 4 */ + 7369 "11010001" // /* MW 3 */ + 7370 "10000100" // /* MW 2 */ + 7371 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7372 "10111010" // LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7373 "01100010" // /* MW 9 */ + 7374 "01000011" // /* MW 8 */ + 7375 "00011000" // /* MW 7 */ + 7376 "00000001" // /* MW 6 */ + 7377 "01001011" // /* MW 5 */ + 7378 "00010000" // /* MW 4 */ + 7379 "11010000" // /* MW 3 */ + 7380 "11001000" // /* MW 2 */ + 7381 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first + 7382 "11010100" // LDA dn4, [p4], #8; MOV dc5, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7383 "00000001" // /* MW 5 */ + 7384 "10010011" // /* MW 4 */ + 7385 "11011011" // /* MW 3 */ + 7386 "11000100" // /* MW 2 */ + 7387 "10000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7388 "10011000" // LDA m0, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00000110" // /* MW 3 */ + 7390 "00101100" // /* MW 2 */ + 7391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7392 "10011000" // LDA dj1, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7393 "11000110" // /* MW 3 */ + 7394 "11111100" // /* MW 2 */ + 7395 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7396 "00111100" // LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7397 "01101000" // /* MW 5 */ + 7398 "10110001" // /* MW 4 */ + 7399 "11010000" // /* MW 3 */ + 7400 "10010100" // /* MW 2 */ + 7401 "10000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7402 "10011000" // LDA dj5, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7403 "11000110" // /* MW 3 */ + 7404 "11111110" // /* MW 2 */ + 7405 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7406 "10011000" // LDA dn5, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7407 "10100110" // /* MW 3 */ + 7408 "00101110" // /* MW 2 */ + 7409 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7410 "10011000" // LDA m1, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7411 "10000110" // /* MW 3 */ + 7412 "00101100" // /* MW 2 */ + 7413 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 first + 7414 "10011000" // LDA dj7, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7415 "11000110" // /* MW 3 */ + 7416 "11111111" // /* MW 2 */ + 7417 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7418 "10011000" // LDA dn7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7419 "10100110" // /* MW 3 */ + 7420 "00101111" // /* MW 2 */ + 7421 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7422 "10011000" // LDA m7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7423 "10000110" // /* MW 3 */ + 7424 "00101111" // /* MW 2 */ + 7425 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 first + 7426 "10011000" // LDA dj3, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7427 "11000110" // /* MW 3 */ + 7428 "11111101" // /* MW 2 */ + 7429 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7430 "10011000" // LDA dn3, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7431 "10100110" // /* MW 3 */ + 7432 "00101101" // /* MW 2 */ + 7433 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7434 "10011000" // LDA m3, [p4], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7435 "10000110" // /* MW 3 */ + 7436 "11001001" // /* MW 2 */ + 7437 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7438 "10011000" // LDA r4, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7439 "10010110" // /* MW 3 */ + 7440 "10101000" // /* MW 2 */ + 7441 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7442 "10011000" // LDA.s16 r7, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7443 "11110010" // /* MW 3 */ + 7444 "10001000" // /* MW 2 */ + 7445 "00000100" // /* MW 1 */ + 7446 "10011000" // LDA m4, [p4], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7447 "00000110" // /* MW 3 */ + 7448 "01001110" // /* MW 2 */ + 7449 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7450 "00111100" // LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7451 "11101000" // /* MW 5 */ + 7452 "11100001" // /* MW 4 */ + 7453 "11010011" // /* MW 3 */ + 7454 "10010110" // /* MW 2 */ + 7455 "10010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first + 7456 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7457 "00101011" // /* MW 3 */ + 7458 "00000100" // /* MW 2 */ + 7459 "00000010" // /* MW 1 */ + 7460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7461 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7462 "10011000" // LDA.s8 r6, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000010" // /* MW 3 */ + 7464 "00000100" // /* MW 2 */ + 7465 "00000101" // /* MW 1 */ + 7466 "00011000" // ADD r4, r4, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "11111011" // /* MW 3 */ + 7468 "00001001" // /* MW 2 */ + 7469 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 4 + 7470 "10111010" // LDA r17, [p4]; MOVXM p4, #7664 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7471 "00010000" // /* MW 9 */ + 7472 "11111000" // /* MW 8 */ + 7473 "00110110" // /* MW 7 */ + 7474 "00000110" // /* MW 6 */ + 7475 "00000000" // /* MW 5 */ + 7476 "00000000" // /* MW 4 */ + 7477 "11010000" // /* MW 3 */ + 7478 "11000110" // /* MW 2 */ + 7479 "10000000" // /* MW 1 */ + 7480 "11111000" // VBCST.16 x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7481 "01110010" // /* MW 3 */ + 7482 "00011101" // /* MW 2 */ + 7483 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first + 7484 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7485 "00000011" // /* MW 3 */ + 7486 "00011100" // /* MW 2 */ + 7487 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7488 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "11111111" // /* MW 3 */ + 7490 "01110010" // /* MW 2 */ + 7491 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7492 "01100110" // NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7493 "01000001" // /* MW 11 */ + 7494 "01101101" // /* MW 10 */ + 7495 "00000100" // /* MW 9 */ + 7496 "11100010" // /* MW 8 */ + 7497 "10001010" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00100011" // /* MW 5 */ + 7500 "00000000" // /* MW 4 */ + 7501 "11110000" // /* MW 3 */ + 7502 "00101100" // /* MW 2 */ + 7503 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 "00001011" // NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7505 "00011010" // /* MW 15 */ + 7506 "00001000" // /* MW 14 */ + 7507 "10101000" // /* MW 13 */ + 7508 "00000011" // /* MW 12 */ + 7509 "00001110" // /* MW 11 */ + 7510 "00000010" // /* MW 10 */ + 7511 "11010100" // /* MW 9 */ + 7512 "00001101" // /* MW 8 */ + 7513 "01011011" // /* MW 7 */ + 7514 "00000001" // /* MW 6 */ + 7515 "00100000" // /* MW 5 */ + 7516 "00000000" // /* MW 4 */ + 7517 "11110000" // /* MW 3 */ + 7518 "00101100" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 7520 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7521 "01101110" // /* MW 9 */ + 7522 "10000011" // /* MW 8 */ + 7523 "10000100" // /* MW 7 */ + 7524 "00000010" // /* MW 6 */ + 7525 "11110100" // /* MW 5 */ + 7526 "11110000" // /* MW 4 */ + 7527 "01110001" // /* MW 3 */ + 7528 "10110011" // /* MW 2 */ + 7529 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7531 "00000001" // /* MW 9 */ + 7532 "10001001" // /* MW 8 */ + 7533 "00000010" // /* MW 7 */ + 7534 "01000110" // /* MW 6 */ + 7535 "00001011" // /* MW 5 */ + 7536 "10011100" // /* MW 4 */ + 7537 "11101010" // /* MW 3 */ + 7538 "00111000" // /* MW 2 */ + 7539 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7541 "00000001" // /* MW 9 */ + 7542 "00110101" // /* MW 8 */ + 7543 "00000001" // /* MW 7 */ + 7544 "11000110" // /* MW 6 */ + 7545 "10001010" // /* MW 5 */ + 7546 "00110000" // /* MW 4 */ + 7547 "01101010" // /* MW 3 */ + 7548 "10110001" // /* MW 2 */ + 7549 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00001010" // /* MW 3 */ + 7552 "10001001" // /* MW 2 */ + 7553 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7555 "10100001" // /* MW 7 */ + 7556 "01001000" // /* MW 6 */ + 7557 "00000100" // /* MW 5 */ + 7558 "11000110" // /* MW 4 */ + 7559 "10001110" // /* MW 3 */ + 7560 "10110000" // /* MW 2 */ + 7561 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7563 "10100001" // /* MW 7 */ + 7564 "00110110" // /* MW 6 */ + 7565 "00000010" // /* MW 5 */ + 7566 "01000110" // /* MW 4 */ + 7567 "00001111" // /* MW 3 */ + 7568 "10011100" // /* MW 2 */ + 7569 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7571 "00001110" // /* MW 3 */ + 7572 "10001001" // /* MW 2 */ + 7573 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7574 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7575 "11100001" // /* MW 7 */ + 7576 "10010010" // /* MW 6 */ + 7577 "00000011" // /* MW 5 */ + 7578 "01000110" // /* MW 4 */ + 7579 "00000011" // /* MW 3 */ + 7580 "00011100" // /* MW 2 */ + 7581 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7582 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7583 "11100001" // /* MW 7 */ + 7584 "01010110" // /* MW 6 */ + 7585 "00000000" // /* MW 5 */ + 7586 "01000110" // /* MW 4 */ + 7587 "00000111" // /* MW 3 */ + 7588 "00011100" // /* MW 2 */ + 7589 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7590 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7591 "00001101" // /* MW 5 */ + 7592 "01100001" // /* MW 4 */ + 7593 "11110100" // /* MW 3 */ + 7594 "00101100" // /* MW 2 */ + 7595 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7596 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7597 "01000001" // /* MW 3 */ + 7598 "01101101" // /* MW 2 */ + 7599 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7601 "00011010" // /* MW 15 */ + 7602 "00001000" // /* MW 14 */ + 7603 "01111000" // /* MW 13 */ + 7604 "10100101" // /* MW 12 */ + 7605 "00000001" // /* MW 11 */ + 7606 "00000000" // /* MW 10 */ + 7607 "00000000" // /* MW 9 */ + 7608 "00000000" // /* MW 8 */ + 7609 "01011011" // /* MW 7 */ + 7610 "00000001" // /* MW 6 */ + 7611 "00100000" // /* MW 5 */ + 7612 "00000000" // /* MW 4 */ + 7613 "11110000" // /* MW 3 */ + 7614 "00101100" // /* MW 2 */ + 7615 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7616 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7617 "01101110" // /* MW 9 */ + 7618 "10000011" // /* MW 8 */ + 7619 "10000100" // /* MW 7 */ + 7620 "00000010" // /* MW 6 */ + 7621 "10010000" // /* MW 5 */ + 7622 "01110011" // /* MW 4 */ + 7623 "11110010" // /* MW 3 */ + 7624 "00001100" // /* MW 2 */ + 7625 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7627 "00000001" // /* MW 7 */ + 7628 "10001001" // /* MW 6 */ + 7629 "00000010" // /* MW 5 */ + 7630 "01000110" // /* MW 4 */ + 7631 "00001011" // /* MW 3 */ + 7632 "10011100" // /* MW 2 */ + 7633 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7635 "00000001" // /* MW 7 */ + 7636 "00110101" // /* MW 6 */ + 7637 "00000001" // /* MW 5 */ + 7638 "11000110" // /* MW 4 */ + 7639 "10001010" // /* MW 3 */ + 7640 "00110000" // /* MW 2 */ + 7641 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7643 "00001010" // /* MW 3 */ + 7644 "10001001" // /* MW 2 */ + 7645 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7647 "10100001" // /* MW 7 */ + 7648 "01001000" // /* MW 6 */ + 7649 "00000100" // /* MW 5 */ + 7650 "01000110" // /* MW 4 */ + 7651 "00001111" // /* MW 3 */ + 7652 "10011100" // /* MW 2 */ + 7653 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7655 "10100001" // /* MW 9 */ + 7656 "00110110" // /* MW 8 */ + 7657 "00000010" // /* MW 7 */ + 7658 "11000010" // /* MW 6 */ + 7659 "10001110" // /* MW 5 */ + 7660 "10110000" // /* MW 4 */ + 7661 "11110100" // /* MW 3 */ + 7662 "00101100" // /* MW 2 */ + 7663 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7664 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7665 "00011101" // /* MW 5 */ + 7666 "00010010" // /* MW 4 */ + 7667 "10001011" // /* MW 3 */ + 7668 "00011110" // /* MW 2 */ + 7669 "00111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 "01011010" // MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7671 "11100001" // /* MW 9 */ + 7672 "10010010" // /* MW 8 */ + 7673 "00000011" // /* MW 7 */ + 7674 "00000010" // /* MW 6 */ + 7675 "11101010" // /* MW 5 */ + 7676 "10110111" // /* MW 4 */ + 7677 "00000000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7681 "11100001" // /* MW 11 */ + 7682 "01010110" // /* MW 10 */ + 7683 "00000000" // /* MW 9 */ + 7684 "00000010" // /* MW 8 */ + 7685 "11100101" // /* MW 7 */ + 7686 "10001111" // /* MW 6 */ + 7687 "00000000" // /* MW 5 */ + 7688 "00000000" // /* MW 4 */ + 7689 "01110000" // /* MW 3 */ + 7690 "10000101" // /* MW 2 */ + 7691 "01000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7693 "11111111" // /* MW 3 */ + 7694 "01110010" // /* MW 2 */ + 7695 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7697 "10011011" // /* MW 3 */ + 7698 "00011101" // /* MW 2 */ + 7699 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7701 "01110100" // /* MW 3 */ + 7702 "00011100" // /* MW 2 */ + 7703 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7705 "10110100" // /* MW 3 */ + 7706 "01011000" // /* MW 2 */ + 7707 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7709 "10010110" // /* MW 3 */ + 7710 "00010001" // /* MW 2 */ + 7711 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00010110" // /* MW 3 */ + 7714 "00010000" // /* MW 2 */ + 7715 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01101100" // /* MW 3 */ + 7718 "01010000" // /* MW 2 */ + 7719 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7721 "01000100" // /* MW 3 */ + 7722 "01010011" // /* MW 2 */ + 7723 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 "00000010" // VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7725 "01110000" // /* MW 7 */ + 7726 "00110110" // /* MW 6 */ + 7727 "10101000" // /* MW 5 */ + 7728 "00000010" // /* MW 4 */ + 7729 "01100000" // /* MW 3 */ + 7730 "01000010" // /* MW 2 */ + 7731 "01110001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7733 "00000011" // /* MW 3 */ + 7734 "00011100" // /* MW 2 */ + 7735 "00011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 "00000010" // VST.3D x10, [p3], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7737 "01110000" // /* MW 7 */ + 7738 "01000101" // /* MW 6 */ + 7739 "10000000" // /* MW 5 */ + 7740 "00000001" // /* MW 4 */ + 7741 "01100000" // /* MW 3 */ + 7742 "01010010" // /* MW 2 */ + 7743 "01100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7745 "01000001" // /* MW 7 */ + 7746 "01101101" // /* MW 6 */ + 7747 "00000100" // /* MW 5 */ + 7748 "01000110" // /* MW 4 */ + 7749 "00000111" // /* MW 3 */ + 7750 "00011100" // /* MW 2 */ + 7751 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7753 "01000001" // /* MW 7 */ + 7754 "00000011" // /* MW 6 */ + 7755 "00000001" // /* MW 5 */ + 7756 "11000110" // /* MW 4 */ + 7757 "10000110" // /* MW 3 */ + 7758 "00110000" // /* MW 2 */ + 7759 "00000010" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 7760 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7761 "01101110" // /* MW 9 */ + 7762 "10000011" // /* MW 8 */ + 7763 "10000100" // /* MW 7 */ + 7764 "00000010" // /* MW 6 */ + 7765 "11110100" // /* MW 5 */ + 7766 "11110000" // /* MW 4 */ + 7767 "01110001" // /* MW 3 */ + 7768 "10110011" // /* MW 2 */ + 7769 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7771 "00000001" // /* MW 9 */ + 7772 "10001001" // /* MW 8 */ + 7773 "00000010" // /* MW 7 */ + 7774 "01000110" // /* MW 6 */ + 7775 "00001011" // /* MW 5 */ + 7776 "10011100" // /* MW 4 */ + 7777 "11101010" // /* MW 3 */ + 7778 "00111000" // /* MW 2 */ + 7779 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7781 "00000001" // /* MW 9 */ + 7782 "00110101" // /* MW 8 */ + 7783 "00000001" // /* MW 7 */ + 7784 "11000110" // /* MW 6 */ + 7785 "10001010" // /* MW 5 */ + 7786 "00110000" // /* MW 4 */ + 7787 "01101010" // /* MW 3 */ + 7788 "10110001" // /* MW 2 */ + 7789 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "00001010" // /* MW 3 */ + 7792 "10001001" // /* MW 2 */ + 7793 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7795 "10100001" // /* MW 7 */ + 7796 "01001000" // /* MW 6 */ + 7797 "00000100" // /* MW 5 */ + 7798 "11000110" // /* MW 4 */ + 7799 "10001110" // /* MW 3 */ + 7800 "10110000" // /* MW 2 */ + 7801 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7803 "10100001" // /* MW 7 */ + 7804 "00110110" // /* MW 6 */ + 7805 "00000010" // /* MW 5 */ + 7806 "01000110" // /* MW 4 */ + 7807 "00001111" // /* MW 3 */ + 7808 "10011100" // /* MW 2 */ + 7809 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7811 "00001110" // /* MW 3 */ + 7812 "10001001" // /* MW 2 */ + 7813 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7814 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7815 "11100001" // /* MW 7 */ + 7816 "10010010" // /* MW 6 */ + 7817 "00000011" // /* MW 5 */ + 7818 "01000110" // /* MW 4 */ + 7819 "00000011" // /* MW 3 */ + 7820 "00011100" // /* MW 2 */ + 7821 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7822 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7823 "11100001" // /* MW 7 */ + 7824 "01010110" // /* MW 6 */ + 7825 "00000000" // /* MW 5 */ + 7826 "01000110" // /* MW 4 */ + 7827 "00000111" // /* MW 3 */ + 7828 "00011100" // /* MW 2 */ + 7829 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7830 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7831 "00001101" // /* MW 5 */ + 7832 "01100001" // /* MW 4 */ + 7833 "11110100" // /* MW 3 */ + 7834 "00101100" // /* MW 2 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7836 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "01000001" // /* MW 3 */ + 7838 "01101101" // /* MW 2 */ + 7839 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7841 "00011010" // /* MW 15 */ + 7842 "00001000" // /* MW 14 */ + 7843 "01111000" // /* MW 13 */ + 7844 "10100101" // /* MW 12 */ + 7845 "00000001" // /* MW 11 */ + 7846 "00000000" // /* MW 10 */ + 7847 "00000000" // /* MW 9 */ + 7848 "00000000" // /* MW 8 */ + 7849 "01011011" // /* MW 7 */ + 7850 "00000001" // /* MW 6 */ + 7851 "00100000" // /* MW 5 */ + 7852 "00000000" // /* MW 4 */ + 7853 "11110000" // /* MW 3 */ + 7854 "00101100" // /* MW 2 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 202 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7856 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 7857 "01101000" // /* MW 11 */ + 7858 "10000011" // /* MW 10 */ + 7859 "10000100" // /* MW 9 */ + 7860 "00000010" // /* MW 8 */ + 7861 "01001001" // /* MW 7 */ + 7862 "00001000" // /* MW 6 */ + 7863 "00100000" // /* MW 5 */ + 7864 "11100111" // /* MW 4 */ + 7865 "11110100" // /* MW 3 */ + 7866 "00001100" // /* MW 2 */ + 7867 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7869 "00000001" // /* MW 7 */ + 7870 "10001001" // /* MW 6 */ + 7871 "00000010" // /* MW 5 */ + 7872 "01000110" // /* MW 4 */ + 7873 "00001011" // /* MW 3 */ + 7874 "10011100" // /* MW 2 */ + 7875 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7877 "00000001" // /* MW 7 */ + 7878 "00110101" // /* MW 6 */ + 7879 "00000001" // /* MW 5 */ + 7880 "11000110" // /* MW 4 */ + 7881 "10001010" // /* MW 3 */ + 7882 "00110000" // /* MW 2 */ + 7883 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7885 "00001010" // /* MW 3 */ + 7886 "10001001" // /* MW 2 */ + 7887 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7889 "10100001" // /* MW 7 */ + 7890 "01001000" // /* MW 6 */ + 7891 "00000100" // /* MW 5 */ + 7892 "01000110" // /* MW 4 */ + 7893 "00001111" // /* MW 3 */ + 7894 "10011100" // /* MW 2 */ + 7895 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7897 "10100001" // /* MW 7 */ + 7898 "00110110" // /* MW 6 */ + 7899 "00000010" // /* MW 5 */ + 7900 "11000110" // /* MW 4 */ + 7901 "10001110" // /* MW 3 */ + 7902 "10110000" // /* MW 2 */ + 7903 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7904 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "00001110" // /* MW 3 */ + 7906 "10001001" // /* MW 2 */ + 7907 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7908 "01001000" // VMAC.f dm3, dm4, x9, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7909 "11100001" // /* MW 3 */ + 7910 "10010010" // /* MW 2 */ + 7911 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7912 "01001000" // VMAC.f dm0, dm2, x11, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7913 "11100001" // /* MW 3 */ + 7914 "01010110" // /* MW 2 */ + 7915 "00000000" // /* MW 1 */ + 7916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7917 "00000000" // /* MW 1 */ + 7918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7919 "00000000" // /* MW 1 */ + 7920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7921 "00000000" // /* MW 1 */ + 7922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7923 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 7924 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "10010110" // /* MW 3 */ + 7926 "00010001" // /* MW 2 */ + 7927 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 248 first + 7928 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7929 "00000000" // /* MW 5 */ + 7930 "01010000" // /* MW 4 */ + 7931 "11000000" // /* MW 3 */ + 7932 "00000010" // /* MW 2 */ + 7933 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7934 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7935 "01101100" // /* MW 3 */ + 7936 "01010000" // /* MW 2 */ + 7937 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.delay_slot + 7938 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7939 "01000100" // /* MW 3 */ + 7940 "01010011" // /* MW 2 */ + 7941 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7942 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7943 "01101100" // /* MW 3 */ + 7944 "01010000" // /* MW 2 */ + 7945 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.delay_slot + 7946 "00011000" // VST x8, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7947 "00010011" // /* MW 3 */ + 7948 "10001010" // /* MW 2 */ + 7949 "00001011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 7950 "00011000" // VST.3D x10, [p3], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7951 "10010011" // /* MW 3 */ + 7952 "00111010" // /* MW 2 */ +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + 7953 "00001011" // /* MW 1 */ +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function conv2d_dw_shuffle _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 254 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 +.function_start + 7968 "10110110" // MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7969 "00010000" // /* MW 11 */ + 7970 "01001100" // /* MW 10 */ + 7971 "10110010" // /* MW 9 */ + 7972 "11110001" // /* MW 8 */ + 7973 "00000001" // /* MW 7 */ + 7974 "00000000" // /* MW 6 */ + 7975 "01101000" // /* MW 5 */ + 7976 "00111001" // /* MW 4 */ + 7977 "10000010" // /* MW 3 */ + 7978 "10010000" // /* MW 2 */ + 7979 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 + 7980 "10110110" // LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7981 "01011000" // /* MW 11 */ + 7982 "00000001" // /* MW 10 */ + 7983 "01101000" // /* MW 9 */ + 7984 "01101000" // /* MW 8 */ + 7985 "01000111" // /* MW 7 */ + 7986 "00111110" // /* MW 6 */ + 7987 "01101000" // /* MW 5 */ + 7988 "00111000" // /* MW 4 */ + 7989 "11010100" // /* MW 3 */ + 7990 "10000100" // /* MW 2 */ + 7991 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first + 7992 "10111010" // LDA dj0, [p3], #4; MOVXM ls, #8080 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7993 "00010000" // /* MW 9 */ + 7994 "11001000" // /* MW 8 */ + 7995 "01111111" // /* MW 7 */ + 7996 "00000100" // /* MW 6 */ + 7997 "00000000" // /* MW 5 */ + 7998 "00000000" // /* MW 4 */ + 7999 "11010000" // /* MW 3 */ + 8000 "10001000" // /* MW 2 */ + 8001 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 258 4 + 8002 "10111010" // LDA dn4, [p3], #4; MOVXM le, #8176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8003 "00010000" // /* MW 9 */ + 8004 "11111000" // /* MW 8 */ + 8005 "10111111" // /* MW 7 */ + 8006 "00000101" // /* MW 6 */ + 8007 "00000000" // /* MW 5 */ + 8008 "00000000" // /* MW 4 */ + 8009 "11010000" // /* MW 3 */ + 8010 "11000100" // /* MW 2 */ + 8011 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 +.src_ref 7 "conv2d_dw_bf16.h" 264 16 +.src_ref 7 "conv2d_dw_bf16.h" 266 47 + 8012 "10111010" // LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8013 "01011000" // /* MW 9 */ + 8014 "00010010" // /* MW 8 */ + 8015 "00001000" // /* MW 7 */ + 8016 "01001000" // /* MW 6 */ + 8017 "00010110" // /* MW 5 */ + 8018 "00000000" // /* MW 4 */ + 8019 "11010000" // /* MW 3 */ + 8020 "11001000" // /* MW 2 */ + 8021 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 267 47 + 8022 "00101100" // LDA m0, [p3], #4; MOVX r2, #19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8023 "10011010" // /* MW 5 */ + 8024 "00001000" // /* MW 4 */ + 8025 "11010000" // /* MW 3 */ + 8026 "10000000" // /* MW 2 */ + 8027 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8028 "10011000" // LDA dc0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "01100110" // /* MW 3 */ + 8030 "00011100" // /* MW 2 */ + 8031 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8032 "10011000" // LDA dc4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "01100110" // /* MW 3 */ + 8034 "10001010" // /* MW 2 */ + 8035 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 51 first + 8036 "10011000" // LDA r5, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "10110110" // /* MW 3 */ + 8038 "00000100" // /* MW 2 */ + 8039 "00000011" // /* MW 1 */ + 8040 "10011000" // LDA r6, [p3, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8041 "11010110" // /* MW 3 */ + 8042 "00100100" // /* MW 2 */ + 8043 "00000011" // /* MW 1 */ + 8044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8045 "00000000" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ + 8050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8051 "00000000" // /* MW 1 */ + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ + 8054 "10011000" // LSHL r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8055 "01001101" // /* MW 3 */ + 8056 "01001000" // /* MW 2 */ + 8057 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8058 "00100100" // LSHL r3, r6, r3; ADD.NC lc, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8059 "11111111" // /* MW 5 */ + 8060 "11100100" // /* MW 4 */ + 8061 "10111010" // /* MW 3 */ + 8062 "11000111" // /* MW 2 */ + 8063 "00110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8065 "00000000" // /* MW 15 */ + 8066 "00000000" // /* MW 14 */ + 8067 "01111000" // /* MW 13 */ + 8068 "11010000" // /* MW 12 */ + 8069 "11000000" // /* MW 11 */ + 8070 "00000000" // /* MW 10 */ + 8071 "00000000" // /* MW 9 */ + 8072 "00000000" // /* MW 8 */ + 8073 "01011011" // /* MW 7 */ + 8074 "00000001" // /* MW 6 */ + 8075 "00100000" // /* MW 5 */ + 8076 "00000000" // /* MW 4 */ + 8077 "11110000" // /* MW 3 */ + 8078 "00101100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 8080 "11100001" // VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8081 "00000000" // /* MW 15 */ + 8082 "00000000" // /* MW 14 */ + 8083 "00111000" // /* MW 13 */ + 8084 "00000010" // /* MW 12 */ + 8085 "01001000" // /* MW 11 */ + 8086 "00000000" // /* MW 10 */ + 8087 "00000000" // /* MW 9 */ + 8088 "00000000" // /* MW 8 */ + 8089 "10001011" // /* MW 7 */ + 8090 "10000000" // /* MW 6 */ + 8091 "01101100" // /* MW 5 */ + 8092 "00111001" // /* MW 4 */ + 8093 "01110010" // /* MW 3 */ + 8094 "10000011" // /* MW 2 */ + 8095 "01000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8097 "00000000" // /* MW 15 */ + 8098 "00000000" // /* MW 14 */ + 8099 "00111000" // /* MW 13 */ + 8100 "00000010" // /* MW 12 */ + 8101 "11000000" // /* MW 11 */ + 8102 "00000000" // /* MW 10 */ + 8103 "00000000" // /* MW 9 */ + 8104 "00000000" // /* MW 8 */ + 8105 "01011011" // /* MW 7 */ + 8106 "00000001" // /* MW 6 */ + 8107 "00100000" // /* MW 5 */ + 8108 "00000000" // /* MW 4 */ + 8109 "11110000" // /* MW 3 */ + 8110 "00101100" // /* MW 2 */ + 8111 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first + 8112 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8113 "00000000" // /* MW 15 */ + 8114 "00000000" // /* MW 14 */ + 8115 "11101000" // /* MW 13 */ + 8116 "11000000" // /* MW 12 */ + 8117 "01000100" // /* MW 11 */ + 8118 "00000000" // /* MW 10 */ + 8119 "00000000" // /* MW 9 */ + 8120 "00000000" // /* MW 8 */ + 8121 "01011011" // /* MW 7 */ + 8122 "00000001" // /* MW 6 */ + 8123 "00100000" // /* MW 5 */ + 8124 "00000000" // /* MW 4 */ + 8125 "11110000" // /* MW 3 */ + 8126 "00101100" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first + 8128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8129 "00000000" // /* MW 15 */ + 8130 "00000000" // /* MW 14 */ + 8131 "11101000" // /* MW 13 */ + 8132 "11000100" // /* MW 12 */ + 8133 "00000100" // /* MW 11 */ + 8134 "00000000" // /* MW 10 */ + 8135 "00000000" // /* MW 9 */ + 8136 "00000000" // /* MW 8 */ + 8137 "01011011" // /* MW 7 */ + 8138 "00000001" // /* MW 6 */ + 8139 "00100000" // /* MW 5 */ + 8140 "00000000" // /* MW 4 */ + 8141 "11110000" // /* MW 3 */ + 8142 "00101100" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first + 8144 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00100110" // /* MW 7 */ + 8154 "00011000" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8160 "11100001" // NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8161 "00000000" // /* MW 15 */ + 8162 "00000000" // /* MW 14 */ + 8163 "01111000" // /* MW 13 */ + 8164 "10100101" // /* MW 12 */ + 8165 "00000001" // /* MW 11 */ + 8166 "00000000" // /* MW 10 */ + 8167 "00000000" // /* MW 9 */ + 8168 "10000000" // /* MW 8 */ + 8169 "00000110" // /* MW 7 */ + 8170 "00100000" // /* MW 6 */ + 8171 "00100100" // /* MW 5 */ + 8172 "00000000" // /* MW 4 */ + 8173 "11110000" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8177 "00000000" // /* MW 15 */ + 8178 "00000000" // /* MW 14 */ + 8179 "01111000" // /* MW 13 */ + 8180 "10100101" // /* MW 12 */ + 8181 "00000001" // /* MW 11 */ + 8182 "00000000" // /* MW 10 */ + 8183 "00000000" // /* MW 9 */ + 8184 "00000000" // /* MW 8 */ + 8185 "01011011" // /* MW 7 */ + 8186 "00000001" // /* MW 6 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.src_ref 7 "conv2d_dw_bf16.h" 274 first +.loop_nesting 0 + 8192 "00111010" // MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 8193 "00111001" // /* MW 9 */ + 8194 "00000010" // /* MW 8 */ + 8195 "01001000" // /* MW 7 */ + 8196 "00000000" // /* MW 6 */ + 8197 "01000000" // /* MW 5 */ + 8198 "00000001" // /* MW 4 */ + 8199 "01100000" // /* MW 3 */ + 8200 "00010001" // /* MW 2 */ + 8201 "10010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.delay_slot + 8202 "01111000" // VSHUFFLE x3, x0, x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8203 "00000100" // /* MW 3 */ + 8204 "10000000" // /* MW 2 */ + 8205 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first +.delay_slot + 8206 "11011000" // VSHUFFLE bmlh0, x1, x3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8207 "10000001" // /* MW 3 */ + 8208 "10001001" // /* MW 2 */ + 8209 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first +.delay_slot + 8210 "11011000" // VSHUFFLE bmll0, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8211 "10001001" // /* MW 3 */ + 8212 "00001001" // /* MW 2 */ + 8213 "00011000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 8214 "10011000" // VST.3D bmlh0, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8215 "00100110" // /* MW 3 */ + 8216 "00011000" // /* MW 2 */ + 8217 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first +.delay_slot + 8218 "10011000" // VST bmll0, [p4, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8219 "00000110" // /* MW 3 */ + 8220 "00100000" // /* MW 2 */ +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + 8221 "00001100" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 282 first +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.function_start + 8224 "10111010" // LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8225 "01011000" // /* MW 9 */ + 8226 "00111111" // /* MW 8 */ + 8227 "00000111" // /* MW 7 */ + 8228 "00101000" // /* MW 6 */ + 8229 "00010000" // /* MW 5 */ + 8230 "00000001" // /* MW 4 */ + 8231 "11010000" // /* MW 3 */ + 8232 "10010011" // /* MW 2 */ + 8233 "00100000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 77 + 8234 "10111010" // MOVA m1, #-208; MOVXM p4, #509064 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8235 "00010000" // /* MW 9 */ + 8236 "01000100" // /* MW 8 */ + 8237 "00110010" // /* MW 7 */ + 8238 "11110010" // /* MW 6 */ + 8239 "00000001" // /* MW 5 */ + 8240 "00000000" // /* MW 4 */ + 8241 "10000000" // /* MW 3 */ + 8242 "00000100" // /* MW 2 */ + 8243 "11100110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 first +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8244 "01010100" // LDA r16, [p4], m0; MOV m0, #201 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8245 "00100101" // /* MW 5 */ + 8246 "00000011" // /* MW 4 */ + 8247 "11010000" // /* MW 3 */ + 8248 "01000010" // /* MW 2 */ + 8249 "10000001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8250 "10011000" // LDA.u8 r19, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8251 "01101010" // /* MW 3 */ + 8252 "00001010" // /* MW 2 */ + 8253 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 287 77 first + 8254 "10011000" // LDA r18, [p4], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8255 "01010110" // /* MW 3 */ + 8256 "00101010" // /* MW 2 */ + 8257 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 282 + 8258 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8259 "00000001" // /* MW 5 */ + 8260 "00000000" // /* MW 4 */ + 8261 "00000000" // /* MW 3 */ + 8262 "00001000" // /* MW 2 */ + 8263 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 "00001100" // LDA p0, [p0]; ST lr, [sp, #-8] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8265 "01111011" // /* MW 5 */ + 8266 "11110000" // /* MW 4 */ + 8267 "11011111" // /* MW 3 */ + 8268 "10000011" // /* MW 2 */ + 8269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 "00001100" // LDA r15, [p2]; ST p2, [sp, #-16] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8271 "00111011" // /* MW 5 */ + 8272 "11100010" // /* MW 4 */ + 8273 "11011111" // /* MW 3 */ + 8274 "10111110" // /* MW 2 */ + 8275 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "10011101" // /* MW 3 */ + 8278 "11111111" // /* MW 2 */ + 8279 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 "00111010" // ST p6, [sp, #-20]; JL #7280 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8281 "01000001" // /* MW 9 */ + 8282 "00000000" // /* MW 8 */ + 8283 "00000000" // /* MW 7 */ + 8284 "10001110" // /* MW 6 */ + 8285 "00000011" // /* MW 5 */ + 8286 "00000000" // /* MW 4 */ + 8287 "10110000" // /* MW 3 */ + 8288 "11100011" // /* MW 2 */ + 8289 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 +.src_ref 7 "conv2d_dw_bf16.h" 285 89 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 "00111010" // ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8291 "01111001" // /* MW 9 */ + 8292 "01100000" // /* MW 8 */ + 8293 "00110001" // /* MW 7 */ + 8294 "01111101" // /* MW 6 */ + 8295 "00001000" // /* MW 5 */ + 8296 "00100111" // /* MW 4 */ + 8297 "10110000" // /* MW 3 */ + 8298 "10111110" // /* MW 2 */ + 8299 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 +.delay_slot + 8300 "11111000" // MOV p6, p4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8301 "11000000" // /* MW 3 */ + 8302 "01101000" // /* MW 2 */ + 8303 "00011110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.delay_slot + 8304 "01011100" // ST p1, [sp, #-24]; LSHL r16, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8305 "00111011" // /* MW 5 */ + 8306 "01000010" // /* MW 4 */ + 8307 "10111000" // /* MW 3 */ + 8308 "00010011" // /* MW 2 */ + 8309 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 first +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.delay_slot + 8310 "00111010" // MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8311 "01111001" // /* MW 9 */ + 8312 "00010000" // /* MW 8 */ + 8313 "00000100" // /* MW 7 */ + 8314 "11101100" // /* MW 6 */ + 8315 "00001000" // /* MW 5 */ + 8316 "00100101" // /* MW 4 */ + 8317 "01100000" // /* MW 3 */ + 8318 "00010001" // /* MW 2 */ + 8319 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 first +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.delay_slot + 8320 "11100001" // NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8321 "00000000" // /* MW 15 */ + 8322 "00000000" // /* MW 14 */ + 8323 "10101000" // /* MW 13 */ + 8324 "11100000" // /* MW 12 */ + 8325 "10110011" // /* MW 11 */ + 8326 "00000001" // /* MW 10 */ + 8327 "00000000" // /* MW 9 */ + 8328 "00000000" // /* MW 8 */ + 8329 "01011011" // /* MW 7 */ + 8330 "00000001" // /* MW 6 */ + 8331 "00100000" // /* MW 5 */ + 8332 "00010111" // /* MW 4 */ + 8333 "11110010" // /* MW 3 */ + 8334 "00101100" // /* MW 2 */ + 8335 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 first +.src_ref 7 "conv2d_dw_bf16.h" 290 24 +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.return_address + 8336 "00101100" // LDA.u8 r16, [p6, #7]; MOVX r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8337 "00010010" // /* MW 5 */ + 8338 "01000100" // /* MW 4 */ + 8339 "01010000" // /* MW 3 */ + 8340 "11000001" // /* MW 2 */ + 8341 "11001110" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ + 8346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8347 "00000000" // /* MW 1 */ + 8348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8349 "00000000" // /* MW 1 */ + 8350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8351 "00000000" // /* MW 1 */ + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 24 + 8354 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8355 "00001000" // /* MW 3 */ + 8356 "01100001" // /* MW 2 */ + 8357 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 8 + 8358 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8359 "00000001" // /* MW 5 */ + 8360 "01000000" // /* MW 4 */ + 8361 "10110000" // /* MW 3 */ + 8362 "00010000" // /* MW 2 */ + 8363 "10000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 61 +.delay_slot + 8364 "01000100" // MOVXM p4, #509064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8365 "00010000" // /* MW 5 */ + 8366 "11001001" // /* MW 4 */ + 8367 "11001000" // /* MW 3 */ + 8368 "00000111" // /* MW 2 */ + 8369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 292 61 first +.src_ref 7 "conv2d_dw_bf16.h" 292 71 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 + 8378 "10111010" // LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8379 "01011000" // /* MW 9 */ + 8380 "00110000" // /* MW 8 */ + 8381 "00000111" // /* MW 7 */ + 8382 "00101000" // /* MW 6 */ + 8383 "00000000" // /* MW 5 */ + 8384 "00000000" // /* MW 4 */ + 8385 "11010000" // /* MW 3 */ + 8386 "11000010" // /* MW 2 */ + 8387 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 118 + 8388 "10011000" // LDA r21, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "10110110" // /* MW 3 */ + 8390 "00101110" // /* MW 2 */ + 8391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 59 first + 8392 "10011000" // LDA r18, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8393 "01010110" // /* MW 3 */ + 8394 "00011110" // /* MW 2 */ + 8395 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 293 31 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8396 "11010100" // LDA r19, [sp, #-24]; MOV p0, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8397 "10000001" // /* MW 5 */ + 8398 "11010001" // /* MW 4 */ + 8399 "00100000" // /* MW 3 */ + 8400 "01001110" // /* MW 2 */ + 8401 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8402 "10111010" // LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8403 "01110010" // /* MW 9 */ + 8404 "01100000" // /* MW 8 */ + 8405 "10110000" // /* MW 7 */ + 8406 "00000011" // /* MW 6 */ + 8407 "10001011" // /* MW 5 */ + 8408 "10011100" // /* MW 4 */ + 8409 "11010000" // /* MW 3 */ + 8410 "01010010" // /* MW 2 */ + 8411 "10000001" // /* MW 1 */ + 8412 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8413 "00011001" // /* MW 3 */ + 8414 "11101111" // /* MW 2 */ + 8415 "00000111" // /* MW 1 */ + 8416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8417 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first + 8418 "10011000" // LSHL r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00011101" // /* MW 3 */ + 8420 "00100011" // /* MW 2 */ + 8421 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 71 + 8422 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001101" // /* MW 3 */ + 8424 "00100000" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 first +.no_stack_arguments + 8426 "00000100" // JL #7280 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "00111000" // /* MW 3 */ + 8430 "00001110" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first +.src_ref 7 "conv2d_dw_bf16.h" 294 30 first +.delay_slot + 8432 "10100100" // LSHL r18, r18, r0; ADD.NC r22, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8433 "10001010" // /* MW 5 */ + 8434 "00110011" // /* MW 4 */ + 8435 "10111011" // /* MW 3 */ + 8436 "10000001" // /* MW 2 */ + 8437 "10010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.delay_slot + 8438 "10100100" // LSHL r17, r21, r0; ADD.NC r21, r15, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8439 "10010010" // /* MW 5 */ + 8440 "10101111" // /* MW 4 */ + 8441 "10111010" // /* MW 3 */ + 8442 "01000001" // /* MW 2 */ + 8443 "10101100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.delay_slot + 8444 "10100100" // LSHL r18, r20, r0; ADD.NC p1, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8445 "10110010" // /* MW 5 */ + 8446 "11010001" // /* MW 4 */ + 8447 "10110010" // /* MW 3 */ + 8448 "10000001" // /* MW 2 */ + 8449 "10100100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 293 31 first +.delay_slot + 8450 "01011000" // ADD.NC p2, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8451 "11000001" // /* MW 3 */ + 8452 "01101001" // /* MW 2 */ + 8453 "00011010" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 68 first +.delay_slot + 8454 "10111010" // NOPA; NOPB; ADD.NC p3, r21, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8455 "10101110" // /* MW 9 */ + 8456 "01100100" // /* MW 8 */ + 8457 "10110101" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00010000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 297 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 84 +.return_address + 8464 "10111010" // LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8465 "01011000" // /* MW 9 */ + 8466 "00110100" // /* MW 8 */ + 8467 "00000111" // /* MW 7 */ + 8468 "00101000" // /* MW 6 */ + 8469 "00000000" // /* MW 5 */ + 8470 "00000000" // /* MW 4 */ + 8471 "00100000" // /* MW 3 */ + 8472 "01000011" // /* MW 2 */ + 8473 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 84 first + 8474 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8475 "00010110" // /* MW 3 */ + 8476 "11111110" // /* MW 2 */ + 8477 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 + 8478 "11010100" // LDA p7, [sp, #-4]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8479 "10000001" // /* MW 5 */ + 8480 "11011101" // /* MW 4 */ + 8481 "00100110" // /* MW 3 */ + 8482 "11110011" // /* MW 2 */ + 8483 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 first + 8484 "10011000" // LDA r17, [p3], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8485 "00110110" // /* MW 3 */ + 8486 "00001010" // /* MW 2 */ + 8487 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 + 8488 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8489 "00111001" // /* MW 3 */ + 8490 "11111000" // /* MW 2 */ + 8491 "00000111" // /* MW 1 */ + 8492 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8493 "11110001" // /* MW 3 */ + 8494 "11110101" // /* MW 2 */ + 8495 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8496 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8497 "00000001" // /* MW 5 */ + 8498 "00000000" // /* MW 4 */ + 8499 "00000000" // /* MW 3 */ + 8500 "11111000" // /* MW 2 */ + 8501 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8502 "10011000" // LDA r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8503 "01010110" // /* MW 3 */ + 8504 "00000110" // /* MW 2 */ + 8505 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first + 8506 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8507 "00001101" // /* MW 3 */ + 8508 "00100000" // /* MW 2 */ + 8509 "00010100" // /* MW 1 */ + 8510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8511 "00000000" // /* MW 1 */ + 8512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8513 "00000000" // /* MW 1 */ + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 first +.tail_call + 8516 "10000100" // J #7968 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7968 delay_slots=5 */ + 8517 "00000000" // /* MW 5 */ + 8518 "00000000" // /* MW 4 */ + 8519 "10010000" // /* MW 3 */ + 8520 "00001111" // /* MW 2 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 first +.delay_slot + 8522 "10011000" // LSHL r17, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001101" // /* MW 3 */ + 8524 "01100010" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.delay_slot + 8526 "01011000" // ADD.NC r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "11001001" // /* MW 3 */ + 8528 "01011000" // /* MW 2 */ + 8529 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first +.delay_slot + 8530 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8531 "01000001" // /* MW 3 */ + 8532 "01101001" // /* MW 2 */ + 8533 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 +.delay_slot + 8534 "11111000" // MOV p0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8535 "00100000" // /* MW 3 */ + 8536 "01101001" // /* MW 2 */ + 8537 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 93 first +.delay_slot + 8538 "10010100" // NOPA; ADD.NC p2, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "10000010" // /* MW 5 */ + 8540 "11010001" // /* MW 4 */ + 8541 "11110100" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 301 +.return_address + 8544 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8545 "00111001" // /* MW 3 */ + 8546 "11111000" // /* MW 2 */ + 8547 "00000111" // /* MW 1 */ + 8548 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8549 "11110001" // /* MW 3 */ + 8550 "11110101" // /* MW 2 */ + 8551 "00000111" // /* MW 1 */ + 8552 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8553 "10011001" // /* MW 3 */ + 8554 "11111111" // /* MW 2 */ + 8555 "00000111" // /* MW 1 */ + 8556 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8557 "00011001" // /* MW 3 */ + 8558 "11101111" // /* MW 2 */ + 8559 "00000111" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8566 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8567 "00000000" // /* MW 3 */ + 8568 "00101000" // /* MW 2 */ + 8569 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 +.delay_slot + 8570 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8571 "00000001" // /* MW 5 */ + 8572 "00000000" // /* MW 4 */ + 8573 "00000000" // /* MW 3 */ + 8574 "11111000" // /* MW 2 */ + 8575 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 8583 "00000000" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 352 first +.src_ref 6 "superkernels.cpp" 357 6 +.function_start + 8592 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8593 "00000000" // /* MW 5 */ + 8594 "11000100" // /* MW 4 */ + 8595 "11001000" // /* MW 3 */ + 8596 "00000111" // /* MW 2 */ + 8597 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 first + 8598 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8599 "01000001" // /* MW 5 */ + 8600 "00101111" // /* MW 4 */ + 8601 "11010000" // /* MW 3 */ + 8602 "11000010" // /* MW 2 */ + 8603 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 352 + 8604 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8605 "00000001" // /* MW 5 */ + 8606 "00000000" // /* MW 4 */ + 8607 "00000000" // /* MW 3 */ + 8608 "00010000" // /* MW 2 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8611 "01110000" // /* MW 7 */ + 8612 "01110000" // /* MW 6 */ + 8613 "00101101" // /* MW 5 */ + 8614 "00000010" // /* MW 4 */ + 8615 "10110000" // /* MW 3 */ + 8616 "00111010" // /* MW 2 */ + 8617 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 + 8618 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8619 "01110000" // /* MW 7 */ + 8620 "11110000" // /* MW 6 */ + 8621 "10101000" // /* MW 5 */ + 8622 "00000001" // /* MW 4 */ + 8623 "10110000" // /* MW 3 */ + 8624 "10110110" // /* MW 2 */ + 8625 "11111111" // /* MW 1 */ + 8626 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8627 "00011101" // /* MW 3 */ + 8628 "11101100" // /* MW 2 */ + 8629 "00001111" // /* MW 1 */ + 8630 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "10011101" // /* MW 3 */ + 8632 "11110111" // /* MW 2 */ + 8633 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 + 8634 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8635 "01110000" // /* MW 7 */ + 8636 "01100000" // /* MW 6 */ + 8637 "11001010" // /* MW 5 */ + 8638 "00000001" // /* MW 4 */ + 8639 "10110000" // /* MW 3 */ + 8640 "00000010" // /* MW 2 */ + 8641 "11111110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 +.src_ref 6 "superkernels.cpp" 357 16 + 8642 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8643 "00000001" // /* MW 5 */ + 8644 "01000000" // /* MW 4 */ + 8645 "00100000" // /* MW 3 */ + 8646 "00010001" // /* MW 2 */ + 8647 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 8648 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8649 "11000000" // /* MW 3 */ + 8650 "11010110" // /* MW 2 */ + 8651 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 22 first +.delay_slot + 8652 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8653 "10010000" // /* MW 3 */ + 8654 "01100010" // /* MW 2 */ + 8655 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 30 +.delay_slot + 8656 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8657 "11111011" // /* MW 3 */ + 8658 "01100011" // /* MW 2 */ + 8659 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8660 "01000100" // MOVXM p3, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8661 "00001000" // /* MW 5 */ + 8662 "11000100" // /* MW 4 */ + 8663 "11000110" // /* MW 3 */ + 8664 "00000111" // /* MW 2 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8666 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8667 "00110001" // /* MW 3 */ + 8668 "00000110" // /* MW 2 */ + 8669 "00001011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 369 2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 "00111010" // MOVS p7, p1; MOVXM p1, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8671 "00010001" // /* MW 9 */ + 8672 "00010000" // /* MW 8 */ + 8673 "10110001" // /* MW 7 */ + 8674 "11110000" // /* MW 6 */ + 8675 "00000001" // /* MW 5 */ + 8676 "00000000" // /* MW 4 */ + 8677 "01100000" // /* MW 3 */ + 8678 "10010001" // /* MW 2 */ + 8679 "11110000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 6 "superkernels.cpp" 359 4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8681 "00010000" // /* MW 11 */ + 8682 "00001110" // /* MW 10 */ + 8683 "10110001" // /* MW 9 */ + 8684 "11110000" // /* MW 8 */ + 8685 "00000001" // /* MW 7 */ + 8686 "00000000" // /* MW 6 */ + 8687 "10001011" // /* MW 5 */ + 8688 "10001000" // /* MW 4 */ + 8689 "11100000" // /* MW 3 */ + 8690 "11000000" // /* MW 2 */ + 8691 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 359 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 "00000100" // JL #6480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 8695 "00000001" // /* MW 5 */ + 8696 "00000000" // /* MW 4 */ + 8697 "10101000" // /* MW 3 */ + 8698 "00001100" // /* MW 2 */ + 8699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8705 "00110001" // /* MW 3 */ + 8706 "00100000" // /* MW 2 */ + 8707 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8708 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8709 "00000101" // /* MW 3 */ + 8710 "00100000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8712 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8713 "01110000" // /* MW 7 */ + 8714 "10100101" // /* MW 6 */ + 8715 "00000001" // /* MW 5 */ + 8716 "00000000" // /* MW 4 */ + 8717 "00110000" // /* MW 3 */ + 8718 "11000010" // /* MW 2 */ + 8719 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 +.src_ref 6 "superkernels.cpp" 369 2 +.return_address + 8720 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8721 "00000000" // /* MW 7 */ + 8722 "10000010" // /* MW 6 */ + 8723 "00110011" // /* MW 5 */ + 8724 "00000001" // /* MW 4 */ + 8725 "01100000" // /* MW 3 */ + 8726 "10010001" // /* MW 2 */ + 8727 "00110011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 17 first + 8728 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8729 "00111010" // /* MW 3 */ + 8730 "00000110" // /* MW 2 */ + 8731 "00000010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 +.src_ref 6 "superkernels.cpp" 361 15 first + 8732 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8733 "00010000" // /* MW 9 */ + 8734 "00001100" // /* MW 8 */ + 8735 "00110001" // /* MW 7 */ + 8736 "11110001" // /* MW 6 */ + 8737 "00000001" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "01010000" // /* MW 3 */ + 8740 "11000011" // /* MW 2 */ + 8741 "01000100" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8745 "00000000" // /* MW 5 */ + 8746 "00000000" // /* MW 4 */ + 8747 "00101000" // /* MW 3 */ + 8748 "00010001" // /* MW 2 */ + 8749 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 +.src_ref 6 "superkernels.cpp" 365 26 +.delay_slot + 8750 "01000100" // MOVXM p3, #508432 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8751 "00100000" // /* MW 5 */ + 8752 "11000100" // /* MW 4 */ + 8753 "11000110" // /* MW 3 */ + 8754 "00000111" // /* MW 2 */ + 8755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8757 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8759 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 first +.delay_slot + 8760 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8761 "00110001" // /* MW 3 */ + 8762 "00000110" // /* MW 2 */ + 8763 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 first +.delay_slot + 8764 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8765 "00010001" // /* MW 3 */ + 8766 "00000110" // /* MW 2 */ + 8767 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 6 "superkernels.cpp" 365 26 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "00010000" // /* MW 13 */ + 8772 "00001000" // /* MW 12 */ + 8773 "10110001" // /* MW 11 */ + 8774 "11110001" // /* MW 10 */ + 8775 "00000001" // /* MW 9 */ + 8776 "00000000" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 242 49 first + 8784 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8785 "10000110" // /* MW 3 */ + 8786 "01100111" // /* MW 2 */ + 8787 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 365 15 + 8788 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8789 "00010000" // /* MW 9 */ + 8790 "00000010" // /* MW 8 */ + 8791 "00110001" // /* MW 7 */ + 8792 "11110010" // /* MW 6 */ + 8793 "00000001" // /* MW 5 */ + 8794 "00000000" // /* MW 4 */ + 8795 "11010000" // /* MW 3 */ + 8796 "11101110" // /* MW 2 */ + 8797 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 8798 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010110" // /* MW 3 */ + 8800 "11111110" // /* MW 2 */ + 8801 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 8802 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8803 "00110110" // /* MW 3 */ + 8804 "11111110" // /* MW 2 */ + 8805 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 8806 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8807 "01010110" // /* MW 3 */ + 8808 "01000110" // /* MW 2 */ + 8809 "00000010" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ + 8814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8815 "00000000" // /* MW 1 */ + 8816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8817 "00000000" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 8820 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8821 "00000010" // /* MW 3 */ + 8822 "01100001" // /* MW 2 */ + 8823 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 8824 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "00010001" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 8828 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "11111101" // /* MW 3 */ + 8830 "11100000" // /* MW 2 */ + 8831 "00010111" // /* MW 1 */ + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8835 "00000000" // /* MW 1 */ + 8836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8837 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 8838 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8839 "00001000" // /* MW 3 */ + 8840 "10010011" // /* MW 2 */ + 8841 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 364 11 +.src_ref 6 "superkernels.cpp" 367 47 +.src_ref 6 "superkernels.cpp" 372 6 +.src_ref 6 "superkernels.cpp" 373 16 + 8842 "10111010" // MOVA r15, #1; MOVXM p7, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8843 "00010000" // /* MW 9 */ + 8844 "00000000" // /* MW 8 */ + 8845 "10110001" // /* MW 7 */ + 8846 "11110011" // /* MW 6 */ + 8847 "00000001" // /* MW 5 */ + 8848 "00000000" // /* MW 4 */ + 8849 "00000000" // /* MW 3 */ + 8850 "00101111" // /* MW 2 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 + 8852 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8853 "11000001" // /* MW 5 */ + 8854 "00101011" // /* MW 4 */ + 8855 "00101000" // /* MW 3 */ + 8856 "00000000" // /* MW 2 */ + 8857 "00000110" // /* MW 1 */ + 8858 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8859 "01011010" // /* MW 3 */ + 8860 "01101000" // /* MW 2 */ + 8861 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 + 8862 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8863 "10000001" // /* MW 5 */ + 8864 "00101001" // /* MW 4 */ + 8865 "00100111" // /* MW 3 */ + 8866 "11010011" // /* MW 2 */ + 8867 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 15 first + 8868 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8869 "00110110" // /* MW 3 */ + 8870 "00000110" // /* MW 2 */ + 8871 "00000100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 26 +.src_ref 6 "superkernels.cpp" 369 2 + 8872 "10111010" // LDA r16, [p3]; MOVXM p3, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8873 "00010000" // /* MW 9 */ + 8874 "11100000" // /* MW 8 */ + 8875 "10110001" // /* MW 7 */ + 8876 "11110001" // /* MW 6 */ + 8877 "00000001" // /* MW 5 */ + 8878 "00000000" // /* MW 4 */ + 8879 "11010000" // /* MW 3 */ + 8880 "11000010" // /* MW 2 */ + 8881 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8882 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "01010110" // /* MW 3 */ + 8884 "00000110" // /* MW 2 */ + 8885 "00000111" // /* MW 1 */ + 8886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8887 "00000000" // /* MW 1 */ + 8888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8889 "00000000" // /* MW 1 */ + 8890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8891 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8892 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8893 "01110110" // /* MW 3 */ + 8894 "00000110" // /* MW 2 */ + 8895 "00000101" // /* MW 1 */ + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 24 first + 8898 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8899 "00001111" // /* MW 3 */ + 8900 "01100001" // /* MW 2 */ + 8901 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8902 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8903 "00000111" // /* MW 3 */ + 8904 "10100010" // /* MW 2 */ + 8905 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first + 8906 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8907 "11111101" // /* MW 3 */ + 8908 "00100000" // /* MW 2 */ + 8909 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 369 2 first +.no_stack_arguments + 8910 "00000100" // JL #8224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8224 delay_slots=5 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "00010000" // /* MW 3 */ + 8914 "00010000" // /* MW 2 */ + 8915 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first +.delay_slot + 8916 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8917 "00110001" // /* MW 3 */ + 8918 "00000110" // /* MW 2 */ + 8919 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first +.delay_slot + 8920 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8921 "11000001" // /* MW 3 */ + 8922 "01001001" // /* MW 2 */ + 8923 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 225 10 first +.delay_slot + 8924 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8925 "00100101" // /* MW 3 */ + 8926 "10110100" // /* MW 2 */ + 8927 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 first +.delay_slot + 8928 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8929 "00010101" // /* MW 3 */ + 8930 "10111011" // /* MW 2 */ + 8931 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 +.delay_slot + 8932 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8933 "11000001" // /* MW 11 */ + 8934 "10001010" // /* MW 10 */ + 8935 "11011111" // /* MW 9 */ + 8936 "00000011" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "00100000" // /* MW 5 */ + 8940 "00000000" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.return_address + 8944 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8945 "00001010" // /* MW 3 */ + 8946 "01100111" // /* MW 2 */ + 8947 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first + 8948 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8949 "00010110" // /* MW 3 */ + 8950 "00000110" // /* MW 2 */ + 8951 "00000010" // /* MW 1 */ + 8952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8953 "00000000" // /* MW 1 */ + 8954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8955 "00000000" // /* MW 1 */ + 8956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8957 "00000000" // /* MW 1 */ + 8958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8959 "00000000" // /* MW 1 */ + 8960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8961 "00000000" // /* MW 1 */ + 8962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8963 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 8964 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8965 "11111000" // /* MW 3 */ + 8966 "00010000" // /* MW 2 */ + 8967 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 372 19 + 8968 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8969 "00010000" // /* MW 9 */ + 8970 "00001100" // /* MW 8 */ + 8971 "10110001" // /* MW 7 */ + 8972 "11110000" // /* MW 6 */ + 8973 "00000001" // /* MW 5 */ + 8974 "00000000" // /* MW 4 */ + 8975 "11010000" // /* MW 3 */ + 8976 "11000010" // /* MW 2 */ + 8977 "01011100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 19 first + 8978 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8979 "01010110" // /* MW 3 */ + 8980 "00000110" // /* MW 2 */ + 8981 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 8982 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8983 "00110110" // /* MW 3 */ + 8984 "00000110" // /* MW 2 */ + 8985 "00000111" // /* MW 1 */ + 8986 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8987 "10011001" // /* MW 3 */ + 8988 "11110100" // /* MW 2 */ + 8989 "00000111" // /* MW 1 */ + 8990 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "11010001" // /* MW 3 */ + 8992 "11111001" // /* MW 2 */ + 8993 "00000111" // /* MW 1 */ + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 8998 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8999 "00000001" // /* MW 3 */ + 9000 "11100001" // /* MW 2 */ + 9001 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 9002 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9003 "00010001" // /* MW 3 */ + 9004 "11100110" // /* MW 2 */ + 9005 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 16 first + 9006 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9007 "00101000" // /* MW 3 */ + 9008 "01100001" // /* MW 2 */ + 9009 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 9010 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 9011 "00000001" // /* MW 5 */ + 9012 "01000000" // /* MW 4 */ + 9013 "10101000" // /* MW 3 */ + 9014 "00010001" // /* MW 2 */ + 9015 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 +.delay_slot + 9016 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9017 "00000001" // /* MW 3 */ + 9018 "00110000" // /* MW 2 */ + 9019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9027 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 first + 9028 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9029 "11000001" // /* MW 11 */ + 9030 "10001000" // /* MW 10 */ + 9031 "10000011" // /* MW 9 */ + 9032 "00000011" // /* MW 8 */ + 9033 "00000000" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 375 + 9040 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9041 "01000001" // /* MW 5 */ + 9042 "11101101" // /* MW 4 */ + 9043 "00101110" // /* MW 3 */ + 9044 "10110110" // /* MW 2 */ + 9045 "11111111" // /* MW 1 */ + 9046 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9047 "11110001" // /* MW 3 */ + 9048 "11110001" // /* MW 2 */ + 9049 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 first + 9050 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9051 "00000000" // /* MW 3 */ + 9052 "00101000" // /* MW 2 */ + 9053 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 +.delay_slot + 9054 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9055 "00000001" // /* MW 5 */ + 9056 "00000000" // /* MW 4 */ + 9057 "00000000" // /* MW 3 */ + 9058 "11110000" // /* MW 2 */ + 9059 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9065 "00000000" // /* MW 1 */ +.delay_slot + 9066 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "11000000" // /* MW 3 */ + 9068 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9069 "00011111" // /* MW 1 */ +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function _b14160_wrapper _Z15_b14160_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 21 first +.src_ref 0 "0_0_reloadable4.cc" 23 79 +.function_start + 9072 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9073 "11000000" // /* MW 3 */ + 9074 "01100000" // /* MW 2 */ + 9075 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 23 79 first + 9076 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "00011110" // /* MW 3 */ + 9078 "00011100" // /* MW 2 */ + 9079 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 24 79 first + 9080 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9081 "10011110" // /* MW 3 */ + 9082 "00101100" // /* MW 2 */ + 9083 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 26 81 first + 9084 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9085 "10011110" // /* MW 3 */ + 9086 "11110101" // /* MW 2 */ + 9087 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 25 47 first + 9088 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9089 "00011110" // /* MW 3 */ + 9090 "00000101" // /* MW 2 */ + 9091 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 22 4 first +.tail_call + 9092 "10000100" // J #8592 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8592 delay_slots=5 */ + 9093 "00000000" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "11001000" // /* MW 3 */ + 9096 "00010000" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + 9107 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.src_ref 3 "transposeshuffle_params.h" 71 first +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 76 18 first +.function_start + 9120 "10111010" // LDA el0, [p1], #4; MOVXM r0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9121 "00010000" // /* MW 9 */ + 9122 "01000000" // /* MW 8 */ + 9123 "00001001" // /* MW 7 */ + 9124 "11110000" // /* MW 6 */ + 9125 "00000001" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "11010000" // /* MW 3 */ + 9128 "10000101" // /* MW 2 */ + 9129 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 9 +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 80 28 +.src_ref 3 "transposeshuffle_params.h" 80 36 +.src_ref 3 "transposeshuffle_params.h" 81 28 +.src_ref 3 "transposeshuffle_params.h" 81 36 + 9130 "01110110" // MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9131 "00001000" // /* MW 11 */ + 9132 "00000001" // /* MW 10 */ + 9133 "00110000" // /* MW 9 */ + 9134 "10101001" // /* MW 8 */ + 9135 "00100111" // /* MW 7 */ + 9136 "00111110" // /* MW 6 */ + 9137 "00001011" // /* MW 5 */ + 9138 "10000000" // /* MW 4 */ + 9139 "10000000" // /* MW 3 */ + 9140 "00000000" // /* MW 2 */ + 9141 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 +.src_ref 3 "transposeshuffle_params.h" 86 17 +.src_ref 3 "transposeshuffle_params.h" 89 43 +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 94 4 + 9142 "01100100" // MOVX r1, #4; MOV r0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9143 "00000001" // /* MW 5 */ + 9144 "00100010" // /* MW 4 */ + 9145 "00100000" // /* MW 3 */ + 9146 "01000010" // /* MW 2 */ + 9147 "00000000" // /* MW 1 */ + 9148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9149 "00000000" // /* MW 1 */ + 9150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9151 "00000000" // /* MW 1 */ + 9152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9153 "00000000" // /* MW 1 */ + 9154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9155 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 first + 9156 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9157 "00101001" // /* MW 3 */ + 9158 "00011100" // /* MW 2 */ + 9159 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9160 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9161 "00101110" // /* MW 3 */ + 9162 "00011100" // /* MW 2 */ + 9163 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9164 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9165 "00001110" // /* MW 3 */ + 9166 "00011100" // /* MW 2 */ + 9167 "00000001" // /* MW 1 */ + 9168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9169 "00000000" // /* MW 1 */ + 9170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9171 "00000000" // /* MW 1 */ + 9172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9173 "00000000" // /* MW 1 */ + 9174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9175 "00000000" // /* MW 1 */ + 9176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9177 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9178 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9179 "00101001" // /* MW 3 */ + 9180 "00011100" // /* MW 2 */ + 9181 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9182 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "00001001" // /* MW 3 */ + 9184 "00011100" // /* MW 2 */ + 9185 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9186 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9187 "00101110" // /* MW 3 */ + 9188 "00011100" // /* MW 2 */ + 9189 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9190 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9191 "00001110" // /* MW 3 */ + 9192 "00011100" // /* MW 2 */ + 9193 "00000001" // /* MW 1 */ + 9194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9195 "00000000" // /* MW 1 */ + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ + 9198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9199 "00000000" // /* MW 1 */ + 9200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9201 "00000000" // /* MW 1 */ + 9202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9203 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9204 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9205 "00101001" // /* MW 3 */ + 9206 "00011100" // /* MW 2 */ + 9207 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9208 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00001001" // /* MW 3 */ + 9210 "00011100" // /* MW 2 */ + 9211 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9212 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00001110" // /* MW 3 */ + 9214 "00000100" // /* MW 2 */ + 9215 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9216 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9217 "00101110" // /* MW 3 */ + 9218 "00010100" // /* MW 2 */ + 9219 "00000001" // /* MW 1 */ + 9220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9221 "00000000" // /* MW 1 */ + 9222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9223 "00000000" // /* MW 1 */ + 9224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9225 "00000000" // /* MW 1 */ + 9226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9227 "00000000" // /* MW 1 */ + 9228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9229 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9230 "10011000" // ST eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9231 "00001001" // /* MW 3 */ + 9232 "00000100" // /* MW 2 */ + 9233 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9234 "10011000" // ST el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9235 "00101001" // /* MW 3 */ + 9236 "00010100" // /* MW 2 */ + 9237 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 28 first + 9238 "10011000" // LDA r3, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9239 "01110110" // /* MW 3 */ + 9240 "00001000" // /* MW 2 */ + 9241 "00000000" // /* MW 1 */ + 9242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9243 "00000000" // /* MW 1 */ + 9244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9245 "00000000" // /* MW 1 */ + 9246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9247 "00000000" // /* MW 1 */ + 9248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9249 "00000000" // /* MW 1 */ + 9250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9251 "00000000" // /* MW 1 */ + 9252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9253 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 36 + 9254 "10011000" // LSHL r4, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9255 "00101101" // /* MW 3 */ + 9256 "11001000" // /* MW 2 */ + 9257 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 42 +.src_ref 3 "transposeshuffle_params.h" 89 43 first + 9258 "00100100" // LSHL r3, r3, r1; ADD.NC r1, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9259 "11111111" // /* MW 5 */ + 9260 "10100100" // /* MW 4 */ + 9261 "10110000" // /* MW 3 */ + 9262 "11000011" // /* MW 2 */ + 9263 "00011000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 +.src_ref 3 "transposeshuffle_params.h" 80 19 first + 9264 "00000010" // ST r1, [p0]; MOV r4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9265 "01110000" // /* MW 7 */ + 9266 "01100000" // /* MW 6 */ + 9267 "10001000" // /* MW 5 */ + 9268 "00000000" // /* MW 4 */ + 9269 "00110000" // /* MW 3 */ + 9270 "10000110" // /* MW 2 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 + 9272 "00011000" // ADD.NC p1, r4, #-60 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "01100010" // /* MW 3 */ + 9274 "01100010" // /* MW 2 */ + 9275 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 28 first + 9276 "10011000" // LDA r4, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9277 "10010110" // /* MW 3 */ + 9278 "00001000" // /* MW 2 */ + 9279 "00000001" // /* MW 1 */ + 9280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9283 "00000000" // /* MW 1 */ + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ + 9286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9287 "00000000" // /* MW 1 */ + 9288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9289 "00000000" // /* MW 1 */ + 9290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9291 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 36 +.src_ref 3 "transposeshuffle_params.h" 90 77 + 9292 "01100100" // LSHL r2, r4, r2; MOV r4, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "00100010" // /* MW 4 */ + 9295 "10110010" // /* MW 3 */ + 9296 "10000101" // /* MW 2 */ + 9297 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 42 + 9298 "00011000" // ADD r2, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "11111111" // /* MW 3 */ + 9300 "10000101" // /* MW 2 */ + 9301 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 19 +.src_ref 3 "transposeshuffle_params.h" 90 77 first + 9302 "01011100" // ST r2, [p1], #4; MSC r4, r4, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9303 "01011100" // /* MW 5 */ + 9304 "10010000" // /* MW 4 */ + 9305 "00110001" // /* MW 3 */ + 9306 "10001010" // /* MW 2 */ + 9307 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 first + 9308 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9309 "00010001" // /* MW 3 */ + 9310 "00011100" // /* MW 2 */ + 9311 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 + 9312 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9313 "00010001" // /* MW 3 */ + 9314 "00011100" // /* MW 2 */ + 9315 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 86 17 first + 9316 "10011000" // ST r0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9317 "00010001" // /* MW 3 */ + 9318 "00101100" // /* MW 2 */ + 9319 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 89 23 first + 9320 "10011000" // ST r3, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9321 "01110001" // /* MW 3 */ + 9322 "11111100" // /* MW 2 */ + 9323 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 90 23 first + 9324 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9325 "10010001" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 91 18 first + 9328 "00000010" // ST r0, [p1]; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "01110000" // /* MW 7 */ + 9330 "01100000" // /* MW 6 */ + 9331 "10101001" // /* MW 5 */ + 9332 "00000000" // /* MW 4 */ + 9333 "00110000" // /* MW 3 */ + 9334 "10000010" // /* MW 2 */ + 9335 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 + 9336 "00011000" // ADD.NC p1, r5, #-68 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9337 "11011110" // /* MW 3 */ + 9338 "01100010" // /* MW 2 */ + 9339 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 first + 9340 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9341 "00010001" // /* MW 3 */ + 9342 "00011100" // /* MW 2 */ + 9343 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9344 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9345 "00010001" // /* MW 3 */ + 9346 "00011100" // /* MW 2 */ + 9347 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9348 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9349 "01010001" // /* MW 3 */ + 9350 "00011100" // /* MW 2 */ + 9351 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9352 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9353 "00110001" // /* MW 3 */ + 9354 "00011100" // /* MW 2 */ + 9355 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 95 first + 9356 "01011100" // ST r0, [p1], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9357 "00000000" // /* MW 5 */ + 9358 "01010000" // /* MW 4 */ + 9359 "00110000" // /* MW 3 */ + 9360 "10000010" // /* MW 2 */ + 9361 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 first +.delay_slot + 9362 "10011000" // ST r3, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9363 "01110001" // /* MW 3 */ + 9364 "00101100" // /* MW 2 */ + 9365 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9366 "10011000" // ST r2, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9367 "01010001" // /* MW 3 */ + 9368 "11111100" // /* MW 2 */ + 9369 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9370 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "10010001" // /* MW 3 */ + 9372 "00101100" // /* MW 2 */ + 9373 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9374 "10011000" // ST r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00110001" // /* MW 3 */ + 9376 "00000100" // /* MW 2 */ + 9377 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9378 "10011000" // ST r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9379 "00010001" // /* MW 3 */ + 9380 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + 9381 "00001001" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.src_ref 3 "transposeshuffle.h" 38 first +.src_ref 3 "transposeshuffle.h" 72 14 +.src_ref 3 "transposeshuffle.h" 79 23 +.function_start + 9392 "10111010" // MOVA r1, #2; MOVXM p2, #508556 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010000" // /* MW 9 */ + 9394 "01000110" // /* MW 8 */ + 9395 "00110001" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "00000000" // /* MW 3 */ + 9400 "01000001" // /* MW 2 */ + 9401 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 14 first +.src_ref 3 "transposeshuffle.h" 72 23 + 9402 "00101100" // LDA r27, [p2]; MOVX r0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10110010" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11010000" // /* MW 3 */ + 9406 "11101110" // /* MW 2 */ + 9407 "01000000" // /* MW 1 */ + 9408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9409 "00000000" // /* MW 1 */ + 9410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9411 "00000000" // /* MW 1 */ + 9412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9413 "00000000" // /* MW 1 */ + 9414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9415 "00000000" // /* MW 1 */ + 9416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9417 "00000000" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 23 first + 9420 "10011000" // EQ r1, r27, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9421 "00010111" // /* MW 3 */ + 9422 "11000010" // /* MW 2 */ + 9423 "00010110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 8 + 9424 "10000100" // JNZ r1, #9888 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9888 delay_slots=5 */ + 9425 "00000001" // /* MW 5 */ + 9426 "01000000" // /* MW 4 */ + 9427 "01010000" // /* MW 3 */ + 9428 "00010011" // /* MW 2 */ + 9429 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 23 +.delay_slot + 9430 "00011000" // MOVX r2, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9431 "01110101" // /* MW 3 */ + 9432 "00000100" // /* MW 2 */ + 9433 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 first +.src_ref 3 "transposeshuffle.h" 72 23 first +.delay_slot + 9434 "00011000" // SEL.EQZ r0, r0, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9435 "00100010" // /* MW 3 */ + 9436 "00000000" // /* MW 2 */ + 9437 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9443 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 + 9444 "01000100" // MOVXM p2, #508560 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9445 "00100000" // /* MW 5 */ + 9446 "11000101" // /* MW 4 */ + 9447 "11000100" // /* MW 3 */ + 9448 "00000111" // /* MW 2 */ + 9449 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 first + 9450 "10011000" // LDA r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9451 "00110110" // /* MW 3 */ + 9452 "00000100" // /* MW 2 */ + 9453 "00000010" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ + 9456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9457 "00000000" // /* MW 1 */ + 9458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9459 "00000000" // /* MW 1 */ + 9460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9461 "00000000" // /* MW 1 */ + 9462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9463 "00000000" // /* MW 1 */ + 9464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9465 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 116 26 + 9466 "10000100" // JZ r1, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9467 "00000001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "10010000" // /* MW 3 */ + 9470 "00010100" // /* MW 2 */ + 9471 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9481 "00000000" // /* MW 1 */ + 9482 "00011000" // MOVX r2, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9483 "00101001" // /* MW 3 */ + 9484 "00000100" // /* MW 2 */ + 9485 "00010000" // /* MW 1 */ + 9486 "10011000" // LTU r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9487 "00101100" // /* MW 3 */ + 9488 "01000100" // /* MW 2 */ + 9489 "00010000" // /* MW 1 */ + 9490 "10000100" // JNZ r2, #9728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9728 delay_slots=5 */ + 9491 "00000001" // /* MW 5 */ + 9492 "01000000" // /* MW 4 */ + 9493 "00000000" // /* MW 3 */ + 9494 "00010011" // /* MW 2 */ + 9495 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9506 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9507 "00010000" // /* MW 9 */ + 9508 "11010000" // /* MW 8 */ + 9509 "01111010" // /* MW 7 */ + 9510 "00001000" // /* MW 6 */ + 9511 "00000000" // /* MW 5 */ + 9512 "00000000" // /* MW 4 */ + 9513 "01101000" // /* MW 3 */ + 9514 "00111000" // /* MW 2 */ + 9515 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 116 8 first +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9516 "00111010" // VLDB x0, [p0], #64; MOVXM le, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9517 "00010000" // /* MW 9 */ + 9518 "11010000" // /* MW 8 */ + 9519 "10111010" // /* MW 7 */ + 9520 "00001001" // /* MW 6 */ + 9521 "00000000" // /* MW 5 */ + 9522 "00000000" // /* MW 4 */ + 9523 "01101000" // /* MW 3 */ + 9524 "00111000" // /* MW 2 */ + 9525 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9526 "10111010" // NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "11001110" // /* MW 9 */ + 9528 "01111101" // /* MW 8 */ + 9529 "10111000" // /* MW 7 */ + 9530 "00000010" // /* MW 6 */ + 9531 "00110100" // /* MW 5 */ + 9532 "00011100" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9536 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9537 "00000000" // /* MW 15 */ + 9538 "00000000" // /* MW 14 */ + 9539 "01111000" // /* MW 13 */ + 9540 "10100101" // /* MW 12 */ + 9541 "00000001" // /* MW 11 */ + 9542 "00000000" // /* MW 10 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "01011011" // /* MW 7 */ + 9546 "00000001" // /* MW 6 */ + 9547 "01101000" // /* MW 5 */ + 9548 "00111000" // /* MW 4 */ + 9549 "11110000" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9552 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9553 "00000000" // /* MW 15 */ + 9554 "00000000" // /* MW 14 */ + 9555 "01111000" // /* MW 13 */ + 9556 "10100101" // /* MW 12 */ + 9557 "00000001" // /* MW 11 */ + 9558 "00000000" // /* MW 10 */ + 9559 "00000000" // /* MW 9 */ + 9560 "00000000" // /* MW 8 */ + 9561 "01011011" // /* MW 7 */ + 9562 "00000001" // /* MW 6 */ + 9563 "01101000" // /* MW 5 */ + 9564 "00111000" // /* MW 4 */ + 9565 "11110000" // /* MW 3 */ + 9566 "00101100" // /* MW 2 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9568 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9569 "00000000" // /* MW 15 */ + 9570 "00000000" // /* MW 14 */ + 9571 "01111000" // /* MW 13 */ + 9572 "10100101" // /* MW 12 */ + 9573 "00000001" // /* MW 11 */ + 9574 "00000000" // /* MW 10 */ + 9575 "00000000" // /* MW 9 */ + 9576 "00000000" // /* MW 8 */ + 9577 "01011011" // /* MW 7 */ + 9578 "00000001" // /* MW 6 */ + 9579 "01101000" // /* MW 5 */ + 9580 "00111000" // /* MW 4 */ + 9581 "11110000" // /* MW 3 */ + 9582 "00101100" // /* MW 2 */ + 9583 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9584 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9585 "00000000" // /* MW 15 */ + 9586 "00000000" // /* MW 14 */ + 9587 "01111000" // /* MW 13 */ + 9588 "10100101" // /* MW 12 */ + 9589 "00000001" // /* MW 11 */ + 9590 "00000000" // /* MW 10 */ + 9591 "00000000" // /* MW 9 */ + 9592 "00000000" // /* MW 8 */ + 9593 "01011011" // /* MW 7 */ + 9594 "00000001" // /* MW 6 */ + 9595 "01101000" // /* MW 5 */ + 9596 "00111000" // /* MW 4 */ + 9597 "11110000" // /* MW 3 */ + 9598 "00101100" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9600 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9601 "00000000" // /* MW 15 */ + 9602 "00000000" // /* MW 14 */ + 9603 "11101000" // /* MW 13 */ + 9604 "00000000" // /* MW 12 */ + 9605 "00000000" // /* MW 11 */ + 9606 "00000000" // /* MW 10 */ + 9607 "00000000" // /* MW 9 */ + 9608 "00000000" // /* MW 8 */ + 9609 "01011011" // /* MW 7 */ + 9610 "00000001" // /* MW 6 */ + 9611 "01101000" // /* MW 5 */ + 9612 "00111000" // /* MW 4 */ + 9613 "11110000" // /* MW 3 */ + 9614 "00101100" // /* MW 2 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.src_ref 3 "transposeshuffle.h" 120 17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9616 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9617 "00000000" // /* MW 15 */ + 9618 "00000000" // /* MW 14 */ + 9619 "11101000" // /* MW 13 */ + 9620 "00000000" // /* MW 12 */ + 9621 "00000000" // /* MW 11 */ + 9622 "00000000" // /* MW 10 */ + 9623 "00000000" // /* MW 9 */ + 9624 "00000000" // /* MW 8 */ + 9625 "01011011" // /* MW 7 */ + 9626 "00000001" // /* MW 6 */ + 9627 "01101000" // /* MW 5 */ + 9628 "00111000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9632 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "11101000" // /* MW 13 */ + 9636 "00000000" // /* MW 12 */ + 9637 "00000000" // /* MW 11 */ + 9638 "00000000" // /* MW 10 */ + 9639 "00000000" // /* MW 9 */ + 9640 "10000000" // /* MW 8 */ + 9641 "00000110" // /* MW 7 */ + 9642 "00011100" // /* MW 6 */ + 9643 "01101001" // /* MW 5 */ + 9644 "00111000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9648 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9649 "11100000" // /* MW 7 */ + 9650 "00000000" // /* MW 6 */ + 9651 "00000000" // /* MW 5 */ + 9652 "00000000" // /* MW 4 */ + 9653 "11010000" // /* MW 3 */ + 9654 "10000000" // /* MW 2 */ + 9655 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9656 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9657 "11100000" // /* MW 7 */ + 9658 "00000000" // /* MW 6 */ + 9659 "00000000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "10000000" // /* MW 2 */ + 9663 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9664 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9665 "11100000" // /* MW 7 */ + 9666 "00000000" // /* MW 6 */ + 9667 "00000000" // /* MW 5 */ + 9668 "00000000" // /* MW 4 */ + 9669 "11010000" // /* MW 3 */ + 9670 "10000000" // /* MW 2 */ + 9671 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.src_ref 3 "transposeshuffle.h" 126 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9672 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 9673 "11101001" // /* MW 9 */ + 9674 "00000000" // /* MW 8 */ + 9675 "00000000" // /* MW 7 */ + 9676 "00000000" // /* MW 6 */ + 9677 "01000000" // /* MW 5 */ + 9678 "00000001" // /* MW 4 */ + 9679 "11010000" // /* MW 3 */ + 9680 "10000000" // /* MW 2 */ + 9681 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9682 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9683 "11100000" // /* MW 7 */ + 9684 "00000000" // /* MW 6 */ + 9685 "00000000" // /* MW 5 */ + 9686 "00000000" // /* MW 4 */ + 9687 "11010000" // /* MW 3 */ + 9688 "10000000" // /* MW 2 */ + 9689 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9690 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9691 "11100000" // /* MW 7 */ + 9692 "00000000" // /* MW 6 */ + 9693 "00000000" // /* MW 5 */ + 9694 "00000000" // /* MW 4 */ + 9695 "11010000" // /* MW 3 */ + 9696 "10000000" // /* MW 2 */ + 9697 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9698 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9699 "11100000" // /* MW 7 */ + 9700 "00000000" // /* MW 6 */ + 9701 "00000000" // /* MW 5 */ + 9702 "00000000" // /* MW 4 */ + 9703 "11010000" // /* MW 3 */ + 9704 "10000000" // /* MW 2 */ + 9705 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9706 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9707 "00001101" // /* MW 5 */ + 9708 "00111000" // /* MW 4 */ + 9709 "11110010" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot + 9712 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9713 "00000000" // /* MW 15 */ + 9714 "00000000" // /* MW 14 */ + 9715 "01111000" // /* MW 13 */ + 9716 "10100101" // /* MW 12 */ + 9717 "00000001" // /* MW 11 */ + 9718 "00000000" // /* MW 10 */ + 9719 "00000000" // /* MW 9 */ + 9720 "10000000" // /* MW 8 */ + 9721 "00000110" // /* MW 7 */ + 9722 "00011100" // /* MW 6 */ + 9723 "00100001" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "11110000" // /* MW 3 */ + 9726 "00101100" // /* MW 2 */ + 9727 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.src_ref 3 "transposeshuffle.h" 116 8 first + 9728 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "10100000" // /* MW 3 */ + 9730 "01110000" // /* MW 2 */ + 9731 "00011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9732 "01000100" // MOVXM ls, #9744 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00100000" // /* MW 5 */ + 9734 "11101100" // /* MW 4 */ + 9735 "00100001" // /* MW 3 */ + 9736 "00000000" // /* MW 2 */ + 9737 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9738 "01000100" // MOVXM le, #9856 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "00000000" // /* MW 5 */ + 9740 "11101101" // /* MW 4 */ + 9741 "00100110" // /* MW 3 */ + 9742 "00000000" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.begin_of_loop +.loop_nesting 1 + 9744 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "00110100" // /* MW 3 */ + 9746 "00011100" // /* MW 2 */ + 9747 "00111000" // /* MW 1 */ + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ + 9750 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9751 "01111110" // /* MW 9 */ + 9752 "10100101" // /* MW 8 */ + 9753 "00000001" // /* MW 7 */ + 9754 "00000000" // /* MW 6 */ + 9755 "00010000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ + 9760 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9761 "00000000" // /* MW 15 */ + 9762 "00000000" // /* MW 14 */ + 9763 "01111000" // /* MW 13 */ + 9764 "10100101" // /* MW 12 */ + 9765 "00000001" // /* MW 11 */ + 9766 "00000000" // /* MW 10 */ + 9767 "00000000" // /* MW 9 */ + 9768 "00000000" // /* MW 8 */ + 9769 "01011011" // /* MW 7 */ + 9770 "00000001" // /* MW 6 */ + 9771 "00100000" // /* MW 5 */ + 9772 "00000000" // /* MW 4 */ + 9773 "11110000" // /* MW 3 */ + 9774 "00101100" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ + 9776 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9777 "00000000" // /* MW 15 */ + 9778 "00000000" // /* MW 14 */ + 9779 "01111000" // /* MW 13 */ + 9780 "10100101" // /* MW 12 */ + 9781 "00000001" // /* MW 11 */ + 9782 "00000000" // /* MW 10 */ + 9783 "00000000" // /* MW 9 */ + 9784 "00000000" // /* MW 8 */ + 9785 "01011011" // /* MW 7 */ + 9786 "00000001" // /* MW 6 */ + 9787 "00100000" // /* MW 5 */ + 9788 "00000000" // /* MW 4 */ + 9789 "11110000" // /* MW 3 */ + 9790 "00101100" // /* MW 2 */ + 9791 "00000000" // /* MW 1 */ + 9792 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9793 "00000000" // /* MW 15 */ + 9794 "00000000" // /* MW 14 */ + 9795 "01111000" // /* MW 13 */ + 9796 "10100101" // /* MW 12 */ + 9797 "00000001" // /* MW 11 */ + 9798 "00000000" // /* MW 10 */ + 9799 "00000000" // /* MW 9 */ + 9800 "00000000" // /* MW 8 */ + 9801 "01011011" // /* MW 7 */ + 9802 "00000001" // /* MW 6 */ + 9803 "00100000" // /* MW 5 */ + 9804 "00000000" // /* MW 4 */ + 9805 "11110000" // /* MW 3 */ + 9806 "00101100" // /* MW 2 */ + 9807 "00000000" // /* MW 1 */ + 9808 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9809 "00000000" // /* MW 15 */ + 9810 "00000000" // /* MW 14 */ + 9811 "01111000" // /* MW 13 */ + 9812 "10100101" // /* MW 12 */ + 9813 "00000001" // /* MW 11 */ + 9814 "00000000" // /* MW 10 */ + 9815 "00000000" // /* MW 9 */ + 9816 "00000000" // /* MW 8 */ + 9817 "01011011" // /* MW 7 */ + 9818 "00000001" // /* MW 6 */ + 9819 "00100000" // /* MW 5 */ + 9820 "00000000" // /* MW 4 */ + 9821 "11110000" // /* MW 3 */ + 9822 "00101100" // /* MW 2 */ + 9823 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 120 17 first + 9824 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9825 "00000000" // /* MW 15 */ + 9826 "00000000" // /* MW 14 */ + 9827 "11101000" // /* MW 13 */ + 9828 "00000000" // /* MW 12 */ + 9829 "00000000" // /* MW 11 */ + 9830 "00000000" // /* MW 10 */ + 9831 "00000000" // /* MW 9 */ + 9832 "00000000" // /* MW 8 */ + 9833 "01011011" // /* MW 7 */ + 9834 "00000001" // /* MW 6 */ + 9835 "00100000" // /* MW 5 */ + 9836 "00000000" // /* MW 4 */ + 9837 "11110000" // /* MW 3 */ + 9838 "00101100" // /* MW 2 */ + 9839 "00000000" // /* MW 1 */ + 9840 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9841 "00000000" // /* MW 15 */ + 9842 "00000000" // /* MW 14 */ + 9843 "01111000" // /* MW 13 */ + 9844 "10100101" // /* MW 12 */ + 9845 "00000001" // /* MW 11 */ + 9846 "00000000" // /* MW 10 */ + 9847 "00000000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "01011011" // /* MW 7 */ + 9850 "00000001" // /* MW 6 */ + 9851 "00100000" // /* MW 5 */ + 9852 "00000000" // /* MW 4 */ + 9853 "11110000" // /* MW 3 */ + 9854 "00101100" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.end_of_loop + 9856 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "10000000" // /* MW 8 */ + 9865 "00000110" // /* MW 7 */ + 9866 "00011100" // /* MW 6 */ + 9867 "00100001" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9873 "00000000" // /* MW 3 */ + 9874 "00101000" // /* MW 2 */ + 9875 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9881 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9884 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9885 "01100111" // /* MW 3 */ + 9886 "00000001" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.src_ref 3 "transposeshuffle.h" 86 34 + 9888 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000000" // /* MW 5 */ + 9890 "11000101" // /* MW 4 */ + 9891 "11000100" // /* MW 3 */ + 9892 "00000111" // /* MW 2 */ + 9893 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 34 first + 9894 "10011000" // LDA r0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00010110" // /* MW 3 */ + 9896 "00000100" // /* MW 2 */ + 9897 "00000010" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ + 9906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9907 "00000000" // /* MW 1 */ + 9908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9909 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 26 + 9910 "10000100" // JZ r0, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "00000000" // /* MW 4 */ + 9913 "10010000" // /* MW 3 */ + 9914 "00010100" // /* MW 2 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9926 "10111010" // MOVA m5, #36; MOVXM p4, #508548 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9927 "00010000" // /* MW 9 */ + 9928 "01000010" // /* MW 8 */ + 9929 "00110001" // /* MW 7 */ + 9930 "11110010" // /* MW 6 */ + 9931 "00000001" // /* MW 5 */ + 9932 "00000000" // /* MW 4 */ + 9933 "10000000" // /* MW 3 */ + 9934 "10010100" // /* MW 2 */ + 9935 "00000100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 + 9936 "10111010" // LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9937 "01011000" // /* MW 9 */ + 9938 "11111101" // /* MW 8 */ + 9939 "01001111" // /* MW 7 */ + 9940 "00001000" // /* MW 6 */ + 9941 "01010001" // /* MW 5 */ + 9942 "00000000" // /* MW 4 */ + 9943 "11010000" // /* MW 3 */ + 9944 "10000110" // /* MW 2 */ + 9945 "10000011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 + 9946 "10111010" // LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9947 "01011000" // /* MW 9 */ + 9948 "00000000" // /* MW 8 */ + 9949 "01100000" // /* MW 7 */ + 9950 "00101010" // /* MW 6 */ + 9951 "00110000" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11010000" // /* MW 3 */ + 9954 "00010010" // /* MW 2 */ + 9955 "10010101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9956 "01110110" // LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01011000" // /* MW 11 */ + 9958 "00100000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "10001010" // /* MW 8 */ + 9961 "01100000" // /* MW 7 */ + 9962 "00000000" // /* MW 6 */ + 9963 "01001011" // /* MW 5 */ + 9964 "00010000" // /* MW 4 */ + 9965 "11010000" // /* MW 3 */ + 9966 "10010000" // /* MW 2 */ + 9967 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 9968 "01110110" // LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9969 "01011000" // /* MW 11 */ + 9970 "00110100" // /* MW 10 */ + 9971 "11101000" // /* MW 9 */ + 9972 "11111000" // /* MW 8 */ + 9973 "00001111" // /* MW 7 */ + 9974 "00000000" // /* MW 6 */ + 9975 "01001011" // /* MW 5 */ + 9976 "00010000" // /* MW 4 */ + 9977 "11010001" // /* MW 3 */ + 9978 "10010100" // /* MW 2 */ + 9979 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9980 "01110110" // LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #10064 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9981 "00010000" // /* MW 11 */ + 9982 "10101000" // /* MW 10 */ + 9983 "00110011" // /* MW 9 */ + 9984 "00001001" // /* MW 8 */ + 9985 "00000000" // /* MW 7 */ + 9986 "00000000" // /* MW 6 */ + 9987 "01001011" // /* MW 5 */ + 9988 "00010000" // /* MW 4 */ + 9989 "11010101" // /* MW 3 */ + 9990 "10011000" // /* MW 2 */ + 9991 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 12 + 9992 "10111010" // LDA dn5, [p4], #-8; MOVXM p3, #10096 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9993 "00010000" // /* MW 9 */ + 9994 "10111000" // /* MW 8 */ + 9995 "10110011" // /* MW 7 */ + 9996 "00001001" // /* MW 6 */ + 9997 "00000000" // /* MW 5 */ + 9998 "00000000" // /* MW 4 */ + 9999 "11010000" // /* MW 3 */ + 10000 "11010100" // /* MW 2 */ + 10001 "10011101" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 10002 "00101100" // LDA dj5, [p4], m4; MOVX r16, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10003 "10101010" // /* MW 5 */ + 10004 "01000001" // /* MW 4 */ + 10005 "11010000" // /* MW 3 */ + 10006 "01011000" // /* MW 2 */ + 10007 "10010001" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 first + 10008 "10111010" // LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10009 "11001000" // /* MW 9 */ + 10010 "01111111" // /* MW 8 */ + 10011 "10101000" // /* MW 7 */ + 10012 "11100100" // /* MW 6 */ + 10013 "10110000" // /* MW 5 */ + 10014 "00001011" // /* MW 4 */ + 10015 "11010000" // /* MW 3 */ + 10016 "10000000" // /* MW 2 */ + 10017 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 first +.src_ref 3 "transposeshuffle.h" 86 8 first + 10018 "10111010" // LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10019 "11001000" // /* MW 9 */ + 10020 "00111111" // /* MW 8 */ + 10021 "10101001" // /* MW 7 */ + 10022 "01101100" // /* MW 6 */ + 10023 "00010001" // /* MW 5 */ + 10024 "00001011" // /* MW 4 */ + 10025 "11010000" // /* MW 3 */ + 10026 "10000100" // /* MW 2 */ + 10027 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 10028 "10111010" // LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10029 "01001000" // /* MW 9 */ + 10030 "01000000" // /* MW 8 */ + 10031 "10101100" // /* MW 7 */ + 10032 "01101100" // /* MW 6 */ + 10033 "00100001" // /* MW 5 */ + 10034 "00001010" // /* MW 4 */ + 10035 "11010000" // /* MW 3 */ + 10036 "10001000" // /* MW 2 */ + 10037 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10038 "10111010" // LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "01001000" // /* MW 9 */ + 10040 "10000000" // /* MW 8 */ + 10041 "01101000" // /* MW 7 */ + 10042 "10010000" // /* MW 6 */ + 10043 "01010010" // /* MW 5 */ + 10044 "00000110" // /* MW 4 */ + 10045 "11010000" // /* MW 3 */ + 10046 "11000100" // /* MW 2 */ + 10047 "10000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10048 "11100001" // LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10049 "00000000" // /* MW 15 */ + 10050 "00000000" // /* MW 14 */ + 10051 "01111000" // /* MW 13 */ + 10052 "10100101" // /* MW 12 */ + 10053 "00000001" // /* MW 11 */ + 10054 "11111000" // /* MW 10 */ + 10055 "01011111" // /* MW 9 */ + 10056 "00001010" // /* MW 8 */ + 10057 "01011011" // /* MW 7 */ + 10058 "00000001" // /* MW 6 */ + 10059 "00100000" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "11010000" // /* MW 3 */ + 10062 "11001000" // /* MW 2 */ + 10063 "10011100" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 1 + 10064 "10000100" // JZ r1, #10512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10512 delay_slots=5 */ + 10065 "00000001" // /* MW 5 */ + 10066 "00000000" // /* MW 4 */ + 10067 "10001000" // /* MW 3 */ + 10068 "00010100" // /* MW 2 */ + 10069 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10079 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 87 12 + 10080 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "01010000" // /* MW 12 */ + 10085 "00101001" // /* MW 11 */ + 10086 "00000010" // /* MW 10 */ + 10087 "00000000" // /* MW 9 */ + 10088 "00000000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11110000" // /* MW 3 */ + 10094 "00101100" // /* MW 2 */ + 10095 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.src_ref 3 "transposeshuffle.h" 88 16 first +.loop_nesting 2 + 10096 "10000100" // JZ r4, #10496 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10496 delay_slots=5 */ + 10097 "00000001" // /* MW 5 */ + 10098 "00000000" // /* MW 4 */ + 10099 "10000000" // /* MW 3 */ + 10100 "00010100" // /* MW 2 */ + 10101 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "10011000" // LTU r18, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10113 "01101100" // /* MW 3 */ + 10114 "11100100" // /* MW 2 */ + 10115 "00010000" // /* MW 1 */ + 10116 "10000100" // JNZ r18, #10352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10352 delay_slots=5 */ + 10117 "00000001" // /* MW 5 */ + 10118 "01000000" // /* MW 4 */ + 10119 "00111000" // /* MW 3 */ + 10120 "00010100" // /* MW 2 */ + 10121 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10131 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 3 "transposeshuffle.h" 88 16 + 10132 "00111010" // VLDB x0, [p0, #64]; MOVXM ls, #10240 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10133 "00010000" // /* MW 9 */ + 10134 "00000000" // /* MW 8 */ + 10135 "01111100" // /* MW 7 */ + 10136 "00001000" // /* MW 6 */ + 10137 "00000000" // /* MW 5 */ + 10138 "00000000" // /* MW 4 */ + 10139 "01101000" // /* MW 3 */ + 10140 "00101000" // /* MW 2 */ + 10141 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10142 "00111010" // VLDB.3D x1, [p0], d1; MOVXM le, #10272 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10143 "00010000" // /* MW 9 */ + 10144 "00010000" // /* MW 8 */ + 10145 "10111100" // /* MW 7 */ + 10146 "00001001" // /* MW 6 */ + 10147 "00000000" // /* MW 5 */ + 10148 "00000000" // /* MW 4 */ + 10149 "11101000" // /* MW 3 */ + 10150 "01110000" // /* MW 2 */ + 10151 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10152 "10011000" // ADD.NC lc, r3, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10153 "11111110" // /* MW 3 */ + 10154 "01110001" // /* MW 2 */ + 10155 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10156 "00011000" // VLDB x0, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10157 "00110100" // /* MW 3 */ + 10158 "00010100" // /* MW 2 */ + 10159 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "11101000" // /* MW 5 */ + 10172 "01110000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10177 "00000000" // /* MW 15 */ + 10178 "00000000" // /* MW 14 */ + 10179 "01111000" // /* MW 13 */ + 10180 "10100101" // /* MW 12 */ + 10181 "00000001" // /* MW 11 */ + 10182 "00000000" // /* MW 10 */ + 10183 "00000000" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "01011011" // /* MW 7 */ + 10186 "00000001" // /* MW 6 */ + 10187 "00100000" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "00000000" // /* MW 9 */ + 10200 "00000000" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "01101000" // /* MW 5 */ + 10204 "00101000" // /* MW 4 */ + 10205 "11110000" // /* MW 3 */ + 10206 "00101100" // /* MW 2 */ + 10207 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "11101000" // /* MW 5 */ + 10220 "01110000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00000000" // /* MW 15 */ + 10226 "00000000" // /* MW 14 */ + 10227 "11101000" // /* MW 13 */ + 10228 "00001110" // /* MW 12 */ + 10229 "01000100" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 3 + 10240 "11100001" // NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "11101000" // /* MW 13 */ + 10244 "00100000" // /* MW 12 */ + 10245 "00000100" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "10001011" // /* MW 7 */ + 10250 "10000100" // /* MW 6 */ + 10251 "01101100" // /* MW 5 */ + 10252 "00101000" // /* MW 4 */ + 10253 "11110000" // /* MW 3 */ + 10254 "00101100" // /* MW 2 */ + 10255 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "10000000" // /* MW 8 */ + 10265 "00100110" // /* MW 7 */ + 10266 "00011000" // /* MW 6 */ + 10267 "11101001" // /* MW 5 */ + 10268 "01110000" // /* MW 4 */ + 10269 "11110000" // /* MW 3 */ + 10270 "00101100" // /* MW 2 */ + 10271 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "11101000" // /* MW 13 */ + 10276 "00001110" // /* MW 12 */ + 10277 "01000100" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "10000000" // /* MW 8 */ + 10281 "00000110" // /* MW 7 */ + 10282 "00010100" // /* MW 6 */ + 10283 "00100100" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 10288 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10289 "11100000" // /* MW 7 */ + 10290 "00100000" // /* MW 6 */ + 10291 "00000100" // /* MW 5 */ + 10292 "00000000" // /* MW 4 */ + 10293 "01100000" // /* MW 3 */ + 10294 "10010001" // /* MW 2 */ + 10295 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10296 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10297 "00100110" // /* MW 3 */ + 10298 "00011000" // /* MW 2 */ + 10299 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10300 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10301 "11100000" // /* MW 7 */ + 10302 "00001110" // /* MW 6 */ + 10303 "01000100" // /* MW 5 */ + 10304 "00000000" // /* MW 4 */ + 10305 "11010000" // /* MW 3 */ + 10306 "10000000" // /* MW 2 */ + 10307 "10000010" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10308 "11011000" // VSHUFFLE bmll0, x1, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10309 "01000001" // /* MW 3 */ + 10310 "00001000" // /* MW 2 */ + 10311 "00011000" // /* MW 1 */ + 10312 "10000100" // J #10496 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10496 delay_slots=5 */ + 10313 "00000000" // /* MW 5 */ + 10314 "00000000" // /* MW 4 */ + 10315 "10000000" // /* MW 3 */ + 10316 "00010100" // /* MW 2 */ + 10317 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10318 "00000010" // VST.3D bmlh0, [p1], d0; MOV p4, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10319 "01110000" // /* MW 7 */ + 10320 "01100000" // /* MW 6 */ + 10321 "00110001" // /* MW 5 */ + 10322 "00000010" // /* MW 4 */ + 10323 "11010000" // /* MW 3 */ + 10324 "00000100" // /* MW 2 */ + 10325 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.delay_slot + 10326 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10327 "11100000" // /* MW 7 */ + 10328 "00001110" // /* MW 6 */ + 10329 "01000100" // /* MW 5 */ + 10330 "00000000" // /* MW 4 */ + 10331 "11010000" // /* MW 3 */ + 10332 "10000000" // /* MW 2 */ + 10333 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.delay_slot + 10334 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10335 "11100000" // /* MW 7 */ + 10336 "00100000" // /* MW 6 */ + 10337 "00000100" // /* MW 5 */ + 10338 "00000000" // /* MW 4 */ + 10339 "01100000" // /* MW 3 */ + 10340 "10010001" // /* MW 2 */ + 10341 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10342 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10343 "00100110" // /* MW 3 */ + 10344 "00011000" // /* MW 2 */ + 10345 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 10346 "00001100" // NOPA; VST bmll0, [p4, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10347 "00001101" // /* MW 5 */ + 10348 "00101000" // /* MW 4 */ + 10349 "11111000" // /* MW 3 */ + 10350 "00101100" // /* MW 2 */ + 10351 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10352 "01000100" // MOVXM ls, #10368 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10353 "00000000" // /* MW 5 */ + 10354 "11110001" // /* MW 4 */ + 10355 "00100001" // /* MW 3 */ + 10356 "00000000" // /* MW 2 */ + 10357 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10358 "01000100" // MOVXM le, #10480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10359 "11100000" // /* MW 5 */ + 10360 "11110001" // /* MW 4 */ + 10361 "00100110" // /* MW 3 */ + 10362 "00000000" // /* MW 2 */ + 10363 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10364 "10011000" // ADD.NC lc, r2, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10365 "00000000" // /* MW 3 */ + 10366 "01110001" // /* MW 2 */ + 10367 "00011101" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.begin_of_loop +.loop_nesting 3 + 10368 "11110100" // VLDB x0, [p0, #64]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10369 "10000001" // /* MW 5 */ + 10370 "11000101" // /* MW 4 */ + 10371 "10001000" // /* MW 3 */ + 10372 "10000110" // /* MW 2 */ + 10373 "00000010" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 + 10374 "00011000" // VLDB.3D x1, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10375 "01110100" // /* MW 3 */ + 10376 "00111000" // /* MW 2 */ + 10377 "00111000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ + 10380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10381 "00000000" // /* MW 1 */ + 10382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10383 "00000000" // /* MW 1 */ + 10384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10385 "00000000" // /* MW 15 */ + 10386 "00000000" // /* MW 14 */ + 10387 "01111000" // /* MW 13 */ + 10388 "10100101" // /* MW 12 */ + 10389 "00000001" // /* MW 11 */ + 10390 "00000000" // /* MW 10 */ + 10391 "00000000" // /* MW 9 */ + 10392 "00000000" // /* MW 8 */ + 10393 "01011011" // /* MW 7 */ + 10394 "00000001" // /* MW 6 */ + 10395 "00100000" // /* MW 5 */ + 10396 "00000000" // /* MW 4 */ + 10397 "11110000" // /* MW 3 */ + 10398 "00101100" // /* MW 2 */ + 10399 "00000000" // /* MW 1 */ + 10400 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10401 "00000000" // /* MW 15 */ + 10402 "00000000" // /* MW 14 */ + 10403 "01111000" // /* MW 13 */ + 10404 "10100101" // /* MW 12 */ + 10405 "00000001" // /* MW 11 */ + 10406 "00000000" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "01011011" // /* MW 7 */ + 10410 "00000001" // /* MW 6 */ + 10411 "00100000" // /* MW 5 */ + 10412 "00000000" // /* MW 4 */ + 10413 "11110000" // /* MW 3 */ + 10414 "00101100" // /* MW 2 */ + 10415 "00000000" // /* MW 1 */ + 10416 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10417 "00000000" // /* MW 15 */ + 10418 "00000000" // /* MW 14 */ + 10419 "01111000" // /* MW 13 */ + 10420 "10100101" // /* MW 12 */ + 10421 "00000001" // /* MW 11 */ + 10422 "00000000" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "01011011" // /* MW 7 */ + 10426 "00000001" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first + 10432 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10433 "00000000" // /* MW 15 */ + 10434 "00000000" // /* MW 14 */ + 10435 "11101000" // /* MW 13 */ + 10436 "00001110" // /* MW 12 */ + 10437 "01000100" // /* MW 11 */ + 10438 "00000000" // /* MW 10 */ + 10439 "00000000" // /* MW 9 */ + 10440 "00000000" // /* MW 8 */ + 10441 "01011011" // /* MW 7 */ + 10442 "00000001" // /* MW 6 */ + 10443 "00100000" // /* MW 5 */ + 10444 "00000000" // /* MW 4 */ + 10445 "11110000" // /* MW 3 */ + 10446 "00101100" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first + 10448 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00000000" // /* MW 15 */ + 10450 "00000000" // /* MW 14 */ + 10451 "11101000" // /* MW 13 */ + 10452 "00100000" // /* MW 12 */ + 10453 "00000100" // /* MW 11 */ + 10454 "00000000" // /* MW 10 */ + 10455 "00000000" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "01011011" // /* MW 7 */ + 10458 "00000001" // /* MW 6 */ + 10459 "00100000" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first + 10464 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "01111000" // /* MW 13 */ + 10468 "10100101" // /* MW 12 */ + 10469 "00000001" // /* MW 11 */ + 10470 "00000000" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "10000000" // /* MW 8 */ + 10473 "00100110" // /* MW 7 */ + 10474 "00011000" // /* MW 6 */ + 10475 "00100001" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.src_ref 4 "vector.hpp" 1152 43 +.end_of_loop + 10480 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10481 "00000000" // /* MW 15 */ + 10482 "00000000" // /* MW 14 */ + 10483 "01111000" // /* MW 13 */ + 10484 "10100101" // /* MW 12 */ + 10485 "00000001" // /* MW 11 */ + 10486 "00000000" // /* MW 10 */ + 10487 "00000000" // /* MW 9 */ + 10488 "10000000" // /* MW 8 */ + 10489 "00000110" // /* MW 7 */ + 10490 "00010100" // /* MW 6 */ + 10491 "00100100" // /* MW 5 */ + 10492 "00000000" // /* MW 4 */ + 10493 "11110000" // /* MW 3 */ + 10494 "00101100" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 2 + 10496 "00011000" // JNZD r17, r17, p3 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10497 "11100000" // /* MW 3 */ + 10498 "01100010" // /* MW 2 */ + 10499 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10508 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10509 "01100111" // /* MW 3 */ + 10510 "00000001" // /* MW 2 */ + 10511 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.src_ref 3 "transposeshuffle.h" 86 8 first +.loop_nesting 1 + 10512 "00011000" // JNZD r0, r0, p2 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10513 "10100000" // /* MW 3 */ + 10514 "00000000" // /* MW 2 */ + 10515 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10525 "01100111" // /* MW 3 */ + 10526 "00000001" // /* MW 2 */ + 10527 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 10528 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10529 "00000000" // /* MW 3 */ + 10530 "00101000" // /* MW 2 */ + 10531 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10537 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + 10541 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 first +.function_start + 10544 "11111000" // MOV p3, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10545 "11000000" // /* MW 3 */ + 10546 "01101100" // /* MW 2 */ + 10547 "00011011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 + 10548 "00111010" // MOVS p6, p1; MOVXM p1, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10549 "00010001" // /* MW 9 */ + 10550 "00001010" // /* MW 8 */ + 10551 "10110001" // /* MW 7 */ + 10552 "11110000" // /* MW 6 */ + 10553 "00000001" // /* MW 5 */ + 10554 "00000000" // /* MW 4 */ + 10555 "01100000" // /* MW 3 */ + 10556 "10010001" // /* MW 2 */ + 10557 "11010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 first + 10558 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10559 "00010110" // /* MW 3 */ + 10560 "00000110" // /* MW 2 */ + 10561 "00000001" // /* MW 1 */ + 10562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10563 "00000000" // /* MW 1 */ + 10564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10565 "00000000" // /* MW 1 */ + 10566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10567 "00000000" // /* MW 1 */ + 10568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10569 "00000000" // /* MW 1 */ + 10570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10571 "00000000" // /* MW 1 */ + 10572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10573 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 15 + 10574 "10000100" // JNZ r16, #10640 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10640 delay_slots=5 */ + 10575 "00000001" // /* MW 5 */ + 10576 "01000000" // /* MW 4 */ + 10577 "11001000" // /* MW 3 */ + 10578 "00010100" // /* MW 2 */ + 10579 "10000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 +.delay_slot + 10580 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10581 "00000001" // /* MW 5 */ + 10582 "00000000" // /* MW 4 */ + 10583 "00000000" // /* MW 3 */ + 10584 "00001000" // /* MW 2 */ + 10585 "00000000" // /* MW 1 */ +.delay_slot + 10586 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10587 "00111101" // /* MW 3 */ + 10588 "11110100" // /* MW 2 */ + 10589 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 10590 "00000010" // MOVS p7, p0; MOV p1, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10591 "01110000" // /* MW 7 */ + 10592 "01100000" // /* MW 6 */ + 10593 "10110111" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "01100000" // /* MW 3 */ + 10596 "00010001" // /* MW 2 */ + 10597 "11110000" // /* MW 1 */ +.delay_slot + 10598 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10599 "10011101" // /* MW 3 */ + 10600 "11111001" // /* MW 2 */ + 10601 "00001111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10602 "00111010" // ST p1, [sp, #-4]; MOVXM p0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10603 "00010001" // /* MW 9 */ + 10604 "01000000" // /* MW 8 */ + 10605 "00110001" // /* MW 7 */ + 10606 "11110000" // /* MW 6 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "10110000" // /* MW 3 */ + 10610 "10010011" // /* MW 2 */ + 10611 "11111111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 first +.no_stack_arguments + 10612 "00000100" // JL #9120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9120 delay_slots=5 */ + 10613 "00000001" // /* MW 5 */ + 10614 "00000000" // /* MW 4 */ + 10615 "11010000" // /* MW 3 */ + 10616 "00010001" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10618 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "11000000" // /* MW 3 */ + 10620 "01100100" // /* MW 2 */ + 10621 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10627 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10628 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10629 "10000001" // /* MW 11 */ + 10630 "10101101" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "00000000" // /* MW 8 */ + 10633 "00000000" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00100000" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 3 "transposeshuffle.h" 137 72 +.return_address + 10640 "10111010" // LDA r16, [p7]; MOVXM p7, #508564 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10641 "00010000" // /* MW 9 */ + 10642 "01001010" // /* MW 8 */ + 10643 "10110001" // /* MW 7 */ + 10644 "11110011" // /* MW 6 */ + 10645 "00000001" // /* MW 5 */ + 10646 "00000000" // /* MW 4 */ + 10647 "11010000" // /* MW 3 */ + 10648 "11000010" // /* MW 2 */ + 10649 "11100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 72 first + 10650 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10651 "00110110" // /* MW 3 */ + 10652 "00000110" // /* MW 2 */ + 10653 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 10654 "10011000" // LDA p1, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10655 "10011110" // /* MW 3 */ + 10656 "00000100" // /* MW 2 */ + 10657 "00000110" // /* MW 1 */ + 10658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10659 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 first +.no_stack_arguments + 10660 "00000100" // JL #9392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9392 delay_slots=5 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "01011000" // /* MW 3 */ + 10664 "00010010" // /* MW 2 */ + 10665 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10666 "00011000" // MOVX r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10667 "00000101" // /* MW 3 */ + 10668 "00100100" // /* MW 2 */ + 10669 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10670 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10671 "00000000" // /* MW 5 */ + 10672 "11000101" // /* MW 4 */ + 10673 "11000100" // /* MW 3 */ + 10674 "00000111" // /* MW 2 */ + 10675 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10676 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10677 "11000000" // /* MW 3 */ + 10678 "01100100" // /* MW 2 */ + 10679 "00011110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10680 "10011000" // LSHL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10681 "00101101" // /* MW 3 */ + 10682 "01100011" // /* MW 2 */ + 10683 "00010100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10684 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10685 "11000001" // /* MW 3 */ + 10686 "01101000" // /* MW 2 */ + 10687 "00011000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 +.return_address + 10688 "10111010" // LDA lr, [sp, #-12]; MOVXM p2, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10689 "00010000" // /* MW 9 */ + 10690 "00001010" // /* MW 8 */ + 10691 "00110001" // /* MW 7 */ + 10692 "11110001" // /* MW 6 */ + 10693 "00000001" // /* MW 5 */ + 10694 "00000000" // /* MW 4 */ + 10695 "00100000" // /* MW 3 */ + 10696 "10000111" // /* MW 2 */ + 10697 "11111110" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first + 10698 "00101100" // LDA r16, [p2]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "00000010" // /* MW 5 */ + 10700 "01100000" // /* MW 4 */ + 10701 "11010000" // /* MW 3 */ + 10702 "11000010" // /* MW 2 */ + 10703 "01000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 + 10704 "10011000" // LDA r17, [p6, #24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10705 "00110110" // /* MW 3 */ + 10706 "01100110" // /* MW 2 */ + 10707 "00000110" // /* MW 1 */ + 10708 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10709 "00011001" // /* MW 3 */ + 10710 "11111011" // /* MW 2 */ + 10711 "00000111" // /* MW 1 */ + 10712 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10713 "10011001" // /* MW 3 */ + 10714 "11111111" // /* MW 2 */ + 10715 "00000111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 first + 10716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10717 "00000001" // /* MW 5 */ + 10718 "00000000" // /* MW 4 */ + 10719 "00000000" // /* MW 3 */ + 10720 "11111000" // /* MW 2 */ + 10721 "11111111" // /* MW 1 */ + 10722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10723 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 + 10724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10725 "00000000" // /* MW 3 */ + 10726 "00101000" // /* MW 2 */ + 10727 "00010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first +.delay_slot + 10728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10729 "00000111" // /* MW 3 */ + 10730 "00100000" // /* MW 2 */ + 10731 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 17 +.delay_slot + 10732 "10011000" // EQ r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10733 "00000111" // /* MW 3 */ + 10734 "01110111" // /* MW 2 */ + 10735 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.delay_slot + 10736 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10737 "10000010" // /* MW 3 */ + 10738 "00100001" // /* MW 2 */ + 10739 "00010100" // /* MW 1 */ +.delay_slot + 10740 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10741 "00010001" // /* MW 3 */ + 10742 "00000110" // /* MW 2 */ + 10743 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + 10745 "00000000" // /* MW 1 */ +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function _b7835_wrapper _Z14_b7835_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 30 first +.src_ref 0 "0_0_reloadable4.cc" 32 79 +.function_start + 10752 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10753 "11000000" // /* MW 3 */ + 10754 "01100000" // /* MW 2 */ + 10755 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 32 79 first + 10756 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10757 "00011110" // /* MW 3 */ + 10758 "00011100" // /* MW 2 */ + 10759 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 34 46 first + 10760 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10761 "00011110" // /* MW 3 */ + 10762 "00010101" // /* MW 2 */ + 10763 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 33 80 first + 10764 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10765 "10011110" // /* MW 3 */ + 10766 "00000100" // /* MW 2 */ + 10767 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 31 4 first +.tail_call + 10768 "10000100" // J #10544 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10544 delay_slots=5 */ + 10769 "00000000" // /* MW 5 */ + 10770 "00000000" // /* MW 4 */ + 10771 "10011000" // /* MW 3 */ + 10772 "00010100" // /* MW 2 */ + 10773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 + 10783 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function buffer_pad_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.function_start + 10784 "11010100" // LDA el0, [p1]; MOV r17, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10785 "10000001" // /* MW 5 */ + 10786 "10101001" // /* MW 4 */ + 10787 "11011000" // /* MW 3 */ + 10788 "10000101" // /* MW 2 */ + 10789 "00100000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 first + 10790 "00011000" // ADD.NC p1, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10791 "10000010" // /* MW 3 */ + 10792 "01101000" // /* MW 2 */ + 10793 "00011001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10794 "10011000" // LDA r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "01010110" // /* MW 3 */ + 10796 "00011110" // /* MW 2 */ + 10797 "00000001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 27 33 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10798 "10011000" // LDA r15, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10799 "11110110" // /* MW 3 */ + 10800 "00000101" // /* MW 2 */ + 10801 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10803 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10809 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10810 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10811 "10100000" // /* MW 3 */ + 10812 "00010111" // /* MW 2 */ + 10813 "00011000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10814 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10815 "00000001" // /* MW 5 */ + 10816 "00000000" // /* MW 4 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00001000" // /* MW 2 */ + 10819 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 43 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10820 "01100100" // MUL r18, r15, r18; MOV r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10821 "11111101" // /* MW 5 */ + 10822 "00111111" // /* MW 4 */ + 10823 "11111000" // /* MW 3 */ + 10824 "10100101" // /* MW 2 */ + 10825 "01111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10826 "00111010" // ST r18, [sp, #-20]; MOVXM r17, #1073741823 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10827 "10010001" // /* MW 9 */ + 10828 "11111111" // /* MW 8 */ + 10829 "00101111" // /* MW 7 */ + 10830 "11111110" // /* MW 6 */ + 10831 "11111111" // /* MW 5 */ + 10832 "00001111" // /* MW 4 */ + 10833 "10110000" // /* MW 3 */ + 10834 "11001010" // /* MW 2 */ + 10835 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10836 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00001101" // /* MW 3 */ + 10838 "10100001" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10840 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00000100" // /* MW 3 */ + 10842 "01100001" // /* MW 2 */ + 10843 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 22 + 10844 "10000100" // JZ r16, #10928 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10928 delay_slots=5 */ + 10845 "00000001" // /* MW 5 */ + 10846 "00000000" // /* MW 4 */ + 10847 "01011000" // /* MW 3 */ + 10848 "00010101" // /* MW 2 */ + 10849 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.delay_slot + 10850 "11010100" // LDA p7, [p0]; MOV p0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10851 "10000001" // /* MW 5 */ + 10852 "11011101" // /* MW 4 */ + 10853 "11010000" // /* MW 3 */ + 10854 "11110011" // /* MW 2 */ + 10855 "00000000" // /* MW 1 */ +.delay_slot + 10856 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10857 "00011101" // /* MW 3 */ + 10858 "11111000" // /* MW 2 */ + 10859 "00001111" // /* MW 1 */ +.delay_slot + 10860 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10861 "11010101" // /* MW 3 */ + 10862 "11110101" // /* MW 2 */ + 10863 "00001111" // /* MW 1 */ +.delay_slot + 10864 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10865 "00111101" // /* MW 3 */ + 10866 "11110000" // /* MW 2 */ + 10867 "00001111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 25 24 first +.delay_slot + 10868 "00001100" // LDA r14, [p1, #-8]; ST r0, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10869 "00101011" // /* MW 5 */ + 10870 "11111000" // /* MW 4 */ + 10871 "11011111" // /* MW 3 */ + 10872 "10111010" // /* MW 2 */ + 10873 "00111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10874 "01011100" // ST el0, [sp, #-24]; MOVX r0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10875 "00000010" // /* MW 5 */ + 10876 "00000000" // /* MW 4 */ + 10877 "10110000" // /* MW 3 */ + 10878 "00000101" // /* MW 2 */ + 10879 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10880 "00011000" // LDA p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10881 "10011001" // /* MW 3 */ + 10882 "11101000" // /* MW 2 */ + 10883 "00000111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 first +.no_stack_arguments + 10884 "00000100" // JL #12608 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12608 delay_slots=5 */ + 10885 "00000001" // /* MW 5 */ + 10886 "00000000" // /* MW 4 */ + 10887 "10100000" // /* MW 3 */ + 10888 "00011000" // /* MW 2 */ + 10889 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.delay_slot + 10890 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "00001001" // /* MW 3 */ + 10892 "00100010" // /* MW 2 */ + 10893 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 first +.delay_slot + 10894 "10011000" // LSHL r1, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00011101" // /* MW 3 */ + 10896 "00000011" // /* MW 2 */ + 10897 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10903 "01111110" // /* MW 9 */ + 10904 "10100101" // /* MW 8 */ + 10905 "00000001" // /* MW 7 */ + 10906 "00000000" // /* MW 6 */ + 10907 "00010000" // /* MW 5 */ + 10908 "00000000" // /* MW 4 */ + 10909 "11110000" // /* MW 3 */ + 10910 "00101100" // /* MW 2 */ + 10911 "00000000" // /* MW 1 */ +.return_address + 10912 "10000100" // J #10944 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10944 delay_slots=5 */ + 10913 "00000000" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01100000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 10928 "11100001" // NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10929 "00000000" // /* MW 15 */ + 10930 "00000000" // /* MW 14 */ + 10931 "01111000" // /* MW 13 */ + 10932 "10100101" // /* MW 12 */ + 10933 "00000001" // /* MW 11 */ + 10934 "00000000" // /* MW 10 */ + 10935 "00000000" // /* MW 9 */ + 10936 "10000000" // /* MW 8 */ + 10937 "00101101" // /* MW 7 */ + 10938 "11101000" // /* MW 6 */ + 10939 "00100111" // /* MW 5 */ + 10940 "00000000" // /* MW 4 */ + 10941 "11110000" // /* MW 3 */ + 10942 "00101100" // /* MW 2 */ + 10943 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 22 first + 10944 "10000100" // JZ r15, #11216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11216 delay_slots=5 */ + 10945 "00000001" // /* MW 5 */ + 10946 "00000000" // /* MW 4 */ + 10947 "11101000" // /* MW 3 */ + 10948 "00010101" // /* MW 2 */ + 10949 "01111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10959 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 first + 10960 "10111010" // LDA r17, [sp, #-20]; MOVXM ls, #11056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10961 "00010000" // /* MW 9 */ + 10962 "10011000" // /* MW 8 */ + 10963 "01111101" // /* MW 7 */ + 10964 "00001000" // /* MW 6 */ + 10965 "00000000" // /* MW 5 */ + 10966 "00000000" // /* MW 4 */ + 10967 "00100000" // /* MW 3 */ + 10968 "11000110" // /* MW 2 */ + 10969 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 + 10970 "10111010" // MOVA r19, #1; MOVXM le, #11152 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10971 "00010000" // /* MW 9 */ + 10972 "11001000" // /* MW 8 */ + 10973 "10111101" // /* MW 7 */ + 10974 "00001001" // /* MW 6 */ + 10975 "00000000" // /* MW 5 */ + 10976 "00000000" // /* MW 4 */ + 10977 "00000000" // /* MW 3 */ + 10978 "00110011" // /* MW 2 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 10980 "10111010" // LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10981 "11001000" // /* MW 9 */ + 10982 "11111111" // /* MW 8 */ + 10983 "00001011" // /* MW 7 */ + 10984 "11101110" // /* MW 6 */ + 10985 "01001001" // /* MW 5 */ + 10986 "00011101" // /* MW 4 */ + 10987 "00100000" // /* MW 3 */ + 10988 "01001010" // /* MW 2 */ + 10989 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 10990 "10111010" // LDA lr, [sp, #-16]; MOVXM p0, #11024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10991 "00010000" // /* MW 9 */ + 10992 "10001000" // /* MW 8 */ + 10993 "00110101" // /* MW 7 */ + 10994 "00001000" // /* MW 6 */ + 10995 "00000000" // /* MW 5 */ + 10996 "00000000" // /* MW 4 */ + 10997 "00100000" // /* MW 3 */ + 10998 "00000111" // /* MW 2 */ + 10999 "11111110" // /* MW 1 */ + 11000 "11111000" // MOV m0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11001 "00100000" // /* MW 3 */ + 11002 "00001010" // /* MW 2 */ + 11003 "00011000" // /* MW 1 */ + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ + 11008 "11100001" // NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11009 "00000000" // /* MW 15 */ + 11010 "00000000" // /* MW 14 */ + 11011 "01111000" // /* MW 13 */ + 11012 "10100101" // /* MW 12 */ + 11013 "00000001" // /* MW 11 */ + 11014 "11101100" // /* MW 10 */ + 11015 "00011001" // /* MW 9 */ + 11016 "00100011" // /* MW 8 */ + 11017 "01011011" // /* MW 7 */ + 11018 "00000001" // /* MW 6 */ + 11019 "00100000" // /* MW 5 */ + 11020 "00000000" // /* MW 4 */ + 11021 "11110000" // /* MW 3 */ + 11022 "00101100" // /* MW 2 */ + 11023 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.loop_nesting 1 + 11024 "10000100" // JZ r14, #11168 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11168 delay_slots=5 */ + 11025 "00000001" // /* MW 5 */ + 11026 "00000000" // /* MW 4 */ + 11027 "11010000" // /* MW 3 */ + 11028 "00010101" // /* MW 2 */ + 11029 "01110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11033 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11035 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11037 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11039 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11040 "00000010" // MOVS p2, p7; MOV lc, r14 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11041 "01110000" // /* MW 7 */ + 11042 "10010000" // /* MW 6 */ + 11043 "10111011" // /* MW 5 */ + 11044 "00000010" // /* MW 4 */ + 11045 "01100000" // /* MW 3 */ + 11046 "10010001" // /* MW 2 */ + 11047 "01010011" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11048 "00000010" // NOPS; MOV p1, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11049 "01110000" // /* MW 7 */ + 11050 "10010000" // /* MW 6 */ + 11051 "10110100" // /* MW 5 */ + 11052 "00000000" // /* MW 4 */ + 11053 "01100000" // /* MW 3 */ + 11054 "00101011" // /* MW 2 */ + 11055 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 first +.begin_of_loop +.loop_nesting 2 + 11056 "11100001" // LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11057 "00000000" // /* MW 15 */ + 11058 "00000000" // /* MW 14 */ + 11059 "01111000" // /* MW 13 */ + 11060 "10100101" // /* MW 12 */ + 11061 "00000001" // /* MW 11 */ + 11062 "00000000" // /* MW 10 */ + 11063 "00000000" // /* MW 9 */ + 11064 "00000000" // /* MW 8 */ + 11065 "01011011" // /* MW 7 */ + 11066 "00000001" // /* MW 6 */ + 11067 "00100000" // /* MW 5 */ + 11068 "00000000" // /* MW 4 */ + 11069 "01010000" // /* MW 3 */ + 11070 "11001110" // /* MW 2 */ + 11071 "01000011" // /* MW 1 */ + 11072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11073 "00000000" // /* MW 15 */ + 11074 "00000000" // /* MW 14 */ + 11075 "01111000" // /* MW 13 */ + 11076 "10100101" // /* MW 12 */ + 11077 "00000001" // /* MW 11 */ + 11078 "00000000" // /* MW 10 */ + 11079 "00000000" // /* MW 9 */ + 11080 "00000000" // /* MW 8 */ + 11081 "01011011" // /* MW 7 */ + 11082 "00000001" // /* MW 6 */ + 11083 "00100000" // /* MW 5 */ + 11084 "00000000" // /* MW 4 */ + 11085 "11110000" // /* MW 3 */ + 11086 "00101100" // /* MW 2 */ + 11087 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11088 "11100001" // ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11089 "00000000" // /* MW 15 */ + 11090 "00000000" // /* MW 14 */ + 11091 "01111000" // /* MW 13 */ + 11092 "10100101" // /* MW 12 */ + 11093 "00000001" // /* MW 11 */ + 11094 "00000000" // /* MW 10 */ + 11095 "00000000" // /* MW 9 */ + 11096 "00000000" // /* MW 8 */ + 11097 "01011011" // /* MW 7 */ + 11098 "00000001" // /* MW 6 */ + 11099 "00100000" // /* MW 5 */ + 11100 "00000000" // /* MW 4 */ + 11101 "11100000" // /* MW 3 */ + 11102 "11001110" // /* MW 2 */ + 11103 "00100011" // /* MW 1 */ + 11104 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11105 "00000000" // /* MW 15 */ + 11106 "00000000" // /* MW 14 */ + 11107 "01111000" // /* MW 13 */ + 11108 "10100101" // /* MW 12 */ + 11109 "00000001" // /* MW 11 */ + 11110 "00000000" // /* MW 10 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "01011011" // /* MW 7 */ + 11114 "00000001" // /* MW 6 */ + 11115 "00100000" // /* MW 5 */ + 11116 "00000000" // /* MW 4 */ + 11117 "11110000" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ + 11120 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11121 "00000000" // /* MW 15 */ + 11122 "00000000" // /* MW 14 */ + 11123 "01111000" // /* MW 13 */ + 11124 "10100101" // /* MW 12 */ + 11125 "00000001" // /* MW 11 */ + 11126 "00000000" // /* MW 10 */ + 11127 "00000000" // /* MW 9 */ + 11128 "00000000" // /* MW 8 */ + 11129 "01011011" // /* MW 7 */ + 11130 "00000001" // /* MW 6 */ + 11131 "00100000" // /* MW 5 */ + 11132 "00000000" // /* MW 4 */ + 11133 "11110000" // /* MW 3 */ + 11134 "00101100" // /* MW 2 */ + 11135 "00000000" // /* MW 1 */ + 11136 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11137 "00000000" // /* MW 15 */ + 11138 "00000000" // /* MW 14 */ + 11139 "01111000" // /* MW 13 */ + 11140 "10100101" // /* MW 12 */ + 11141 "00000001" // /* MW 11 */ + 11142 "00000000" // /* MW 10 */ + 11143 "00000000" // /* MW 9 */ + 11144 "00000000" // /* MW 8 */ + 11145 "01011011" // /* MW 7 */ + 11146 "00000001" // /* MW 6 */ + 11147 "00100000" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "11110000" // /* MW 3 */ + 11150 "00101100" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 11152 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11153 "00000000" // /* MW 15 */ + 11154 "00000000" // /* MW 14 */ + 11155 "01111000" // /* MW 13 */ + 11156 "10100101" // /* MW 12 */ + 11157 "00000001" // /* MW 11 */ + 11158 "00000000" // /* MW 10 */ + 11159 "00000000" // /* MW 9 */ + 11160 "00000000" // /* MW 8 */ + 11161 "01011011" // /* MW 7 */ + 11162 "00000001" // /* MW 6 */ + 11163 "00100000" // /* MW 5 */ + 11164 "00000000" // /* MW 4 */ + 11165 "11110000" // /* MW 3 */ + 11166 "00101100" // /* MW 2 */ + 11167 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.loop_nesting 1 + 11168 "00011100" // PADDB [p7], m0; JNZD r16, r16, p0 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 11169 "01000000" // /* MW 5 */ + 11170 "01000000" // /* MW 4 */ + 11171 "00001000" // /* MW 3 */ + 11172 "01110010" // /* MW 2 */ + 11173 "11100001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11175 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11177 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11179 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11181 "00000000" // /* MW 1 */ +.delay_slot + 11182 "01011000" // ADD.NC r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "11001001" // /* MW 3 */ + 11184 "10011000" // /* MW 2 */ + 11185 "00011100" // /* MW 1 */ +.loop_nesting 0 + 11186 "10000100" // J #11232 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11232 delay_slots=5 */ + 11187 "00000000" // /* MW 5 */ + 11188 "00000000" // /* MW 4 */ + 11189 "11110000" // /* MW 3 */ + 11190 "00010101" // /* MW 2 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11195 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11197 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11199 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11200 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11201 "00000000" // /* MW 15 */ + 11202 "00000000" // /* MW 14 */ + 11203 "01111000" // /* MW 13 */ + 11204 "10100101" // /* MW 12 */ + 11205 "00000001" // /* MW 11 */ + 11206 "00000000" // /* MW 10 */ + 11207 "00000000" // /* MW 9 */ + 11208 "00000000" // /* MW 8 */ + 11209 "01011011" // /* MW 7 */ + 11210 "00000001" // /* MW 6 */ + 11211 "00100000" // /* MW 5 */ + 11212 "00000000" // /* MW 4 */ + 11213 "11110000" // /* MW 3 */ + 11214 "00101100" // /* MW 2 */ + 11215 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 11216 "11100001" // LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11217 "00000000" // /* MW 15 */ + 11218 "00000000" // /* MW 14 */ + 11219 "01111000" // /* MW 13 */ + 11220 "10100101" // /* MW 12 */ + 11221 "00000001" // /* MW 11 */ + 11222 "00000000" // /* MW 10 */ + 11223 "00000000" // /* MW 9 */ + 11224 "00000000" // /* MW 8 */ + 11225 "01011011" // /* MW 7 */ + 11226 "00000001" // /* MW 6 */ + 11227 "00100000" // /* MW 5 */ + 11228 "00000000" // /* MW 4 */ + 11229 "00100000" // /* MW 3 */ + 11230 "00000111" // /* MW 2 */ + 11231 "11111110" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 11232 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11233 "11010001" // /* MW 3 */ + 11234 "11110101" // /* MW 2 */ + 11235 "00000111" // /* MW 1 */ + 11236 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11237 "10011001" // /* MW 3 */ + 11238 "11111011" // /* MW 2 */ + 11239 "00000111" // /* MW 1 */ + 11240 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11241 "11110001" // /* MW 3 */ + 11242 "11111101" // /* MW 2 */ + 11243 "00000111" // /* MW 1 */ + 11244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11245 "00000000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ + 11248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11249 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 first + 11250 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11251 "00000000" // /* MW 3 */ + 11252 "00101000" // /* MW 2 */ + 11253 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 +.delay_slot + 11254 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000001" // /* MW 5 */ + 11256 "00000000" // /* MW 4 */ + 11257 "00000000" // /* MW 3 */ + 11258 "11111000" // /* MW 2 */ + 11259 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + 11267 "00000000" // /* MW 1 */ +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function _b8148_wrapper _Z14_b8148_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 38 first +.src_ref 0 "0_0_reloadable4.cc" 40 79 +.function_start + 11280 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "01100000" // /* MW 2 */ + 11283 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 40 79 first + 11284 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11285 "00011110" // /* MW 3 */ + 11286 "00011100" // /* MW 2 */ + 11287 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 42 46 first + 11288 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11289 "00011110" // /* MW 3 */ + 11290 "00010101" // /* MW 2 */ + 11291 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 41 80 first + 11292 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "10011110" // /* MW 3 */ + 11294 "00000100" // /* MW 2 */ + 11295 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 39 4 first +.tail_call + 11296 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 11297 "00000000" // /* MW 5 */ + 11298 "00000000" // /* MW 4 */ + 11299 "00010000" // /* MW 3 */ + 11300 "00010101" // /* MW 2 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11307 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 + 11311 "00000000" // /* MW 1 */ +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function _b13739_wrapper _Z15_b13739_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 46 first +.src_ref 0 "0_0_reloadable4.cc" 48 79 +.function_start + 11312 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "11000000" // /* MW 3 */ + 11314 "01100000" // /* MW 2 */ + 11315 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 48 79 first + 11316 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00011110" // /* MW 3 */ + 11318 "00101100" // /* MW 2 */ + 11319 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 50 81 first + 11320 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00011110" // /* MW 3 */ + 11322 "11110101" // /* MW 2 */ + 11323 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 49 47 first + 11324 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "10011110" // /* MW 3 */ + 11326 "00000100" // /* MW 2 */ + 11327 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 47 4 first +.tail_call + 11328 "10000100" // J #3904 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3904 delay_slots=5 */ + 11329 "00000000" // /* MW 5 */ + 11330 "00000000" // /* MW 4 */ + 11331 "10100000" // /* MW 3 */ + 11332 "00000111" // /* MW 2 */ + 11333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 + 11343 "00000000" // /* MW 1 */ +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function _b13744_wrapper _Z15_b13744_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 54 first +.src_ref 0 "0_0_reloadable4.cc" 56 79 +.function_start + 11344 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11345 "11000000" // /* MW 3 */ + 11346 "01100000" // /* MW 2 */ + 11347 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 56 79 first + 11348 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11349 "00011110" // /* MW 3 */ + 11350 "00101100" // /* MW 2 */ + 11351 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 58 81 first + 11352 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11353 "00011110" // /* MW 3 */ + 11354 "11110101" // /* MW 2 */ + 11355 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 57 47 first + 11356 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11357 "10011110" // /* MW 3 */ + 11358 "00000100" // /* MW 2 */ + 11359 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 55 4 first +.tail_call + 11360 "10000100" // J #4864 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4864 delay_slots=5 */ + 11361 "00000000" // /* MW 5 */ + 11362 "00000000" // /* MW 4 */ + 11363 "10000000" // /* MW 3 */ + 11364 "00001001" // /* MW 2 */ + 11365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 + 11375 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 62 first +.src_ref 0 "0_0_reloadable4.cc" 64 79 +.function_start + 11376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "11000000" // /* MW 3 */ + 11378 "01100000" // /* MW 2 */ + 11379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 64 79 first + 11380 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11381 "00011110" // /* MW 3 */ + 11382 "00111100" // /* MW 2 */ + 11383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 65 47 first + 11384 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "10011110" // /* MW 3 */ + 11386 "11101100" // /* MW 2 */ + 11387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 67 81 first + 11388 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "10011110" // /* MW 3 */ + 11390 "00010101" // /* MW 2 */ + 11391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 66 80 first + 11392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00011110" // /* MW 3 */ + 11394 "00000101" // /* MW 2 */ + 11395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 63 4 first +.tail_call + 11396 "10000100" // J #5872 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5872 delay_slots=5 */ + 11397 "00000000" // /* MW 5 */ + 11398 "00000000" // /* MW 4 */ + 11399 "01111000" // /* MW 3 */ + 11400 "00001011" // /* MW 2 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11411 "00000000" // /* MW 1 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function load_slice_generic_innermost_rtp _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 40 first +.src_ref 3 "slice_generic_innermost_params.h" 41 19 first +.function_start + 11424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11425 "00101110" // /* MW 3 */ + 11426 "00011100" // /* MW 2 */ + 11427 "00000001" // /* MW 1 */ + 11428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11429 "00000000" // /* MW 1 */ + 11430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11431 "00000000" // /* MW 1 */ + 11432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11433 "00000000" // /* MW 1 */ + 11434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11435 "00000000" // /* MW 1 */ + 11436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11437 "00000000" // /* MW 1 */ + 11438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11439 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 41 17 first + 11440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11441 "00101001" // /* MW 3 */ + 11442 "00011100" // /* MW 2 */ + 11443 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 19 first + 11444 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11445 "00101110" // /* MW 3 */ + 11446 "00011100" // /* MW 2 */ + 11447 "00000001" // /* MW 1 */ + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ + 11456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11457 "00000000" // /* MW 1 */ + 11458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11459 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 17 + 11460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00101001" // /* MW 3 */ + 11462 "00011100" // /* MW 2 */ + 11463 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 19 first + 11464 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00101110" // /* MW 3 */ + 11466 "00011100" // /* MW 2 */ + 11467 "00000001" // /* MW 1 */ + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ + 11472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11473 "00000000" // /* MW 1 */ + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11475 "00000000" // /* MW 1 */ + 11476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11477 "00000000" // /* MW 1 */ + 11478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11479 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 17 + 11480 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "00101001" // /* MW 3 */ + 11482 "00011100" // /* MW 2 */ + 11483 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 19 first + 11484 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11485 "00101110" // /* MW 3 */ + 11486 "00011100" // /* MW 2 */ + 11487 "00000001" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ + 11490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11491 "00000000" // /* MW 1 */ + 11492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11493 "00000000" // /* MW 1 */ + 11494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11495 "00000000" // /* MW 1 */ + 11496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11497 "00000000" // /* MW 1 */ + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 17 + 11500 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "00101001" // /* MW 3 */ + 11502 "00011100" // /* MW 2 */ + 11503 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 19 first + 11504 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11505 "00101110" // /* MW 3 */ + 11506 "00011100" // /* MW 2 */ + 11507 "00000001" // /* MW 1 */ + 11508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11509 "00000000" // /* MW 1 */ + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 17 + 11520 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11521 "00101001" // /* MW 3 */ + 11522 "00011100" // /* MW 2 */ + 11523 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 17 first + 11524 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "00101110" // /* MW 3 */ + 11526 "00011100" // /* MW 2 */ + 11527 "00000001" // /* MW 1 */ + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ + 11536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11537 "00000000" // /* MW 1 */ + 11538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11539 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 15 + 11540 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00101001" // /* MW 3 */ + 11542 "00011100" // /* MW 2 */ + 11543 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 18 first + 11544 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "00101110" // /* MW 3 */ + 11546 "00000100" // /* MW 2 */ + 11547 "00000001" // /* MW 1 */ + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ + 11552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11553 "00000000" // /* MW 1 */ + 11554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11555 "00000000" // /* MW 1 */ + 11556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11557 "00000000" // /* MW 1 */ + 11558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11559 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 16 + 11560 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11561 "00101001" // /* MW 3 */ + 11562 "00000100" // /* MW 2 */ + 11563 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 18 first + 11564 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11565 "00101110" // /* MW 3 */ + 11566 "00010100" // /* MW 2 */ + 11567 "00000001" // /* MW 1 */ + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 49 first + 11570 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11571 "00000000" // /* MW 3 */ + 11572 "00101000" // /* MW 2 */ + 11573 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 16 first +.delay_slot + 11582 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11583 "00101001" // /* MW 3 */ + 11584 "00010100" // /* MW 2 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11585 "00001000" // /* MW 1 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function setup_slice_generic_innermost _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.src_ref 3 "slice_generic_innermost_params.h" 52 first +.src_ref 3 "slice_generic_innermost_params.h" 53 25 first +.src_ref 3 "slice_generic_innermost_params.h" 55 42 +.src_ref 3 "slice_generic_innermost_params.h" 58 40 +.function_start + 11600 "10111010" // LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11601 "01011000" // /* MW 9 */ + 11602 "00100000" // /* MW 8 */ + 11603 "10000000" // /* MW 7 */ + 11604 "00101000" // /* MW 6 */ + 11605 "00000000" // /* MW 5 */ + 11606 "00000000" // /* MW 4 */ + 11607 "11010000" // /* MW 3 */ + 11608 "10000110" // /* MW 2 */ + 11609 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 38 first +.src_ref 3 "slice_generic_innermost_params.h" 58 30 +.src_ref 3 "slice_generic_innermost_params.h" 59 31 + 11610 "10111010" // LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11611 "01011000" // /* MW 9 */ + 11612 "11111010" // /* MW 8 */ + 11613 "01001111" // /* MW 7 */ + 11614 "01001000" // /* MW 6 */ + 11615 "00110000" // /* MW 5 */ + 11616 "00000000" // /* MW 4 */ + 11617 "11010000" // /* MW 3 */ + 11618 "10010110" // /* MW 2 */ + 11619 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 51 +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.src_ref 3 "slice_generic_innermost_params.h" 62 27 + 11620 "01010100" // LDA r4, [p0], #8; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11621 "00000001" // /* MW 5 */ + 11622 "00000001" // /* MW 4 */ + 11623 "11010000" // /* MW 3 */ + 11624 "10010010" // /* MW 2 */ + 11625 "00000101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 55 42 first +.src_ref 3 "slice_generic_innermost_params.h" 60 27 + 11626 "01010100" // LDA r6, [p0], m1; MOV dj0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11627 "00000001" // /* MW 5 */ + 11628 "00000010" // /* MW 4 */ + 11629 "11010001" // /* MW 3 */ + 11630 "00011010" // /* MW 2 */ + 11631 "00000101" // /* MW 1 */ + 11632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11633 "00000000" // /* MW 1 */ + 11634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11635 "00000000" // /* MW 1 */ + 11636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11637 "00000000" // /* MW 1 */ + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 30 first + 11642 "10011000" // MUL r1, r5, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11643 "00011111" // /* MW 3 */ + 11644 "01000010" // /* MW 2 */ + 11645 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 40 first + 11646 "10011000" // AND r0, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11647 "00000100" // /* MW 3 */ + 11648 "10000000" // /* MW 2 */ + 11649 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 30 + 11650 "10011000" // OR r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11651 "00000101" // /* MW 3 */ + 11652 "11000000" // /* MW 2 */ + 11653 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 43 first +.src_ref 3 "slice_generic_innermost_params.h" 58 28 + 11654 "01011100" // ST r0, [p0], #-16; MUL r1, r1, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11655 "10011111" // /* MW 5 */ + 11656 "10000100" // /* MW 4 */ + 11657 "00110000" // /* MW 3 */ + 11658 "10000010" // /* MW 2 */ + 11659 "00011001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 75 first + 11660 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11661 "00000000" // /* MW 3 */ + 11662 "00101000" // /* MW 2 */ + 11663 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 31 first +.delay_slot + 11664 "10011000" // LSHL r0, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "00101101" // /* MW 3 */ + 11666 "01000000" // /* MW 2 */ + 11667 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 25 +.delay_slot + 11668 "10011000" // ST r0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "00010001" // /* MW 3 */ + 11670 "00011100" // /* MW 2 */ + 11671 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 first +.delay_slot + 11672 "10011000" // ST m0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00000001" // /* MW 3 */ + 11674 "00011100" // /* MW 2 */ + 11675 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.delay_slot + 11676 "10011000" // ST dj0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "01000001" // /* MW 3 */ + 11678 "00000100" // /* MW 2 */ + 11679 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 62 27 first +.delay_slot + 11680 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11681 "00000001" // /* MW 3 */ + 11682 "00010100" // /* MW 2 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + 11683 "00001000" // /* MW 1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function setup_slice_generic_innermost_params _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 79 first +.src_ref 3 "slice_generic_innermost_params.h" 80 4 first +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 11696 "00000100" // JL #11424 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11424 delay_slots=5 */ + 11697 "00000001" // /* MW 5 */ + 11698 "00000000" // /* MW 4 */ + 11699 "01010000" // /* MW 3 */ + 11700 "00010110" // /* MW 2 */ + 11701 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11702 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11703 "11100000" // /* MW 3 */ + 11704 "11000001" // /* MW 2 */ + 11705 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11706 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11707 "11000000" // /* MW 3 */ + 11708 "01100000" // /* MW 2 */ + 11709 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11714 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11715 "00011100" // /* MW 13 */ + 11716 "00000000" // /* MW 12 */ + 11717 "00000000" // /* MW 11 */ + 11718 "01010111" // /* MW 10 */ + 11719 "00011010" // /* MW 9 */ + 11720 "01000000" // /* MW 8 */ + 11721 "00000000" // /* MW 7 */ + 11722 "00000000" // /* MW 6 */ + 11723 "10110110" // /* MW 5 */ + 11724 "00000010" // /* MW 4 */ + 11725 "11110000" // /* MW 3 */ + 11726 "00101100" // /* MW 2 */ + 11727 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 first +.tail_call +.return_address + 11728 "10000100" // J #11600 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11600 delay_slots=5 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00000000" // /* MW 4 */ + 11731 "10101000" // /* MW 3 */ + 11732 "00010110" // /* MW 2 */ + 11733 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11734 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "10000000" // /* MW 3 */ + 11736 "01110001" // /* MW 2 */ + 11737 "00011111" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11738 "11111000" // MOV p0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "11000000" // /* MW 3 */ + 11740 "01100100" // /* MW 2 */ + 11741 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11743 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11745 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11747 "00000000" // /* MW 1 */ +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function slice_generic_innermost _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 25 first +.src_ref 3 "slice_generic_innermost.h" 35 60 +.src_ref 3 "slice_generic_innermost.h" 54 19 +.function_start + 11760 "00000010" // MOVS p5, p1; MOV r0, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11761 "01110000" // /* MW 7 */ + 11762 "01100000" // /* MW 6 */ + 11763 "00001010" // /* MW 5 */ + 11764 "00000000" // /* MW 4 */ + 11765 "01100000" // /* MW 3 */ + 11766 "10010001" // /* MW 2 */ + 11767 "10110000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 first + 11768 "00011000" // ADD.NC p3, r0, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11769 "00010010" // /* MW 3 */ + 11770 "01100000" // /* MW 2 */ + 11771 "00011011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 + 11772 "11010100" // LDA m2, [p3], #4; MOV r0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11773 "10000001" // /* MW 5 */ + 11774 "00111101" // /* MW 4 */ + 11775 "11010000" // /* MW 3 */ + 11776 "10100000" // /* MW 2 */ + 11777 "01100011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 36 61 first + 11778 "10011000" // LDA m0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11779 "00000110" // /* MW 3 */ + 11780 "00011100" // /* MW 2 */ + 11781 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 35 first + 11782 "10011000" // LDA r2, [p3, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01010110" // /* MW 3 */ + 11784 "11010100" // /* MW 2 */ + 11785 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 59 first + 11786 "10011000" // LDA m1, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11787 "10000110" // /* MW 3 */ + 11788 "00000100" // /* MW 2 */ + 11789 "00000011" // /* MW 1 */ + 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11791 "00000000" // /* MW 1 */ + 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11793 "00000000" // /* MW 1 */ + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ + 11796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11797 "00000000" // /* MW 1 */ + 11798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11799 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 40 26 first + 11800 "10000100" // JZ r2, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11801 "00000001" // /* MW 5 */ + 11802 "00000000" // /* MW 4 */ + 11803 "11100000" // /* MW 3 */ + 11804 "00010111" // /* MW 2 */ + 11805 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11806 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11807 "11000000" // /* MW 3 */ + 11808 "01100000" // /* MW 2 */ + 11809 "00011111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 first +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11810 "11110100" // PADDB [p7], m0; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11811 "10000001" // /* MW 5 */ + 11812 "11011101" // /* MW 4 */ + 11813 "00000110" // /* MW 3 */ + 11814 "01110010" // /* MW 2 */ + 11815 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 38 first +.delay_slot + 11816 "00011000" // PADDB [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "10010000" // /* MW 3 */ + 11818 "01001011" // /* MW 2 */ + 11819 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 37 39 first +.src_ref 3 "slice_generic_innermost.h" 52 20 +.delay_slot + 11820 "11110100" // PADDB [p0], m0; MOV p4, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11821 "10000001" // /* MW 5 */ + 11822 "11000001" // /* MW 4 */ + 11823 "00001000" // /* MW 3 */ + 11824 "01110010" // /* MW 2 */ + 11825 "00000001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 37 first +.delay_slot + 11826 "00011000" // PADDB [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11827 "10010000" // /* MW 3 */ + 11828 "00101011" // /* MW 2 */ + 11829 "00111001" // /* MW 1 */ + 11830 "00011000" // MOVX r1, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11831 "00010001" // /* MW 3 */ + 11832 "00000010" // /* MW 2 */ + 11833 "00010000" // /* MW 1 */ + 11834 "10011000" // LTU r3, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00011100" // /* MW 3 */ + 11836 "10000110" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ + 11838 "10000100" // JNZ r3, #12080 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12080 delay_slots=5 */ + 11839 "00000001" // /* MW 5 */ + 11840 "01000000" // /* MW 4 */ + 11841 "10011000" // /* MW 3 */ + 11842 "00010111" // /* MW 2 */ + 11843 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 +.delay_slot + 11844 "10111000" // MOV dj0, #48 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11845 "01100000" // /* MW 3 */ + 11846 "10000000" // /* MW 2 */ + 11847 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.delay_slot + 11848 "10011000" // LDA r1, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11849 "00110110" // /* MW 3 */ + 11850 "00000000" // /* MW 2 */ + 11851 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first + 11858 "10110110" // VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #11952 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11859 "00010000" // /* MW 11 */ + 11860 "01011000" // /* MW 10 */ + 11861 "01111111" // /* MW 9 */ + 11862 "00001000" // /* MW 8 */ + 11863 "00000000" // /* MW 7 */ + 11864 "00000000" // /* MW 6 */ + 11865 "11101000" // /* MW 5 */ + 11866 "00010000" // /* MW 4 */ + 11867 "01110110" // /* MW 3 */ + 11868 "00010011" // /* MW 2 */ + 11869 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11870 "01111110" // PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #12000 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11871 "01100000" // /* MW 13 */ + 11872 "00001011" // /* MW 12 */ + 11873 "01100001" // /* MW 11 */ + 11874 "00000010" // /* MW 10 */ + 11875 "11101110" // /* MW 9 */ + 11876 "00110111" // /* MW 8 */ + 11877 "00000001" // /* MW 7 */ + 11878 "00000000" // /* MW 6 */ + 11879 "01101000" // /* MW 5 */ + 11880 "00010000" // /* MW 4 */ + 11881 "11111110" // /* MW 3 */ + 11882 "00001100" // /* MW 2 */ + 11883 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11884 "11110110" // VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11885 "01000000" // /* MW 11 */ + 11886 "10111111" // /* MW 10 */ + 11887 "10111000" // /* MW 9 */ + 11888 "00000010" // /* MW 8 */ + 11889 "01011011" // /* MW 7 */ + 11890 "00001000" // /* MW 6 */ + 11891 "11101111" // /* MW 5 */ + 11892 "00010001" // /* MW 4 */ + 11893 "01110000" // /* MW 3 */ + 11894 "00001011" // /* MW 2 */ + 11895 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11896 "00110010" // PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00001000" // /* MW 6 */ + 11899 "01101011" // /* MW 5 */ + 11900 "00010001" // /* MW 4 */ + 11901 "11111000" // /* MW 3 */ + 11902 "00001100" // /* MW 2 */ + 11903 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11904 "00111100" // PADDA [p4], m0; VLDB x0, [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11905 "01101000" // /* MW 5 */ + 11906 "00010000" // /* MW 4 */ + 11907 "11111110" // /* MW 3 */ + 11908 "00001100" // /* MW 2 */ + 11909 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11910 "01001100" // VLDB x3, [p0], m0; PADDS [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11911 "10110110" // /* MW 5 */ + 11912 "00010000" // /* MW 4 */ + 11913 "10001110" // /* MW 3 */ + 11914 "00011110" // /* MW 2 */ + 11915 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11916 "00111100" // PADDA [p0], m0; VLDB x1, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11917 "11101000" // /* MW 5 */ + 11918 "00010000" // /* MW 4 */ + 11919 "11110110" // /* MW 3 */ + 11920 "00001100" // /* MW 2 */ + 11921 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11922 "10110100" // VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11923 "00001011" // /* MW 5 */ + 11924 "00010010" // /* MW 4 */ + 11925 "10000000" // /* MW 3 */ + 11926 "00010110" // /* MW 2 */ + 11927 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11928 "00110010" // NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11929 "01011011" // /* MW 7 */ + 11930 "00001000" // /* MW 6 */ + 11931 "01101011" // /* MW 5 */ + 11932 "00010000" // /* MW 4 */ + 11933 "11111110" // /* MW 3 */ + 11934 "00101100" // /* MW 2 */ + 11935 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11936 "11100001" // NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11937 "00000000" // /* MW 15 */ + 11938 "00000000" // /* MW 14 */ + 11939 "11101000" // /* MW 13 */ + 11940 "11000010" // /* MW 12 */ + 11941 "01000000" // /* MW 11 */ + 11942 "00000000" // /* MW 10 */ + 11943 "00000000" // /* MW 9 */ + 11944 "10000000" // /* MW 8 */ + 11945 "00000110" // /* MW 7 */ + 11946 "00101000" // /* MW 6 */ + 11947 "11101101" // /* MW 5 */ + 11948 "00010001" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11952 "11100001" // PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11953 "00000000" // /* MW 15 */ + 11954 "00000000" // /* MW 14 */ + 11955 "11101000" // /* MW 13 */ + 11956 "10000010" // /* MW 12 */ + 11957 "00000100" // /* MW 11 */ + 11958 "00000000" // /* MW 10 */ + 11959 "00000000" // /* MW 9 */ + 11960 "00000000" // /* MW 8 */ + 11961 "01011011" // /* MW 7 */ + 11962 "00001000" // /* MW 6 */ + 11963 "11101111" // /* MW 5 */ + 11964 "00010000" // /* MW 4 */ + 11965 "11110110" // /* MW 3 */ + 11966 "00001100" // /* MW 2 */ + 11967 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11968 "11100001" // PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11969 "00000000" // /* MW 15 */ + 11970 "00000000" // /* MW 14 */ + 11971 "01111000" // /* MW 13 */ + 11972 "10100101" // /* MW 12 */ + 11973 "00000001" // /* MW 11 */ + 11974 "00000000" // /* MW 10 */ + 11975 "00000000" // /* MW 9 */ + 11976 "10000000" // /* MW 8 */ + 11977 "00100110" // /* MW 7 */ + 11978 "00101000" // /* MW 6 */ + 11979 "01101001" // /* MW 5 */ + 11980 "00010001" // /* MW 4 */ + 11981 "11111000" // /* MW 3 */ + 11982 "00001100" // /* MW 2 */ + 11983 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11984 "11100001" // PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11985 "00000000" // /* MW 15 */ + 11986 "00000000" // /* MW 14 */ + 11987 "11101000" // /* MW 13 */ + 11988 "11000010" // /* MW 12 */ + 11989 "01000000" // /* MW 11 */ + 11990 "00000000" // /* MW 10 */ + 11991 "00000000" // /* MW 9 */ + 11992 "00000000" // /* MW 8 */ + 11993 "01011011" // /* MW 7 */ + 11994 "00001000" // /* MW 6 */ + 11995 "01101011" // /* MW 5 */ + 11996 "00010000" // /* MW 4 */ + 11997 "11111110" // /* MW 3 */ + 11998 "00001100" // /* MW 2 */ + 11999 "10100101" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12000 "11100001" // PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "00000000" // /* MW 10 */ + 12007 "00000000" // /* MW 9 */ + 12008 "10000000" // /* MW 8 */ + 12009 "00000110" // /* MW 7 */ + 12010 "00101000" // /* MW 6 */ + 12011 "11101101" // /* MW 5 */ + 12012 "00010001" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00001100" // /* MW 2 */ + 12015 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12016 "11011000" // VSHUFFLE bmll0, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12017 "00000101" // /* MW 3 */ + 12018 "00001001" // /* MW 2 */ + 12019 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12020 "10011000" // VST bmlh0, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12021 "00100110" // /* MW 3 */ + 12022 "00101000" // /* MW 2 */ + 12023 "00001001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12024 "10010100" // PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12025 "00001011" // /* MW 5 */ + 12026 "00000011" // /* MW 4 */ + 12027 "11110001" // /* MW 3 */ + 12028 "00001100" // /* MW 2 */ + 12029 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12030 "10000100" // J #12224 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12224 delay_slots=5 */ + 12031 "00000000" // /* MW 5 */ + 12032 "00000000" // /* MW 4 */ + 12033 "11100000" // /* MW 3 */ + 12034 "00010111" // /* MW 2 */ + 12035 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12036 "10111010" // PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12037 "11100010" // /* MW 9 */ + 12038 "10000010" // /* MW 8 */ + 12039 "00000100" // /* MW 7 */ + 12040 "10000000" // /* MW 6 */ + 12041 "00100110" // /* MW 5 */ + 12042 "00101000" // /* MW 4 */ + 12043 "11110001" // /* MW 3 */ + 12044 "00001100" // /* MW 2 */ + 12045 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12046 "00001100" // PADDA [p1], m1; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12047 "00001101" // /* MW 5 */ + 12048 "01010000" // /* MW 4 */ + 12049 "11111010" // /* MW 3 */ + 12050 "00001100" // /* MW 2 */ + 12051 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.delay_slot + 12052 "10010100" // PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12053 "00001011" // /* MW 5 */ + 12054 "00000011" // /* MW 4 */ + 12055 "11110001" // /* MW 3 */ + 12056 "00001100" // /* MW 2 */ + 12057 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.delay_slot + 12058 "00001100" // NOPA; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12059 "00001101" // /* MW 5 */ + 12060 "01010000" // /* MW 4 */ + 12061 "11111010" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot + 12064 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12065 "00000000" // /* MW 15 */ + 12066 "00000000" // /* MW 14 */ + 12067 "01111000" // /* MW 13 */ + 12068 "10100101" // /* MW 12 */ + 12069 "00000001" // /* MW 11 */ + 12070 "00000000" // /* MW 10 */ + 12071 "00000000" // /* MW 9 */ + 12072 "10000000" // /* MW 8 */ + 12073 "00100110" // /* MW 7 */ + 12074 "00101000" // /* MW 6 */ + 12075 "00100001" // /* MW 5 */ + 12076 "00000000" // /* MW 4 */ + 12077 "11110000" // /* MW 3 */ + 12078 "00101100" // /* MW 2 */ + 12079 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.src_ref 3 "slice_generic_innermost.h" 40 8 first + 12080 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00100000" // /* MW 3 */ + 12082 "01110001" // /* MW 2 */ + 12083 "00011101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12084 "01000100" // MOVXM ls, #12096 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12085 "10000000" // /* MW 5 */ + 12086 "11111110" // /* MW 4 */ + 12087 "00100001" // /* MW 3 */ + 12088 "00000000" // /* MW 2 */ + 12089 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12090 "01000100" // MOVXM le, #12208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "01100000" // /* MW 5 */ + 12092 "11111111" // /* MW 4 */ + 12093 "00100110" // /* MW 3 */ + 12094 "00000000" // /* MW 2 */ + 12095 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.begin_of_loop +.loop_nesting 1 + 12096 "00111100" // VLDA x1, [p4], m0; VLDB x2, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "01101000" // /* MW 5 */ + 12098 "00010001" // /* MW 4 */ + 12099 "01110110" // /* MW 3 */ + 12100 "00001011" // /* MW 2 */ + 12101 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first + 12102 "00110010" // PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12103 "01011011" // /* MW 7 */ + 12104 "00001000" // /* MW 6 */ + 12105 "01101100" // /* MW 5 */ + 12106 "00010000" // /* MW 4 */ + 12107 "11111110" // /* MW 3 */ + 12108 "00001100" // /* MW 2 */ + 12109 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first + 12110 "00111100" // PADDA [p7], m0; VLDB x3, [p0], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12111 "11101000" // /* MW 5 */ + 12112 "00010001" // /* MW 4 */ + 12113 "11110000" // /* MW 3 */ + 12114 "00001100" // /* MW 2 */ + 12115 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 59 21 first + 12116 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "10010000" // /* MW 3 */ + 12118 "00001011" // /* MW 2 */ + 12119 "00111000" // /* MW 1 */ + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ + 12124 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12125 "01100111" // /* MW 3 */ + 12126 "00000001" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first + 12128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12129 "00000000" // /* MW 15 */ + 12130 "00000000" // /* MW 14 */ + 12131 "11101000" // /* MW 13 */ + 12132 "01000010" // /* MW 12 */ + 12133 "00001000" // /* MW 11 */ + 12134 "00000000" // /* MW 10 */ + 12135 "00000000" // /* MW 9 */ + 12136 "00000000" // /* MW 8 */ + 12137 "01011011" // /* MW 7 */ + 12138 "00000001" // /* MW 6 */ + 12139 "00100000" // /* MW 5 */ + 12140 "00000000" // /* MW 4 */ + 12141 "11110000" // /* MW 3 */ + 12142 "00101100" // /* MW 2 */ + 12143 "00000000" // /* MW 1 */ + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "01111000" // /* MW 13 */ + 12148 "10100101" // /* MW 12 */ + 12149 "00000001" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first + 12160 "11100001" // NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "11101000" // /* MW 13 */ + 12164 "11000010" // /* MW 12 */ + 12165 "01000000" // /* MW 11 */ + 12166 "00000000" // /* MW 10 */ + 12167 "00000000" // /* MW 9 */ + 12168 "10000000" // /* MW 8 */ + 12169 "00000110" // /* MW 7 */ + 12170 "00101000" // /* MW 6 */ + 12171 "00100101" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 55 19 first + 12176 "11100001" // NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12177 "00000000" // /* MW 15 */ + 12178 "00000000" // /* MW 14 */ + 12179 "01111000" // /* MW 13 */ + 12180 "10100101" // /* MW 12 */ + 12181 "00000001" // /* MW 11 */ + 12182 "00000000" // /* MW 10 */ + 12183 "00000000" // /* MW 9 */ + 12184 "00000000" // /* MW 8 */ + 12185 "01011011" // /* MW 7 */ + 12186 "00000001" // /* MW 6 */ + 12187 "00100000" // /* MW 5 */ + 12188 "01010111" // /* MW 4 */ + 12189 "11111010" // /* MW 3 */ + 12190 "00101100" // /* MW 2 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first + 12192 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12193 "00000000" // /* MW 15 */ + 12194 "00000000" // /* MW 14 */ + 12195 "01111000" // /* MW 13 */ + 12196 "10100101" // /* MW 12 */ + 12197 "00000001" // /* MW 11 */ + 12198 "00000000" // /* MW 10 */ + 12199 "00000000" // /* MW 9 */ + 12200 "10000000" // /* MW 8 */ + 12201 "00100110" // /* MW 7 */ + 12202 "00101000" // /* MW 6 */ + 12203 "00100001" // /* MW 5 */ + 12204 "00000000" // /* MW 4 */ + 12205 "11110000" // /* MW 3 */ + 12206 "00101100" // /* MW 2 */ + 12207 "00000000" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop + 12208 "11100001" // NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12209 "00000000" // /* MW 15 */ + 12210 "00000000" // /* MW 14 */ + 12211 "01111000" // /* MW 13 */ + 12212 "10100101" // /* MW 12 */ + 12213 "00000001" // /* MW 11 */ + 12214 "00000000" // /* MW 10 */ + 12215 "00000000" // /* MW 9 */ + 12216 "00000000" // /* MW 8 */ + 12217 "01011011" // /* MW 7 */ + 12218 "00000001" // /* MW 6 */ + 12219 "00100000" // /* MW 5 */ + 12220 "01010111" // /* MW 4 */ + 12221 "11110010" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.src_ref 3 "slice_generic_innermost.h" 76 first +.loop_nesting 0 + 12224 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12225 "00000000" // /* MW 3 */ + 12226 "00101000" // /* MW 2 */ + 12227 "00010000" // /* MW 1 */ +.delay_slot + 12228 "11111000" // MOV p7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12229 "00100000" // /* MW 3 */ + 12230 "01100000" // /* MW 2 */ + 12231 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 + 12239 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function slice_generic_innermost_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 first +.function_start + 12240 "00111010" // MOVS p5, p0; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12241 "01110001" // /* MW 9 */ + 12242 "00000000" // /* MW 8 */ + 12243 "00000000" // /* MW 7 */ + 12244 "00000000" // /* MW 6 */ + 12245 "00000100" // /* MW 5 */ + 12246 "00000000" // /* MW 4 */ + 12247 "01100000" // /* MW 3 */ + 12248 "00010001" // /* MW 2 */ + 12249 "10110000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 12250 "00000010" // ST lr, [sp, #-4]; MOV p3, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12251 "01110000" // /* MW 7 */ + 12252 "01100000" // /* MW 6 */ + 12253 "10110001" // /* MW 5 */ + 12254 "00000001" // /* MW 4 */ + 12255 "10110000" // /* MW 3 */ + 12256 "10000111" // /* MW 2 */ + 12257 "11111111" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 first +.no_stack_arguments + 12258 "00111010" // MOVS p1, p2; JL #11696 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */ + 12259 "01000001" // /* MW 9 */ + 12260 "00000000" // /* MW 8 */ + 12261 "00000000" // /* MW 7 */ + 12262 "10110110" // /* MW 6 */ + 12263 "00000101" // /* MW 5 */ + 12264 "00000000" // /* MW 4 */ + 12265 "01100000" // /* MW 3 */ + 12266 "00010001" // /* MW 2 */ + 12267 "00110001" // /* MW 1 */ +.delay_slot + 12268 "11111000" // MOV p0, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "11100000" // /* MW 3 */ + 12270 "01100101" // /* MW 2 */ + 12271 "00011000" // /* MW 1 */ +.delay_slot + 12272 "00011000" // PADDB [p0], #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "10010000" // /* MW 3 */ + 12274 "11101111" // /* MW 2 */ + 12275 "00111000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.delay_slot + 12276 "11111000" // MOV p4, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12277 "11000000" // /* MW 3 */ + 12278 "01100000" // /* MW 2 */ + 12279 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12283 "00100000" // /* MW 5 */ + 12284 "00000000" // /* MW 4 */ + 12285 "11110000" // /* MW 3 */ + 12286 "00101100" // /* MW 2 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 31 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.return_address + 12288 "10111010" // LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12289 "01110010" // /* MW 9 */ + 12290 "01110000" // /* MW 8 */ + 12291 "00101101" // /* MW 7 */ + 12292 "00000010" // /* MW 6 */ + 12293 "10001011" // /* MW 5 */ + 12294 "10010000" // /* MW 4 */ + 12295 "00100010" // /* MW 3 */ + 12296 "01001010" // /* MW 2 */ + 12297 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 44 + 12298 "00101100" // LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12299 "00100000" // /* MW 5 */ + 12300 "11000101" // /* MW 4 */ + 12301 "00101000" // /* MW 3 */ + 12302 "11011010" // /* MW 2 */ + 12303 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 57 first + 12304 "10111010" // LDA r20, [sp, #-120]; MOVXM r19, #65534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12305 "00010000" // /* MW 9 */ + 12306 "11111111" // /* MW 8 */ + 12307 "01101111" // /* MW 7 */ + 12308 "00111110" // /* MW 6 */ + 12309 "00000000" // /* MW 5 */ + 12310 "00000000" // /* MW 4 */ + 12311 "00100000" // /* MW 3 */ + 12312 "01010010" // /* MW 2 */ + 12313 "11110001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first + 12314 "00101100" // LDA p1, [p3]; ADD r17, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12315 "00100001" // /* MW 5 */ + 12316 "11000110" // /* MW 4 */ + 12317 "11011001" // /* MW 3 */ + 12318 "10010011" // /* MW 2 */ + 12319 "01100000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 70 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 59 first + 12320 "00101100" // LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12321 "01100000" // /* MW 5 */ + 12322 "11010101" // /* MW 4 */ + 12323 "00101000" // /* MW 3 */ + 12324 "11001110" // /* MW 2 */ + 12325 "11110001" // /* MW 1 */ + 12326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12327 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 12328 "10011000" // LDA r17, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12329 "00110110" // /* MW 3 */ + 12330 "00000110" // /* MW 2 */ + 12331 "00000101" // /* MW 1 */ + 12332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12333 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 36 first + 12334 "10011000" // MUL r18, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12335 "00101111" // /* MW 3 */ + 12336 "10100101" // /* MW 2 */ + 12337 "00010101" // /* MW 1 */ + 12338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12339 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 49 + 12340 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12341 "01001111" // /* MW 3 */ + 12342 "10100101" // /* MW 2 */ + 12343 "00010100" // /* MW 1 */ + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12345 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 62 + 12346 "10011000" // MUL r18, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00101111" // /* MW 3 */ + 12348 "01100101" // /* MW 2 */ + 12349 "00010101" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 first +.no_stack_arguments + 12350 "00000100" // JL #11760 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11760 delay_slots=5 */ + 12351 "00000001" // /* MW 5 */ + 12352 "00000000" // /* MW 4 */ + 12353 "11111000" // /* MW 3 */ + 12354 "00010110" // /* MW 2 */ + 12355 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 57 +.delay_slot + 12356 "10011000" // MUL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12357 "00101111" // /* MW 3 */ + 12358 "11100101" // /* MW 2 */ + 12359 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12360 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12361 "00000101" // /* MW 3 */ + 12362 "00100000" // /* MW 2 */ + 12363 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12364 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12365 "00001101" // /* MW 3 */ + 12366 "10100001" // /* MW 2 */ + 12367 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12368 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12369 "11000001" // /* MW 3 */ + 12370 "01101000" // /* MW 2 */ + 12371 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12372 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12373 "10000001" // /* MW 11 */ + 12374 "10101101" // /* MW 10 */ + 12375 "00000000" // /* MW 9 */ + 12376 "00000000" // /* MW 8 */ + 12377 "00000000" // /* MW 7 */ + 12378 "00000000" // /* MW 6 */ + 12379 "00100000" // /* MW 5 */ + 12380 "00000000" // /* MW 4 */ + 12381 "11110000" // /* MW 3 */ + 12382 "00101100" // /* MW 2 */ + 12383 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.return_address + 12384 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12385 "00111001" // /* MW 3 */ + 12386 "11111100" // /* MW 2 */ + 12387 "00000111" // /* MW 1 */ + 12388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12389 "00000000" // /* MW 1 */ + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 first + 12400 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12401 "00000000" // /* MW 3 */ + 12402 "00101000" // /* MW 2 */ + 12403 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.delay_slot + 12404 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12405 "00000001" // /* MW 5 */ + 12406 "00000000" // /* MW 4 */ + 12407 "00000000" // /* MW 3 */ + 12408 "11110000" // /* MW 2 */ + 12409 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + 12417 "00000000" // /* MW 1 */ +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function _b8170_wrapper _Z14_b8170_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 71 first +.src_ref 0 "0_0_reloadable4.cc" 73 79 +.function_start + 12432 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "11000000" // /* MW 3 */ + 12434 "01100000" // /* MW 2 */ + 12435 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 73 79 first + 12436 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "00011110" // /* MW 3 */ + 12438 "00011100" // /* MW 2 */ + 12439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 75 47 first + 12440 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00011110" // /* MW 3 */ + 12442 "00010101" // /* MW 2 */ + 12443 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 74 80 first + 12444 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12445 "10011110" // /* MW 3 */ + 12446 "00000100" // /* MW 2 */ + 12447 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 72 4 first +.tail_call + 12448 "10000100" // J #12240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12240 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11101000" // /* MW 3 */ + 12452 "00010111" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 + 12463 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 115 4 first +.function_start + 12464 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "01000001" // /* MW 5 */ + 12466 "10100000" // /* MW 4 */ + 12467 "00101111" // /* MW 3 */ + 12468 "11000000" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12470 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12471 "00011100" // /* MW 3 */ + 12472 "11000110" // /* MW 2 */ + 12473 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12474 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12475 "00011100" // /* MW 3 */ + 12476 "11000110" // /* MW 2 */ + 12477 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12478 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12479 "00011100" // /* MW 3 */ + 12480 "11000110" // /* MW 2 */ + 12481 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12482 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12483 "00011100" // /* MW 3 */ + 12484 "11000110" // /* MW 2 */ + 12485 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12486 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12487 "00011100" // /* MW 3 */ + 12488 "11000110" // /* MW 2 */ + 12489 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12490 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12491 "00011100" // /* MW 3 */ + 12492 "11000110" // /* MW 2 */ + 12493 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12494 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12495 "00011100" // /* MW 3 */ + 12496 "11000110" // /* MW 2 */ + 12497 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12498 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12499 "00011100" // /* MW 3 */ + 12500 "11000110" // /* MW 2 */ + 12501 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12502 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12503 "00011100" // /* MW 3 */ + 12504 "11000110" // /* MW 2 */ + 12505 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12506 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12507 "00011100" // /* MW 3 */ + 12508 "11000110" // /* MW 2 */ + 12509 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12510 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12511 "00011100" // /* MW 3 */ + 12512 "11000110" // /* MW 2 */ + 12513 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12514 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12515 "00011100" // /* MW 3 */ + 12516 "11000110" // /* MW 2 */ + 12517 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12518 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12519 "00011100" // /* MW 3 */ + 12520 "11000110" // /* MW 2 */ + 12521 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12522 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "00011100" // /* MW 3 */ + 12524 "11000110" // /* MW 2 */ + 12525 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12526 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00011100" // /* MW 3 */ + 12528 "11000110" // /* MW 2 */ + 12529 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12530 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12531 "00011100" // /* MW 3 */ + 12532 "11000110" // /* MW 2 */ + 12533 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12534 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12535 "00011100" // /* MW 3 */ + 12536 "11000110" // /* MW 2 */ + 12537 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12538 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12539 "00011100" // /* MW 3 */ + 12540 "11000110" // /* MW 2 */ + 12541 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12542 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12543 "00011100" // /* MW 3 */ + 12544 "11000110" // /* MW 2 */ + 12545 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12546 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00011100" // /* MW 3 */ + 12548 "11000110" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12550 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00011100" // /* MW 3 */ + 12552 "11000110" // /* MW 2 */ + 12553 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12554 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00011100" // /* MW 3 */ + 12556 "11000110" // /* MW 2 */ + 12557 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12558 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "00011100" // /* MW 3 */ + 12560 "11000110" // /* MW 2 */ + 12561 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12562 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12563 "00011100" // /* MW 3 */ + 12564 "11000110" // /* MW 2 */ + 12565 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12566 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12567 "00011100" // /* MW 3 */ + 12568 "11000110" // /* MW 2 */ + 12569 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12570 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12571 "00011100" // /* MW 3 */ + 12572 "11000110" // /* MW 2 */ + 12573 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12574 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "00011100" // /* MW 3 */ + 12576 "11000110" // /* MW 2 */ + 12577 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12578 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12579 "00011100" // /* MW 3 */ + 12580 "11000110" // /* MW 2 */ + 12581 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 119 first + 12582 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12583 "00000000" // /* MW 3 */ + 12584 "00101000" // /* MW 2 */ + 12585 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 first +.delay_slot + 12586 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12587 "00011100" // /* MW 3 */ + 12588 "11000110" // /* MW 2 */ + 12589 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12590 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12591 "00011100" // /* MW 3 */ + 12592 "11000110" // /* MW 2 */ + 12593 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12594 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12595 "00011100" // /* MW 3 */ + 12596 "11000110" // /* MW 2 */ + 12597 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12598 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12599 "00011100" // /* MW 3 */ + 12600 "11000110" // /* MW 2 */ + 12601 "00010000" // /* MW 1 */ +.delay_slot + 12602 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12603 "10100000" // /* MW 3 */ + 12604 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 12605 "00011000" // /* MW 1 */ +.label memset +.function memset memset +.src_ref 12 "string.c" 325 first +.src_ref 12 "string.c" 328 4 first +.function_start + 12608 "10000100" // JZ r1, #12768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12768 delay_slots=5 */ + 12609 "00000001" // /* MW 5 */ + 12610 "00000000" // /* MW 4 */ + 12611 "11110000" // /* MW 3 */ + 12612 "00011000" // /* MW 2 */ + 12613 "00001000" // /* MW 1 */ +.src_ref 12 "string.c" 329 3 +.delay_slot + 12614 "11111000" // MOV p0, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12615 "11000000" // /* MW 3 */ + 12616 "01100010" // /* MW 2 */ + 12617 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 first +.src_ref 12 "string.c" 329 3 + 12626 "00000010" // MOVS p1, p0; MOV lc, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12627 "01110000" // /* MW 7 */ + 12628 "01010000" // /* MW 6 */ + 12629 "10111000" // /* MW 5 */ + 12630 "00000010" // /* MW 4 */ + 12631 "01100000" // /* MW 3 */ + 12632 "00010001" // /* MW 2 */ + 12633 "00110000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12634 "01000100" // MOVXM ls, #12656 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12635 "11100000" // /* MW 5 */ + 12636 "11100010" // /* MW 4 */ + 12637 "00110001" // /* MW 3 */ + 12638 "00000000" // /* MW 2 */ + 12639 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12640 "11100001" // NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12641 "00000000" // /* MW 15 */ + 12642 "00000000" // /* MW 14 */ + 12643 "00010000" // /* MW 13 */ + 12644 "11101000" // /* MW 12 */ + 12645 "10111000" // /* MW 11 */ + 12646 "00001101" // /* MW 10 */ + 12647 "00000000" // /* MW 9 */ + 12648 "00000000" // /* MW 8 */ + 12649 "01011011" // /* MW 7 */ + 12650 "00000001" // /* MW 6 */ + 12651 "00100000" // /* MW 5 */ + 12652 "00000000" // /* MW 4 */ + 12653 "11110000" // /* MW 3 */ + 12654 "00101100" // /* MW 2 */ + 12655 "00000000" // /* MW 1 */ +.label ZLS_Fmemset_48 +.src_ref 12 "string.c" 329 3 first +.begin_of_loop +.loop_nesting 1 + 12656 "11100001" // ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12657 "00000000" // /* MW 15 */ + 12658 "00000000" // /* MW 14 */ + 12659 "01111000" // /* MW 13 */ + 12660 "10100101" // /* MW 12 */ + 12661 "00000001" // /* MW 11 */ + 12662 "00000000" // /* MW 10 */ + 12663 "00000000" // /* MW 9 */ + 12664 "00000000" // /* MW 8 */ + 12665 "01011011" // /* MW 7 */ + 12666 "00000001" // /* MW 6 */ + 12667 "00100000" // /* MW 5 */ + 12668 "00000000" // /* MW 4 */ + 12669 "11100000" // /* MW 3 */ + 12670 "10000000" // /* MW 2 */ + 12671 "00100011" // /* MW 1 */ + 12672 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12673 "00000000" // /* MW 15 */ + 12674 "00000000" // /* MW 14 */ + 12675 "01111000" // /* MW 13 */ + 12676 "10100101" // /* MW 12 */ + 12677 "00000001" // /* MW 11 */ + 12678 "00000000" // /* MW 10 */ + 12679 "00000000" // /* MW 9 */ + 12680 "00000000" // /* MW 8 */ + 12681 "01011011" // /* MW 7 */ + 12682 "00000001" // /* MW 6 */ + 12683 "00100000" // /* MW 5 */ + 12684 "00000000" // /* MW 4 */ + 12685 "11110000" // /* MW 3 */ + 12686 "00101100" // /* MW 2 */ + 12687 "00000000" // /* MW 1 */ + 12688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12689 "00000000" // /* MW 15 */ + 12690 "00000000" // /* MW 14 */ + 12691 "01111000" // /* MW 13 */ + 12692 "10100101" // /* MW 12 */ + 12693 "00000001" // /* MW 11 */ + 12694 "00000000" // /* MW 10 */ + 12695 "00000000" // /* MW 9 */ + 12696 "00000000" // /* MW 8 */ + 12697 "01011011" // /* MW 7 */ + 12698 "00000001" // /* MW 6 */ + 12699 "00100000" // /* MW 5 */ + 12700 "00000000" // /* MW 4 */ + 12701 "11110000" // /* MW 3 */ + 12702 "00101100" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ + 12704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12705 "00000000" // /* MW 15 */ + 12706 "00000000" // /* MW 14 */ + 12707 "01111000" // /* MW 13 */ + 12708 "10100101" // /* MW 12 */ + 12709 "00000001" // /* MW 11 */ + 12710 "00000000" // /* MW 10 */ + 12711 "00000000" // /* MW 9 */ + 12712 "00000000" // /* MW 8 */ + 12713 "01011011" // /* MW 7 */ + 12714 "00000001" // /* MW 6 */ + 12715 "00100000" // /* MW 5 */ + 12716 "00000000" // /* MW 4 */ + 12717 "11110000" // /* MW 3 */ + 12718 "00101100" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ + 12720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12721 "00000000" // /* MW 15 */ + 12722 "00000000" // /* MW 14 */ + 12723 "01111000" // /* MW 13 */ + 12724 "10100101" // /* MW 12 */ + 12725 "00000001" // /* MW 11 */ + 12726 "00000000" // /* MW 10 */ + 12727 "00000000" // /* MW 9 */ + 12728 "00000000" // /* MW 8 */ + 12729 "01011011" // /* MW 7 */ + 12730 "00000001" // /* MW 6 */ + 12731 "00100000" // /* MW 5 */ + 12732 "00000000" // /* MW 4 */ + 12733 "11110000" // /* MW 3 */ + 12734 "00101100" // /* MW 2 */ + 12735 "00000000" // /* MW 1 */ + 12736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12737 "00000000" // /* MW 15 */ + 12738 "00000000" // /* MW 14 */ + 12739 "01111000" // /* MW 13 */ + 12740 "10100101" // /* MW 12 */ + 12741 "00000001" // /* MW 11 */ + 12742 "00000000" // /* MW 10 */ + 12743 "00000000" // /* MW 9 */ + 12744 "00000000" // /* MW 8 */ + 12745 "01011011" // /* MW 7 */ + 12746 "00000001" // /* MW 6 */ + 12747 "00100000" // /* MW 5 */ + 12748 "00000000" // /* MW 4 */ + 12749 "11110000" // /* MW 3 */ + 12750 "00101100" // /* MW 2 */ + 12751 "00000000" // /* MW 1 */ +.label ZLE_Fmemset_144 +.end_of_loop + 12752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12753 "00000000" // /* MW 15 */ + 12754 "00000000" // /* MW 14 */ + 12755 "01111000" // /* MW 13 */ + 12756 "10100101" // /* MW 12 */ + 12757 "00000001" // /* MW 11 */ + 12758 "00000000" // /* MW 10 */ + 12759 "00000000" // /* MW 9 */ + 12760 "00000000" // /* MW 8 */ + 12761 "01011011" // /* MW 7 */ + 12762 "00000001" // /* MW 6 */ + 12763 "00100000" // /* MW 5 */ + 12764 "00000000" // /* MW 4 */ + 12765 "11110000" // /* MW 3 */ + 12766 "00101100" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.label TGT_Fmemset_160 +.src_ref 12 "string.c" 330 4 first +.loop_nesting 0 + 12768 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12769 "00000000" // /* MW 3 */ + 12770 "00101000" // /* MW 2 */ + 12771 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label memset__end + 12781 "00000000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/conv" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 11 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 12 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.txt b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.txt new file mode 100644 index 0000000000000000000000000000000000000000..313c672c901f9483e93de4398d1e94f821c22ed0 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/Release/0_0_reloadable79.txt @@ -0,0 +1,3559 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2020 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 282 0x2020 1 x +conv2d_dw_bf16.h 285 0x2020 2 +conv2d_dw_bf16.h 285 0x2020 3 +conv2d_dw_bf16.h 287 0x2020 4 +conv2d_dw_bf16.h 285 0x202a +conv2d_dw_bf16.h 287 0x202a 1 +conv2d_dw_bf16.h 285 0x2034 x +conv2d_dw_bf16.h 285 0x2034 1 +conv2d_dw_bf16.h 285 0x203a +conv2d_dw_bf16.h 287 0x203e x +conv2d_dw_bf16.h 282 0x2042 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2048 x +io_buffer_main.h 149 0x204e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 284 0x2058 x +conv2d_dw_bf16.h 284 0x2062 +conv2d_dw_bf16.h 285 0x2062 1 x +conv2d_dw_bf16.h 290 0x206c +conv2d_dw_bf16.h 285 0x2070 +conv2d_dw_bf16.h 285 0x2076 +conv2d_dw_bf16.h 287 0x2076 1 x +conv2d_dw_bf16.h 291 0x2076 2 +conv2d_dw_bf16.h 285 0x2080 x +conv2d_dw_bf16.h 287 0x2080 1 +conv2d_dw_bf16.h 290 0x2090 x +conv2d_dw_bf16.h 290 0x2090 1 +conv2d_dw_bf16.h 292 0x2090 2 +conv2d_dw_bf16.h 290 0x20a2 +conv2d_dw_bf16.h 290 0x20a6 +conv2d_dw_bf16.h 292 0x20ac +conv2d_dw_bf16.h 292 0x20ba +conv2d_dw_bf16.h 292 0x20ba 1 x +conv2d_dw_bf16.h 292 0x20ba 2 +conv2d_dw_bf16.h 294 0x20ba 3 +conv2d_dw_bf16.h 294 0x20ba 4 +conv2d_dw_bf16.h 294 0x20ba 5 +conv2d_dw_bf16.h 292 0x20c4 +conv2d_dw_bf16.h 294 0x20c8 x +conv2d_dw_bf16.h 292 0x20cc +conv2d_dw_bf16.h 293 0x20cc 1 +conv2d_dw_bf16.h 297 0x20cc 2 +conv2d_dw_bf16.h 291 0x20d2 +conv2d_dw_bf16.h 294 0x20d2 1 +conv2d_dw_bf16.h 297 0x20d2 2 +conv2d_dw_bf16.h 292 0x20e2 x +conv2d_dw_bf16.h 292 0x20e6 +conv2d_dw_bf16.h 291 0x20ea x +conv2d_dw_bf16.h 292 0x20f0 x +conv2d_dw_bf16.h 294 0x20f0 1 x +conv2d_dw_bf16.h 292 0x20f6 +conv2d_dw_bf16.h 294 0x20f6 1 +conv2d_dw_bf16.h 292 0x20fc +conv2d_dw_bf16.h 294 0x20fc 1 +conv2d_dw_bf16.h 293 0x2102 x +conv2d_dw_bf16.h 294 0x2106 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2110 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 297 0x2110 1 +conv2d_dw_bf16.h 298 0x2110 2 +conv2d_dw_bf16.h 298 0x2110 3 +conv2d_dw_bf16.h 297 0x211a x +conv2d_dw_bf16.h 298 0x211e +conv2d_dw_bf16.h 298 0x2124 x +conv2d_dw_bf16.h 296 0x2128 +conv2d_dw_bf16.h 301 0x2130 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2136 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 297 0x213a x +conv2d_dw_bf16.h 296 0x2144 x +conv2d_dw_bf16.h 298 0x214a x +conv2d_dw_bf16.h 298 0x214e +conv2d_dw_bf16.h 297 0x2152 x +conv2d_dw_bf16.h 296 0x2156 +conv2d_dw_bf16.h 298 0x215a x +conv2d_dw_bf16.h 301 0x2160 +conv2d_dw_bf16.h 301 0x2176 x +conv2d_dw_bf16.h 301 0x217a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 100 0xc10 x +elementwise_binary.h 103 0xc10 1 x +elementwise_binary.h 101 0xc14 +elementwise_binary.h 101 0xc1a +elementwise_binary.h 101 0xc1e x +elementwise_binary.h 101 0xc22 +elementwise_binary.h 89 0xc30 x +elementwise_binary.h 92 0xc30 1 +elementwise_binary.h 92 0xc30 2 x +elementwise_binary.h 89 0xc3a +elementwise_binary.h 92 0xc4e x +elementwise_binary.h 93 0xc52 x +elementwise_binary.h 93 0xc62 +elementwise_binary.h 94 0xc66 x +elementwise_binary.h 94 0xc76 +elementwise_binary.h 95 0xc7a x +elementwise_binary.h 96 0xc82 x +elementwise_binary.h 95 0xc8e x +elementwise_binary.h 96 0xc92 +elementwise_binary.h 96 0xca0 +elementwise_binary.h 98 0xca0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 105 0xca0 2 +add_impl.h 105 0xcaa +add_impl.h 106 0xcaa 1 +add_impl.h 106 0xcaa 2 +add_impl.h 105 0xcb4 x +add_impl.h 106 0xcb4 1 +add_impl.h 106 0xcbe x +add_impl.h 106 0xcc6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0xcca x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0xcce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0xcd2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0xcd8 x +add_impl.h 106 0xcdc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 40 0xcf0 x +elementwise_binary_broadcasting.h 41 0xcf0 1 +elementwise_binary_broadcasting.h 41 0xcf0 2 +elementwise_binary_broadcasting.h 41 0xcfa +elementwise_binary_broadcasting.h 41 0xcfa 1 +elementwise_binary_broadcasting.h 41 0xcfa 2 x +elementwise_binary_broadcasting.h 42 0xcfa 3 +elementwise_binary_broadcasting.h 43 0xd0e x +elementwise_binary_broadcasting.h 41 0xd12 x +elementwise_binary_broadcasting.h 41 0xd16 +elementwise_binary_broadcasting.h 42 0xd1a x +elementwise_binary_broadcasting.h 41 0xd1e x +elementwise_binary_broadcasting.h 42 0xd1e 1 +elementwise_binary_broadcasting.h 41 0xd24 +elementwise_binary_broadcasting.h 35 0xd30 +elementwise_binary_broadcasting.h 35 0xd30 1 x +elementwise_binary_broadcasting.h 36 0xd3a x +elementwise_binary_broadcasting.h 36 0xd40 +elementwise_binary_broadcasting.h 37 0xd50 +elementwise_binary_broadcasting.h 37 0xd54 x +elementwise_binary_broadcasting.h 37 0xd5a +elementwise_binary_broadcasting.h 38 0xd60 x +elementwise_binary_broadcasting.h 48 0xd70 x +elementwise_binary_broadcasting.h 55 0xd70 1 +elementwise_binary_broadcasting.h 61 0xd70 2 +elementwise_binary_broadcasting.h 55 0xd7a x +elementwise_binary_broadcasting.h 61 0xd7e x +elementwise_binary_broadcasting.h 76 0xd7e 1 +elementwise_binary_broadcasting.h 61 0xd90 +elementwise_binary_broadcasting.h 61 0xd90 1 +elementwise_binary_broadcasting.h 55 0xd96 +elementwise_binary_broadcasting.h 55 0xd9a x +elementwise_binary_broadcasting.h 62 0xda4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xdb8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xdc0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 65 0xdd0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xde0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xde6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xdf0 +add_accum.hpp 19 0xdf0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 76 0xdf0 2 x +elementwise_binary_broadcasting.h 76 0xdf0 3 x +elementwise_binary_broadcasting.h 76 0xdfa +elementwise_binary_broadcasting.h 76 0xdfa 1 +elementwise_binary_broadcasting.h 76 0xe04 +elementwise_binary_broadcasting.h 76 0xe0a +elementwise_binary_broadcasting.h 76 0xe10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe18 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe18 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe1c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe1c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe1c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe20 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe20 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe20 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe24 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe24 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe24 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe28 x +vector.hpp 1159 0xe28 1 +vector.hpp 1159 0xe28 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe28 3 x +accum.hpp 1119 0xe28 4 +accum.hpp 1119 0xe28 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe28 6 x +elementwise_binary.h 154 0xe28 7 +elementwise_binary.h 177 0xe28 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe2e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe2e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe2e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe2e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe36 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe36 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe36 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe3a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe3a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe3a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe42 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe42 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe42 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe46 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe46 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe46 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe4e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe4e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe52 +vector.hpp 1159 0xe52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe52 2 +accum.hpp 1119 0xe52 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe52 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe52 5 x +elementwise_binary.h 177 0xe52 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe60 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe60 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe64 +vector.hpp 1159 0xe64 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe64 2 +accum.hpp 1119 0xe64 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe64 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe64 5 x +elementwise_binary.h 154 0xe64 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe70 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe70 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe70 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe80 +vector.hpp 1159 0xe80 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe80 2 +accum.hpp 1119 0xe80 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe80 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe80 5 x +elementwise_binary.h 177 0xe80 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe92 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe92 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe92 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xe92 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe9c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe9c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe9c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0xe9c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xea6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xea6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xea6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 80 0xea6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xeae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xeae 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0xeae 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xeb4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xeb4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xeb4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 41 0xec0 +ise_binary_attribute_broadcasting.h 41 0xec0 1 x +ise_binary_attribute_broadcasting.h 76 0xec0 2 +ise_binary_attribute_broadcasting.h 51 0xeca +ise_binary_attribute_broadcasting.h 51 0xed6 +ise_binary_attribute_broadcasting.h 51 0xedc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0xee2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0xee2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 76 0xef0 x +ise_binary_attribute_broadcasting.h 51 0xef6 +ise_binary_attribute_broadcasting.h 51 0xefc x +ise_binary_attribute_broadcasting.h 51 0xf00 +ise_binary_attribute_broadcasting.h 76 0xf00 1 +ise_binary_attribute_broadcasting.h 76 0xf06 +ise_binary_attribute_broadcasting.h 77 0xf10 +ise_binary_attribute_broadcasting.h 77 0xf20 x +ise_binary_attribute_broadcasting.h 77 0xf24 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 152 0xf40 x +superkernels.cpp 157 0xf40 1 +superkernels.cpp 157 0xf46 x +superkernels.cpp 152 0xf4c +superkernels.cpp 154 0xf5a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0xf64 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 157 0xf6c +superkernels.cpp 157 0xf6c 1 +superkernels.cpp 154 0xf72 x +superkernels.cpp 154 0xf76 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0xf7e + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 154 0xf7e 1 +superkernels.cpp 160 0xf86 +superkernels.cpp 171 0xf86 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xf8c +tile.hpp 74 0xf92 +tile.hpp 86 0xf92 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 160 0xf9e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xfa8 +tile.hpp 74 0xfac +tile.hpp 74 0xfb0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0xfc0 +superkernels.cpp 164 0xfc6 x +superkernels.cpp 164 0xfc6 1 +superkernels.cpp 162 0xfd0 +superkernels.cpp 164 0xfd0 1 +superkernels.cpp 171 0xfd0 2 +superkernels.cpp 162 0xfda x +superkernels.cpp 164 0xfda 1 +superkernels.cpp 169 0xfda 2 +superkernels.cpp 162 0xfee +superkernels.cpp 164 0xff6 x +superkernels.cpp 162 0xffa x +superkernels.cpp 164 0x1000 x +superkernels.cpp 169 0x1010 +superkernels.cpp 171 0x1010 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1020 x +io_buffer_main.h 242 0x1028 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x1028 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1032 +io_buffer_main.h 242 0x1036 +io_buffer_main.h 259 0x103a x +io_buffer_main.h 242 0x1048 x +io_buffer_main.h 242 0x1048 1 x +io_buffer_main.h 242 0x104c +io_buffer_main.h 419 0x1050 +io_buffer_main.h 419 0x105a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 169 0x105e +superkernels.cpp 168 0x1068 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x106c x +io_buffer_main.h 348 0x106c 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 169 0x1072 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1076 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 171 0x107c x +superkernels.cpp 168 0x1084 x +superkernels.cpp 168 0x1088 +superkernels.cpp 169 0x108c x +superkernels.cpp 169 0x1090 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x10a0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10a0 1 +superkernels.cpp 174 0x10a0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x10aa +io_buffer_main.h 449 0x10aa 1 +io_buffer_main.h 449 0x10b8 x +io_buffer_main.h 351 0x10bc x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10bc 1 +superkernels.cpp 173 0x10c6 x +superkernels.cpp 173 0x10ca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x10d6 x +io_buffer_main.h 351 0x10da + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10de x +superkernels.cpp 173 0x10e2 +superkernels.cpp 174 0x10f2 +superkernels.cpp 174 0x10f6 x +superkernels.cpp 176 0x1100 +superkernels.cpp 176 0x1114 x +superkernels.cpp 176 0x111c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 95 0x1130 x +elementwise_unary.h 97 0x1130 1 +elementwise_unary.h 97 0x1130 2 x +elementwise_unary.h 97 0x1146 x +elementwise_unary.h 98 0x114a x +elementwise_unary.h 98 0x115a +elementwise_unary.h 99 0x115e x +elementwise_unary.h 101 0x1164 x +elementwise_unary.h 99 0x1170 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1180 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 107 0x1180 1 x +elementwise_unary.h 113 0x1180 2 +elementwise_unary.h 113 0x1180 3 +elementwise_unary.h 142 0x1180 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x118c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 113 0x118c 1 x +elementwise_unary.h 161 0x118c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1198 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x1198 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x11a4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 161 0x11a4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x11ba x +max_min.hpp 20 0x11be x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 113 0x11be 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x11c4 +vector.hpp 1159 0x11c4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x11c4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x11c4 3 +accum.hpp 1119 0x11c4 4 +accum.hpp 1119 0x11c4 5 +accum.hpp 1119 0x11c4 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x11c4 7 +elementwise_unary.h 166 0x11c4 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x11d8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x11de x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x11e2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x11e2 1 +mul_acc32_fp.hpp 36 0x11e2 2 +mul_acc32_fp.hpp 38 0x11e2 3 +mul_acc32_fp.hpp 38 0x11e2 4 +mul_acc32_fp.hpp 39 0x11e2 5 +mul_acc32_fp.hpp 39 0x11e2 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x11e8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x11e8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x11f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x11fa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1202 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1206 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x1206 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x120e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x120e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x120e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x121a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x121e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x121e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1228 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x1228 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1230 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x1234 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1240 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1240 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x1240 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x1240 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x124a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x124a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x124a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 161 0x124a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x1254 x +mul_acc32_fp.hpp 36 0x1258 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x125c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x125c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x125c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x125c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1266 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1266 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x1266 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1270 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1270 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1280 x +max_min.hpp 21 0x1290 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1290 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x1290 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x12a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x12b0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x12b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x12b8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x12bc x +mul_acc32_fp.hpp 36 0x12c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12c4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12c4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x12c4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12ca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12ca 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x12ca 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x12ce x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12d2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x12d2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12da + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x12de x +mul_acc32_fp.hpp 39 0x12e2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 129 0x12e8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12f0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x12f0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12f4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12f4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x12f4 2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 210 0x1300 x +superkernels.cpp 215 0x1300 1 +superkernels.cpp 215 0x1306 x +superkernels.cpp 210 0x130c +superkernels.cpp 212 0x131a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1324 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 215 0x132c +superkernels.cpp 215 0x132c 1 +superkernels.cpp 212 0x1332 x +superkernels.cpp 212 0x1336 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x133e + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 212 0x133e 1 +superkernels.cpp 218 0x1346 +superkernels.cpp 229 0x1346 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x134c +tile.hpp 74 0x1352 +tile.hpp 86 0x1352 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 218 0x135e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1368 +tile.hpp 74 0x136c +tile.hpp 74 0x1370 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x1380 +superkernels.cpp 222 0x1386 x +superkernels.cpp 222 0x1386 1 +superkernels.cpp 220 0x1390 +superkernels.cpp 222 0x1390 1 +superkernels.cpp 229 0x1390 2 +superkernels.cpp 220 0x139a x +superkernels.cpp 222 0x139a 1 +superkernels.cpp 227 0x139a 2 +superkernels.cpp 220 0x13ae +superkernels.cpp 222 0x13b6 x +superkernels.cpp 220 0x13ba x +superkernels.cpp 222 0x13c0 x +superkernels.cpp 227 0x13d0 +superkernels.cpp 229 0x13d0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x13e0 x +io_buffer_main.h 242 0x13e8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x13e8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x13f2 +io_buffer_main.h 242 0x13f6 +io_buffer_main.h 259 0x13fa x +io_buffer_main.h 242 0x1408 x +io_buffer_main.h 242 0x1408 1 x +io_buffer_main.h 242 0x140c +io_buffer_main.h 419 0x1410 +io_buffer_main.h 419 0x141a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 227 0x141e +superkernels.cpp 226 0x1428 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x142c x +io_buffer_main.h 348 0x142c 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 227 0x1432 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1436 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 229 0x143c x +superkernels.cpp 226 0x1444 x +superkernels.cpp 226 0x1448 +superkernels.cpp 227 0x144c x +superkernels.cpp 227 0x1450 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1460 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x1460 1 +superkernels.cpp 232 0x1460 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x146a +io_buffer_main.h 449 0x146a 1 +io_buffer_main.h 449 0x1478 x +io_buffer_main.h 351 0x147c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x147c 1 +superkernels.cpp 231 0x1486 x +superkernels.cpp 231 0x148a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1496 x +io_buffer_main.h 351 0x149a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x149e x +superkernels.cpp 231 0x14a2 +superkernels.cpp 232 0x14b2 +superkernels.cpp 232 0x14b6 x +superkernels.cpp 234 0x14c0 +superkernels.cpp 234 0x14d4 x +superkernels.cpp 234 0x14dc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 100 0x14f0 x +elementwise_binary.h 103 0x14f0 1 x +elementwise_binary.h 101 0x14f4 +elementwise_binary.h 101 0x14fa +elementwise_binary.h 101 0x14fe x +elementwise_binary.h 101 0x1502 +elementwise_binary.h 89 0x1510 x +elementwise_binary.h 92 0x1510 1 +elementwise_binary.h 92 0x1510 2 x +elementwise_binary.h 89 0x151a +elementwise_binary.h 92 0x152c x +elementwise_binary.h 93 0x1530 x +elementwise_binary.h 93 0x1540 +elementwise_binary.h 94 0x1544 x +elementwise_binary.h 94 0x1554 +elementwise_binary.h 95 0x1558 x +elementwise_binary.h 96 0x1560 x +elementwise_binary.h 95 0x156e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x1572 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x1580 +elementwise_binary.h 98 0x1592 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x159c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x15a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x15a0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 108 0x15b0 x +elementwise_binary.h 115 0x15b0 1 +elementwise_binary.h 115 0x15b0 2 +elementwise_binary.h 115 0x15ba x +elementwise_binary.h 115 0x15ba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15c4 +mul_acc32_fp.hpp 36 0x15c4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 115 0x15c4 2 +elementwise_binary.h 115 0x15c4 3 +elementwise_binary.h 115 0x15ce +elementwise_binary.h 127 0x15ce 1 x +elementwise_binary.h 115 0x15d8 x +elementwise_binary.h 127 0x15d8 1 +elementwise_binary.h 115 0x15e8 +elementwise_binary.h 127 0x15ec x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15f0 x +vector.hpp 1139 0x15f0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x15f0 2 x +elementwise_binary.h 148 0x15f0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15f6 +vector.hpp 1139 0x15f6 1 +vector.hpp 1159 0x15f6 2 +vector.hpp 1159 0x15f6 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x15f6 4 +accum.hpp 1119 0x15f6 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x15f6 6 +elementwise_binary.h 170 0x15f6 7 x +elementwise_binary.h 172 0x15f6 8 x +elementwise_binary.h 177 0x15f6 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15fe x +vector.hpp 1139 0x15fe 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x15fe 2 x +elementwise_binary.h 148 0x15fe 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1604 +vector.hpp 1139 0x1604 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1604 2 x +elementwise_binary.h 172 0x1604 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x160a x +vector.hpp 1139 0x160a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x160a 2 x +elementwise_binary.h 148 0x160a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1610 +vector.hpp 1139 0x1610 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1610 2 x +elementwise_binary.h 172 0x1610 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1616 x +vector.hpp 1139 0x1616 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1616 2 x +elementwise_binary.h 148 0x1616 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x161c +vector.hpp 1139 0x161c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x161c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x161c 3 x +elementwise_binary.h 172 0x161c 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1626 x +vector.hpp 1139 0x1626 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1626 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1626 3 x +elementwise_binary.h 148 0x1626 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1630 +vector.hpp 1139 0x1630 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1630 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1630 3 x +elementwise_binary.h 172 0x1630 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x163a x +vector.hpp 1139 0x163a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x163a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x163a 3 x +elementwise_binary.h 148 0x163a 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1644 +vector.hpp 1139 0x1644 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1644 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1644 3 x +elementwise_binary.h 172 0x1644 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1650 x +vector.hpp 1139 0x1650 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1650 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1650 3 x +elementwise_binary.h 148 0x1650 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1660 +vector.hpp 1139 0x1660 1 +vector.hpp 1159 0x1660 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1660 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1660 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1660 5 x +elementwise_binary.h 172 0x1660 6 x +elementwise_binary.h 177 0x1660 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1670 x +vector.hpp 1139 0x1670 1 x +vector.hpp 1159 0x1670 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1670 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1670 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1670 5 x +elementwise_binary.h 148 0x1670 6 x +elementwise_binary.h 154 0x1670 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1680 +vector.hpp 1139 0x1680 1 +vector.hpp 1159 0x1680 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1680 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1680 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1680 5 x +elementwise_binary.h 172 0x1680 6 x +elementwise_binary.h 177 0x1680 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1690 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1690 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1690 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1690 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1698 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1698 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1698 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1698 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16a0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16a0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16a0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16a8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16a8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16a8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16a8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16b0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16b0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16b0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16b8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16b8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16b8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16b8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16c0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16c0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16c0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16c8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16c8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16c8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16cc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16cc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 131 0x16cc 2 x +elementwise_binary.h 154 0x16cc 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16d2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16d2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16d2 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16d6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16d6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16d6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16da x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16da 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16da 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16de + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16de 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16de 2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 277 0x16f0 x +superkernels.cpp 282 0x16f0 1 +superkernels.cpp 282 0x16f6 x +superkernels.cpp 277 0x16fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1702 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 279 0x1702 1 x +superkernels.cpp 282 0x171e x +superkernels.cpp 282 0x171e 1 x +superkernels.cpp 279 0x1724 x +superkernels.cpp 279 0x1728 +superkernels.cpp 279 0x172e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1736 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x173a +superkernels.cpp 287 0x173a 1 +superkernels.cpp 289 0x173a 2 +superkernels.cpp 301 0x173a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1744 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x1744 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x174e +tile.hpp 86 0x174e 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x175a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1764 +tile.hpp 74 0x1768 +tile.hpp 74 0x176c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 289 0x1770 +superkernels.cpp 289 0x1770 1 x +superkernels.cpp 289 0x177a +superkernels.cpp 289 0x177a 1 +superkernels.cpp 298 0x177a 2 +superkernels.cpp 287 0x1784 x +superkernels.cpp 290 0x1784 1 +superkernels.cpp 299 0x1784 2 +superkernels.cpp 287 0x179a +superkernels.cpp 289 0x17a0 x +superkernels.cpp 287 0x17a4 x +superkernels.cpp 289 0x17a8 x +superkernels.cpp 290 0x17ac x +superkernels.cpp 298 0x17b0 +superkernels.cpp 299 0x17b6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x17c0 x +io_buffer_main.h 242 0x17c4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x17c4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x17ce +io_buffer_main.h 242 0x17d2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x17d6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 259 0x17da x +io_buffer_main.h 242 0x17e6 x +io_buffer_main.h 242 0x17e6 1 x +io_buffer_main.h 242 0x17ea + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x17ea 1 x +superkernels.cpp 293 0x17f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x17f4 +io_buffer_main.h 419 0x17f4 1 +io_buffer_main.h 419 0x17fe x +io_buffer_main.h 242 0x1802 x +io_buffer_main.h 242 0x180a +io_buffer_main.h 242 0x180e +io_buffer_main.h 242 0x1812 +io_buffer_main.h 259 0x1816 x +io_buffer_main.h 242 0x1824 x +io_buffer_main.h 242 0x1824 1 x +io_buffer_main.h 242 0x1828 +io_buffer_main.h 419 0x1834 x +io_buffer_main.h 348 0x1838 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1838 1 +superkernels.cpp 299 0x1838 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1846 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x184a x +superkernels.cpp 299 0x1850 x +superkernels.cpp 301 0x1850 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1856 x +io_buffer_main.h 149 0x185a +io_buffer_main.h 351 0x185e +io_buffer_main.h 351 0x185e 1 +io_buffer_main.h 149 0x1864 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 301 0x186a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1870 +io_buffer_main.h 351 0x1870 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1874 x +superkernels.cpp 299 0x1878 x +superkernels.cpp 299 0x187c +superkernels.cpp 298 0x1880 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1890 x +io_buffer_main.h 351 0x1890 1 +io_buffer_main.h 351 0x1890 2 +io_buffer_main.h 351 0x1890 3 +io_buffer_main.h 351 0x1890 4 +io_buffer_main.h 449 0x1890 5 +io_buffer_main.h 449 0x1890 6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x189a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x18aa x +io_buffer_main.h 351 0x18ae x +io_buffer_main.h 348 0x18b2 +io_buffer_main.h 351 0x18c0 +io_buffer_main.h 348 0x18c4 x +io_buffer_main.h 351 0x18c4 1 +io_buffer_main.h 449 0x18d6 x +io_buffer_main.h 351 0x18da x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x18da 1 +superkernels.cpp 306 0x18da 2 +superkernels.cpp 305 0x18e4 x +superkernels.cpp 305 0x18e8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x18f4 x +io_buffer_main.h 351 0x18f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x18fc x +superkernels.cpp 305 0x1900 +superkernels.cpp 306 0x1910 +superkernels.cpp 306 0x1914 x +superkernels.cpp 308 0x1920 +superkernels.cpp 308 0x1936 x +superkernels.cpp 308 0x193e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h: +conv2d_dw_bf16_params.h 177 0x1950 x +conv2d_dw_bf16_params.h 181 0x1950 1 +conv2d_dw_bf16_params.h 181 0x1950 2 x +conv2d_dw_bf16_params.h 181 0x195a x +conv2d_dw_bf16_params.h 184 0x195a 1 +conv2d_dw_bf16_params.h 184 0x195a 2 +conv2d_dw_bf16_params.h 177 0x1964 +conv2d_dw_bf16_params.h 184 0x196a +conv2d_dw_bf16_params.h 181 0x1980 +conv2d_dw_bf16_params.h 181 0x1984 +conv2d_dw_bf16_params.h 181 0x1988 +conv2d_dw_bf16_params.h 181 0x198c +conv2d_dw_bf16_params.h 181 0x199a +conv2d_dw_bf16_params.h 181 0x199e +conv2d_dw_bf16_params.h 184 0x19a2 x +conv2d_dw_bf16_params.h 184 0x19a6 +conv2d_dw_bf16_params.h 184 0x19aa +conv2d_dw_bf16_params.h 184 0x19b6 +conv2d_dw_bf16_params.h 184 0x19bc +conv2d_dw_bf16_params.h 184 0x19c2 +conv2d_dw_bf16_params.h 184 0x19c2 1 +conv2d_dw_bf16_params.h 184 0x19cc +conv2d_dw_bf16_params.h 184 0x19d2 +conv2d_dw_bf16_params.h 184 0x19d6 +conv2d_dw_bf16_params.h 184 0x19e0 +conv2d_dw_bf16_params.h 184 0x19e0 1 +conv2d_dw_bf16_params.h 185 0x19e0 2 x +conv2d_dw_bf16_params.h 185 0x19e0 3 +conv2d_dw_bf16_params.h 184 0x19ea x +conv2d_dw_bf16_params.h 185 0x19ea 1 +conv2d_dw_bf16_params.h 184 0x19f0 +conv2d_dw_bf16_params.h 185 0x19f0 1 +conv2d_dw_bf16_params.h 184 0x19f6 +conv2d_dw_bf16_params.h 185 0x19fe x +conv2d_dw_bf16_params.h 184 0x1a04 x +conv2d_dw_bf16_params.h 184 0x1a08 +conv2d_dw_bf16_params.h 185 0x1a08 1 x +conv2d_dw_bf16_params.h 185 0x1a12 +conv2d_dw_bf16_params.h 185 0x1a16 +conv2d_dw_bf16_params.h 185 0x1a1a +conv2d_dw_bf16_params.h 185 0x1a20 +conv2d_dw_bf16_params.h 186 0x1a20 1 +conv2d_dw_bf16_params.h 192 0x1a20 2 +conv2d_dw_bf16_params.h 198 0x1a20 3 +conv2d_dw_bf16_params.h 200 0x1a20 4 +conv2d_dw_bf16_params.h 200 0x1a20 5 +conv2d_dw_bf16_params.h 216 0x1a20 6 +conv2d_dw_bf16_params.h 235 0x1a20 7 +conv2d_dw_bf16_params.h 237 0x1a20 8 +conv2d_dw_bf16_params.h 239 0x1a20 9 +conv2d_dw_bf16_params.h 185 0x1a2a +conv2d_dw_bf16_params.h 192 0x1a2a 1 x +conv2d_dw_bf16_params.h 200 0x1a2a 2 +conv2d_dw_bf16_params.h 200 0x1a2a 3 +conv2d_dw_bf16_params.h 209 0x1a2a 4 +conv2d_dw_bf16_params.h 219 0x1a2a 5 +conv2d_dw_bf16_params.h 226 0x1a2a 6 +conv2d_dw_bf16_params.h 230 0x1a2a 7 +conv2d_dw_bf16_params.h 237 0x1a2a 8 +conv2d_dw_bf16_params.h 197 0x1a34 +conv2d_dw_bf16_params.h 198 0x1a34 1 +conv2d_dw_bf16_params.h 200 0x1a34 2 +conv2d_dw_bf16_params.h 234 0x1a34 3 +conv2d_dw_bf16_params.h 186 0x1a3e +conv2d_dw_bf16_params.h 197 0x1a3e 1 +conv2d_dw_bf16_params.h 201 0x1a3e 2 +conv2d_dw_bf16_params.h 208 0x1a3e 3 +conv2d_dw_bf16_params.h 226 0x1a3e 4 +conv2d_dw_bf16_params.h 230 0x1a3e 5 +conv2d_dw_bf16_params.h 239 0x1a3e 6 +conv2d_dw_bf16_params.h 239 0x1a3e 7 +conv2d_dw_bf16_params.h 186 0x1a48 +conv2d_dw_bf16_params.h 198 0x1a48 1 +conv2d_dw_bf16_params.h 201 0x1a48 2 +conv2d_dw_bf16_params.h 234 0x1a48 3 +conv2d_dw_bf16_params.h 208 0x1a52 +conv2d_dw_bf16_params.h 208 0x1a52 1 +conv2d_dw_bf16_params.h 209 0x1a52 2 +conv2d_dw_bf16_params.h 214 0x1a52 3 +conv2d_dw_bf16_params.h 216 0x1a52 4 +conv2d_dw_bf16_params.h 221 0x1a52 5 +conv2d_dw_bf16_params.h 230 0x1a52 6 +conv2d_dw_bf16_params.h 237 0x1a52 7 +conv2d_dw_bf16_params.h 185 0x1a5c x +conv2d_dw_bf16_params.h 232 0x1a5c 1 +conv2d_dw_bf16_params.h 232 0x1a5c 2 +conv2d_dw_bf16_params.h 185 0x1a66 +conv2d_dw_bf16_params.h 185 0x1a6a +conv2d_dw_bf16_params.h 185 0x1a6e +conv2d_dw_bf16_params.h 206 0x1a6e 1 +conv2d_dw_bf16_params.h 185 0x1a74 +conv2d_dw_bf16_params.h 186 0x1a74 1 +conv2d_dw_bf16_params.h 185 0x1a7a +conv2d_dw_bf16_params.h 192 0x1a7a 1 x +conv2d_dw_bf16_params.h 197 0x1a80 +conv2d_dw_bf16_params.h 198 0x1a80 1 x +conv2d_dw_bf16_params.h 200 0x1a86 x +conv2d_dw_bf16_params.h 198 0x1a8a x +conv2d_dw_bf16_params.h 200 0x1a8e x +conv2d_dw_bf16_params.h 209 0x1a8e 1 +conv2d_dw_bf16_params.h 213 0x1a8e 2 +conv2d_dw_bf16_params.h 198 0x1a94 x +conv2d_dw_bf16_params.h 197 0x1a98 x +conv2d_dw_bf16_params.h 197 0x1a9c +conv2d_dw_bf16_params.h 204 0x1a9c 1 +conv2d_dw_bf16_params.h 201 0x1aa2 x +conv2d_dw_bf16_params.h 186 0x1aa6 x +conv2d_dw_bf16_params.h 197 0x1aa6 1 x +conv2d_dw_bf16_params.h 186 0x1aac +conv2d_dw_bf16_params.h 198 0x1aac 1 x +conv2d_dw_bf16_params.h 212 0x1aac 2 +conv2d_dw_bf16_params.h 219 0x1aac 3 +conv2d_dw_bf16_params.h 186 0x1ab6 x +conv2d_dw_bf16_params.h 200 0x1ab6 1 x +conv2d_dw_bf16_params.h 193 0x1abc x +conv2d_dw_bf16_params.h 201 0x1abc 1 x +conv2d_dw_bf16_params.h 204 0x1ac2 x +conv2d_dw_bf16_params.h 212 0x1ac2 1 x +conv2d_dw_bf16_params.h 208 0x1ac8 x +conv2d_dw_bf16_params.h 208 0x1acc +conv2d_dw_bf16_params.h 208 0x1ad0 +conv2d_dw_bf16_params.h 214 0x1ad0 1 x +conv2d_dw_bf16_params.h 200 0x1ad6 +conv2d_dw_bf16_params.h 205 0x1ad6 1 +conv2d_dw_bf16_params.h 208 0x1ad6 2 x +conv2d_dw_bf16_params.h 214 0x1ad6 3 +conv2d_dw_bf16_params.h 219 0x1ad6 4 +conv2d_dw_bf16_params.h 205 0x1ae0 x +conv2d_dw_bf16_params.h 216 0x1ae0 1 x +conv2d_dw_bf16_params.h 206 0x1ae6 x +conv2d_dw_bf16_params.h 209 0x1ae6 1 x +conv2d_dw_bf16_params.h 207 0x1aec x +conv2d_dw_bf16_params.h 213 0x1aec 1 x +conv2d_dw_bf16_params.h 209 0x1af2 x +conv2d_dw_bf16_params.h 213 0x1af2 1 +conv2d_dw_bf16_params.h 209 0x1af8 +conv2d_dw_bf16_params.h 209 0x1af8 1 +conv2d_dw_bf16_params.h 216 0x1af8 2 +conv2d_dw_bf16_params.h 209 0x1b02 +conv2d_dw_bf16_params.h 209 0x1b06 +conv2d_dw_bf16_params.h 211 0x1b06 1 x +conv2d_dw_bf16_params.h 216 0x1b06 2 x +conv2d_dw_bf16_params.h 211 0x1b10 +conv2d_dw_bf16_params.h 216 0x1b10 1 +conv2d_dw_bf16_params.h 212 0x1b16 x +conv2d_dw_bf16_params.h 216 0x1b16 1 +conv2d_dw_bf16_params.h 213 0x1b1c x +conv2d_dw_bf16_params.h 216 0x1b1c 1 x +conv2d_dw_bf16_params.h 224 0x1b1c 2 +conv2d_dw_bf16_params.h 224 0x1b1c 3 +conv2d_dw_bf16_params.h 214 0x1b26 x +conv2d_dw_bf16_params.h 216 0x1b26 1 +conv2d_dw_bf16_params.h 222 0x1b26 2 +conv2d_dw_bf16_params.h 225 0x1b26 3 +conv2d_dw_bf16_params.h 229 0x1b26 4 +conv2d_dw_bf16_params.h 239 0x1b26 5 +conv2d_dw_bf16_params.h 215 0x1b30 x +conv2d_dw_bf16_params.h 219 0x1b30 1 x +conv2d_dw_bf16_params.h 200 0x1b36 x +conv2d_dw_bf16_params.h 218 0x1b36 1 x +conv2d_dw_bf16_params.h 219 0x1b36 2 +conv2d_dw_bf16_params.h 219 0x1b3c x +conv2d_dw_bf16_params.h 221 0x1b3c 1 x +conv2d_dw_bf16_params.h 219 0x1b42 +conv2d_dw_bf16_params.h 221 0x1b42 1 +conv2d_dw_bf16_params.h 220 0x1b48 x +conv2d_dw_bf16_params.h 221 0x1b48 1 +conv2d_dw_bf16_params.h 224 0x1b48 2 x +conv2d_dw_bf16_params.h 224 0x1b48 3 x +conv2d_dw_bf16_params.h 221 0x1b52 x +conv2d_dw_bf16_params.h 230 0x1b52 1 x +conv2d_dw_bf16_params.h 222 0x1b58 x +conv2d_dw_bf16_params.h 226 0x1b58 1 x +conv2d_dw_bf16_params.h 224 0x1b5e x +conv2d_dw_bf16_params.h 226 0x1b5e 1 +conv2d_dw_bf16_params.h 225 0x1b64 x +conv2d_dw_bf16_params.h 230 0x1b64 1 x +conv2d_dw_bf16_params.h 226 0x1b6a x +conv2d_dw_bf16_params.h 228 0x1b6e x +conv2d_dw_bf16_params.h 229 0x1b72 x +conv2d_dw_bf16_params.h 230 0x1b76 x +conv2d_dw_bf16_params.h 232 0x1b7a x +conv2d_dw_bf16_params.h 232 0x1b8a +conv2d_dw_bf16_params.h 232 0x1b8a 1 +conv2d_dw_bf16_params.h 190 0x1b90 +conv2d_dw_bf16_params.h 190 0x1b94 x +conv2d_dw_bf16_params.h 232 0x1b98 +conv2d_dw_bf16_params.h 232 0x1ba2 +conv2d_dw_bf16_params.h 232 0x1bb0 +conv2d_dw_bf16_params.h 236 0x1bb0 1 +conv2d_dw_bf16_params.h 232 0x1bba x +conv2d_dw_bf16_params.h 232 0x1bba 1 x +conv2d_dw_bf16_params.h 234 0x1bba 2 x +conv2d_dw_bf16_params.h 232 0x1bc4 +conv2d_dw_bf16_params.h 239 0x1bc4 1 +conv2d_dw_bf16_params.h 237 0x1bce +conv2d_dw_bf16_params.h 239 0x1bce 1 x +conv2d_dw_bf16_params.h 240 0x1bce 2 +conv2d_dw_bf16_params.h 239 0x1bd8 +conv2d_dw_bf16_params.h 232 0x1be4 x +conv2d_dw_bf16_params.h 232 0x1be8 +conv2d_dw_bf16_params.h 234 0x1be8 1 x +conv2d_dw_bf16_params.h 232 0x1bee x +conv2d_dw_bf16_params.h 239 0x1bee 1 x +conv2d_dw_bf16_params.h 234 0x1bf8 x +conv2d_dw_bf16_params.h 239 0x1bf8 1 +conv2d_dw_bf16_params.h 232 0x1bfe x +conv2d_dw_bf16_params.h 235 0x1bfe 1 x +conv2d_dw_bf16_params.h 236 0x1c04 x +conv2d_dw_bf16_params.h 239 0x1c08 x +conv2d_dw_bf16_params.h 234 0x1c0c x +conv2d_dw_bf16_params.h 237 0x1c0c 1 x +conv2d_dw_bf16_params.h 235 0x1c12 x +conv2d_dw_bf16_params.h 239 0x1c12 1 x +conv2d_dw_bf16_params.h 236 0x1c18 x +conv2d_dw_bf16_params.h 239 0x1c18 1 +conv2d_dw_bf16_params.h 237 0x1c1e x +conv2d_dw_bf16_params.h 240 0x1c22 x +conv2d_dw_bf16_params.h 239 0x1c2e x +conv2d_dw_bf16_params.h 239 0x1c32 +conv2d_dw_bf16_params.h 237 0x1c36 x +conv2d_dw_bf16_params.h 237 0x1c3a +conv2d_dw_bf16_params.h 237 0x1c3e +conv2d_dw_bf16_params.h 238 0x1c42 x +conv2d_dw_bf16_params.h 239 0x1c46 x +conv2d_dw_bf16_params.h 239 0x1c4a +conv2d_dw_bf16_params.h 240 0x1c4a 1 x +conv2d_dw_bf16_params.h 239 0x1c50 x +conv2d_dw_bf16_params.h 239 0x1c54 +conv2d_dw_bf16_params.h 239 0x1c58 +conv2d_dw_bf16_params.h 239 0x1c5c +conv2d_dw_bf16_params.h 239 0x1c60 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c70 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 158 0x1c70 1 x +conv2d_dw_bf16.h 179 0x1c70 2 +conv2d_dw_bf16.h 183 0x1c70 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c7c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1c7c 1 +shuffle.hpp 153 0x1c7c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1c7c 3 x +conv2d_dw_bf16.h 202 0x1c7c 4 +conv2d_dw_bf16.h 179 0x1c88 +conv2d_dw_bf16.h 202 0x1c88 1 +conv2d_dw_bf16.h 228 0x1c88 2 +conv2d_dw_bf16.h 229 0x1c88 3 +conv2d_dw_bf16.h 230 0x1c88 4 +conv2d_dw_bf16.h 231 0x1c88 5 +conv2d_dw_bf16.h 232 0x1c88 6 +conv2d_dw_bf16.h 233 0x1c88 7 +conv2d_dw_bf16.h 234 0x1c88 8 +conv2d_dw_bf16.h 235 0x1c88 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1c92 +aie_core.h 81 0x1c92 1 +aie_core.h 100 0x1c92 2 +aie_core.h 100 0x1c92 3 +aie_core.h 100 0x1c92 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c92 5 +vector.hpp 1139 0x1c92 6 +vector.hpp 1159 0x1c92 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1c92 8 +shuffle.hpp 153 0x1c92 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1c92 10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1c9c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c9c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1c9c 2 +conv2d_dw_bf16.h 208 0x1c9c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1ca8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ca8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1ca8 2 x +conv2d_dw_bf16.h 208 0x1ca8 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1cb4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1cb4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1cb4 2 x +conv2d_dw_bf16.h 202 0x1cb4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1cc0 +aie_core.h 100 0x1cc0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1cc0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1cc0 3 +shuffle.hpp 153 0x1cc0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1cc0 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1ccc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ccc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1ccc 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1cd6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1cd6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1cd6 2 x +conv2d_dw_bf16.h 180 0x1cdc +conv2d_dw_bf16.h 181 0x1ce0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1ce4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ce4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 181 0x1ce4 2 +conv2d_dw_bf16.h 181 0x1cea x +conv2d_dw_bf16.h 181 0x1cee +conv2d_dw_bf16.h 181 0x1cf2 +conv2d_dw_bf16.h 182 0x1cf6 x +conv2d_dw_bf16.h 182 0x1cfa +conv2d_dw_bf16.h 182 0x1cfe +conv2d_dw_bf16.h 183 0x1d02 x +conv2d_dw_bf16.h 183 0x1d06 +conv2d_dw_bf16.h 183 0x1d0a +conv2d_dw_bf16.h 202 0x1d0e x +conv2d_dw_bf16.h 202 0x1d12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1d1a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d1a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 202 0x1d1a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1d20 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 202 0x1d26 x +conv2d_dw_bf16.h 202 0x1d2e +conv2d_dw_bf16.h 226 0x1d3c x +conv2d_dw_bf16.h 208 0x1d40 x +conv2d_dw_bf16.h 228 0x1d44 +conv2d_dw_bf16.h 228 0x1d44 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1d50 +accum.hpp 1119 0x1d50 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 223 0x1d50 2 x +conv2d_dw_bf16.h 232 0x1d50 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1d60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d60 1 x +vector.hpp 1139 0x1d60 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d60 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d6a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 224 0x1d6a 1 x +conv2d_dw_bf16.h 229 0x1d6a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1d74 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d74 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d74 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1d74 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d7e +shuffle.hpp 153 0x1d82 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 230 0x1d82 1 x +conv2d_dw_bf16.h 225 0x1d8a x +conv2d_dw_bf16.h 234 0x1d8a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 226 0x1d96 x +conv2d_dw_bf16.h 231 0x1d96 1 x +conv2d_dw_bf16.h 223 0x1d9e x +conv2d_dw_bf16.h 235 0x1d9e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1da6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 228 0x1dac x +conv2d_dw_bf16.h 232 0x1db0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1dc0 x +aie_core.h 100 0x1dc0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1dc0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 224 0x1dca x +conv2d_dw_bf16.h 229 0x1dca 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1dd2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1dd2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1dda + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 225 0x1dde x +conv2d_dw_bf16.h 230 0x1dde 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1de6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 234 0x1de6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1df0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1df0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1df0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 208 0x1df6 x +conv2d_dw_bf16.h 231 0x1df6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e00 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1e00 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 208 0x1e00 2 +conv2d_dw_bf16.h 235 0x1e00 3 x +conv2d_dw_bf16.h 208 0x1e0c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e10 x +vector.hpp 1139 0x1e14 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1e18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e18 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1e1c x +accum.hpp 1119 0x1e20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1e24 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x1e28 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1e2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1e2c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 243 0x1e2c 2 x +conv2d_dw_bf16.h 226 0x1e34 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1e38 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1e38 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 228 0x1e38 2 +conv2d_dw_bf16.h 223 0x1e40 x +conv2d_dw_bf16.h 228 0x1e40 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e48 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 232 0x1e48 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1e50 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e50 1 x +vector.hpp 1139 0x1e50 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e50 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e5a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 224 0x1e5a 1 x +conv2d_dw_bf16.h 229 0x1e5a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1e64 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e64 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e64 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1e64 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e6e +shuffle.hpp 153 0x1e72 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 230 0x1e72 1 x +conv2d_dw_bf16.h 225 0x1e7a x +conv2d_dw_bf16.h 234 0x1e7a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e82 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 226 0x1e86 x +conv2d_dw_bf16.h 231 0x1e86 1 x +conv2d_dw_bf16.h 223 0x1e8e x +conv2d_dw_bf16.h 235 0x1e8e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e96 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 228 0x1e9c x +conv2d_dw_bf16.h 232 0x1ea0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1eb0 x +aie_core.h 100 0x1eb0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1eb0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 202 0x1eb0 3 x +conv2d_dw_bf16.h 224 0x1ebc x +conv2d_dw_bf16.h 229 0x1ebc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ec4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1ec4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ecc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 225 0x1ed0 x +conv2d_dw_bf16.h 230 0x1ed0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ed8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 234 0x1ed8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ee0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 231 0x1ee4 x +conv2d_dw_bf16.h 235 0x1ee8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1ef4 x +accum.hpp 1119 0x1ef8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 248 0x1ef8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1efe x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x1f02 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1f06 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f0a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 243 0x1f0a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1f0e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f0e 1 +vector.hpp 1139 0x1f20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 254 0x1f20 1 x +conv2d_dw_bf16.h 255 0x1f20 2 +conv2d_dw_bf16.h 255 0x1f20 3 +conv2d_dw_bf16.h 261 0x1f20 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 255 0x1f2c 1 x +conv2d_dw_bf16.h 262 0x1f2c 2 x +conv2d_dw_bf16.h 270 0x1f2c 3 +conv2d_dw_bf16.h 255 0x1f38 +conv2d_dw_bf16.h 258 0x1f38 1 x +conv2d_dw_bf16.h 255 0x1f42 x +conv2d_dw_bf16.h 258 0x1f42 1 +conv2d_dw_bf16.h 255 0x1f4c +conv2d_dw_bf16.h 263 0x1f4c 1 +conv2d_dw_bf16.h 264 0x1f4c 2 +conv2d_dw_bf16.h 266 0x1f4c 3 +conv2d_dw_bf16.h 255 0x1f56 +conv2d_dw_bf16.h 267 0x1f56 1 +conv2d_dw_bf16.h 255 0x1f5c +conv2d_dw_bf16.h 255 0x1f60 +conv2d_dw_bf16.h 258 0x1f64 x +conv2d_dw_bf16.h 258 0x1f7a x +conv2d_dw_bf16.h 270 0x1f7a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f80 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x1f80 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f90 x +vector.hpp 1139 0x1f90 1 x +vector.hpp 1159 0x1f90 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 261 0x1f90 3 x +conv2d_dw_bf16.h 262 0x1f90 4 x +conv2d_dw_bf16.h 263 0x1f90 5 x +conv2d_dw_bf16.h 270 0x1f90 6 +conv2d_dw_bf16.h 264 0x1fa0 x +conv2d_dw_bf16.h 266 0x1fb0 x +conv2d_dw_bf16.h 267 0x1fc0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1fd0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fd0 1 x +vector.hpp 1159 0x1fe0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x1fe0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2000 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 263 0x2000 1 x +conv2d_dw_bf16.h 270 0x2000 2 +conv2d_dw_bf16.h 274 0x2000 3 x +conv2d_dw_bf16.h 264 0x200a x +conv2d_dw_bf16.h 266 0x200e x +conv2d_dw_bf16.h 267 0x2012 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x2016 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2016 1 x +vector.hpp 1159 0x201a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x201a 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 352 0x2190 x +superkernels.cpp 357 0x2190 1 +superkernels.cpp 357 0x2196 x +superkernels.cpp 352 0x219c +superkernels.cpp 375 0x21aa +superkernels.cpp 360 0x21ba +superkernels.cpp 357 0x21c2 +superkernels.cpp 357 0x21c2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x21c8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 354 0x21cc x +superkernels.cpp 354 0x21d0 +superkernels.cpp 354 0x21d4 +superkernels.cpp 354 0x21da + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x21de + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 369 0x21de 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x21e8 +tile.hpp 86 0x21e8 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 359 0x21e8 2 +superkernels.cpp 359 0x21f6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2200 +tile.hpp 74 0x2204 +tile.hpp 74 0x2208 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 360 0x2210 +superkernels.cpp 369 0x2210 1 +superkernels.cpp 360 0x2218 x +superkernels.cpp 361 0x221c +superkernels.cpp 361 0x221c 1 x +superkernels.cpp 360 0x222e +superkernels.cpp 365 0x222e 1 +superkernels.cpp 360 0x2238 x +superkernels.cpp 361 0x223c x +superkernels.cpp 365 0x2240 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x2250 x +io_buffer_main.h 242 0x2254 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 365 0x2254 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x225e +io_buffer_main.h 242 0x2262 +io_buffer_main.h 259 0x2266 x +io_buffer_main.h 242 0x2274 x +io_buffer_main.h 242 0x2274 1 x +io_buffer_main.h 242 0x2278 +io_buffer_main.h 419 0x227c +io_buffer_main.h 419 0x2286 x +io_buffer_main.h 351 0x228a +io_buffer_main.h 449 0x228a 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 364 0x228a 2 +superkernels.cpp 367 0x228a 3 +superkernels.cpp 372 0x228a 4 +superkernels.cpp 373 0x228a 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 76 0x2294 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x229e +io_buffer_main.h 348 0x229e 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 365 0x22a4 x +superkernels.cpp 365 0x22a8 +superkernels.cpp 369 0x22a8 1 +superkernels.cpp 364 0x22b2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x22bc x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 365 0x22c2 x +superkernels.cpp 364 0x22c6 x +superkernels.cpp 367 0x22ca x +superkernels.cpp 369 0x22ce x +superkernels.cpp 364 0x22d4 x +superkernels.cpp 367 0x22d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 225 0x22dc x +io_buffer_impl.h 76 0x22e0 x +io_buffer_impl.h 76 0x22e4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x22f0 +io_buffer_main.h 348 0x22f4 x +io_buffer_main.h 449 0x2304 x +io_buffer_main.h 351 0x2308 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 372 0x2308 1 +superkernels.cpp 372 0x2312 x +superkernels.cpp 372 0x2316 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x2326 x +io_buffer_main.h 351 0x232a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 372 0x232e x +superkernels.cpp 372 0x2332 +superkernels.cpp 373 0x2338 +superkernels.cpp 373 0x2344 x +superkernels.cpp 375 0x2350 +superkernels.cpp 375 0x235a x +superkernels.cpp 375 0x235e +superkernels.cpp - 0x235f + + +transposeshuffle_params.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 33 0x2930 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2934 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 37 0x2934 1 +transpose4d_adf_wrapper.cpp 37 0x293e x +transpose4d_adf_wrapper.cpp 37 0x294e +transpose4d_adf_wrapper.cpp 37 0x294e 1 +transpose4d_adf_wrapper.cpp 33 0x2954 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x295e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 38 0x296a +transpose4d_adf_wrapper.cpp 38 0x2974 x +transpose4d_adf_wrapper.cpp 38 0x297a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2990 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x2990 1 +transposeshuffle.h 137 0x299a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x299e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x29a4 x +transposeshuffle.h 137 0x29aa +transposeshuffle.h 137 0x29ae + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 43 0x29ae 1 +transpose4d_adf_wrapper.cpp 43 0x29b4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x29b8 +transposeshuffle.h 137 0x29bc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 43 0x29c0 +transpose4d_adf_wrapper.cpp 46 0x29c0 1 +transpose4d_adf_wrapper.cpp 43 0x29ca +transpose4d_adf_wrapper.cpp 43 0x29ca 1 x +transpose4d_adf_wrapper.cpp 43 0x29d0 +transpose4d_adf_wrapper.cpp 46 0x29dc x +transpose4d_adf_wrapper.cpp 46 0x29e4 +transpose4d_adf_wrapper.cpp 43 0x29e8 x +transpose4d_adf_wrapper.cpp 43 0x29ec +transpose4d_adf_wrapper.cpp 43 0x29f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2a20 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/buffer_pad_adf_wrapper.cpp: +buffer_pad_adf_wrapper.cpp 24 0x2a20 1 x +buffer_pad_adf_wrapper.cpp 26 0x2a20 2 +buffer_pad_adf_wrapper.cpp 26 0x2a26 x +buffer_pad_adf_wrapper.cpp 26 0x2a2a +buffer_pad_adf_wrapper.cpp 27 0x2a2e x +buffer_pad_adf_wrapper.cpp 24 0x2a3e +buffer_pad_adf_wrapper.cpp 36 0x2a44 x +buffer_pad_adf_wrapper.cpp 36 0x2a44 1 +buffer_pad_adf_wrapper.cpp 36 0x2a4a +buffer_pad_adf_wrapper.cpp 36 0x2a54 +buffer_pad_adf_wrapper.cpp 36 0x2a58 +buffer_pad_adf_wrapper.cpp 36 0x2a5c +buffer_pad_adf_wrapper.cpp 36 0x2a5c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2a62 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/buffer_pad_adf_wrapper.cpp: +buffer_pad_adf_wrapper.cpp 25 0x2a74 x +buffer_pad_adf_wrapper.cpp 37 0x2a7a +buffer_pad_adf_wrapper.cpp 37 0x2a80 +buffer_pad_adf_wrapper.cpp 37 0x2a84 x +buffer_pad_adf_wrapper.cpp 36 0x2a8a +buffer_pad_adf_wrapper.cpp 36 0x2a8e x +buffer_pad_adf_wrapper.cpp 40 0x2ac0 x +buffer_pad_adf_wrapper.cpp 40 0x2ac0 1 x +buffer_pad_adf_wrapper.cpp 41 0x2ad0 x +buffer_pad_adf_wrapper.cpp 41 0x2ada +buffer_pad_adf_wrapper.cpp 42 0x2ae4 +buffer_pad_adf_wrapper.cpp 40 0x2aee +buffer_pad_adf_wrapper.cpp 45 0x2aee 1 +buffer_pad_adf_wrapper.cpp 41 0x2b10 +buffer_pad_adf_wrapper.cpp 41 0x2b20 +buffer_pad_adf_wrapper.cpp 42 0x2b20 1 +buffer_pad_adf_wrapper.cpp 42 0x2b28 +buffer_pad_adf_wrapper.cpp 42 0x2b30 x +buffer_pad_adf_wrapper.cpp 42 0x2b50 +buffer_pad_adf_wrapper.cpp 40 0x2ba0 x +buffer_pad_adf_wrapper.cpp 45 0x2bd0 +buffer_pad_adf_wrapper.cpp 45 0x2bf2 x +buffer_pad_adf_wrapper.cpp 45 0x2bf6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2fd0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 29 0x2fd0 1 +e_generic_innermost_adf_wrapper.cpp 29 0x2fd0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2fda + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 33 0x2fe2 +e_generic_innermost_adf_wrapper.cpp 33 0x2fe2 1 x +e_generic_innermost_adf_wrapper.cpp 37 0x2ff4 +e_generic_innermost_adf_wrapper.cpp 36 0x3000 x +e_generic_innermost_adf_wrapper.cpp 37 0x3000 1 +e_generic_innermost_adf_wrapper.cpp 34 0x300a x +e_generic_innermost_adf_wrapper.cpp 36 0x300a 1 +e_generic_innermost_adf_wrapper.cpp 34 0x3010 +e_generic_innermost_adf_wrapper.cpp 36 0x3010 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x301a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 34 0x301a 1 x +e_generic_innermost_adf_wrapper.cpp 36 0x3020 x +e_generic_innermost_adf_wrapper.cpp 37 0x3020 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3028 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 36 0x302e x +e_generic_innermost_adf_wrapper.cpp 36 0x3034 +e_generic_innermost_adf_wrapper.cpp 36 0x303a +e_generic_innermost_adf_wrapper.cpp 37 0x303e x +e_generic_innermost_adf_wrapper.cpp 37 0x3044 +e_generic_innermost_adf_wrapper.cpp 37 0x3048 +e_generic_innermost_adf_wrapper.cpp 37 0x304c +e_generic_innermost_adf_wrapper.cpp 37 0x3050 +e_generic_innermost_adf_wrapper.cpp 39 0x3060 +e_generic_innermost_adf_wrapper.cpp 39 0x3070 x +e_generic_innermost_adf_wrapper.cpp 39 0x3074 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2df0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 25 0x2df0 1 x +slice_generic_innermost.h 35 0x2df0 2 +slice_generic_innermost.h 54 0x2df0 3 +slice_generic_innermost.h 35 0x2df8 x +slice_generic_innermost.h 35 0x2dfc +slice_generic_innermost.h 36 0x2e02 x +slice_generic_innermost.h 40 0x2e06 x +slice_generic_innermost.h 38 0x2e0a x +slice_generic_innermost.h 40 0x2e18 x +slice_generic_innermost.h 40 0x2e18 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e1e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 36 0x2e1e 1 +slice_generic_innermost.h 50 0x2e1e 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e22 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 36 0x2e22 1 x +slice_generic_innermost.h 50 0x2e22 2 +slice_generic_innermost.h 35 0x2e28 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e2c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 37 0x2e2c 1 x +slice_generic_innermost.h 52 0x2e2c 2 +slice_generic_innermost.h 38 0x2e32 x +slice_generic_innermost.h 40 0x2e44 +slice_generic_innermost.h 40 0x2e48 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e52 x +vector.hpp 1139 0x2e52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x2e52 2 +slice_generic_innermost.h 50 0x2e52 3 x +slice_generic_innermost.h 52 0x2e52 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e5e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x2e5e 1 x +slice_generic_innermost.h 51 0x2e5e 2 x +slice_generic_innermost.h 53 0x2e5e 3 x +slice_generic_innermost.h 56 0x2e5e 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e6c x +vector.hpp 1139 0x2e6c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x2e6c 2 +slice_generic_innermost.h 50 0x2e6c 3 x +slice_generic_innermost.h 57 0x2e6c 4 x +slice_generic_innermost.h 58 0x2e6c 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e78 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x2e78 1 x +slice_generic_innermost.h 52 0x2e78 2 x +slice_generic_innermost.h 59 0x2e78 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 53 0x2e80 1 x +slice_generic_innermost.h 56 0x2e80 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e86 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 57 0x2e86 1 x +slice_generic_innermost.h 58 0x2e86 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e8c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 50 0x2e8c 1 x +slice_generic_innermost.h 59 0x2e8c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e92 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x2e92 1 x +slice_generic_innermost.h 52 0x2e92 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e98 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x2e98 1 x +slice_generic_innermost.h 56 0x2e98 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ea0 +vector.hpp 1159 0x2ea0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x2ea0 2 x +slice_generic_innermost.h 54 0x2ea0 3 x +slice_generic_innermost.h 58 0x2ea0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2eb0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x2eb0 1 x +slice_generic_innermost.h 50 0x2eb0 2 x +slice_generic_innermost.h 53 0x2eb0 3 x +slice_generic_innermost.h 57 0x2eb0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ec0 +vector.hpp 1159 0x2ec0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 52 0x2ec0 2 x +slice_generic_innermost.h 59 0x2ec0 3 x +slice_generic_innermost.h 60 0x2ec0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ed0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x2ed0 1 x +slice_generic_innermost.h 51 0x2ed0 2 x +slice_generic_innermost.h 55 0x2ed0 3 x +slice_generic_innermost.h 56 0x2ed0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ee0 +vector.hpp 1159 0x2ee0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x2ee0 2 x +slice_generic_innermost.h 58 0x2ee0 3 x +slice_generic_innermost.h 61 0x2ee0 4 x +slice_generic_innermost.h 46 0x2ef0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2ef4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x2ef4 1 x +slice_generic_innermost.h 47 0x2ef8 x +slice_generic_innermost.h 61 0x2ef8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f04 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x2f04 1 x +slice_generic_innermost.h 55 0x2f04 2 x +slice_generic_innermost.h 60 0x2f04 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f0e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x2f0e 1 x +slice_generic_innermost.h 61 0x2f0e 2 x +slice_generic_innermost.h 47 0x2f14 x +slice_generic_innermost.h 55 0x2f14 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f1a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x2f1a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x2f20 1 x +slice_generic_innermost.h 40 0x2f30 x +slice_generic_innermost.h 40 0x2f34 +slice_generic_innermost.h 40 0x2f3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2f40 x +vector.hpp 1139 0x2f40 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 50 0x2f40 2 x +slice_generic_innermost.h 52 0x2f40 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2f46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x2f46 1 x +slice_generic_innermost.h 53 0x2f46 2 x +slice_generic_innermost.h 56 0x2f46 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2f4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 57 0x2f4e 1 x +slice_generic_innermost.h 58 0x2f4e 2 x +slice_generic_innermost.h 59 0x2f54 x +slice_generic_innermost.h 46 0x2f60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x2f80 1 x +slice_generic_innermost.h 54 0x2f80 2 x +slice_generic_innermost.h 55 0x2f90 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2fa0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x2fa0 1 x +slice_generic_innermost.h 61 0x2fb0 x +slice_generic_innermost.h 76 0x2fc0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 21 0x2370 x +0_0_reloadable4.cc 23 0x2370 1 +0_0_reloadable4.cc 23 0x2374 x +0_0_reloadable4.cc 24 0x2378 x +0_0_reloadable4.cc 26 0x237c x +0_0_reloadable4.cc 25 0x2380 x +0_0_reloadable4.cc 22 0x2384 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle_params.h: +transposeshuffle_params.h 71 0x23a0 x +transposeshuffle_params.h 76 0x23a0 1 +transposeshuffle_params.h 76 0x23a0 2 x +transposeshuffle_params.h 76 0x23aa +transposeshuffle_params.h 76 0x23aa 1 +transposeshuffle_params.h 80 0x23aa 2 +transposeshuffle_params.h 80 0x23aa 3 +transposeshuffle_params.h 81 0x23aa 4 +transposeshuffle_params.h 81 0x23aa 5 +transposeshuffle_params.h 85 0x23b6 +transposeshuffle_params.h 86 0x23b6 1 +transposeshuffle_params.h 89 0x23b6 2 +transposeshuffle_params.h 91 0x23b6 3 +transposeshuffle_params.h 93 0x23b6 4 +transposeshuffle_params.h 94 0x23b6 5 +transposeshuffle_params.h 76 0x23c4 x +transposeshuffle_params.h 76 0x23c8 +transposeshuffle_params.h 76 0x23cc +transposeshuffle_params.h 76 0x23da +transposeshuffle_params.h 76 0x23de +transposeshuffle_params.h 76 0x23e2 +transposeshuffle_params.h 76 0x23e6 +transposeshuffle_params.h 76 0x23f4 +transposeshuffle_params.h 76 0x23f8 +transposeshuffle_params.h 76 0x23fc +transposeshuffle_params.h 76 0x2400 +transposeshuffle_params.h 76 0x240e +transposeshuffle_params.h 76 0x2412 +transposeshuffle_params.h 80 0x2416 x +transposeshuffle_params.h 80 0x2426 +transposeshuffle_params.h 80 0x242a +transposeshuffle_params.h 89 0x242a 1 x +transposeshuffle_params.h 80 0x2430 +transposeshuffle_params.h 80 0x2430 1 x +transposeshuffle_params.h 80 0x2438 +transposeshuffle_params.h 81 0x243c x +transposeshuffle_params.h 81 0x244c +transposeshuffle_params.h 90 0x244c 1 +transposeshuffle_params.h 81 0x2452 +transposeshuffle_params.h 81 0x2456 +transposeshuffle_params.h 90 0x2456 1 x +transposeshuffle_params.h 85 0x245c x +transposeshuffle_params.h 85 0x2460 +transposeshuffle_params.h 86 0x2464 x +transposeshuffle_params.h 89 0x2468 x +transposeshuffle_params.h 90 0x246c x +transposeshuffle_params.h 91 0x2470 +transposeshuffle_params.h 91 0x2470 1 x +transposeshuffle_params.h 91 0x2478 +transposeshuffle_params.h 93 0x247c x +transposeshuffle_params.h 93 0x2480 +transposeshuffle_params.h 93 0x2484 +transposeshuffle_params.h 93 0x2488 +transposeshuffle_params.h 93 0x248c +transposeshuffle_params.h 95 0x248c 1 x +transposeshuffle_params.h 94 0x2492 x +transposeshuffle_params.h 94 0x2496 +transposeshuffle_params.h 94 0x249a +transposeshuffle_params.h 94 0x249e +transposeshuffle_params.h 94 0x24a2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 38 0x24b0 x +transposeshuffle.h 72 0x24b0 1 +transposeshuffle.h 79 0x24b0 2 +transposeshuffle.h 72 0x24ba +transposeshuffle.h 72 0x24ba 1 x +transposeshuffle.h 72 0x24ba 2 +transposeshuffle.h 79 0x24cc x +transposeshuffle.h 79 0x24d0 +transposeshuffle.h 72 0x24d6 +transposeshuffle.h 72 0x24d6 1 +transposeshuffle.h 72 0x24da x +transposeshuffle.h 72 0x24da 1 x +transposeshuffle.h 116 0x24e4 +transposeshuffle.h 116 0x24ea x +transposeshuffle.h 116 0x24fa +transposeshuffle.h 116 0x24fa 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2522 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x2522 1 +transposeshuffle.h 119 0x2522 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x252c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x252c 1 x +transposeshuffle.h 119 0x252c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2536 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x2536 1 +transposeshuffle.h 119 0x2536 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2540 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2540 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2550 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2550 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2560 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2560 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2570 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2570 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2580 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2580 1 +transposeshuffle.h 120 0x2580 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2590 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2590 1 x +transposeshuffle.h 120 0x2590 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x25a0 +vector.hpp 1159 0x25a0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x25a0 2 +transposeshuffle.h 120 0x25a0 3 x +transposeshuffle.h 122 0x25a0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25b0 1 +transposeshuffle.h 122 0x25b0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25b8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25b8 1 +transposeshuffle.h 122 0x25b8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25c0 1 +transposeshuffle.h 122 0x25c0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25c8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25c8 1 +transposeshuffle.h 122 0x25c8 2 +transposeshuffle.h 126 0x25c8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25d2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25d2 1 x +transposeshuffle.h 122 0x25d2 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25da + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25da 1 +transposeshuffle.h 122 0x25da 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25e2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25e2 1 +transposeshuffle.h 122 0x25e2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25ea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x25ea 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x25f0 1 +transposeshuffle.h 116 0x2600 x +transposeshuffle.h 116 0x2604 +transposeshuffle.h 116 0x260a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2610 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2610 1 x +transposeshuffle.h 120 0x2660 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2680 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x2680 1 x +transposeshuffle.h 126 0x2690 x +transposeshuffle.h 86 0x26a0 +transposeshuffle.h 86 0x26a6 x +transposeshuffle.h 86 0x26b6 +transposeshuffle.h 86 0x26b6 1 +transposeshuffle.h 86 0x26c6 +transposeshuffle.h 86 0x26c6 1 +transposeshuffle.h 86 0x26d0 +transposeshuffle.h 86 0x26d0 1 +transposeshuffle.h 87 0x26d0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26da +aie_core.h 100 0x26da 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x26da 2 +vector.hpp 1152 0x26da 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26da 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26e4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x26e4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26e4 2 +transposeshuffle.h 86 0x26e4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x26f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x26f0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26f0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x26fc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26fc 2 +transposeshuffle.h 86 0x26fc 3 +transposeshuffle.h 86 0x2708 +transposeshuffle.h 87 0x2708 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2712 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x2712 1 +transposeshuffle.h 86 0x2718 +transposeshuffle.h 87 0x2718 1 x +transposeshuffle.h 86 0x2722 x +transposeshuffle.h 86 0x2722 1 x +transposeshuffle.h 86 0x272c +transposeshuffle.h 86 0x272c 1 +transposeshuffle.h 86 0x2736 +transposeshuffle.h 86 0x2740 +transposeshuffle.h 87 0x2750 x +transposeshuffle.h 87 0x2760 +transposeshuffle.h 88 0x2770 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2794 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x2794 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x279e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x279e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x279e 2 x +transposeshuffle.h 88 0x27a8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x27ac x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x27b0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x27b0 1 +vector.hpp 1132 0x27d0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x27e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x27e0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x27f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2800 x +vector.hpp 1152 0x2800 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2800 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2810 x +aie_core.h 100 0x2810 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2810 2 +vector.hpp 1152 0x2810 3 x +vector.hpp 1152 0x2820 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2820 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2830 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2830 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2838 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2838 1 x +vector.hpp 1152 0x283c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x283c 1 x +transpose.hpp 225 0x2844 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x284e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x284e 1 +vector.hpp 1152 0x284e 2 x +vector.hpp 1152 0x2856 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2856 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x285e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x285e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2866 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2866 1 x +vector.hpp 1152 0x286a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x2870 x +transposeshuffle.h 88 0x2876 +transposeshuffle.h 88 0x287c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2880 x +vector.hpp 1152 0x2880 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2886 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2886 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x28c0 x +transpose.hpp 225 0x28d0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x28e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x28e0 1 x +vector.hpp 1152 0x28f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 87 0x2900 x +transposeshuffle.h 86 0x2910 x +transposeshuffle.h 126 0x2920 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 30 0x2a00 x +0_0_reloadable4.cc 32 0x2a00 1 +0_0_reloadable4.cc 32 0x2a04 x +0_0_reloadable4.cc 34 0x2a08 x +0_0_reloadable4.cc 33 0x2a0c x +0_0_reloadable4.cc 31 0x2a10 x +0_0_reloadable4.cc 38 0x2c10 x +0_0_reloadable4.cc 40 0x2c10 1 +0_0_reloadable4.cc 40 0x2c14 x +0_0_reloadable4.cc 42 0x2c18 x +0_0_reloadable4.cc 41 0x2c1c x +0_0_reloadable4.cc 39 0x2c20 x +0_0_reloadable4.cc 46 0x2c30 x +0_0_reloadable4.cc 48 0x2c30 1 +0_0_reloadable4.cc 48 0x2c34 x +0_0_reloadable4.cc 50 0x2c38 x +0_0_reloadable4.cc 49 0x2c3c x +0_0_reloadable4.cc 47 0x2c40 x +0_0_reloadable4.cc 54 0x2c50 x +0_0_reloadable4.cc 56 0x2c50 1 +0_0_reloadable4.cc 56 0x2c54 x +0_0_reloadable4.cc 58 0x2c58 x +0_0_reloadable4.cc 57 0x2c5c x +0_0_reloadable4.cc 55 0x2c60 x +0_0_reloadable4.cc 62 0x2c70 x +0_0_reloadable4.cc 64 0x2c70 1 +0_0_reloadable4.cc 64 0x2c74 x +0_0_reloadable4.cc 65 0x2c78 x +0_0_reloadable4.cc 67 0x2c7c x +0_0_reloadable4.cc 66 0x2c80 x +0_0_reloadable4.cc 63 0x2c84 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost_params.h: +slice_generic_innermost_params.h 40 0x2ca0 x +slice_generic_innermost_params.h 41 0x2ca0 1 x +slice_generic_innermost_params.h 41 0x2cb0 x +slice_generic_innermost_params.h 42 0x2cb4 x +slice_generic_innermost_params.h 42 0x2cc4 +slice_generic_innermost_params.h 43 0x2cc8 x +slice_generic_innermost_params.h 43 0x2cd8 +slice_generic_innermost_params.h 44 0x2cdc x +slice_generic_innermost_params.h 44 0x2cec +slice_generic_innermost_params.h 45 0x2cf0 x +slice_generic_innermost_params.h 45 0x2d00 +slice_generic_innermost_params.h 46 0x2d04 x +slice_generic_innermost_params.h 46 0x2d14 +slice_generic_innermost_params.h 47 0x2d18 x +slice_generic_innermost_params.h 47 0x2d28 +slice_generic_innermost_params.h 48 0x2d2c x +slice_generic_innermost_params.h 49 0x2d32 x +slice_generic_innermost_params.h 48 0x2d3e x +slice_generic_innermost_params.h 52 0x2d50 x +slice_generic_innermost_params.h 53 0x2d50 1 x +slice_generic_innermost_params.h 55 0x2d50 2 +slice_generic_innermost_params.h 58 0x2d50 3 +slice_generic_innermost_params.h 53 0x2d5a x +slice_generic_innermost_params.h 58 0x2d5a 1 +slice_generic_innermost_params.h 59 0x2d5a 2 +slice_generic_innermost_params.h 53 0x2d64 +slice_generic_innermost_params.h 60 0x2d64 1 +slice_generic_innermost_params.h 62 0x2d64 2 +slice_generic_innermost_params.h 55 0x2d6a x +slice_generic_innermost_params.h 60 0x2d6a 1 +slice_generic_innermost_params.h 53 0x2d7a x +slice_generic_innermost_params.h 58 0x2d7e x +slice_generic_innermost_params.h 58 0x2d82 +slice_generic_innermost_params.h 53 0x2d86 x +slice_generic_innermost_params.h 58 0x2d86 1 +slice_generic_innermost_params.h 75 0x2d8c x +slice_generic_innermost_params.h 59 0x2d90 x +slice_generic_innermost_params.h 59 0x2d94 +slice_generic_innermost_params.h 60 0x2d98 x +slice_generic_innermost_params.h 60 0x2d9c +slice_generic_innermost_params.h 62 0x2da0 x +slice_generic_innermost_params.h 79 0x2db0 x +slice_generic_innermost_params.h 80 0x2db0 1 x +slice_generic_innermost_params.h 81 0x2db6 +slice_generic_innermost_params.h 81 0x2dba +slice_generic_innermost_params.h 81 0x2dd0 x +slice_generic_innermost_params.h 81 0x2dd6 +slice_generic_innermost_params.h 81 0x2dda + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 71 0x3090 x +0_0_reloadable4.cc 73 0x3090 1 +0_0_reloadable4.cc 73 0x3094 x +0_0_reloadable4.cc 75 0x3098 x +0_0_reloadable4.cc 74 0x309c x +0_0_reloadable4.cc 72 0x30a0 x +0_0_reloadable4.cc 91 0x9e0 x +0_0_reloadable4.cc 93 0x9e0 1 +0_0_reloadable4.cc 93 0x9e0 2 x +0_0_reloadable4.cc 91 0x9e6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f0 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 98 0x9f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 95 0x9f8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa08 x +io_buffer_compiler.h 614 0xa0c +io_buffer_compiler.h 614 0xa10 +io_buffer_compiler.h 614 0xa14 +io_buffer_compiler.h 614 0xa18 +io_buffer_compiler.h 219 0xa28 x +io_buffer_compiler.h 219 0xa28 1 x +io_buffer_compiler.h 218 0xa2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa30 +io_buffer_main.h 434 0xa30 1 +io_buffer_main.h 434 0xa30 2 +io_buffer_main.h 434 0xa3c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 95 0xa40 +0_0_reloadable4.cc 95 0xa40 1 +0_0_reloadable4.cc 98 0xa40 2 +0_0_reloadable4.cc 101 0xa40 3 +0_0_reloadable4.cc 95 0xa46 +0_0_reloadable4.cc 95 0xa46 1 x +0_0_reloadable4.cc 95 0xa4c +0_0_reloadable4.cc 95 0xa4c 1 +0_0_reloadable4.cc 95 0xa52 +0_0_reloadable4.cc 98 0xa52 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa5c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa6e x +io_buffer_compiler.h 614 0xa72 +io_buffer_compiler.h 614 0xa76 +io_buffer_compiler.h 614 0xa7a +io_buffer_compiler.h 614 0xa7e +io_buffer_compiler.h 219 0xa8e x +io_buffer_compiler.h 219 0xa8e 1 x +io_buffer_compiler.h 218 0xa92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa9e x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 98 0xaa2 x +0_0_reloadable4.cc 98 0xaa6 +0_0_reloadable4.cc 98 0xaa6 1 +0_0_reloadable4.cc 98 0xaac +0_0_reloadable4.cc 98 0xaac 1 +0_0_reloadable4.cc 98 0xab2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xac4 x +io_buffer_compiler.h 614 0xac8 +io_buffer_compiler.h 614 0xacc +io_buffer_compiler.h 614 0xad0 +io_buffer_compiler.h 614 0xad4 +io_buffer_compiler.h 219 0xae4 x +io_buffer_compiler.h 219 0xae4 1 x +io_buffer_compiler.h 218 0xae8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xaf4 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 101 0xaf8 x +0_0_reloadable4.cc 101 0xafc +0_0_reloadable4.cc 101 0xb00 +0_0_reloadable4.cc 101 0xb06 +0_0_reloadable4.cc 101 0xb18 +0_0_reloadable4.cc 104 0xb1c +0_0_reloadable4.cc 106 0xb1c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb30 +io_buffer_compiler.h 630 0xb30 1 +io_buffer_compiler.h 630 0xb30 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb30 3 +io_buffer_main.h 464 0xb30 4 +io_buffer_main.h 464 0xb30 5 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 104 0xb30 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb36 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 106 0xb3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb3e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb3e 1 +io_buffer_main.h 464 0xb42 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 629 0xb4a x +io_buffer_compiler.h 629 0xb4e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb5e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb62 +io_buffer_compiler.h 630 0xb62 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb68 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 106 0xb68 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb78 x +io_buffer_compiler.h 629 0xb7c x +io_buffer_compiler.h 630 0xb7c 1 +io_buffer_compiler.h 629 0xb82 +io_buffer_compiler.h 630 0xb82 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb92 +io_buffer_main.h 464 0xb96 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb9a +io_buffer_compiler.h 630 0xb9a 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 109 0xba0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbae x +io_buffer_compiler.h 629 0xbb2 x +io_buffer_compiler.h 630 0xbb2 1 +io_buffer_compiler.h 629 0xbb8 +io_buffer_compiler.h 630 0xbb8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xbca x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 111 0xbce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbd2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 111 0xbe6 x +0_0_reloadable4.cc 111 0xbec + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbf0 x +io_buffer_compiler.h 630 0xbf6 +io_buffer_compiler.h 630 0xbfa +io_buffer_compiler.h 630 0xbfe +io_buffer_compiler.h - 0xbff + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x30b0 +me_div.c 108 0x30b0 1 +me_div.c 115 0x30b0 2 x +me_div.c 108 0x30b6 +me_div.c 108 0x30ba +me_div.c 108 0x30be +me_div.c 108 0x30c2 +me_div.c 108 0x30c6 +me_div.c 108 0x30ca +me_div.c 108 0x30ce +me_div.c 108 0x30d2 +me_div.c 108 0x30d6 +me_div.c 108 0x30da +me_div.c 108 0x30de +me_div.c 108 0x30e2 +me_div.c 108 0x30e6 +me_div.c 108 0x30ea +me_div.c 108 0x30ee +me_div.c 108 0x30f2 +me_div.c 108 0x30f6 +me_div.c 108 0x30fa +me_div.c 108 0x30fe +me_div.c 108 0x3102 +me_div.c 108 0x3106 +me_div.c 108 0x310a +me_div.c 108 0x310e +me_div.c 108 0x3112 +me_div.c 108 0x3116 +me_div.c 108 0x311a +me_div.c 108 0x311e +me_div.c 108 0x3122 +me_div.c 119 0x3126 x +me_div.c 108 0x312a x +me_div.c 108 0x312e +me_div.c 108 0x3132 +me_div.c 108 0x3136 +me_div.c - 0x3137 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: src/string.c: +File name Line number Starting address View Stmt + +src/string.c: +string.c 325 0x3140 x +string.c 328 0x3140 1 x +string.c 329 0x3146 +string.c 328 0x3152 x +string.c 329 0x3152 1 +string.c 328 0x315a +string.c 328 0x3160 +string.c 329 0x3170 x +string.c 330 0x31e0 x +string.c - 0x31e1 + + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/scripts/0_0_reloadable79.bcf b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/scripts/0_0_reloadable79.bcf new file mode 100644 index 0000000000000000000000000000000000000000..b5025c34b99f02de39e461699cdc760aa2cbe456 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/scripts/0_0_reloadable79.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x9e0 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x9e0 + +_reserved DMb 0x7b540 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7bd40 0x40 //reserved for sync buffer +_stack DM_stack 0x7bd80 0x440 //stack for core +_reserved DMb 0x7c1c0 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c200 0x800//heap +_reserved DMb 0x40000 0x3b540 + +_reserved DMb 0x7ca00 0x3600 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/scripts/0_0_reloadable79.prx b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/scripts/0_0_reloadable79.prx new file mode 100644 index 0000000000000000000000000000000000000000..4778fb04c45647c5a210138d961fb4745ab466ee --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable79/scripts/0_0_reloadable79.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.cmic2 b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..9c124c0a9693c0611e9ab76c180f2bbbe7f0b969 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.cmic2 @@ -0,0 +1,18572 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable5.cc" 103 first +.src_ref 0 "0_0_reloadable5.cc" 105 60 +.src_ref 0 "0_0_reloadable5.cc" 105 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 103 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable5.cc" 110 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable5.cc" 107 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 107 110 +.src_ref 0 "0_0_reloadable5.cc" 110 60 +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 107 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 107 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 110 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 +.src_ref 0 "0_0_reloadable5.cc" 110 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 +.src_ref 0 "0_0_reloadable5.cc" 110 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2816 "01000100" // MOVXM p7, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "10000000" // /* MW 5 */ + 2818 "11001000" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 116 60 +.src_ref 0 "0_0_reloadable5.cc" 118 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable5.cc" 116 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 118 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable5.cc" 118 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 121 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 123 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 123 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 123 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 3088 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3089 "00000000" // /* MW 3 */ + 3090 "00101000" // /* MW 2 */ + 3091 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3092 "01000100" // MOVXM p0, #509088 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3093 "01000000" // /* MW 5 */ + 3094 "11001001" // /* MW 4 */ + 3095 "11000000" // /* MW 3 */ + 3096 "00000111" // /* MW 2 */ + 3097 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3098 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3099 "10000000" // /* MW 3 */ + 3100 "00000000" // /* MW 2 */ + 3101 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 3102 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3103 "00000001" // /* MW 3 */ + 3104 "00000100" // /* MW 2 */ + 3105 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3106 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3107 "00000001" // /* MW 3 */ + 3108 "00010100" // /* MW 2 */ + 3109 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3111 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 3120 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3121 "00010000" // /* MW 9 */ + 3122 "01000000" // /* MW 8 */ + 3123 "00110010" // /* MW 7 */ + 3124 "11110000" // /* MW 6 */ + 3125 "00000001" // /* MW 5 */ + 3126 "00000000" // /* MW 4 */ + 3127 "11010000" // /* MW 3 */ + 3128 "10000101" // /* MW 2 */ + 3129 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 3130 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3131 "00000001" // /* MW 5 */ + 3132 "00000000" // /* MW 4 */ + 3133 "00000000" // /* MW 3 */ + 3134 "00001000" // /* MW 2 */ + 3135 "00000000" // /* MW 1 */ + 3136 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3137 "00111101" // /* MW 3 */ + 3138 "11111000" // /* MW 2 */ + 3139 "00001111" // /* MW 1 */ + 3140 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "11110101" // /* MW 3 */ + 3142 "11111101" // /* MW 2 */ + 3143 "00001111" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 3150 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "00101001" // /* MW 3 */ + 3152 "00011100" // /* MW 2 */ + 3153 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 3154 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3155 "00101110" // /* MW 3 */ + 3156 "00011100" // /* MW 2 */ + 3157 "00000001" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 3170 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3171 "00101001" // /* MW 3 */ + 3172 "00011100" // /* MW 2 */ + 3173 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 3174 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3175 "00101110" // /* MW 3 */ + 3176 "00000100" // /* MW 2 */ + 3177 "00000001" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ + 3182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3183 "00000000" // /* MW 1 */ + 3184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3185 "00000000" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 3190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00101001" // /* MW 3 */ + 3192 "00011100" // /* MW 2 */ + 3193 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 3194 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "00101110" // /* MW 3 */ + 3196 "00010100" // /* MW 2 */ + 3197 "00000001" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ + 3200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3201 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 3202 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 3203 "00000001" // /* MW 5 */ + 3204 "00000000" // /* MW 4 */ + 3205 "00001000" // /* MW 3 */ + 3206 "00000110" // /* MW 2 */ + 3207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3213 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 3214 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3215 "00101001" // /* MW 3 */ + 3216 "11011100" // /* MW 2 */ + 3217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.delay_slot + 3218 "00101110" // NOPA; NOPS; MOV r15, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3219 "00011100" // /* MW 13 */ + 3220 "00000000" // /* MW 12 */ + 3221 "00000000" // /* MW 11 */ + 3222 "00000111" // /* MW 10 */ + 3223 "10000110" // /* MW 9 */ + 3224 "01011110" // /* MW 8 */ + 3225 "00000000" // /* MW 7 */ + 3226 "00000000" // /* MW 6 */ + 3227 "10110110" // /* MW 5 */ + 3228 "00000010" // /* MW 4 */ + 3229 "11110000" // /* MW 3 */ + 3230 "00101100" // /* MW 2 */ + 3231 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 3232 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3233 "00001000" // /* MW 9 */ + 3234 "11000100" // /* MW 8 */ + 3235 "00110011" // /* MW 7 */ + 3236 "01101000" // /* MW 6 */ + 3237 "00000000" // /* MW 5 */ + 3238 "00000001" // /* MW 4 */ + 3239 "00100000" // /* MW 3 */ + 3240 "00000111" // /* MW 2 */ + 3241 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 3242 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3243 "01011000" // /* MW 9 */ + 3244 "11111101" // /* MW 8 */ + 3245 "00000111" // /* MW 7 */ + 3246 "00001000" // /* MW 6 */ + 3247 "10000000" // /* MW 5 */ + 3248 "00000001" // /* MW 4 */ + 3249 "10000000" // /* MW 3 */ + 3250 "11100010" // /* MW 2 */ + 3251 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 3252 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3253 "00000001" // /* MW 9 */ + 3254 "10100000" // /* MW 8 */ + 3255 "00000111" // /* MW 7 */ + 3256 "10000000" // /* MW 6 */ + 3257 "00010001" // /* MW 5 */ + 3258 "00001010" // /* MW 4 */ + 3259 "00100000" // /* MW 3 */ + 3260 "10111110" // /* MW 2 */ + 3261 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 3262 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3263 "01001010" // /* MW 3 */ + 3264 "00000110" // /* MW 2 */ + 3265 "00000000" // /* MW 1 */ + 3266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3271 "00010111" // /* MW 3 */ + 3272 "00000010" // /* MW 2 */ + 3273 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3275 "00000000" // /* MW 3 */ + 3276 "00101000" // /* MW 2 */ + 3277 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3279 "00000101" // /* MW 3 */ + 3280 "00100010" // /* MW 2 */ + 3281 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3283 "00000001" // /* MW 5 */ + 3284 "00000000" // /* MW 4 */ + 3285 "00000000" // /* MW 3 */ + 3286 "11111000" // /* MW 2 */ + 3287 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00100111" // /* MW 3 */ + 3290 "01110111" // /* MW 2 */ + 3291 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "10000010" // /* MW 3 */ + 3294 "00100001" // /* MW 2 */ + 3295 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3297 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 40 first +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.function_start + 3312 "10111010" // MOVA m0, #20; MOVXM p0, #509068 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3313 "00010000" // /* MW 9 */ + 3314 "01000110" // /* MW 8 */ + 3315 "00110010" // /* MW 7 */ + 3316 "11110000" // /* MW 6 */ + 3317 "00000001" // /* MW 5 */ + 3318 "00000000" // /* MW 4 */ + 3319 "10000000" // /* MW 3 */ + 3320 "10000000" // /* MW 2 */ + 3321 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 + 3322 "10111010" // LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3323 "01011000" // /* MW 9 */ + 3324 "00000110" // /* MW 8 */ + 3325 "00101000" // /* MW 7 */ + 3326 "00101000" // /* MW 6 */ + 3327 "00100000" // /* MW 5 */ + 3328 "00000000" // /* MW 4 */ + 3329 "01010000" // /* MW 3 */ + 3330 "00000001" // /* MW 2 */ + 3331 "00000001" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 43 4 first + 3342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3343 "00000000" // /* MW 3 */ + 3344 "00101000" // /* MW 2 */ + 3345 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.delay_slot + 3346 "00011000" // NEZ r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "11110000" // /* MW 3 */ + 3348 "00000110" // /* MW 2 */ + 3349 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.delay_slot + 3350 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00001000" // /* MW 3 */ + 3352 "10000000" // /* MW 2 */ + 3353 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 first +.delay_slot + 3354 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00011101" // /* MW 3 */ + 3356 "00000000" // /* MW 2 */ + 3357 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 23 +.delay_slot + 3358 "01011100" // ST r0, [p0, #4]; LSHL r2, r3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3359 "00111011" // /* MW 5 */ + 3360 "10001000" // /* MW 4 */ + 3361 "00110001" // /* MW 3 */ + 3362 "10000010" // /* MW 2 */ + 3363 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 23 +.delay_slot + 3364 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3365 "01010001" // /* MW 3 */ + 3366 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3367 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_broadcasting.h" 35 +.src_ref 2 "elementwise_binary_broadcasting.h" 35 first +.function_start + 3376 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000000" // /* MW 4 */ + 3379 "00000000" // /* MW 3 */ + 3380 "00001000" // /* MW 2 */ + 3381 "00000000" // /* MW 1 */ + 3382 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00111101" // /* MW 3 */ + 3384 "11111100" // /* MW 2 */ + 3385 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 first +.no_stack_arguments + 3386 "00000100" // JL #3120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3120 delay_slots=5 */ + 3387 "00000001" // /* MW 5 */ + 3388 "00000000" // /* MW 4 */ + 3389 "00011000" // /* MW 3 */ + 3390 "00000110" // /* MW 2 */ + 3391 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 +.delay_slot + 3392 "01000100" // MOVXM p0, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000000" // /* MW 5 */ + 3394 "11001001" // /* MW 4 */ + 3395 "11000000" // /* MW 3 */ + 3396 "00000111" // /* MW 2 */ + 3397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "00000001" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.return_address + 3408 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00111001" // /* MW 3 */ + 3410 "11111100" // /* MW 2 */ + 3411 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 first +.tail_call + 3412 "10000100" // J #3312 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3312 delay_slots=5 */ + 3413 "00000000" // /* MW 5 */ + 3414 "00000000" // /* MW 4 */ + 3415 "01111000" // /* MW 3 */ + 3416 "00000110" // /* MW 2 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.delay_slot + 3418 "01000100" // MOVXM p0, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "00000000" // /* MW 5 */ + 3420 "11001001" // /* MW 4 */ + 3421 "11000000" // /* MW 3 */ + 3422 "00000111" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 38 4 first +.delay_slot + 3424 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3425 "00000001" // /* MW 5 */ + 3426 "00000000" // /* MW 4 */ + 3427 "00000000" // /* MW 3 */ + 3428 "11111000" // /* MW 2 */ + 3429 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3435 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 48 first +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 +.function_start + 3440 "10111010" // MOVA m0, #20; MOVXM p3, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3441 "00010000" // /* MW 9 */ + 3442 "01000000" // /* MW 8 */ + 3443 "10110010" // /* MW 7 */ + 3444 "11110001" // /* MW 6 */ + 3445 "00000001" // /* MW 5 */ + 3446 "00000000" // /* MW 4 */ + 3447 "10000000" // /* MW 3 */ + 3448 "10000000" // /* MW 2 */ + 3449 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 first + 3450 "10011000" // LDA r0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3451 "00010110" // /* MW 3 */ + 3452 "00111100" // /* MW 2 */ + 3453 "00000011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3454 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3455 "10000001" // /* MW 5 */ + 3456 "11001101" // /* MW 4 */ + 3457 "01011000" // /* MW 3 */ + 3458 "00000101" // /* MW 2 */ + 3459 "01100001" // /* MW 1 */ + 3460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3461 "00000000" // /* MW 1 */ + 3462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3463 "00000000" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 12 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 35 + 3472 "10000100" // JNZ r1, #3536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3536 delay_slots=5 */ + 3473 "00000001" // /* MW 5 */ + 3474 "01000000" // /* MW 4 */ + 3475 "11101000" // /* MW 3 */ + 3476 "00000110" // /* MW 2 */ + 3477 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 +.delay_slot + 3478 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3479 "11101001" // /* MW 3 */ + 3480 "11000100" // /* MW 2 */ + 3481 "00010111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 first +.delay_slot + 3482 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3483 "00101101" // /* MW 3 */ + 3484 "00000000" // /* MW 2 */ + 3485 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 62 28 first + 3492 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00110010" // /* MW 3 */ + 3494 "00000100" // /* MW 2 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "10000100" // J #3568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3568 delay_slots=5 */ + 3503 "00000000" // /* MW 5 */ + 3504 "00000000" // /* MW 4 */ + 3505 "11111000" // /* MW 3 */ + 3506 "00000110" // /* MW 2 */ + 3507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3511 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 3512 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "01110010" // /* MW 3 */ + 3514 "00000101" // /* MW 2 */ + 3515 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3516 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "01100111" // /* MW 3 */ + 3518 "00000001" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3520 "11100001" // NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00000000" // /* MW 15 */ + 3522 "00000000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "00010011" // /* MW 7 */ + 3530 "00000100" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.src_ref 2 "elementwise_binary_broadcasting.h" 65 28 first + 3536 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "00110010" // /* MW 3 */ + 3538 "00000100" // /* MW 2 */ + 3539 "00000001" // /* MW 1 */ + 3540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3541 "00000000" // /* MW 1 */ + 3542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3543 "00000000" // /* MW 1 */ + 3544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3545 "00000000" // /* MW 1 */ + 3546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3547 "00000000" // /* MW 1 */ + 3548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3549 "00000000" // /* MW 1 */ + 3550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3551 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 3552 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "01110010" // /* MW 3 */ + 3554 "00000101" // /* MW 2 */ + 3555 "00011000" // /* MW 1 */ + 3556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3557 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3558 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "00000000" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00010011" // /* MW 5 */ + 3564 "00000100" // /* MW 4 */ + 3565 "11110001" // /* MW 3 */ + 3566 "00101100" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first + 3568 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01001000" // /* MW 9 */ + 3570 "00111111" // /* MW 8 */ + 3571 "10111000" // /* MW 7 */ + 3572 "10001010" // /* MW 6 */ + 3573 "00000111" // /* MW 5 */ + 3574 "00000000" // /* MW 4 */ + 3575 "11010000" // /* MW 3 */ + 3576 "10000000" // /* MW 2 */ + 3577 "10001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3578 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #3680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3579 "00010000" // /* MW 9 */ + 3580 "00110000" // /* MW 8 */ + 3581 "01111111" // /* MW 7 */ + 3582 "00000000" // /* MW 6 */ + 3583 "00000000" // /* MW 5 */ + 3584 "00000000" // /* MW 4 */ + 3585 "11010000" // /* MW 3 */ + 3586 "10010000" // /* MW 2 */ + 3587 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3588 "01000100" // MOVXM le, #3712 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3589 "00000000" // /* MW 5 */ + 3590 "11111101" // /* MW 4 */ + 3591 "00000110" // /* MW 3 */ + 3592 "00000000" // /* MW 2 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3594 "01000100" // MOVXM p4, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "00100000" // /* MW 5 */ + 3596 "11001000" // /* MW 4 */ + 3597 "11001000" // /* MW 3 */ + 3598 "00000111" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3600 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00100010" // /* MW 3 */ + 3602 "00000100" // /* MW 2 */ + 3603 "00000100" // /* MW 1 */ + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first + 3608 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "10101011" // /* MW 3 */ + 3610 "00001000" // /* MW 2 */ + 3611 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 148 20 first + 3612 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "00101011" // /* MW 3 */ + 3614 "00101001" // /* MW 2 */ + 3615 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first + 3616 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00101011" // /* MW 3 */ + 3618 "00001000" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "00101011" // /* MW 3 */ + 3622 "00101010" // /* MW 2 */ + 3623 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "00000000" // /* MW 5 */ + 3626 "11110101" // /* MW 4 */ + 3627 "01110000" // /* MW 3 */ + 3628 "00010101" // /* MW 2 */ + 3629 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3631 "00111101" // /* MW 7 */ + 3632 "00101000" // /* MW 6 */ + 3633 "00000011" // /* MW 5 */ + 3634 "00000100" // /* MW 4 */ + 3635 "01110000" // /* MW 3 */ + 3636 "00100101" // /* MW 2 */ + 3637 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3639 "00101011" // /* MW 3 */ + 3640 "00001000" // /* MW 2 */ + 3641 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3643 "00111101" // /* MW 7 */ + 3644 "00010000" // /* MW 6 */ + 3645 "00000100" // /* MW 5 */ + 3646 "00000100" // /* MW 4 */ + 3647 "01110000" // /* MW 3 */ + 3648 "01000101" // /* MW 2 */ + 3649 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3651 "10101011" // /* MW 3 */ + 3652 "00001000" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3655 "00111101" // /* MW 7 */ + 3656 "00101000" // /* MW 6 */ + 3657 "00000011" // /* MW 5 */ + 3658 "00000100" // /* MW 4 */ + 3659 "01110000" // /* MW 3 */ + 3660 "00100101" // /* MW 2 */ + 3661 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3663 "00101011" // /* MW 3 */ + 3664 "00001000" // /* MW 2 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3667 "00111101" // /* MW 13 */ + 3668 "00010000" // /* MW 12 */ + 3669 "00000100" // /* MW 11 */ + 3670 "01010111" // /* MW 10 */ + 3671 "00011010" // /* MW 9 */ + 3672 "01000000" // /* MW 8 */ + 3673 "00000000" // /* MW 7 */ + 3674 "00000000" // /* MW 6 */ + 3675 "01000110" // /* MW 5 */ + 3676 "00111011" // /* MW 4 */ + 3677 "01110100" // /* MW 3 */ + 3678 "01000101" // /* MW 2 */ + 3679 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3680 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "10101011" // /* MW 3 */ + 3682 "00001000" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3685 "00111101" // /* MW 11 */ + 3686 "00101000" // /* MW 10 */ + 3687 "00000011" // /* MW 9 */ + 3688 "10001110" // /* MW 8 */ + 3689 "00010001" // /* MW 7 */ + 3690 "00001111" // /* MW 6 */ + 3691 "00100001" // /* MW 5 */ + 3692 "00000000" // /* MW 4 */ + 3693 "01110000" // /* MW 3 */ + 3694 "00100101" // /* MW 2 */ + 3695 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3697 "00000000" // /* MW 15 */ + 3698 "00000000" // /* MW 14 */ + 3699 "01111000" // /* MW 13 */ + 3700 "10100101" // /* MW 12 */ + 3701 "00000001" // /* MW 11 */ + 3702 "00000000" // /* MW 10 */ + 3703 "00000000" // /* MW 9 */ + 3704 "00000000" // /* MW 8 */ + 3705 "01011011" // /* MW 7 */ + 3706 "00000001" // /* MW 6 */ + 3707 "00100000" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "01110000" // /* MW 3 */ + 3710 "00000101" // /* MW 2 */ + 3711 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3713 "10000001" // /* MW 15 */ + 3714 "00100000" // /* MW 14 */ + 3715 "01111000" // /* MW 13 */ + 3716 "10100101" // /* MW 12 */ + 3717 "00000001" // /* MW 11 */ + 3718 "00000000" // /* MW 10 */ + 3719 "00000000" // /* MW 9 */ + 3720 "00000000" // /* MW 8 */ + 3721 "10100011" // /* MW 7 */ + 3722 "00011101" // /* MW 6 */ + 3723 "00100010" // /* MW 5 */ + 3724 "00000000" // /* MW 4 */ + 3725 "01110000" // /* MW 3 */ + 3726 "01000101" // /* MW 2 */ + 3727 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3729 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3731 "00111101" // /* MW 7 */ + 3732 "00101000" // /* MW 6 */ + 3733 "00000011" // /* MW 5 */ + 3734 "00000010" // /* MW 4 */ + 3735 "01100000" // /* MW 3 */ + 3736 "11000100" // /* MW 2 */ + 3737 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3741 "00111101" // /* MW 7 */ + 3742 "00010000" // /* MW 6 */ + 3743 "00000100" // /* MW 5 */ + 3744 "00000010" // /* MW 4 */ + 3745 "01100000" // /* MW 3 */ + 3746 "10110100" // /* MW 2 */ + 3747 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.src_ref 2 "elementwise_binary_broadcasting.h" 80 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3751 "00000000" // /* MW 5 */ + 3752 "01010000" // /* MW 4 */ + 3753 "01100000" // /* MW 3 */ + 3754 "11000100" // /* MW 2 */ + 3755 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 3758 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3759 "10100011" // /* MW 3 */ + 3760 "00011101" // /* MW 2 */ + 3761 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 3764 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3765 "00100011" // /* MW 3 */ + 3766 "00011110" // /* MW 2 */ + 3767 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3769 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 first +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.function_start + 3776 "00111010" // MOVS p2, p1; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3777 "01110001" // /* MW 9 */ + 3778 "00000000" // /* MW 8 */ + 3779 "00000000" // /* MW 7 */ + 3780 "00000000" // /* MW 6 */ + 3781 "00000100" // /* MW 5 */ + 3782 "00000000" // /* MW 4 */ + 3783 "01100000" // /* MW 3 */ + 3784 "10010001" // /* MW 2 */ + 3785 "01010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 + 3786 "00000010" // ST lr, [sp, #-4]; MOV r16, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3787 "01110000" // /* MW 7 */ + 3788 "01100000" // /* MW 6 */ + 3789 "00001000" // /* MW 5 */ + 3790 "00000010" // /* MW 4 */ + 3791 "10110000" // /* MW 3 */ + 3792 "10000111" // /* MW 2 */ + 3793 "11111111" // /* MW 1 */ + 3794 "11111000" // MOV r17, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "11100000" // /* MW 3 */ + 3796 "01010101" // /* MW 2 */ + 3797 "00011100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 + 3798 "01000100" // MOVXM p3, #509068 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3799 "00011000" // /* MW 5 */ + 3800 "11001001" // /* MW 4 */ + 3801 "11000110" // /* MW 3 */ + 3802 "00000111" // /* MW 2 */ + 3803 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 first + 3804 "00010100" // LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3805 "10000000" // /* MW 5 */ + 3806 "11010001" // /* MW 4 */ + 3807 "01010000" // /* MW 3 */ + 3808 "11101101" // /* MW 2 */ + 3809 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 3810 "00001100" // LDA.s16 r18, [p3], #-14; VST sfh, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3811 "01010110" // /* MW 5 */ + 3812 "00001110" // /* MW 4 */ + 3813 "01010000" // /* MW 3 */ + 3814 "11001010" // /* MW 2 */ + 3815 "01110011" // /* MW 1 */ + 3816 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3817 "01010111" // /* MW 3 */ + 3818 "00000110" // /* MW 2 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ + 3822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3823 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 first +.no_stack_arguments + 3824 "00000100" // JL #3440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3440 delay_slots=5 */ + 3825 "00000001" // /* MW 5 */ + 3826 "00000000" // /* MW 4 */ + 3827 "10111000" // /* MW 3 */ + 3828 "00000110" // /* MW 2 */ + 3829 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.delay_slot + 3830 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3831 "11000000" // /* MW 3 */ + 3832 "01010000" // /* MW 2 */ + 3833 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 first +.delay_slot + 3836 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "00010010" // /* MW 3 */ + 3838 "00100101" // /* MW 2 */ + 3839 "00010100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3840 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000001" // /* MW 5 */ + 3842 "11010010" // /* MW 4 */ + 3843 "01000010" // /* MW 3 */ + 3844 "00100000" // /* MW 2 */ + 3845 "10001100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3846 "10111010" // NOPA; NOPB; MOV p0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111110" // /* MW 9 */ + 3848 "00010000" // /* MW 8 */ + 3849 "00110100" // /* MW 7 */ + 3850 "00000000" // /* MW 6 */ + 3851 "00010000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.return_address + 3856 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00111001" // /* MW 3 */ + 3858 "11111100" // /* MW 2 */ + 3859 "00000111" // /* MW 1 */ + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ + 3862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3863 "00000000" // /* MW 1 */ + 3864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3865 "00000000" // /* MW 1 */ + 3866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3867 "00000000" // /* MW 1 */ + 3868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3869 "00000000" // /* MW 1 */ + 3870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 first + 3872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3873 "00000000" // /* MW 3 */ + 3874 "00101000" // /* MW 2 */ + 3875 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.delay_slot + 3876 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3877 "00000001" // /* MW 5 */ + 3878 "00000000" // /* MW 4 */ + 3879 "00000000" // /* MW 3 */ + 3880 "11110000" // /* MW 2 */ + 3881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3889 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 152 first +.src_ref 6 "superkernels.cpp" 157 6 +.function_start + 3904 "01000100" // MOVXM p3, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3905 "10000000" // /* MW 5 */ + 3906 "11000111" // /* MW 4 */ + 3907 "11000110" // /* MW 3 */ + 3908 "00000111" // /* MW 2 */ + 3909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 first + 3910 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3911 "11000001" // /* MW 5 */ + 3912 "10110101" // /* MW 4 */ + 3913 "11011000" // /* MW 3 */ + 3914 "11000010" // /* MW 2 */ + 3915 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 152 + 3916 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3917 "00000001" // /* MW 5 */ + 3918 "00000000" // /* MW 4 */ + 3919 "00000000" // /* MW 3 */ + 3920 "00001000" // /* MW 2 */ + 3921 "00000000" // /* MW 1 */ + 3922 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3923 "01110000" // /* MW 7 */ + 3924 "11010000" // /* MW 6 */ + 3925 "00001011" // /* MW 5 */ + 3926 "00000000" // /* MW 4 */ + 3927 "10110000" // /* MW 3 */ + 3928 "01100011" // /* MW 2 */ + 3929 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 11 + 3930 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3931 "00010001" // /* MW 9 */ + 3932 "11100110" // /* MW 8 */ + 3933 "00110001" // /* MW 7 */ + 3934 "11110011" // /* MW 6 */ + 3935 "00000001" // /* MW 5 */ + 3936 "00000000" // /* MW 4 */ + 3937 "10110000" // /* MW 3 */ + 3938 "10000010" // /* MW 2 */ + 3939 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3940 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3941 "11000000" // /* MW 3 */ + 3942 "11010100" // /* MW 2 */ + 3943 "00011011" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 +.src_ref 6 "superkernels.cpp" 157 16 + 3948 "10000100" // JNZ r16, #4112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4112 delay_slots=5 */ + 3949 "00000001" // /* MW 5 */ + 3950 "01000000" // /* MW 4 */ + 3951 "00001000" // /* MW 3 */ + 3952 "00001000" // /* MW 2 */ + 3953 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 22 first +.delay_slot + 3954 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10010000" // /* MW 3 */ + 3956 "01100010" // /* MW 2 */ + 3957 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 30 +.delay_slot + 3958 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3959 "11111011" // /* MW 3 */ + 3960 "01100011" // /* MW 2 */ + 3961 "00010100" // /* MW 1 */ +.delay_slot + 3962 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3963 "00111101" // /* MW 3 */ + 3964 "11110100" // /* MW 2 */ + 3965 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 154 11 +.delay_slot + 3966 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3967 "01110000" // /* MW 7 */ + 3968 "01100000" // /* MW 6 */ + 3969 "00110000" // /* MW 5 */ + 3970 "00000011" // /* MW 4 */ + 3971 "00110000" // /* MW 3 */ + 3972 "11000110" // /* MW 2 */ + 3973 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 +.src_ref 6 "superkernels.cpp" 171 2 +.delay_slot + 3974 "01000100" // MOVXM p0, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3975 "00000000" // /* MW 5 */ + 3976 "11001001" // /* MW 4 */ + 3977 "11000000" // /* MW 3 */ + 3978 "00000111" // /* MW 2 */ + 3979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 "01000100" // MOVXM p2, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3981 "00100000" // /* MW 5 */ + 3982 "11001000" // /* MW 4 */ + 3983 "11000100" // /* MW 3 */ + 3984 "00000111" // /* MW 2 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "00010000" // /* MW 9 */ + 3988 "00000110" // /* MW 8 */ + 3989 "00110010" // /* MW 7 */ + 3990 "11110001" // /* MW 6 */ + 3991 "00000001" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "11100000" // /* MW 3 */ + 3994 "11000000" // /* MW 2 */ + 3995 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 "00000100" // JL #3376 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3376 delay_slots=5 */ + 3999 "00000001" // /* MW 5 */ + 4000 "00000000" // /* MW 4 */ + 4001 "10011000" // /* MW 3 */ + 4002 "00000110" // /* MW 2 */ + 4003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4009 "00110001" // /* MW 3 */ + 4010 "00100000" // /* MW 2 */ + 4011 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4012 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4013 "00000101" // /* MW 3 */ + 4014 "00100000" // /* MW 2 */ + 4015 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4016 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "01111000" // /* MW 13 */ + 4020 "10100101" // /* MW 12 */ + 4021 "00000001" // /* MW 11 */ + 4022 "00000000" // /* MW 10 */ + 4023 "00000000" // /* MW 9 */ + 4024 "10000000" // /* MW 8 */ + 4025 "00010001" // /* MW 7 */ + 4026 "00000110" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 +.return_address + 4032 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4033 "10011000" // /* MW 5 */ + 4034 "11000111" // /* MW 4 */ + 4035 "11000100" // /* MW 3 */ + 4036 "00000111" // /* MW 2 */ + 4037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 first +.src_ref 6 "superkernels.cpp" 164 65 + 4038 "10111010" // LDA r16, [p2]; MOVXM p2, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4039 "00010000" // /* MW 9 */ + 4040 "01000000" // /* MW 8 */ + 4041 "00110010" // /* MW 7 */ + 4042 "11110001" // /* MW 6 */ + 4043 "00000001" // /* MW 5 */ + 4044 "00000000" // /* MW 4 */ + 4045 "11010000" // /* MW 3 */ + 4046 "11000010" // /* MW 2 */ + 4047 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 +.src_ref 6 "superkernels.cpp" 164 65 +.src_ref 6 "superkernels.cpp" 171 2 + 4048 "10111010" // LDA r17, [p2]; MOVXM p2, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "00010000" // /* MW 9 */ + 4050 "01000000" // /* MW 8 */ + 4051 "00110010" // /* MW 7 */ + 4052 "11110001" // /* MW 6 */ + 4053 "00000001" // /* MW 5 */ + 4054 "00000000" // /* MW 4 */ + 4055 "11010000" // /* MW 3 */ + 4056 "11000110" // /* MW 2 */ + 4057 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 first +.src_ref 6 "superkernels.cpp" 164 16 +.src_ref 6 "superkernels.cpp" 169 47 + 4058 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4059 "00010000" // /* MW 9 */ + 4060 "11101000" // /* MW 8 */ + 4061 "10110001" // /* MW 7 */ + 4062 "11110000" // /* MW 6 */ + 4063 "00000001" // /* MW 5 */ + 4064 "00000000" // /* MW 4 */ + 4065 "01010000" // /* MW 3 */ + 4066 "11001011" // /* MW 2 */ + 4067 "01001010" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "10000100" // J #4128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4128 delay_slots=5 */ + 4073 "00000000" // /* MW 5 */ + 4074 "00000000" // /* MW 4 */ + 4075 "00010000" // /* MW 3 */ + 4076 "00001000" // /* MW 2 */ + 4077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 +.delay_slot + 4078 "01000100" // MOVXM p0, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4079 "00010000" // /* MW 5 */ + 4080 "11001000" // /* MW 4 */ + 4081 "11000000" // /* MW 3 */ + 4082 "00000111" // /* MW 2 */ + 4083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 27 first +.delay_slot + 4086 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4087 "00001111" // /* MW 3 */ + 4088 "01100001" // /* MW 2 */ + 4089 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 first +.delay_slot + 4090 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4091 "10100011" // /* MW 5 */ + 4092 "00001100" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 16 first +.delay_slot + 4096 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4097 "00000000" // /* MW 15 */ + 4098 "00000000" // /* MW 14 */ + 4099 "01111000" // /* MW 13 */ + 4100 "10100101" // /* MW 12 */ + 4101 "00000001" // /* MW 11 */ + 4102 "00000000" // /* MW 10 */ + 4103 "00000000" // /* MW 9 */ + 4104 "10000000" // /* MW 8 */ + 4105 "00010001" // /* MW 7 */ + 4106 "00000110" // /* MW 6 */ + 4107 "00100001" // /* MW 5 */ + 4108 "00000000" // /* MW 4 */ + 4109 "11110000" // /* MW 3 */ + 4110 "00101100" // /* MW 2 */ + 4111 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 169 47 +.src_ref 6 "superkernels.cpp" 171 2 + 4112 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508880; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4113 "00000000" // /* MW 15 */ + 4114 "00000000" // /* MW 14 */ + 4115 "00010000" // /* MW 13 */ + 4116 "11101000" // /* MW 12 */ + 4117 "10110001" // /* MW 11 */ + 4118 "11110000" // /* MW 10 */ + 4119 "00000001" // /* MW 9 */ + 4120 "00000000" // /* MW 8 */ + 4121 "10001011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "00100010" // /* MW 5 */ + 4124 "00000000" // /* MW 4 */ + 4125 "11110000" // /* MW 3 */ + 4126 "00101100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4128 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4129 "00000000" // /* MW 7 */ + 4130 "11000011" // /* MW 6 */ + 4131 "10110011" // /* MW 5 */ + 4132 "00000011" // /* MW 4 */ + 4133 "01100000" // /* MW 3 */ + 4134 "10010001" // /* MW 2 */ + 4135 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 168 2 + 4136 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4137 "00010000" // /* MW 9 */ + 4138 "11100000" // /* MW 8 */ + 4139 "00110001" // /* MW 7 */ + 4140 "11110000" // /* MW 6 */ + 4141 "00000001" // /* MW 5 */ + 4142 "00000000" // /* MW 4 */ + 4143 "11010000" // /* MW 3 */ + 4144 "11101110" // /* MW 2 */ + 4145 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4146 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4147 "00010110" // /* MW 3 */ + 4148 "11111110" // /* MW 2 */ + 4149 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4150 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "00110110" // /* MW 3 */ + 4152 "11111110" // /* MW 2 */ + 4153 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4154 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4155 "01010110" // /* MW 3 */ + 4156 "01000110" // /* MW 2 */ + 4157 "00000111" // /* MW 1 */ + 4158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4159 "00000000" // /* MW 1 */ + 4160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4161 "00000000" // /* MW 1 */ + 4162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4163 "00000000" // /* MW 1 */ + 4164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4165 "00000000" // /* MW 1 */ + 4166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4167 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4168 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "00000010" // /* MW 3 */ + 4170 "01100001" // /* MW 2 */ + 4171 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4172 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4173 "00010001" // /* MW 3 */ + 4174 "00000110" // /* MW 2 */ + 4175 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4176 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4177 "11111101" // /* MW 3 */ + 4178 "11100000" // /* MW 2 */ + 4179 "00010111" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ + 4184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4186 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4187 "00001000" // /* MW 3 */ + 4188 "10010011" // /* MW 2 */ + 4189 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 + 4190 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4191 "10000001" // /* MW 5 */ + 4192 "10101101" // /* MW 4 */ + 4193 "10100111" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00000100" // /* MW 1 */ + 4196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4197 "00000000" // /* MW 1 */ + 4198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4199 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first + 4200 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4201 "00110110" // /* MW 3 */ + 4202 "00000110" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4204 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4205 "10000001" // /* MW 5 */ + 4206 "11011101" // /* MW 4 */ + 4207 "11011100" // /* MW 3 */ + 4208 "11001010" // /* MW 2 */ + 4209 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 47 first + 4210 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "01110110" // /* MW 3 */ + 4212 "00000110" // /* MW 2 */ + 4213 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4214 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "10011110" // /* MW 3 */ + 4216 "01011100" // /* MW 2 */ + 4217 "00000111" // /* MW 1 */ + 4218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 171 2 first +.no_stack_arguments + 4220 "00000100" // JL #3776 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 4221 "00000001" // /* MW 5 */ + 4222 "00000000" // /* MW 4 */ + 4223 "01100000" // /* MW 3 */ + 4224 "00000111" // /* MW 2 */ + 4225 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4227 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first +.delay_slot + 4228 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4229 "00000111" // /* MW 3 */ + 4230 "01100010" // /* MW 2 */ + 4231 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 +.delay_slot + 4232 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4233 "00110001" // /* MW 3 */ + 4234 "00000110" // /* MW 2 */ + 4235 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 first +.delay_slot + 4236 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4237 "00001101" // /* MW 3 */ + 4238 "11100001" // /* MW 2 */ + 4239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 +.delay_slot + 4240 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4241 "00000000" // /* MW 15 */ + 4242 "00000000" // /* MW 14 */ + 4243 "10101000" // /* MW 13 */ + 4244 "10100000" // /* MW 12 */ + 4245 "00110100" // /* MW 11 */ + 4246 "00000000" // /* MW 10 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "01011011" // /* MW 7 */ + 4250 "00000001" // /* MW 6 */ + 4251 "00100000" // /* MW 5 */ + 4252 "00000000" // /* MW 4 */ + 4253 "11110000" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 173 6 +.src_ref 6 "superkernels.cpp" 174 14 +.return_address + 4256 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4257 "00010000" // /* MW 9 */ + 4258 "11100000" // /* MW 8 */ + 4259 "00110001" // /* MW 7 */ + 4260 "11110011" // /* MW 6 */ + 4261 "00000001" // /* MW 5 */ + 4262 "00000000" // /* MW 4 */ + 4263 "11010000" // /* MW 3 */ + 4264 "11000110" // /* MW 2 */ + 4265 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4266 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4267 "00000101" // /* MW 3 */ + 4268 "00100000" // /* MW 2 */ + 4269 "00010000" // /* MW 1 */ + 4270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4271 "00000000" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4280 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "00001000" // /* MW 3 */ + 4282 "01010001" // /* MW 2 */ + 4283 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 173 19 + 4284 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508936 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4285 "00010000" // /* MW 9 */ + 4286 "00000100" // /* MW 8 */ + 4287 "00110010" // /* MW 7 */ + 4288 "11110001" // /* MW 6 */ + 4289 "00000001" // /* MW 5 */ + 4290 "00000000" // /* MW 4 */ + 4291 "11010000" // /* MW 3 */ + 4292 "11001110" // /* MW 2 */ + 4293 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 first + 4294 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4295 "00110110" // /* MW 3 */ + 4296 "00000110" // /* MW 2 */ + 4297 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 19 + 4298 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4299 "01010110" // /* MW 3 */ + 4300 "00000110" // /* MW 2 */ + 4301 "00000010" // /* MW 1 */ + 4302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4303 "00000000" // /* MW 1 */ + 4304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4305 "00000000" // /* MW 1 */ + 4306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4307 "00000000" // /* MW 1 */ + 4308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4309 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4310 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "00110001" // /* MW 3 */ + 4312 "00100001" // /* MW 2 */ + 4313 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4314 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4315 "00010001" // /* MW 3 */ + 4316 "11100110" // /* MW 2 */ + 4317 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 16 first + 4318 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4319 "00101000" // /* MW 3 */ + 4320 "01100001" // /* MW 2 */ + 4321 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 + 4322 "10000100" // JNZ r16, #4352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4352 delay_slots=5 */ + 4323 "00000001" // /* MW 5 */ + 4324 "01000000" // /* MW 4 */ + 4325 "10000000" // /* MW 3 */ + 4326 "00001000" // /* MW 2 */ + 4327 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4337 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 + 4338 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00000001" // /* MW 3 */ + 4340 "00100000" // /* MW 2 */ + 4341 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 first + 4342 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "00000000" // /* MW 7 */ + 4346 "10000000" // /* MW 6 */ + 4347 "00010001" // /* MW 5 */ + 4348 "00000110" // /* MW 4 */ + 4349 "11110110" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 176 + 4352 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4353 "00111001" // /* MW 3 */ + 4354 "11110100" // /* MW 2 */ + 4355 "00000111" // /* MW 1 */ + 4356 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4357 "00011001" // /* MW 3 */ + 4358 "11111011" // /* MW 2 */ + 4359 "00000111" // /* MW 1 */ + 4360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4361 "00000000" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4367 "11110001" // /* MW 3 */ + 4368 "11111101" // /* MW 2 */ + 4369 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4373 "00000000" // /* MW 3 */ + 4374 "00101000" // /* MW 2 */ + 4375 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4377 "10100000" // /* MW 3 */ + 4378 "01100111" // /* MW 2 */ + 4379 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 +.delay_slot + 4380 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4381 "00000001" // /* MW 5 */ + 4382 "00000000" // /* MW 4 */ + 4383 "00000000" // /* MW 3 */ + 4384 "11111000" // /* MW 2 */ + 4385 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4391 "00000000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 2 "elementwise_binary_shared.h" 66 first +.src_ref 2 "elementwise_binary_shared.h" 78 37 +.function_start + 4400 "11111000" // MOV r2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4401 "11000000" // /* MW 3 */ + 4402 "10010110" // /* MW 2 */ + 4403 "00011000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 +.src_ref 2 "elementwise_binary_shared.h" 78 37 first + 4404 "00100100" // MOVX r0, #0; ADD.NC p5, r2, #14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4405 "00001110" // /* MW 5 */ + 4406 "11000010" // /* MW 4 */ + 4407 "00101010" // /* MW 3 */ + 4408 "00000000" // /* MW 2 */ + 4409 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 first +.src_ref 2 "elementwise_binary_shared.h" 81 22 first + 4410 "11010100" // LDA.s16 r0, [p5], #2; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4411 "11100101" // /* MW 5 */ + 4412 "00000010" // /* MW 4 */ + 4413 "01010000" // /* MW 3 */ + 4414 "10000010" // /* MW 2 */ + 4415 "10100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 15 first + 4416 "10011000" // LDA r2, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4417 "01010110" // /* MW 3 */ + 4418 "00000100" // /* MW 2 */ + 4419 "00000101" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ + 4422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4423 "00000000" // /* MW 1 */ + 4424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4425 "00000000" // /* MW 1 */ + 4426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4427 "00000000" // /* MW 1 */ + 4428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4429 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 4430 "00011000" // MOVX r1, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4431 "00001001" // /* MW 3 */ + 4432 "00000010" // /* MW 2 */ + 4433 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 4434 "10011000" // LTU r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4435 "00101100" // /* MW 3 */ + 4436 "01000010" // /* MW 2 */ + 4437 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 8 + 4438 "10000100" // JNZ r1, #4576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4576 delay_slots=5 */ + 4439 "00000001" // /* MW 5 */ + 4440 "01000000" // /* MW 4 */ + 4441 "11110000" // /* MW 3 */ + 4442 "00001000" // /* MW 2 */ + 4443 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 66 +.delay_slot + 4444 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4445 "00000001" // /* MW 5 */ + 4446 "00000000" // /* MW 4 */ + 4447 "00000000" // /* MW 3 */ + 4448 "00001000" // /* MW 2 */ + 4449 "00000000" // /* MW 1 */ +.delay_slot + 4450 "11111000" // MOV p4, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4451 "11100000" // /* MW 3 */ + 4452 "01100101" // /* MW 2 */ + 4453 "00011100" // /* MW 1 */ +.delay_slot + 4454 "00011000" // PADDB [p4], #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4455 "10010000" // /* MW 3 */ + 4456 "11111111" // /* MW 2 */ + 4457 "00111100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 78 37 first +.delay_slot + 4458 "00011000" // VST x0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4459 "00010011" // /* MW 3 */ + 4460 "00000100" // /* MW 2 */ + 4461 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 85 34 +.src_ref 2 "elementwise_binary_shared.h" 90 19 + 4464 "11010100" // MOVA dj0, #12; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4465 "11100101" // /* MW 5 */ + 4466 "00000010" // /* MW 4 */ + 4467 "10000000" // /* MW 3 */ + 4468 "10000010" // /* MW 2 */ + 4469 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 85 34 first +.src_ref 2 "elementwise_binary_shared.h" 90 19 first + 4470 "10011000" // LDA.u8 r0, [p3, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4471 "00001010" // /* MW 3 */ + 4472 "00000000" // /* MW 2 */ + 4473 "00000011" // /* MW 1 */ + 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4475 "00000000" // /* MW 1 */ + 4476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4477 "00000000" // /* MW 1 */ + 4478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4479 "00000000" // /* MW 1 */ + 4480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4481 "00000000" // /* MW 1 */ + 4482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4483 "00000000" // /* MW 1 */ + 4484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4485 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 90 12 +.src_ref 2 "elementwise_binary_shared.h" 90 35 + 4486 "10000100" // JNZ r0, #4528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4528 delay_slots=5 */ + 4487 "00000001" // /* MW 5 */ + 4488 "01000000" // /* MW 4 */ + 4489 "11011000" // /* MW 3 */ + 4490 "00001000" // /* MW 2 */ + 4491 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 4492 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4493 "00000000" // /* MW 3 */ + 4494 "00000000" // /* MW 2 */ + 4495 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.delay_slot + 4496 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4497 "10000000" // /* MW 3 */ + 4498 "00000000" // /* MW 2 */ + 4499 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4505 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 4506 "10111010" // MOVA m1, #0; J #4544 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4544 delay_slots=5 */ + 4507 "00100000" // /* MW 9 */ + 4508 "00000000" // /* MW 8 */ + 4509 "00000000" // /* MW 7 */ + 4510 "00111000" // /* MW 6 */ + 4511 "00000010" // /* MW 5 */ + 4512 "00000000" // /* MW 4 */ + 4513 "10000000" // /* MW 3 */ + 4514 "00000100" // /* MW 2 */ + 4515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4523 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 4524 "00011000" // VST x0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4525 "00010011" // /* MW 3 */ + 4526 "00000100" // /* MW 2 */ + 4527 "00001000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 4528 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4529 "10000000" // /* MW 3 */ + 4530 "00000000" // /* MW 2 */ + 4531 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "elementwise_binary_shared.h" 130 16 + 4532 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4533 "01010000" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000001" // /* MW 8 */ + 4537 "00010011" // /* MW 7 */ + 4538 "00000100" // /* MW 6 */ + 4539 "00100001" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "11110000" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 4544 "10000100" // J #4672 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4672 delay_slots=5 */ + 4545 "00000000" // /* MW 5 */ + 4546 "00000000" // /* MW 4 */ + 4547 "00100000" // /* MW 3 */ + 4548 "00001001" // /* MW 2 */ + 4549 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 4550 "00000010" // MOVS p0, p4; MOV p4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01110000" // /* MW 7 */ + 4552 "01100000" // /* MW 6 */ + 4553 "00110000" // /* MW 5 */ + 4554 "00000010" // /* MW 4 */ + 4555 "01100000" // /* MW 3 */ + 4556 "00010001" // /* MW 2 */ + 4557 "00010010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4559 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4564 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4565 "10000001" // /* MW 11 */ + 4566 "10101101" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "00000000" // /* MW 7 */ + 4570 "00000000" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "00101100" // /* MW 2 */ + 4575 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 +.src_ref 2 "elementwise_binary_shared.h" 109 97 + 4576 "00011000" // MOVX r1, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4577 "00001101" // /* MW 3 */ + 4578 "00000010" // /* MW 2 */ + 4579 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 97 first + 4580 "10011000" // EQ r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00100111" // /* MW 3 */ + 4582 "01000010" // /* MW 2 */ + 4583 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4584 "10000100" // JNZ r1, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */ + 4585 "00000001" // /* MW 5 */ + 4586 "01000000" // /* MW 4 */ + 4587 "00001000" // /* MW 3 */ + 4588 "00001001" // /* MW 2 */ + 4589 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 4590 "01000100" // MOVXM p3, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4591 "00100000" // /* MW 5 */ + 4592 "11001000" // /* MW 4 */ + 4593 "11000110" // /* MW 3 */ + 4594 "00000111" // /* MW 2 */ + 4595 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4601 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 4602 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4603 "00000000" // /* MW 5 */ + 4604 "00100000" // /* MW 4 */ + 4605 "00000000" // /* MW 3 */ + 4606 "10000000" // /* MW 2 */ + 4607 "00111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4608 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "00010000" // /* MW 13 */ + 4612 "00000000" // /* MW 12 */ + 4613 "00001000" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "11100000" // /* MW 9 */ + 4616 "00101111" // /* MW 8 */ + 4617 "01011011" // /* MW 7 */ + 4618 "00000001" // /* MW 6 */ + 4619 "00100000" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4624 "01010100" // LDA.s8 r0, [p3]; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "00000001" // /* MW 5 */ + 4626 "00000001" // /* MW 4 */ + 4627 "01010000" // /* MW 3 */ + 4628 "10000000" // /* MW 2 */ + 4629 "01100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4630 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "00000000" // /* MW 3 */ + 4632 "00000000" // /* MW 2 */ + 4633 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4634 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "10000000" // /* MW 3 */ + 4636 "00000000" // /* MW 2 */ + 4637 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4639 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4641 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4642 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4643 "00010001" // /* MW 3 */ + 4644 "00000000" // /* MW 2 */ + 4645 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4646 "11111000" // VMOV bmll1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4647 "10010010" // /* MW 3 */ + 4648 "00000000" // /* MW 2 */ + 4649 "00011001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4650 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4651 "10000000" // /* MW 3 */ + 4652 "00111010" // /* MW 2 */ + 4653 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4654 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4655 "10010110" // /* MW 3 */ + 4656 "01000000" // /* MW 2 */ + 4657 "00001000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4660 "01011000" // VEXTBCST.16 x0, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4661 "00000011" // /* MW 3 */ + 4662 "00000001" // /* MW 2 */ + 4663 "00011000" // /* MW 1 */ + 4664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 4666 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4667 "01100110" // /* MW 5 */ + 4668 "11111000" // /* MW 4 */ + 4669 "11111111" // /* MW 3 */ + 4670 "00101100" // /* MW 2 */ + 4671 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 125 4 first +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first + 4672 "10110110" // LDA r1, [p5, #-16]; VLDB x1, [p4], m1; MOVXM ls, #4784 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4673 "00010000" // /* MW 11 */ + 4674 "01011000" // /* MW 10 */ + 4675 "01111001" // /* MW 9 */ + 4676 "00000100" // /* MW 8 */ + 4677 "00000000" // /* MW 7 */ + 4678 "00000000" // /* MW 6 */ + 4679 "11101000" // /* MW 5 */ + 4680 "01010000" // /* MW 4 */ + 4681 "11011000" // /* MW 3 */ + 4682 "10000110" // /* MW 2 */ + 4683 "10111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4684 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #4832 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4685 "00010000" // /* MW 11 */ + 4686 "01110000" // /* MW 10 */ + 4687 "10111001" // /* MW 9 */ + 4688 "00000101" // /* MW 8 */ + 4689 "00000000" // /* MW 7 */ + 4690 "00000000" // /* MW 6 */ + 4691 "01101000" // /* MW 5 */ + 4692 "10010000" // /* MW 4 */ + 4693 "00000010" // /* MW 3 */ + 4694 "01100011" // /* MW 2 */ + 4695 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 136 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4696 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p4], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4697 "11110001" // /* MW 7 */ + 4698 "00000000" // /* MW 6 */ + 4699 "11101000" // /* MW 5 */ + 4700 "01010000" // /* MW 4 */ + 4701 "01111000" // /* MW 3 */ + 4702 "00000101" // /* MW 2 */ + 4703 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4704 "10111010" // VLDA x0, [p1], m2; MOVXM p3, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4705 "00010000" // /* MW 9 */ + 4706 "00001000" // /* MW 8 */ + 4707 "10110010" // /* MW 7 */ + 4708 "11110001" // /* MW 6 */ + 4709 "00000001" // /* MW 5 */ + 4710 "00000000" // /* MW 4 */ + 4711 "01110000" // /* MW 3 */ + 4712 "00000011" // /* MW 2 */ + 4713 "00101001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4714 "10011000" // LDA.s8 r2, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4715 "01000010" // /* MW 3 */ + 4716 "00000100" // /* MW 2 */ + 4717 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4718 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4719 "00101011" // /* MW 3 */ + 4720 "00001000" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4723 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4724 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4725 "00111101" // /* MW 3 */ + 4726 "01000010" // /* MW 2 */ + 4727 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4728 "01100010" // ADD.NC lc, r1, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4729 "00000001" // /* MW 7 */ + 4730 "00000010" // /* MW 6 */ + 4731 "00000001" // /* MW 5 */ + 4732 "10000110" // /* MW 4 */ + 4733 "11111110" // /* MW 3 */ + 4734 "01110000" // /* MW 2 */ + 4735 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4736 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p4], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4737 "11101000" // /* MW 5 */ + 4738 "01010000" // /* MW 4 */ + 4739 "01111000" // /* MW 3 */ + 4740 "00000011" // /* MW 2 */ + 4741 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4742 "10111010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4743 "01111110" // /* MW 9 */ + 4744 "10100101" // /* MW 8 */ + 4745 "00000001" // /* MW 7 */ + 4746 "00000000" // /* MW 6 */ + 4747 "00010000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "01110000" // /* MW 3 */ + 4750 "00000101" // /* MW 2 */ + 4751 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary_shared.h" 144 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4752 "11100001" // NOPA; NOPB; NOPS; MOVX crRnd, r2; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "00000000" // /* MW 15 */ + 4754 "00000000" // /* MW 14 */ + 4755 "01111000" // /* MW 13 */ + 4756 "10100101" // /* MW 12 */ + 4757 "00000001" // /* MW 11 */ + 4758 "00000000" // /* MW 10 */ + 4759 "11010100" // /* MW 9 */ + 4760 "00000101" // /* MW 8 */ + 4761 "01011011" // /* MW 7 */ + 4762 "00000001" // /* MW 6 */ + 4763 "00100000" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4768 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00010000" // /* MW 15 */ + 4770 "00001000" // /* MW 14 */ + 4771 "01111000" // /* MW 13 */ + 4772 "10100101" // /* MW 12 */ + 4773 "00000001" // /* MW 11 */ + 4774 "00000000" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4784 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p4], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4785 "00000000" // /* MW 15 */ + 4786 "00000000" // /* MW 14 */ + 4787 "01111000" // /* MW 13 */ + 4788 "10100101" // /* MW 12 */ + 4789 "00000001" // /* MW 11 */ + 4790 "00000000" // /* MW 10 */ + 4791 "00000000" // /* MW 9 */ + 4792 "00000000" // /* MW 8 */ + 4793 "01011011" // /* MW 7 */ + 4794 "00000001" // /* MW 6 */ + 4795 "11101000" // /* MW 5 */ + 4796 "01010000" // /* MW 4 */ + 4797 "01111000" // /* MW 3 */ + 4798 "00000011" // /* MW 2 */ + 4799 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4800 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4801 "00000000" // /* MW 15 */ + 4802 "00000000" // /* MW 14 */ + 4803 "01111000" // /* MW 13 */ + 4804 "10100101" // /* MW 12 */ + 4805 "00000001" // /* MW 11 */ + 4806 "00000000" // /* MW 10 */ + 4807 "00000000" // /* MW 9 */ + 4808 "00000000" // /* MW 8 */ + 4809 "10100011" // /* MW 7 */ + 4810 "00011100" // /* MW 6 */ + 4811 "00100010" // /* MW 5 */ + 4812 "00000000" // /* MW 4 */ + 4813 "01110000" // /* MW 3 */ + 4814 "00000101" // /* MW 2 */ + 4815 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4817 "00000000" // /* MW 15 */ + 4818 "00000000" // /* MW 14 */ + 4819 "01111000" // /* MW 13 */ + 4820 "10100101" // /* MW 12 */ + 4821 "00000001" // /* MW 11 */ + 4822 "00000000" // /* MW 10 */ + 4823 "00000000" // /* MW 9 */ + 4824 "00000000" // /* MW 8 */ + 4825 "01011011" // /* MW 7 */ + 4826 "00000001" // /* MW 6 */ + 4827 "00100000" // /* MW 5 */ + 4828 "00000000" // /* MW 4 */ + 4829 "11110000" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4832 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4833 "00010000" // /* MW 15 */ + 4834 "00001000" // /* MW 14 */ + 4835 "01111000" // /* MW 13 */ + 4836 "10100101" // /* MW 12 */ + 4837 "00000001" // /* MW 11 */ + 4838 "00000000" // /* MW 10 */ + 4839 "00000000" // /* MW 9 */ + 4840 "00000000" // /* MW 8 */ + 4841 "01011011" // /* MW 7 */ + 4842 "00000001" // /* MW 6 */ + 4843 "00100000" // /* MW 5 */ + 4844 "00000000" // /* MW 4 */ + 4845 "11110000" // /* MW 3 */ + 4846 "00101100" // /* MW 2 */ + 4847 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4849 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4850 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4851 "10100011" // /* MW 3 */ + 4852 "00011100" // /* MW 2 */ + 4853 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4856 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4857 "00000001" // /* MW 3 */ + 4858 "00000010" // /* MW 2 */ + 4859 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4861 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4862 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4863 "00000000" // /* MW 3 */ + 4864 "00101000" // /* MW 2 */ + 4865 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4866 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4867 "10100011" // /* MW 3 */ + 4868 "00011100" // /* MW 2 */ + 4869 "00001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.delay_slot + 4870 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4871 "00000001" // /* MW 5 */ + 4872 "00000000" // /* MW 4 */ + 4873 "00000000" // /* MW 3 */ + 4874 "11111000" // /* MW 2 */ + 4875 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4877 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot + 4878 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4879 "10100011" // /* MW 3 */ + 4880 "00011100" // /* MW 2 */ + 4881 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 4883 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 2 "elementwise_unary.h" 95 first +.src_ref 2 "elementwise_unary.h" 97 22 +.src_ref 2 "elementwise_unary.h" 97 24 first +.function_start + 4896 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4897 "00010000" // /* MW 9 */ + 4898 "10100000" // /* MW 8 */ + 4899 "00110010" // /* MW 7 */ + 4900 "11110000" // /* MW 6 */ + 4901 "00000001" // /* MW 5 */ + 4902 "00000000" // /* MW 4 */ + 4903 "11010000" // /* MW 3 */ + 4904 "10000101" // /* MW 2 */ + 4905 "00100011" // /* MW 1 */ + 4906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4907 "00000000" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ + 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4917 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 97 22 first + 4918 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4919 "00101001" // /* MW 3 */ + 4920 "00011100" // /* MW 2 */ + 4921 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 24 first + 4922 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4923 "00101110" // /* MW 3 */ + 4924 "00000100" // /* MW 2 */ + 4925 "00000001" // /* MW 1 */ + 4926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4927 "00000000" // /* MW 1 */ + 4928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4929 "00000000" // /* MW 1 */ + 4930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4931 "00000000" // /* MW 1 */ + 4932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4933 "00000000" // /* MW 1 */ + 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4935 "00000000" // /* MW 1 */ + 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4937 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 22 + 4938 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "00101001" // /* MW 3 */ + 4940 "00000100" // /* MW 2 */ + 4941 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 24 first + 4942 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "00101110" // /* MW 3 */ + 4944 "00010100" // /* MW 2 */ + 4945 "00000001" // /* MW 1 */ + 4946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4947 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 101 4 first + 4948 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4949 "00000000" // /* MW 3 */ + 4950 "00101000" // /* MW 2 */ + 4951 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 22 first +.delay_slot + 4960 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4961 "00101001" // /* MW 3 */ + 4962 "00010100" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 4963 "00001000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 107 first +.src_ref 2 "elementwise_unary.h" 113 37 +.src_ref 2 "elementwise_unary.h" 113 78 +.src_ref 2 "elementwise_unary.h" 142 19 +.function_start + 4976 "10110110" // MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #509248 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4977 "00010000" // /* MW 11 */ + 4978 "10100000" // /* MW 10 */ + 4979 "00110010" // /* MW 9 */ + 4980 "11110001" // /* MW 8 */ + 4981 "00000001" // /* MW 7 */ + 4982 "00000000" // /* MW 6 */ + 4983 "01101000" // /* MW 5 */ + 4984 "00111101" // /* MW 4 */ + 4985 "00000000" // /* MW 3 */ + 4986 "01000000" // /* MW 2 */ + 4987 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 113 37 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4988 "10110110" // LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508944 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4989 "00010000" // /* MW 11 */ + 4990 "00001000" // /* MW 10 */ + 4991 "00110010" // /* MW 9 */ + 4992 "11110001" // /* MW 8 */ + 4993 "00000001" // /* MW 7 */ + 4994 "00000000" // /* MW 6 */ + 4995 "11101000" // /* MW 5 */ + 4996 "00111011" // /* MW 4 */ + 4997 "11010000" // /* MW 3 */ + 4998 "10001010" // /* MW 2 */ + 4999 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 142 19 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5000 "10110110" // LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5001 "00010000" // /* MW 11 */ + 5002 "01000000" // /* MW 10 */ + 5003 "11001000" // /* MW 9 */ + 5004 "00010000" // /* MW 8 */ + 5005 "00000000" // /* MW 7 */ + 5006 "00000000" // /* MW 6 */ + 5007 "01101000" // /* MW 5 */ + 5008 "00111101" // /* MW 4 */ + 5009 "01010000" // /* MW 3 */ + 5010 "10000100" // /* MW 2 */ + 5011 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5012 "11110100" // VLDB x7, [p0], #64; VBCST.16 x0, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5013 "11100101" // /* MW 5 */ + 5014 "00110010" // /* MW 4 */ + 5015 "10000000" // /* MW 3 */ + 5016 "10111110" // /* MW 2 */ + 5017 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5018 "01000100" // MOVXM r4, #49280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "00000000" // /* MW 5 */ + 5020 "00100001" // /* MW 4 */ + 5021 "11000010" // /* MW 3 */ + 5022 "00000000" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5024 "11111000" // VBCST.16 x1, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5025 "01110010" // /* MW 3 */ + 5026 "10010001" // /* MW 2 */ + 5027 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5028 "01000100" // MOVXM r3, #32767 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5029 "11111110" // /* MW 5 */ + 5030 "10111111" // /* MW 4 */ + 5031 "01110001" // /* MW 3 */ + 5032 "00000000" // /* MW 2 */ + 5033 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5034 "11111000" // VMIN_GE.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5035 "00101100" // /* MW 3 */ + 5036 "01010000" // /* MW 2 */ + 5037 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "elementwise_unary.h" 113 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5038 "11100100" // LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5039 "11011001" // /* MW 5 */ + 5040 "10000001" // /* MW 4 */ + 5041 "10110110" // /* MW 3 */ + 5042 "00000001" // /* MW 2 */ + 5043 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 +.src_ref 2 "elementwise_unary.h" 166 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5044 "11100100" // MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5045 "01011001" // /* MW 5 */ + 5046 "01110000" // /* MW 4 */ + 5047 "00001000" // /* MW 3 */ + 5048 "01010000" // /* MW 2 */ + 5049 "00001111" // /* MW 1 */ + 5050 "11111000" // VBCST.16 x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5051 "01110010" // /* MW 3 */ + 5052 "00001101" // /* MW 2 */ + 5053 "00011001" // /* MW 1 */ + 5054 "01000100" // MOVXM r5, #15616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5055 "00000000" // /* MW 5 */ + 5056 "10111010" // /* MW 4 */ + 5057 "00110010" // /* MW 3 */ + 5058 "00000000" // /* MW 2 */ + 5059 "00000000" // /* MW 1 */ + 5060 "11111000" // VBCST.16 x3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5061 "01110010" // /* MW 3 */ + 5062 "10010101" // /* MW 2 */ + 5063 "00011001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 + 5064 "01000100" // MOVXM r17, #16128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5065 "00000000" // /* MW 5 */ + 5066 "10111110" // /* MW 4 */ + 5067 "00111000" // /* MW 3 */ + 5068 "00000000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 5070 "01111000" // VBAND x11, x6, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101011" // /* MW 3 */ + 5072 "10110001" // /* MW 2 */ + 5073 "00011101" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 5074 "11100100" // MOVX r17, #828; VBCST.16 x5, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5075 "11100101" // /* MW 5 */ + 5076 "10001010" // /* MW 4 */ + 5077 "00100101" // /* MW 3 */ + 5078 "01011110" // /* MW 2 */ + 5079 "01100100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 5080 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5081 "01100001" // /* MW 7 */ + 5082 "11100111" // /* MW 6 */ + 5083 "10001100" // /* MW 5 */ + 5084 "11100110" // /* MW 4 */ + 5085 "11101100" // /* MW 3 */ + 5086 "11000000" // /* MW 2 */ + 5087 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 5088 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5089 "00101011" // /* MW 3 */ + 5090 "01001001" // /* MW 2 */ + 5091 "00011100" // /* MW 1 */ + 5092 "01000100" // MOVXM r2, #16000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5093 "00000000" // /* MW 5 */ + 5094 "00111101" // /* MW 4 */ + 5095 "00110001" // /* MW 3 */ + 5096 "00000000" // /* MW 2 */ + 5097 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 5098 "01100010" // VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5099 "00000001" // /* MW 7 */ + 5100 "11100111" // /* MW 6 */ + 5101 "10001010" // /* MW 5 */ + 5102 "11100110" // /* MW 4 */ + 5103 "01110010" // /* MW 3 */ + 5104 "00001001" // /* MW 2 */ + 5105 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 5106 "11111000" // VCONV.fp32.bf16 cml0, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5107 "10001010" // /* MW 3 */ + 5108 "00001011" // /* MW 2 */ + 5109 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 5110 "01100010" // VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5111 "10000001" // /* MW 7 */ + 5112 "00001100" // /* MW 6 */ + 5113 "10001011" // /* MW 5 */ + 5114 "11100110" // /* MW 4 */ + 5115 "00101100" // /* MW 3 */ + 5116 "01010000" // /* MW 2 */ + 5117 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5118 "01010110" // VCONV.bf16.fp32 x11, cml4; MOVXM ls, #5168; VMAC.f dm1, dm0, x9, x4, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5119 "10000001" // /* MW 11 */ + 5120 "00010010" // /* MW 10 */ + 5121 "10001001" // /* MW 9 */ + 5122 "00000010" // /* MW 8 */ + 5123 "01000011" // /* MW 7 */ + 5124 "10001111" // /* MW 6 */ + 5125 "00000000" // /* MW 5 */ + 5126 "00000000" // /* MW 4 */ + 5127 "11000000" // /* MW 3 */ + 5128 "01000010" // /* MW 2 */ + 5129 "10110010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5130 "11111000" // VMAX_LT.bf16 x6, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5131 "11101100" // /* MW 3 */ + 5132 "01000000" // /* MW 2 */ + 5133 "00011011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5134 "01011010" // MOVXM le, #5264; VMSC.f dm2, dm3, x11, x6, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5135 "11000011" // /* MW 9 */ + 5136 "01110110" // /* MW 8 */ + 5137 "10001010" // /* MW 7 */ + 5138 "00000010" // /* MW 6 */ + 5139 "01001001" // /* MW 5 */ + 5140 "10110111" // /* MW 4 */ + 5141 "00000000" // /* MW 3 */ + 5142 "00000000" // /* MW 2 */ + 5143 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 125 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5144 "00000010" // VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5145 "10000000" // /* MW 7 */ + 5146 "00111111" // /* MW 6 */ + 5147 "10111000" // /* MW 5 */ + 5148 "00000010" // /* MW 4 */ + 5149 "11000000" // /* MW 3 */ + 5150 "00100010" // /* MW 2 */ + 5151 "01010010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first + 5152 "11111000" // VMIN_GE.bf16 x8, r16, x7, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "00101100" // /* MW 3 */ + 5154 "00111000" // /* MW 2 */ + 5155 "00011100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 5156 "11110110" // NOPA; NOPB; NOPS; VBAND x11, x6, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5157 "10110000" // /* MW 11 */ + 5158 "10010101" // /* MW 10 */ + 5159 "11011000" // /* MW 9 */ + 5160 "00000010" // /* MW 8 */ + 5161 "01011011" // /* MW 7 */ + 5162 "00000001" // /* MW 6 */ + 5163 "00100000" // /* MW 5 */ + 5164 "00000000" // /* MW 4 */ + 5165 "11110000" // /* MW 3 */ + 5166 "00101100" // /* MW 2 */ + 5167 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 142 19 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first +.loop_nesting 1 + 5168 "01001010" // VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5169 "00100011" // /* MW 9 */ + 5170 "00101011" // /* MW 8 */ + 5171 "10001100" // /* MW 7 */ + 5172 "11100110" // /* MW 6 */ + 5173 "11101100" // /* MW 5 */ + 5174 "11000000" // /* MW 4 */ + 5175 "01101100" // /* MW 3 */ + 5176 "00111101" // /* MW 2 */ + 5177 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "abs.hpp" 32 22 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5178 "01001010" // VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5179 "01100001" // /* MW 9 */ + 5180 "11100111" // /* MW 8 */ + 5181 "10001100" // /* MW 7 */ + 5182 "01100110" // /* MW 6 */ + 5183 "00101011" // /* MW 5 */ + 5184 "01001001" // /* MW 4 */ + 5185 "11101100" // /* MW 3 */ + 5186 "00111011" // /* MW 2 */ + 5187 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5188 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10000001" // /* MW 3 */ + 5190 "00001100" // /* MW 2 */ + 5191 "10001011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5192 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "00000001" // /* MW 3 */ + 5194 "11100111" // /* MW 2 */ + 5195 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5196 "01100010" // VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5197 "10000001" // /* MW 7 */ + 5198 "00010010" // /* MW 6 */ + 5199 "10001001" // /* MW 5 */ + 5200 "00000010" // /* MW 4 */ + 5201 "01100000" // /* MW 3 */ + 5202 "10100100" // /* MW 2 */ + 5203 "00100011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5205 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5206 "01111010" // NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "00000000" // /* MW 7 */ + 5210 "00000000" // /* MW 6 */ + 5211 "00100011" // /* MW 5 */ + 5212 "00011110" // /* MW 4 */ + 5213 "11110001" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5216 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5217 "00000000" // /* MW 15 */ + 5218 "00000000" // /* MW 14 */ + 5219 "01111000" // /* MW 13 */ + 5220 "00010110" // /* MW 12 */ + 5221 "00101000" // /* MW 11 */ + 5222 "00000010" // /* MW 10 */ + 5223 "00000000" // /* MW 9 */ + 5224 "00000000" // /* MW 8 */ + 5225 "00010110" // /* MW 7 */ + 5226 "10010010" // /* MW 6 */ + 5227 "00100101" // /* MW 5 */ + 5228 "00000000" // /* MW 4 */ + 5229 "11110000" // /* MW 3 */ + 5230 "00101100" // /* MW 2 */ + 5231 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5232 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5233 "00000000" // /* MW 15 */ + 5234 "00000000" // /* MW 14 */ + 5235 "01111000" // /* MW 13 */ + 5236 "01110110" // /* MW 12 */ + 5237 "10100000" // /* MW 11 */ + 5238 "00000001" // /* MW 10 */ + 5239 "00000000" // /* MW 9 */ + 5240 "00000000" // /* MW 8 */ + 5241 "01011011" // /* MW 7 */ + 5242 "00000001" // /* MW 6 */ + 5243 "00100000" // /* MW 5 */ + 5244 "00000000" // /* MW 4 */ + 5245 "11110000" // /* MW 3 */ + 5246 "00101100" // /* MW 2 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5248 "00011011" // NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5249 "10110110" // /* MW 15 */ + 5250 "01010011" // /* MW 14 */ + 5251 "01111100" // /* MW 13 */ + 5252 "00010110" // /* MW 12 */ + 5253 "00011100" // /* MW 11 */ + 5254 "00000010" // /* MW 10 */ + 5255 "00000000" // /* MW 9 */ + 5256 "00000000" // /* MW 8 */ + 5257 "00010110" // /* MW 7 */ + 5258 "10010001" // /* MW 6 */ + 5259 "00100010" // /* MW 5 */ + 5260 "00000000" // /* MW 4 */ + 5261 "11110000" // /* MW 3 */ + 5262 "00101100" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.src_ref 4 "abs.hpp" 32 22 first +.end_of_loop + 5264 "11100001" // NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5265 "00000000" // /* MW 15 */ + 5266 "00000000" // /* MW 14 */ + 5267 "10111000" // /* MW 13 */ + 5268 "10010101" // /* MW 12 */ + 5269 "11011000" // /* MW 11 */ + 5270 "00000010" // /* MW 10 */ + 5271 "00000000" // /* MW 9 */ + 5272 "00000000" // /* MW 8 */ + 5273 "01011011" // /* MW 7 */ + 5274 "00000001" // /* MW 6 */ + 5275 "00100000" // /* MW 5 */ + 5276 "00000000" // /* MW 4 */ + 5277 "11110000" // /* MW 3 */ + 5278 "00101100" // /* MW 2 */ + 5279 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.loop_nesting 0 + 5280 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5281 "00100011" // /* MW 7 */ + 5282 "00101011" // /* MW 6 */ + 5283 "10001100" // /* MW 5 */ + 5284 "11100110" // /* MW 4 */ + 5285 "11101100" // /* MW 3 */ + 5286 "11000000" // /* MW 2 */ + 5287 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5288 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5289 "00101011" // /* MW 3 */ + 5290 "01001001" // /* MW 2 */ + 5291 "00011100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5292 "01001000" // VMUL.f dm4, x3, x11, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5293 "01100001" // /* MW 3 */ + 5294 "11100111" // /* MW 2 */ + 5295 "10001100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5296 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5297 "00000001" // /* MW 3 */ + 5298 "11100111" // /* MW 2 */ + 5299 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5300 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5301 "00100011" // /* MW 3 */ + 5302 "00011101" // /* MW 2 */ + 5303 "00001001" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5305 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5306 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5307 "00100011" // /* MW 3 */ + 5308 "00011110" // /* MW 2 */ + 5309 "00001001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 5310 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5311 "10000001" // /* MW 3 */ + 5312 "00001100" // /* MW 2 */ + 5313 "10001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 + 5314 "01100010" // VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5315 "10000001" // /* MW 7 */ + 5316 "00010010" // /* MW 6 */ + 5317 "10001001" // /* MW 5 */ + 5318 "00000010" // /* MW 4 */ + 5319 "11000000" // /* MW 3 */ + 5320 "01000010" // /* MW 2 */ + 5321 "10110010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 + 5322 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5323 "00010110" // /* MW 3 */ + 5324 "10010001" // /* MW 2 */ + 5325 "00001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first + 5326 "01001000" // VMSC.f dm2, dm3, x11, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "11000011" // /* MW 3 */ + 5328 "01110110" // /* MW 2 */ + 5329 "10001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 5330 "01001000" // VMSC.f dm4, dm1, x5, x9, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5331 "00100011" // /* MW 3 */ + 5332 "00101011" // /* MW 2 */ + 5333 "10001100" // /* MW 1 */ + 5334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5335 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 129 4 first + 5336 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5337 "00000000" // /* MW 3 */ + 5338 "00101000" // /* MW 2 */ + 5339 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5343 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.delay_slot + 5344 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "00100011" // /* MW 3 */ + 5346 "00011101" // /* MW 2 */ + 5347 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.delay_slot + 5348 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5349 "00100011" // /* MW 3 */ + 5350 "00011110" // /* MW 2 */ + 5351 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 5353 "00000000" // /* MW 1 */ +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_sigmoid1d _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 210 first +.src_ref 6 "superkernels.cpp" 215 6 +.function_start + 5360 "01000100" // MOVXM p3, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5361 "10000000" // /* MW 5 */ + 5362 "11000111" // /* MW 4 */ + 5363 "11000110" // /* MW 3 */ + 5364 "00000111" // /* MW 2 */ + 5365 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 first + 5366 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5367 "11000001" // /* MW 5 */ + 5368 "10110101" // /* MW 4 */ + 5369 "11011000" // /* MW 3 */ + 5370 "11000010" // /* MW 2 */ + 5371 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 210 + 5372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5373 "00000001" // /* MW 5 */ + 5374 "00000000" // /* MW 4 */ + 5375 "00000000" // /* MW 3 */ + 5376 "00001000" // /* MW 2 */ + 5377 "00000000" // /* MW 1 */ + 5378 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5379 "01110000" // /* MW 7 */ + 5380 "11010000" // /* MW 6 */ + 5381 "00001011" // /* MW 5 */ + 5382 "00000000" // /* MW 4 */ + 5383 "10110000" // /* MW 3 */ + 5384 "01100011" // /* MW 2 */ + 5385 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 11 + 5386 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5387 "00010001" // /* MW 9 */ + 5388 "11100110" // /* MW 8 */ + 5389 "00110001" // /* MW 7 */ + 5390 "11110011" // /* MW 6 */ + 5391 "00000001" // /* MW 5 */ + 5392 "00000000" // /* MW 4 */ + 5393 "10110000" // /* MW 3 */ + 5394 "10000010" // /* MW 2 */ + 5395 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 5396 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "11000000" // /* MW 3 */ + 5398 "11010100" // /* MW 2 */ + 5399 "00011011" // /* MW 1 */ + 5400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5401 "00000000" // /* MW 1 */ + 5402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5403 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 +.src_ref 6 "superkernels.cpp" 215 16 + 5404 "10000100" // JNZ r16, #5568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5568 delay_slots=5 */ + 5405 "00000001" // /* MW 5 */ + 5406 "01000000" // /* MW 4 */ + 5407 "11100000" // /* MW 3 */ + 5408 "00001010" // /* MW 2 */ + 5409 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 22 first +.delay_slot + 5410 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5411 "10010000" // /* MW 3 */ + 5412 "01100010" // /* MW 2 */ + 5413 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 30 +.delay_slot + 5414 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5415 "11111011" // /* MW 3 */ + 5416 "01100011" // /* MW 2 */ + 5417 "00010100" // /* MW 1 */ +.delay_slot + 5418 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5419 "00111101" // /* MW 3 */ + 5420 "11110100" // /* MW 2 */ + 5421 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 212 11 +.delay_slot + 5422 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5423 "01110000" // /* MW 7 */ + 5424 "01100000" // /* MW 6 */ + 5425 "00110000" // /* MW 5 */ + 5426 "00000011" // /* MW 4 */ + 5427 "00110000" // /* MW 3 */ + 5428 "11000110" // /* MW 2 */ + 5429 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 +.src_ref 6 "superkernels.cpp" 229 2 +.delay_slot + 5430 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5431 "10000000" // /* MW 5 */ + 5432 "11001010" // /* MW 4 */ + 5433 "11000000" // /* MW 3 */ + 5434 "00000111" // /* MW 2 */ + 5435 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5436 "01000100" // MOVXM p2, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5437 "00100000" // /* MW 5 */ + 5438 "11001000" // /* MW 4 */ + 5439 "11000100" // /* MW 3 */ + 5440 "00000111" // /* MW 2 */ + 5441 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5442 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5443 "00010000" // /* MW 9 */ + 5444 "00000110" // /* MW 8 */ + 5445 "00110010" // /* MW 7 */ + 5446 "11110001" // /* MW 6 */ + 5447 "00000001" // /* MW 5 */ + 5448 "00000000" // /* MW 4 */ + 5449 "11100000" // /* MW 3 */ + 5450 "11000000" // /* MW 2 */ + 5451 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5453 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5454 "00000100" // JL #4896 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4896 delay_slots=5 */ + 5455 "00000001" // /* MW 5 */ + 5456 "00000000" // /* MW 4 */ + 5457 "10010000" // /* MW 3 */ + 5458 "00001001" // /* MW 2 */ + 5459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5463 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5464 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5465 "00110001" // /* MW 3 */ + 5466 "00100000" // /* MW 2 */ + 5467 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5468 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5469 "00000101" // /* MW 3 */ + 5470 "00100000" // /* MW 2 */ + 5471 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5472 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5473 "00000000" // /* MW 15 */ + 5474 "00000000" // /* MW 14 */ + 5475 "01111000" // /* MW 13 */ + 5476 "10100101" // /* MW 12 */ + 5477 "00000001" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "00000000" // /* MW 9 */ + 5480 "10000000" // /* MW 8 */ + 5481 "00010001" // /* MW 7 */ + 5482 "00000110" // /* MW 6 */ + 5483 "00100010" // /* MW 5 */ + 5484 "00000000" // /* MW 4 */ + 5485 "11110000" // /* MW 3 */ + 5486 "00101100" // /* MW 2 */ + 5487 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 +.return_address + 5488 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5489 "10011000" // /* MW 5 */ + 5490 "11000111" // /* MW 4 */ + 5491 "11000100" // /* MW 3 */ + 5492 "00000111" // /* MW 2 */ + 5493 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 first +.src_ref 6 "superkernels.cpp" 222 46 + 5494 "10111010" // LDA r16, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5495 "00010000" // /* MW 9 */ + 5496 "10100000" // /* MW 8 */ + 5497 "00110010" // /* MW 7 */ + 5498 "11110001" // /* MW 6 */ + 5499 "00000001" // /* MW 5 */ + 5500 "00000000" // /* MW 4 */ + 5501 "11010000" // /* MW 3 */ + 5502 "11000010" // /* MW 2 */ + 5503 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 +.src_ref 6 "superkernels.cpp" 222 46 +.src_ref 6 "superkernels.cpp" 229 2 + 5504 "10111010" // LDA r17, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5505 "00010000" // /* MW 9 */ + 5506 "10100000" // /* MW 8 */ + 5507 "00110010" // /* MW 7 */ + 5508 "11110001" // /* MW 6 */ + 5509 "00000001" // /* MW 5 */ + 5510 "00000000" // /* MW 4 */ + 5511 "11010000" // /* MW 3 */ + 5512 "11000110" // /* MW 2 */ + 5513 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 first +.src_ref 6 "superkernels.cpp" 222 16 +.src_ref 6 "superkernels.cpp" 227 47 + 5514 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5515 "00010000" // /* MW 9 */ + 5516 "11101000" // /* MW 8 */ + 5517 "10110001" // /* MW 7 */ + 5518 "11110000" // /* MW 6 */ + 5519 "00000001" // /* MW 5 */ + 5520 "00000000" // /* MW 4 */ + 5521 "01010000" // /* MW 3 */ + 5522 "11001011" // /* MW 2 */ + 5523 "01001000" // /* MW 1 */ + 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5525 "00000000" // /* MW 1 */ + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ + 5528 "10000100" // J #5584 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5584 delay_slots=5 */ + 5529 "00000000" // /* MW 5 */ + 5530 "00000000" // /* MW 4 */ + 5531 "11101000" // /* MW 3 */ + 5532 "00001010" // /* MW 2 */ + 5533 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 +.delay_slot + 5534 "01000100" // MOVXM p0, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00010000" // /* MW 5 */ + 5536 "11001000" // /* MW 4 */ + 5537 "11000000" // /* MW 3 */ + 5538 "00000111" // /* MW 2 */ + 5539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5541 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 27 first +.delay_slot + 5542 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5543 "00001111" // /* MW 3 */ + 5544 "01100001" // /* MW 2 */ + 5545 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 first +.delay_slot + 5546 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "10100011" // /* MW 5 */ + 5548 "00001100" // /* MW 4 */ + 5549 "11110000" // /* MW 3 */ + 5550 "00101100" // /* MW 2 */ + 5551 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 16 first +.delay_slot + 5552 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5553 "00000000" // /* MW 15 */ + 5554 "00000000" // /* MW 14 */ + 5555 "01111000" // /* MW 13 */ + 5556 "10100101" // /* MW 12 */ + 5557 "00000001" // /* MW 11 */ + 5558 "00000000" // /* MW 10 */ + 5559 "00000000" // /* MW 9 */ + 5560 "10000000" // /* MW 8 */ + 5561 "00010001" // /* MW 7 */ + 5562 "00000110" // /* MW 6 */ + 5563 "00100001" // /* MW 5 */ + 5564 "00000000" // /* MW 4 */ + 5565 "11110000" // /* MW 3 */ + 5566 "00101100" // /* MW 2 */ + 5567 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 227 47 +.src_ref 6 "superkernels.cpp" 229 2 + 5568 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508880; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5569 "00000000" // /* MW 15 */ + 5570 "00000000" // /* MW 14 */ + 5571 "00010000" // /* MW 13 */ + 5572 "11101000" // /* MW 12 */ + 5573 "10110001" // /* MW 11 */ + 5574 "11110000" // /* MW 10 */ + 5575 "00000001" // /* MW 9 */ + 5576 "00000000" // /* MW 8 */ + 5577 "10001011" // /* MW 7 */ + 5578 "10000000" // /* MW 6 */ + 5579 "00100010" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "11110000" // /* MW 3 */ + 5582 "00101100" // /* MW 2 */ + 5583 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5584 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5585 "00000000" // /* MW 7 */ + 5586 "11000011" // /* MW 6 */ + 5587 "10110011" // /* MW 5 */ + 5588 "00000011" // /* MW 4 */ + 5589 "01100000" // /* MW 3 */ + 5590 "10010001" // /* MW 2 */ + 5591 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 226 2 + 5592 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5593 "00010000" // /* MW 9 */ + 5594 "11100000" // /* MW 8 */ + 5595 "00110001" // /* MW 7 */ + 5596 "11110000" // /* MW 6 */ + 5597 "00000001" // /* MW 5 */ + 5598 "00000000" // /* MW 4 */ + 5599 "11010000" // /* MW 3 */ + 5600 "11101110" // /* MW 2 */ + 5601 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5602 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5603 "00010110" // /* MW 3 */ + 5604 "11111110" // /* MW 2 */ + 5605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5606 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5607 "00110110" // /* MW 3 */ + 5608 "11111110" // /* MW 2 */ + 5609 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5610 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5611 "01010110" // /* MW 3 */ + 5612 "01000110" // /* MW 2 */ + 5613 "00000111" // /* MW 1 */ + 5614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5615 "00000000" // /* MW 1 */ + 5616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5617 "00000000" // /* MW 1 */ + 5618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5619 "00000000" // /* MW 1 */ + 5620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5621 "00000000" // /* MW 1 */ + 5622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5623 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5624 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5625 "00000010" // /* MW 3 */ + 5626 "01100001" // /* MW 2 */ + 5627 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 5628 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5629 "00010001" // /* MW 3 */ + 5630 "00000110" // /* MW 2 */ + 5631 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 5632 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5633 "11111101" // /* MW 3 */ + 5634 "11100000" // /* MW 2 */ + 5635 "00010111" // /* MW 1 */ + 5636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5637 "00000000" // /* MW 1 */ + 5638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5639 "00000000" // /* MW 1 */ + 5640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5641 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5642 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5643 "00001000" // /* MW 3 */ + 5644 "10010011" // /* MW 2 */ + 5645 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 + 5646 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5647 "10000001" // /* MW 5 */ + 5648 "10101101" // /* MW 4 */ + 5649 "10100111" // /* MW 3 */ + 5650 "00000000" // /* MW 2 */ + 5651 "00000100" // /* MW 1 */ + 5652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5653 "00000000" // /* MW 1 */ + 5654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5655 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first + 5656 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00110110" // /* MW 3 */ + 5658 "00000110" // /* MW 2 */ + 5659 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 5660 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5661 "10000001" // /* MW 5 */ + 5662 "11011101" // /* MW 4 */ + 5663 "11011100" // /* MW 3 */ + 5664 "11001010" // /* MW 2 */ + 5665 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 47 first + 5666 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5667 "01110110" // /* MW 3 */ + 5668 "00000110" // /* MW 2 */ + 5669 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 5670 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5671 "10011110" // /* MW 3 */ + 5672 "01011100" // /* MW 2 */ + 5673 "00000111" // /* MW 1 */ + 5674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5675 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 229 2 first +.no_stack_arguments + 5676 "00000100" // JL #4976 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4976 delay_slots=5 */ + 5677 "00000001" // /* MW 5 */ + 5678 "00000000" // /* MW 4 */ + 5679 "10111000" // /* MW 3 */ + 5680 "00001001" // /* MW 2 */ + 5681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5683 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first +.delay_slot + 5684 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5685 "00000111" // /* MW 3 */ + 5686 "01100010" // /* MW 2 */ + 5687 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 +.delay_slot + 5688 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5689 "00110001" // /* MW 3 */ + 5690 "00000110" // /* MW 2 */ + 5691 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 first +.delay_slot + 5692 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5693 "00001101" // /* MW 3 */ + 5694 "11100001" // /* MW 2 */ + 5695 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 +.delay_slot + 5696 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "00000000" // /* MW 15 */ + 5698 "00000000" // /* MW 14 */ + 5699 "10101000" // /* MW 13 */ + 5700 "10100000" // /* MW 12 */ + 5701 "00110100" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00100000" // /* MW 5 */ + 5708 "00000000" // /* MW 4 */ + 5709 "11110000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 231 6 +.src_ref 6 "superkernels.cpp" 232 14 +.return_address + 5712 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5713 "00010000" // /* MW 9 */ + 5714 "11100000" // /* MW 8 */ + 5715 "00110001" // /* MW 7 */ + 5716 "11110011" // /* MW 6 */ + 5717 "00000001" // /* MW 5 */ + 5718 "00000000" // /* MW 4 */ + 5719 "11010000" // /* MW 3 */ + 5720 "11000110" // /* MW 2 */ + 5721 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 5722 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5723 "00000101" // /* MW 3 */ + 5724 "00100000" // /* MW 2 */ + 5725 "00010000" // /* MW 1 */ + 5726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5727 "00000000" // /* MW 1 */ + 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5729 "00000000" // /* MW 1 */ + 5730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5731 "00000000" // /* MW 1 */ + 5732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5733 "00000000" // /* MW 1 */ + 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5735 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5736 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5737 "00001000" // /* MW 3 */ + 5738 "01010001" // /* MW 2 */ + 5739 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 231 19 + 5740 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508936 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5741 "00010000" // /* MW 9 */ + 5742 "00000100" // /* MW 8 */ + 5743 "00110010" // /* MW 7 */ + 5744 "11110001" // /* MW 6 */ + 5745 "00000001" // /* MW 5 */ + 5746 "00000000" // /* MW 4 */ + 5747 "11010000" // /* MW 3 */ + 5748 "11001110" // /* MW 2 */ + 5749 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 first + 5750 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5751 "00110110" // /* MW 3 */ + 5752 "00000110" // /* MW 2 */ + 5753 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 19 + 5754 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5755 "01010110" // /* MW 3 */ + 5756 "00000110" // /* MW 2 */ + 5757 "00000010" // /* MW 1 */ + 5758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5759 "00000000" // /* MW 1 */ + 5760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5761 "00000000" // /* MW 1 */ + 5762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5763 "00000000" // /* MW 1 */ + 5764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5765 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5766 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5767 "00110001" // /* MW 3 */ + 5768 "00100001" // /* MW 2 */ + 5769 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5770 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5771 "00010001" // /* MW 3 */ + 5772 "11100110" // /* MW 2 */ + 5773 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 16 first + 5774 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5775 "00101000" // /* MW 3 */ + 5776 "01100001" // /* MW 2 */ + 5777 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 + 5778 "10000100" // JNZ r16, #5808 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5808 delay_slots=5 */ + 5779 "00000001" // /* MW 5 */ + 5780 "01000000" // /* MW 4 */ + 5781 "01011000" // /* MW 3 */ + 5782 "00001011" // /* MW 2 */ + 5783 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5789 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5793 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 + 5794 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5795 "00000001" // /* MW 3 */ + 5796 "00100000" // /* MW 2 */ + 5797 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 first + 5798 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5799 "00000000" // /* MW 9 */ + 5800 "00000000" // /* MW 8 */ + 5801 "00000000" // /* MW 7 */ + 5802 "10000000" // /* MW 6 */ + 5803 "00010001" // /* MW 5 */ + 5804 "00000110" // /* MW 4 */ + 5805 "11110110" // /* MW 3 */ + 5806 "00101100" // /* MW 2 */ + 5807 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 234 + 5808 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5809 "00111001" // /* MW 3 */ + 5810 "11110100" // /* MW 2 */ + 5811 "00000111" // /* MW 1 */ + 5812 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5813 "00011001" // /* MW 3 */ + 5814 "11111011" // /* MW 2 */ + 5815 "00000111" // /* MW 1 */ + 5816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5817 "00000000" // /* MW 1 */ + 5818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5819 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5821 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5822 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5823 "11110001" // /* MW 3 */ + 5824 "11111101" // /* MW 2 */ + 5825 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5827 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5828 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5829 "00000000" // /* MW 3 */ + 5830 "00101000" // /* MW 2 */ + 5831 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5832 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "10100000" // /* MW 3 */ + 5834 "01100111" // /* MW 2 */ + 5835 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 +.delay_slot + 5836 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5837 "00000001" // /* MW 5 */ + 5838 "00000000" // /* MW 4 */ + 5839 "00000000" // /* MW 3 */ + 5840 "11111000" // /* MW 2 */ + 5841 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5843 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 5847 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 5856 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5857 "00010000" // /* MW 9 */ + 5858 "01100000" // /* MW 8 */ + 5859 "00110010" // /* MW 7 */ + 5860 "11110000" // /* MW 6 */ + 5861 "00000001" // /* MW 5 */ + 5862 "00000000" // /* MW 4 */ + 5863 "11010000" // /* MW 3 */ + 5864 "10000101" // /* MW 2 */ + 5865 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 5866 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5867 "01011000" // /* MW 9 */ + 5868 "00000000" // /* MW 8 */ + 5869 "00001000" // /* MW 7 */ + 5870 "00001011" // /* MW 6 */ + 5871 "00010000" // /* MW 5 */ + 5872 "00001000" // /* MW 4 */ + 5873 "00000000" // /* MW 3 */ + 5874 "00000000" // /* MW 2 */ + 5875 "11110000" // /* MW 1 */ + 5876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5877 "00000000" // /* MW 1 */ + 5878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5879 "00000000" // /* MW 1 */ + 5880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5881 "00000000" // /* MW 1 */ + 5882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5883 "00000000" // /* MW 1 */ + 5884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5885 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 5886 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5887 "00101001" // /* MW 3 */ + 5888 "00011100" // /* MW 2 */ + 5889 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 5890 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5891 "00101110" // /* MW 3 */ + 5892 "00011100" // /* MW 2 */ + 5893 "00000001" // /* MW 1 */ + 5894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5895 "00000000" // /* MW 1 */ + 5896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5897 "00000000" // /* MW 1 */ + 5898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5899 "00000000" // /* MW 1 */ + 5900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5901 "00000000" // /* MW 1 */ + 5902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5903 "00000000" // /* MW 1 */ + 5904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5905 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 5906 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5907 "00101001" // /* MW 3 */ + 5908 "00011100" // /* MW 2 */ + 5909 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 5910 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5911 "00101110" // /* MW 3 */ + 5912 "00000100" // /* MW 2 */ + 5913 "00000001" // /* MW 1 */ + 5914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5915 "00000000" // /* MW 1 */ + 5916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5917 "00000000" // /* MW 1 */ + 5918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5919 "00000000" // /* MW 1 */ + 5920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5921 "00000000" // /* MW 1 */ + 5922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5923 "00000000" // /* MW 1 */ + 5924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5925 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 5926 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5927 "00101001" // /* MW 3 */ + 5928 "00011100" // /* MW 2 */ + 5929 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 5930 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5931 "01110110" // /* MW 3 */ + 5932 "00010100" // /* MW 2 */ + 5933 "00000001" // /* MW 1 */ + 5934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5935 "00000000" // /* MW 1 */ + 5936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5937 "00000000" // /* MW 1 */ + 5938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5939 "00000000" // /* MW 1 */ + 5940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5941 "00000000" // /* MW 1 */ + 5942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5943 "00000000" // /* MW 1 */ + 5944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5945 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5946 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "01110001" // /* MW 3 */ + 5948 "01001100" // /* MW 2 */ + 5949 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5950 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5951 "00010111" // /* MW 3 */ + 5952 "00000100" // /* MW 2 */ + 5953 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5954 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5955 "00000000" // /* MW 3 */ + 5956 "00101000" // /* MW 2 */ + 5957 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5958 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5959 "00000000" // /* MW 5 */ + 5960 "00111110" // /* MW 4 */ + 5961 "11110001" // /* MW 3 */ + 5962 "00000000" // /* MW 2 */ + 5963 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5964 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5965 "00100100" // /* MW 3 */ + 5966 "11000100" // /* MW 2 */ + 5967 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5968 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5969 "00100111" // /* MW 3 */ + 5970 "01110110" // /* MW 2 */ + 5971 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5972 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5973 "10000010" // /* MW 3 */ + 5974 "00000001" // /* MW 2 */ + 5975 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 5977 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 5984 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5985 "00000001" // /* MW 5 */ + 5986 "00000000" // /* MW 4 */ + 5987 "00000000" // /* MW 3 */ + 5988 "00001000" // /* MW 2 */ + 5989 "00000000" // /* MW 1 */ + 5990 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5991 "00111101" // /* MW 3 */ + 5992 "11111000" // /* MW 2 */ + 5993 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 5994 "00000100" // JL #5856 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5856 delay_slots=5 */ + 5995 "00000001" // /* MW 5 */ + 5996 "00000000" // /* MW 4 */ + 5997 "01110000" // /* MW 3 */ + 5998 "00001011" // /* MW 2 */ + 5999 "00000000" // /* MW 1 */ +.delay_slot + 6000 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6001 "10100000" // /* MW 3 */ + 6002 "00010111" // /* MW 2 */ + 6003 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 6004 "00111010" // ST r0, [sp, #-4]; MOVXM r15, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6005 "00010001" // /* MW 9 */ + 6006 "01100000" // /* MW 8 */ + 6007 "11101010" // /* MW 7 */ + 6008 "11110001" // /* MW 6 */ + 6009 "00000001" // /* MW 5 */ + 6010 "00000000" // /* MW 4 */ + 6011 "10110000" // /* MW 3 */ + 6012 "10000010" // /* MW 2 */ + 6013 "11111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 6014 "11111000" // MOV p0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6015 "10100000" // /* MW 3 */ + 6016 "01100111" // /* MW 2 */ + 6017 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6020 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6021 "10000001" // /* MW 11 */ + 6022 "10101101" // /* MW 10 */ + 6023 "00000000" // /* MW 9 */ + 6024 "00000000" // /* MW 8 */ + 6025 "00000000" // /* MW 7 */ + 6026 "00000000" // /* MW 6 */ + 6027 "00100000" // /* MW 5 */ + 6028 "00000000" // /* MW 4 */ + 6029 "11110000" // /* MW 3 */ + 6030 "00101100" // /* MW 2 */ + 6031 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 6032 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p1, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6033 "00001000" // /* MW 9 */ + 6034 "11000100" // /* MW 8 */ + 6035 "10110011" // /* MW 7 */ + 6036 "01101000" // /* MW 6 */ + 6037 "00000000" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "00100000" // /* MW 3 */ + 6040 "00000111" // /* MW 2 */ + 6041 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 6042 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6043 "01011000" // /* MW 9 */ + 6044 "11111101" // /* MW 8 */ + 6045 "00000111" // /* MW 7 */ + 6046 "00001000" // /* MW 6 */ + 6047 "10000000" // /* MW 5 */ + 6048 "00000001" // /* MW 4 */ + 6049 "10000000" // /* MW 3 */ + 6050 "11100010" // /* MW 2 */ + 6051 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 6052 "01111010" // LDA r15, [sp, #-4]; ST r16, [p1], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6053 "00000001" // /* MW 9 */ + 6054 "10100000" // /* MW 8 */ + 6055 "00000111" // /* MW 7 */ + 6056 "10000000" // /* MW 6 */ + 6057 "00010001" // /* MW 5 */ + 6058 "00001010" // /* MW 4 */ + 6059 "00100001" // /* MW 3 */ + 6060 "10111110" // /* MW 2 */ + 6061 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 6062 "10011000" // LDA.u8 r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6063 "01001010" // /* MW 3 */ + 6064 "00000110" // /* MW 2 */ + 6065 "00000001" // /* MW 1 */ + 6066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6067 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6069 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6070 "00011000" // ST.s16 r16, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6071 "00010111" // /* MW 3 */ + 6072 "00000010" // /* MW 2 */ + 6073 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6074 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6075 "00000000" // /* MW 3 */ + 6076 "00101000" // /* MW 2 */ + 6077 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6078 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6079 "00000101" // /* MW 3 */ + 6080 "00100010" // /* MW 2 */ + 6081 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6082 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6083 "00000001" // /* MW 5 */ + 6084 "00000000" // /* MW 4 */ + 6085 "00000000" // /* MW 3 */ + 6086 "11111000" // /* MW 2 */ + 6087 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6088 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6089 "00100111" // /* MW 3 */ + 6090 "01110111" // /* MW 2 */ + 6091 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6092 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6093 "10000010" // /* MW 3 */ + 6094 "00100001" // /* MW 2 */ + 6095 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 6097 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_shared.h" 186 first +.src_ref 2 "elementwise_binary_shared.h" 191 8 first +.tail_call +.function_start + 6112 "10000100" // J #4400 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4400 delay_slots=5 */ + 6113 "00000000" // /* MW 5 */ + 6114 "00000000" // /* MW 4 */ + 6115 "10011000" // /* MW 3 */ + 6116 "00001000" // /* MW 2 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 191 8 +.delay_slot + 6118 "01000100" // MOVXM p3, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6119 "10000000" // /* MW 5 */ + 6120 "11001001" // /* MW 4 */ + 6121 "11000110" // /* MW 3 */ + 6122 "00000111" // /* MW 2 */ + 6123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 6131 "00000000" // /* MW 1 */ +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_add1d _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 240 first +.src_ref 6 "superkernels.cpp" 245 6 +.function_start + 6144 "01000100" // MOVXM p4, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6145 "10000000" // /* MW 5 */ + 6146 "11000111" // /* MW 4 */ + 6147 "11001000" // /* MW 3 */ + 6148 "00000111" // /* MW 2 */ + 6149 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first + 6150 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6151 "11000001" // /* MW 5 */ + 6152 "10110101" // /* MW 4 */ + 6153 "11011000" // /* MW 3 */ + 6154 "11000010" // /* MW 2 */ + 6155 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 240 + 6156 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6157 "00000001" // /* MW 5 */ + 6158 "00000000" // /* MW 4 */ + 6159 "00000000" // /* MW 3 */ + 6160 "00001000" // /* MW 2 */ + 6161 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 242 22 first + 6162 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6163 "01111001" // /* MW 9 */ + 6164 "01100000" // /* MW 8 */ + 6165 "11001010" // /* MW 7 */ + 6166 "10000001" // /* MW 6 */ + 6167 "00010100" // /* MW 5 */ + 6168 "00100011" // /* MW 4 */ + 6169 "10110000" // /* MW 3 */ + 6170 "00111010" // /* MW 2 */ + 6171 "11111111" // /* MW 1 */ + 6172 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6173 "01110000" // /* MW 7 */ + 6174 "11010000" // /* MW 6 */ + 6175 "00001011" // /* MW 5 */ + 6176 "00000000" // /* MW 4 */ + 6177 "10110000" // /* MW 3 */ + 6178 "10000011" // /* MW 2 */ + 6179 "11111101" // /* MW 1 */ + 6180 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6181 "00010101" // /* MW 3 */ + 6182 "11111100" // /* MW 2 */ + 6183 "00001111" // /* MW 1 */ + 6184 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6185 "00111101" // /* MW 3 */ + 6186 "11110000" // /* MW 2 */ + 6187 "00001111" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first +.src_ref 6 "superkernels.cpp" 245 16 first + 6190 "10000100" // JNZ r16, #6336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6336 delay_slots=5 */ + 6191 "00000001" // /* MW 5 */ + 6192 "01000000" // /* MW 4 */ + 6193 "01100000" // /* MW 3 */ + 6194 "00001100" // /* MW 2 */ + 6195 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 30 first +.delay_slot + 6196 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6197 "11111011" // /* MW 3 */ + 6198 "01100011" // /* MW 2 */ + 6199 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 6200 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6201 "10011000" // /* MW 5 */ + 6202 "11000111" // /* MW 4 */ + 6203 "11000100" // /* MW 3 */ + 6204 "00000111" // /* MW 2 */ + 6205 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 6206 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6207 "01110000" // /* MW 7 */ + 6208 "01100000" // /* MW 6 */ + 6209 "00110111" // /* MW 5 */ + 6210 "00000001" // /* MW 4 */ + 6211 "00110000" // /* MW 3 */ + 6212 "11000110" // /* MW 2 */ + 6213 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 6214 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6215 "11000000" // /* MW 3 */ + 6216 "11010110" // /* MW 2 */ + 6217 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 +.src_ref 6 "superkernels.cpp" 250 28 +.src_ref 6 "superkernels.cpp" 252 42 +.src_ref 6 "superkernels.cpp" 264 2 +.delay_slot + 6218 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6219 "00010001" // /* MW 9 */ + 6220 "01100000" // /* MW 8 */ + 6221 "10110010" // /* MW 7 */ + 6222 "11110011" // /* MW 6 */ + 6223 "00000001" // /* MW 5 */ + 6224 "00000000" // /* MW 4 */ + 6225 "10110000" // /* MW 3 */ + 6226 "10100011" // /* MW 2 */ + 6227 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 248 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6228 "00111010" // MOVS p0, p7; MOVXM p2, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6229 "00010001" // /* MW 9 */ + 6230 "00001000" // /* MW 8 */ + 6231 "00110010" // /* MW 7 */ + 6232 "11110001" // /* MW 6 */ + 6233 "00000001" // /* MW 5 */ + 6234 "00000000" // /* MW 4 */ + 6235 "01100000" // /* MW 3 */ + 6236 "10010001" // /* MW 2 */ + 6237 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6238 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6239 "00010000" // /* MW 9 */ + 6240 "00000110" // /* MW 8 */ + 6241 "00110010" // /* MW 7 */ + 6242 "11110001" // /* MW 6 */ + 6243 "00000001" // /* MW 5 */ + 6244 "00000000" // /* MW 4 */ + 6245 "11100000" // /* MW 3 */ + 6246 "11000000" // /* MW 2 */ + 6247 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 "00000100" // JL #5984 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5984 delay_slots=5 */ + 6251 "00000001" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "10110000" // /* MW 3 */ + 6254 "00001011" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6259 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6260 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "00110001" // /* MW 3 */ + 6262 "00100000" // /* MW 2 */ + 6263 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6264 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000101" // /* MW 3 */ + 6266 "00100000" // /* MW 2 */ + 6267 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6268 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "00010001" // /* MW 3 */ + 6270 "00000110" // /* MW 2 */ + 6271 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 252 42 first +.return_address + 6272 "10111010" // LDA r16, [p7]; MOVXM p1, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6273 "00010000" // /* MW 9 */ + 6274 "11100110" // /* MW 8 */ + 6275 "10110001" // /* MW 7 */ + 6276 "11110000" // /* MW 6 */ + 6277 "00000001" // /* MW 5 */ + 6278 "00000000" // /* MW 4 */ + 6279 "11010000" // /* MW 3 */ + 6280 "11000010" // /* MW 2 */ + 6281 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 261 48 + 6282 "10111010" // LDA r17, [p1]; MOVXM p3, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6283 "00010000" // /* MW 9 */ + 6284 "11101000" // /* MW 8 */ + 6285 "10110001" // /* MW 7 */ + 6286 "11110001" // /* MW 6 */ + 6287 "00000001" // /* MW 5 */ + 6288 "00000000" // /* MW 4 */ + 6289 "11010000" // /* MW 3 */ + 6290 "11000110" // /* MW 2 */ + 6291 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 28 first +.src_ref 6 "superkernels.cpp" 253 16 +.src_ref 6 "superkernels.cpp" 262 48 + 6292 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6293 "00010000" // /* MW 9 */ + 6294 "11101010" // /* MW 8 */ + 6295 "10110001" // /* MW 7 */ + 6296 "11110000" // /* MW 6 */ + 6297 "00000001" // /* MW 5 */ + 6298 "00000000" // /* MW 4 */ + 6299 "01010000" // /* MW 3 */ + 6300 "11001011" // /* MW 2 */ + 6301 "11101010" // /* MW 1 */ + 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ + 6308 "10000100" // J #6352 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6352 delay_slots=5 */ + 6309 "00000000" // /* MW 5 */ + 6310 "00000000" // /* MW 4 */ + 6311 "01101000" // /* MW 3 */ + 6312 "00001100" // /* MW 2 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 +.delay_slot + 6314 "01000100" // MOVXM p2, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6315 "00010000" // /* MW 5 */ + 6316 "11001000" // /* MW 4 */ + 6317 "11000100" // /* MW 3 */ + 6318 "00000111" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 27 first +.delay_slot + 6320 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6321 "00001111" // /* MW 3 */ + 6322 "01100001" // /* MW 2 */ + 6323 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 first +.delay_slot + 6324 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6325 "01010001" // /* MW 3 */ + 6326 "00000110" // /* MW 2 */ + 6327 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 first +.delay_slot + 6328 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6329 "00010001" // /* MW 3 */ + 6330 "00000110" // /* MW 2 */ + 6331 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 253 16 first +.delay_slot + 6332 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6333 "00010001" // /* MW 3 */ + 6334 "00000110" // /* MW 2 */ + 6335 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 261 48 + 6336 "01000100" // MOVXM p3, #508880 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6337 "10100000" // /* MW 5 */ + 6338 "11000111" // /* MW 4 */ + 6339 "11000110" // /* MW 3 */ + 6340 "00000111" // /* MW 2 */ + 6341 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 + 6342 "10111010" // NOPA; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6343 "00010000" // /* MW 9 */ + 6344 "11101010" // /* MW 8 */ + 6345 "10110001" // /* MW 7 */ + 6346 "11110000" // /* MW 6 */ + 6347 "00000001" // /* MW 5 */ + 6348 "00000000" // /* MW 4 */ + 6349 "11110000" // /* MW 3 */ + 6350 "00101100" // /* MW 2 */ + 6351 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6352 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6353 "10000110" // /* MW 3 */ + 6354 "01100111" // /* MW 2 */ + 6355 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 256 2 + 6356 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6357 "00010000" // /* MW 9 */ + 6358 "11100000" // /* MW 8 */ + 6359 "00110001" // /* MW 7 */ + 6360 "11110001" // /* MW 6 */ + 6361 "00000001" // /* MW 5 */ + 6362 "00000000" // /* MW 4 */ + 6363 "11010000" // /* MW 3 */ + 6364 "11101110" // /* MW 2 */ + 6365 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6366 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6367 "00010110" // /* MW 3 */ + 6368 "11111110" // /* MW 2 */ + 6369 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6370 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6371 "00110110" // /* MW 3 */ + 6372 "11111110" // /* MW 2 */ + 6373 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 first + 6374 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6375 "01010110" // /* MW 3 */ + 6376 "00000110" // /* MW 2 */ + 6377 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6378 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6379 "01110110" // /* MW 3 */ + 6380 "01000110" // /* MW 2 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ + 6388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6390 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6391 "00000010" // /* MW 3 */ + 6392 "01100001" // /* MW 2 */ + 6393 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 256 2 first + 6394 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6395 "00001110" // /* MW 5 */ + 6396 "01000000" // /* MW 4 */ + 6397 "00111001" // /* MW 3 */ + 6398 "11000010" // /* MW 2 */ + 6399 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 + 6400 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6401 "00010001" // /* MW 3 */ + 6402 "00000110" // /* MW 2 */ + 6403 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6404 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6405 "11111101" // /* MW 3 */ + 6406 "11100000" // /* MW 2 */ + 6407 "00010111" // /* MW 1 */ + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6414 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00001000" // /* MW 3 */ + 6416 "11010011" // /* MW 2 */ + 6417 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6418 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6419 "00000110" // /* MW 3 */ + 6420 "01100111" // /* MW 2 */ + 6421 "00011010" // /* MW 1 */ + 6422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6423 "00000000" // /* MW 1 */ + 6424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6425 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6426 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6427 "01110110" // /* MW 3 */ + 6428 "11111111" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6430 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6431 "00110110" // /* MW 3 */ + 6432 "11111110" // /* MW 2 */ + 6433 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6434 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6435 "01010110" // /* MW 3 */ + 6436 "11111110" // /* MW 2 */ + 6437 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6438 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6439 "01110110" // /* MW 3 */ + 6440 "01010110" // /* MW 2 */ + 6441 "00000010" // /* MW 1 */ + 6442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6443 "00000000" // /* MW 1 */ + 6444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6445 "00000000" // /* MW 1 */ + 6446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6447 "00000000" // /* MW 1 */ + 6448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6449 "00000000" // /* MW 1 */ + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6452 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "00010010" // /* MW 3 */ + 6454 "10100011" // /* MW 2 */ + 6455 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6456 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "00110001" // /* MW 3 */ + 6458 "00000110" // /* MW 2 */ + 6459 "00001010" // /* MW 1 */ + 6460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6461 "00000000" // /* MW 1 */ + 6462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6463 "00000000" // /* MW 1 */ + 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6465 "00000000" // /* MW 1 */ + 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6467 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6468 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6469 "00001000" // /* MW 3 */ + 6470 "11010011" // /* MW 2 */ + 6471 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 261 46 +.src_ref 6 "superkernels.cpp" 262 46 + 6472 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6473 "01111001" // /* MW 9 */ + 6474 "01100000" // /* MW 8 */ + 6475 "11001110" // /* MW 7 */ + 6476 "00101001" // /* MW 6 */ + 6477 "00000000" // /* MW 5 */ + 6478 "00000001" // /* MW 4 */ + 6479 "01100000" // /* MW 3 */ + 6480 "00010001" // /* MW 2 */ + 6481 "11010001" // /* MW 1 */ + 6482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6483 "00000000" // /* MW 1 */ + 6484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6485 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6486 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6487 "00011001" // /* MW 3 */ + 6488 "11101110" // /* MW 2 */ + 6489 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 48 first + 6490 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6491 "00111011" // /* MW 5 */ + 6492 "11011000" // /* MW 4 */ + 6493 "11011111" // /* MW 3 */ + 6494 "11000110" // /* MW 2 */ + 6495 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 first +.src_ref 6 "superkernels.cpp" 264 2 + 6496 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10000001" // /* MW 5 */ + 6498 "11011101" // /* MW 4 */ + 6499 "11010110" // /* MW 3 */ + 6500 "11010010" // /* MW 2 */ + 6501 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6502 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6503 "01010110" // /* MW 3 */ + 6504 "01001110" // /* MW 2 */ + 6505 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6506 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6507 "00011110" // /* MW 3 */ + 6508 "01011101" // /* MW 2 */ + 6509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6510 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6511 "11000000" // /* MW 3 */ + 6512 "01100000" // /* MW 2 */ + 6513 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6516 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "01110110" // /* MW 3 */ + 6518 "00000110" // /* MW 2 */ + 6519 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6521 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 264 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6522 "00000100" // JL #6112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6112 delay_slots=5 */ + 6523 "00000001" // /* MW 5 */ + 6524 "00000000" // /* MW 4 */ + 6525 "11110000" // /* MW 3 */ + 6526 "00001011" // /* MW 2 */ + 6527 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6528 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6529 "11000000" // /* MW 3 */ + 6530 "11010100" // /* MW 2 */ + 6531 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 6532 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6533 "00001101" // /* MW 3 */ + 6534 "01100011" // /* MW 2 */ + 6535 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 first +.delay_slot + 6536 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00001101" // /* MW 3 */ + 6538 "00100001" // /* MW 2 */ + 6539 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 +.delay_slot + 6540 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "01000001" // /* MW 3 */ + 6542 "01101001" // /* MW 2 */ + 6543 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 6544 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "10101000" // /* MW 13 */ + 6548 "11100010" // /* MW 12 */ + 6549 "00110100" // /* MW 11 */ + 6550 "00000000" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00100000" // /* MW 5 */ + 6556 "00000000" // /* MW 4 */ + 6557 "11110000" // /* MW 3 */ + 6558 "00101100" // /* MW 2 */ + 6559 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6560 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6561 "01111000" // /* MW 9 */ + 6562 "11010000" // /* MW 8 */ + 6563 "10110011" // /* MW 7 */ + 6564 "00101000" // /* MW 6 */ + 6565 "00000000" // /* MW 5 */ + 6566 "00000001" // /* MW 4 */ + 6567 "11010000" // /* MW 3 */ + 6568 "11000110" // /* MW 2 */ + 6569 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 + 6570 "01000100" // MOVXM p6, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6571 "00010000" // /* MW 5 */ + 6572 "11001000" // /* MW 4 */ + 6573 "11001100" // /* MW 3 */ + 6574 "00000111" // /* MW 2 */ + 6575 "00000000" // /* MW 1 */ + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ + 6580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6581 "00000000" // /* MW 1 */ + 6582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6583 "00000000" // /* MW 1 */ + 6584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6585 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6586 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6587 "00001000" // /* MW 3 */ + 6588 "01010001" // /* MW 2 */ + 6589 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6590 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6591 "00110110" // /* MW 3 */ + 6592 "11110110" // /* MW 2 */ + 6593 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6594 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6595 "00011001" // /* MW 3 */ + 6596 "11101101" // /* MW 2 */ + 6597 "00000111" // /* MW 1 */ + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ + 6606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6607 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6608 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6609 "00010001" // /* MW 3 */ + 6610 "00100011" // /* MW 2 */ + 6611 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6612 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6613 "01100011" // /* MW 5 */ + 6614 "11101100" // /* MW 4 */ + 6615 "11010011" // /* MW 3 */ + 6616 "11000110" // /* MW 2 */ + 6617 "01001010" // /* MW 1 */ + 6618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6619 "00000000" // /* MW 1 */ + 6620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6621 "00000000" // /* MW 1 */ + 6622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6623 "00000000" // /* MW 1 */ + 6624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6625 "00000000" // /* MW 1 */ + 6626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6627 "00000000" // /* MW 1 */ + 6628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6629 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6630 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6631 "00001000" // /* MW 3 */ + 6632 "01010001" // /* MW 2 */ + 6633 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 268 6 +.src_ref 6 "superkernels.cpp" 269 14 + 6634 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6635 "00010000" // /* MW 9 */ + 6636 "11100000" // /* MW 8 */ + 6637 "10110001" // /* MW 7 */ + 6638 "11110000" // /* MW 6 */ + 6639 "00000001" // /* MW 5 */ + 6640 "00000000" // /* MW 4 */ + 6641 "11010000" // /* MW 3 */ + 6642 "11001110" // /* MW 2 */ + 6643 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 first + 6644 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6645 "01010110" // /* MW 3 */ + 6646 "00000110" // /* MW 2 */ + 6647 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 6648 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6649 "00110110" // /* MW 3 */ + 6650 "00000110" // /* MW 2 */ + 6651 "00000001" // /* MW 1 */ + 6652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6653 "00000000" // /* MW 1 */ + 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6655 "00000000" // /* MW 1 */ + 6656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6657 "00000000" // /* MW 1 */ + 6658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6659 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6660 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6661 "00110001" // /* MW 3 */ + 6662 "00100001" // /* MW 2 */ + 6663 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6664 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6665 "00010001" // /* MW 3 */ + 6666 "11100110" // /* MW 2 */ + 6667 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 16 first + 6668 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "00101000" // /* MW 3 */ + 6670 "01100001" // /* MW 2 */ + 6671 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 6672 "10000100" // JNZ r16, #6704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6704 delay_slots=5 */ + 6673 "00000001" // /* MW 5 */ + 6674 "01000000" // /* MW 4 */ + 6675 "00011000" // /* MW 3 */ + 6676 "00001101" // /* MW 2 */ + 6677 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6679 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6687 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 + 6688 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "00000001" // /* MW 3 */ + 6690 "00100000" // /* MW 2 */ + 6691 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 first + 6692 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6693 "11000001" // /* MW 11 */ + 6694 "00001000" // /* MW 10 */ + 6695 "10000011" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "00000000" // /* MW 7 */ + 6698 "00000000" // /* MW 6 */ + 6699 "00100000" // /* MW 5 */ + 6700 "00000000" // /* MW 4 */ + 6701 "11110000" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 271 + 6704 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6705 "00111001" // /* MW 3 */ + 6706 "11110000" // /* MW 2 */ + 6707 "00000111" // /* MW 1 */ + 6708 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6709 "11110001" // /* MW 3 */ + 6710 "11111101" // /* MW 2 */ + 6711 "00000111" // /* MW 1 */ + 6712 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6713 "10011001" // /* MW 3 */ + 6714 "11110111" // /* MW 2 */ + 6715 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6717 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6718 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6719 "11010001" // /* MW 3 */ + 6720 "11111001" // /* MW 2 */ + 6721 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6723 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6725 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6726 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6727 "00000000" // /* MW 3 */ + 6728 "00101000" // /* MW 2 */ + 6729 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6730 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "00001011" // /* MW 3 */ + 6732 "10001110" // /* MW 2 */ + 6733 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 +.delay_slot + 6734 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6735 "00000001" // /* MW 5 */ + 6736 "00000000" // /* MW 4 */ + 6737 "00000000" // /* MW 3 */ + 6738 "11111000" // /* MW 2 */ + 6739 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6741 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6743 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6745 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 6752 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6753 "00000000" // /* MW 3 */ + 6754 "00101000" // /* MW 2 */ + 6755 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 6756 "01000100" // MOVXM p0, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6757 "01000000" // /* MW 5 */ + 6758 "11001010" // /* MW 4 */ + 6759 "11000000" // /* MW 3 */ + 6760 "00000111" // /* MW 2 */ + 6761 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 6762 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6763 "10000000" // /* MW 3 */ + 6764 "00000000" // /* MW 2 */ + 6765 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 6766 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6767 "00000001" // /* MW 3 */ + 6768 "00000100" // /* MW 2 */ + 6769 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 6770 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6771 "00000001" // /* MW 3 */ + 6772 "00010100" // /* MW 2 */ + 6773 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 6775 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 6784 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6785 "00010000" // /* MW 9 */ + 6786 "10000000" // /* MW 8 */ + 6787 "00110010" // /* MW 7 */ + 6788 "11110000" // /* MW 6 */ + 6789 "00000001" // /* MW 5 */ + 6790 "00000000" // /* MW 4 */ + 6791 "11010000" // /* MW 3 */ + 6792 "10000101" // /* MW 2 */ + 6793 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 6794 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "00000000" // /* MW 3 */ + 6798 "00001000" // /* MW 2 */ + 6799 "00000000" // /* MW 1 */ + 6800 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6801 "00111101" // /* MW 3 */ + 6802 "11111100" // /* MW 2 */ + 6803 "00001111" // /* MW 1 */ + 6804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6805 "00000000" // /* MW 1 */ + 6806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6807 "00000000" // /* MW 1 */ + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 6812 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6813 "00101001" // /* MW 3 */ + 6814 "00011100" // /* MW 2 */ + 6815 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 6816 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00101110" // /* MW 3 */ + 6818 "00011100" // /* MW 2 */ + 6819 "00000001" // /* MW 1 */ + 6820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6821 "00000000" // /* MW 1 */ + 6822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6823 "00000000" // /* MW 1 */ + 6824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6825 "00000000" // /* MW 1 */ + 6826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6827 "00000000" // /* MW 1 */ + 6828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6829 "00000000" // /* MW 1 */ + 6830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6831 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 6832 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "00101001" // /* MW 3 */ + 6834 "00011100" // /* MW 2 */ + 6835 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 6836 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "00101110" // /* MW 3 */ + 6838 "00000100" // /* MW 2 */ + 6839 "00000001" // /* MW 1 */ + 6840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6841 "00000000" // /* MW 1 */ + 6842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6843 "00000000" // /* MW 1 */ + 6844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6845 "00000000" // /* MW 1 */ + 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6847 "00000000" // /* MW 1 */ + 6848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6849 "00000000" // /* MW 1 */ + 6850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6851 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 6852 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00101001" // /* MW 3 */ + 6854 "00011100" // /* MW 2 */ + 6855 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 6856 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "00101110" // /* MW 3 */ + 6858 "00010100" // /* MW 2 */ + 6859 "00000001" // /* MW 1 */ + 6860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6861 "00000000" // /* MW 1 */ + 6862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6863 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 6864 "00000100" // JL #6752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6752 delay_slots=5 */ + 6865 "00000001" // /* MW 5 */ + 6866 "00000000" // /* MW 4 */ + 6867 "00110000" // /* MW 3 */ + 6868 "00001101" // /* MW 2 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot + 6870 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6871 "10011101" // /* MW 3 */ + 6872 "11111011" // /* MW 2 */ + 6873 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6877 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 6878 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6879 "00101001" // /* MW 3 */ + 6880 "11011100" // /* MW 2 */ + 6881 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 6882 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6883 "00011100" // /* MW 13 */ + 6884 "00000000" // /* MW 12 */ + 6885 "00000000" // /* MW 11 */ + 6886 "00000111" // /* MW 10 */ + 6887 "00000110" // /* MW 9 */ + 6888 "01111011" // /* MW 8 */ + 6889 "00000000" // /* MW 7 */ + 6890 "00000000" // /* MW 6 */ + 6891 "10110110" // /* MW 5 */ + 6892 "00000010" // /* MW 4 */ + 6893 "11110000" // /* MW 3 */ + 6894 "00101100" // /* MW 2 */ + 6895 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 6896 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "00111001" // /* MW 3 */ + 6898 "11111100" // /* MW 2 */ + 6899 "00000111" // /* MW 1 */ + 6900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6901 "00000000" // /* MW 1 */ + 6902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6903 "00000000" // /* MW 1 */ + 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6905 "00000000" // /* MW 1 */ + 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6907 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6909 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6910 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6911 "10011001" // /* MW 3 */ + 6912 "11111011" // /* MW 2 */ + 6913 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6914 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6915 "00000000" // /* MW 3 */ + 6916 "00101000" // /* MW 2 */ + 6917 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6923 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6924 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "00000001" // /* MW 3 */ + 6926 "00100000" // /* MW 2 */ + 6927 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6928 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6929 "01110001" // /* MW 9 */ + 6930 "00000000" // /* MW 8 */ + 6931 "00000000" // /* MW 7 */ + 6932 "00000000" // /* MW 6 */ + 6933 "11111110" // /* MW 5 */ + 6934 "00111111" // /* MW 4 */ + 6935 "00110000" // /* MW 3 */ + 6936 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 6937 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 6944 "10111010" // MOVA m0, #32; MOVXM p3, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6945 "00010000" // /* MW 9 */ + 6946 "10000000" // /* MW 8 */ + 6947 "10110010" // /* MW 7 */ + 6948 "11110001" // /* MW 6 */ + 6949 "00000001" // /* MW 5 */ + 6950 "00000000" // /* MW 4 */ + 6951 "10000000" // /* MW 3 */ + 6952 "00000000" // /* MW 2 */ + 6953 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 6954 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6955 "00010000" // /* MW 9 */ + 6956 "00001000" // /* MW 8 */ + 6957 "00110010" // /* MW 7 */ + 6958 "11110010" // /* MW 6 */ + 6959 "00000001" // /* MW 5 */ + 6960 "00000000" // /* MW 4 */ + 6961 "11010000" // /* MW 3 */ + 6962 "00000110" // /* MW 2 */ + 6963 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 6964 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6965 "01011000" // /* MW 9 */ + 6966 "11111010" // /* MW 8 */ + 6967 "01101111" // /* MW 7 */ + 6968 "10001000" // /* MW 6 */ + 6969 "00000111" // /* MW 5 */ + 6970 "00011000" // /* MW 4 */ + 6971 "11010000" // /* MW 3 */ + 6972 "10010000" // /* MW 2 */ + 6973 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 6974 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #7136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6975 "00010000" // /* MW 9 */ + 6976 "11110000" // /* MW 8 */ + 6977 "01111101" // /* MW 7 */ + 6978 "00000100" // /* MW 6 */ + 6979 "00000000" // /* MW 5 */ + 6980 "00000000" // /* MW 4 */ + 6981 "11010000" // /* MW 3 */ + 6982 "10000000" // /* MW 2 */ + 6983 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 6984 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #7152 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6985 "00010000" // /* MW 9 */ + 6986 "11111000" // /* MW 8 */ + 6987 "10111101" // /* MW 7 */ + 6988 "00000101" // /* MW 6 */ + 6989 "00000000" // /* MW 5 */ + 6990 "00000000" // /* MW 4 */ + 6991 "01010000" // /* MW 3 */ + 6992 "10001000" // /* MW 2 */ + 6993 "10000000" // /* MW 1 */ + 6994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6995 "00000000" // /* MW 1 */ + 6996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6997 "00000000" // /* MW 1 */ + 6998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6999 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 7000 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7001 "00111101" // /* MW 3 */ + 7002 "01000010" // /* MW 2 */ + 7003 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 7004 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7005 "11111100" // /* MW 3 */ + 7006 "01110000" // /* MW 2 */ + 7007 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 7008 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7009 "11101000" // /* MW 5 */ + 7010 "01010000" // /* MW 4 */ + 7011 "01110000" // /* MW 3 */ + 7012 "00010011" // /* MW 2 */ + 7013 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7014 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7015 "10000000" // /* MW 7 */ + 7016 "10111010" // /* MW 6 */ + 7017 "01101000" // /* MW 5 */ + 7018 "01010000" // /* MW 4 */ + 7019 "01110000" // /* MW 3 */ + 7020 "00011011" // /* MW 2 */ + 7021 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7022 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7023 "11101000" // /* MW 5 */ + 7024 "01010000" // /* MW 4 */ + 7025 "01110000" // /* MW 3 */ + 7026 "00010011" // /* MW 2 */ + 7027 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7028 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7029 "01101000" // /* MW 5 */ + 7030 "01010000" // /* MW 4 */ + 7031 "01110000" // /* MW 3 */ + 7032 "00011011" // /* MW 2 */ + 7033 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7034 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7035 "11101000" // /* MW 5 */ + 7036 "01010000" // /* MW 4 */ + 7037 "01110000" // /* MW 3 */ + 7038 "00010011" // /* MW 2 */ + 7039 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7040 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7041 "01101000" // /* MW 5 */ + 7042 "01010000" // /* MW 4 */ + 7043 "01110000" // /* MW 3 */ + 7044 "00011011" // /* MW 2 */ + 7045 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7046 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7047 "11101000" // /* MW 5 */ + 7048 "01010000" // /* MW 4 */ + 7049 "01110000" // /* MW 3 */ + 7050 "00010011" // /* MW 2 */ + 7051 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7052 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7053 "01000001" // /* MW 9 */ + 7054 "11100010" // /* MW 8 */ + 7055 "00000000" // /* MW 7 */ + 7056 "00011101" // /* MW 6 */ + 7057 "00110100" // /* MW 5 */ + 7058 "00101000" // /* MW 4 */ + 7059 "01110000" // /* MW 3 */ + 7060 "00011011" // /* MW 2 */ + 7061 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7062 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7063 "01100001" // /* MW 9 */ + 7064 "11100000" // /* MW 8 */ + 7065 "00000001" // /* MW 7 */ + 7066 "00011101" // /* MW 6 */ + 7067 "01110100" // /* MW 5 */ + 7068 "00101000" // /* MW 4 */ + 7069 "01110000" // /* MW 3 */ + 7070 "00010011" // /* MW 2 */ + 7071 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7072 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7073 "01000001" // /* MW 9 */ + 7074 "11100010" // /* MW 8 */ + 7075 "00000000" // /* MW 7 */ + 7076 "00011101" // /* MW 6 */ + 7077 "00110100" // /* MW 5 */ + 7078 "00101000" // /* MW 4 */ + 7079 "01110000" // /* MW 3 */ + 7080 "00011011" // /* MW 2 */ + 7081 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7082 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7083 "01100001" // /* MW 9 */ + 7084 "11100000" // /* MW 8 */ + 7085 "00000001" // /* MW 7 */ + 7086 "00011101" // /* MW 6 */ + 7087 "01110100" // /* MW 5 */ + 7088 "00101000" // /* MW 4 */ + 7089 "01110000" // /* MW 3 */ + 7090 "00010011" // /* MW 2 */ + 7091 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7092 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7093 "01000001" // /* MW 11 */ + 7094 "11100010" // /* MW 10 */ + 7095 "00000000" // /* MW 9 */ + 7096 "10001110" // /* MW 8 */ + 7097 "10101101" // /* MW 7 */ + 7098 "00000000" // /* MW 6 */ + 7099 "01101000" // /* MW 5 */ + 7100 "01010000" // /* MW 4 */ + 7101 "01110000" // /* MW 3 */ + 7102 "00011011" // /* MW 2 */ + 7103 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7104 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7105 "00000011" // /* MW 15 */ + 7106 "00001111" // /* MW 14 */ + 7107 "01111000" // /* MW 13 */ + 7108 "10100101" // /* MW 12 */ + 7109 "00000001" // /* MW 11 */ + 7110 "00000000" // /* MW 10 */ + 7111 "00000000" // /* MW 9 */ + 7112 "00000000" // /* MW 8 */ + 7113 "01011011" // /* MW 7 */ + 7114 "00000001" // /* MW 6 */ + 7115 "11101000" // /* MW 5 */ + 7116 "01010000" // /* MW 4 */ + 7117 "01110000" // /* MW 3 */ + 7118 "00010011" // /* MW 2 */ + 7119 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7120 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7121 "00010010" // /* MW 15 */ + 7122 "00000111" // /* MW 14 */ + 7123 "01111000" // /* MW 13 */ + 7124 "10100101" // /* MW 12 */ + 7125 "00000001" // /* MW 11 */ + 7126 "00000000" // /* MW 10 */ + 7127 "00000000" // /* MW 9 */ + 7128 "00000000" // /* MW 8 */ + 7129 "00100011" // /* MW 7 */ + 7130 "00011100" // /* MW 6 */ + 7131 "01101010" // /* MW 5 */ + 7132 "01010000" // /* MW 4 */ + 7133 "01110000" // /* MW 3 */ + 7134 "00011011" // /* MW 2 */ + 7135 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7136 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7137 "00000011" // /* MW 15 */ + 7138 "00001111" // /* MW 14 */ + 7139 "01111000" // /* MW 13 */ + 7140 "10100101" // /* MW 12 */ + 7141 "00000001" // /* MW 11 */ + 7142 "00000000" // /* MW 10 */ + 7143 "00000000" // /* MW 9 */ + 7144 "00000000" // /* MW 8 */ + 7145 "10100011" // /* MW 7 */ + 7146 "00011100" // /* MW 6 */ + 7147 "11101010" // /* MW 5 */ + 7148 "01010000" // /* MW 4 */ + 7149 "01110000" // /* MW 3 */ + 7150 "00010011" // /* MW 2 */ + 7151 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7152 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7153 "00010010" // /* MW 15 */ + 7154 "00000111" // /* MW 14 */ + 7155 "01111000" // /* MW 13 */ + 7156 "10100101" // /* MW 12 */ + 7157 "00000001" // /* MW 11 */ + 7158 "00000000" // /* MW 10 */ + 7159 "00000000" // /* MW 9 */ + 7160 "00000000" // /* MW 8 */ + 7161 "00100011" // /* MW 7 */ + 7162 "00011100" // /* MW 6 */ + 7163 "01101010" // /* MW 5 */ + 7164 "01010000" // /* MW 4 */ + 7165 "01110000" // /* MW 3 */ + 7166 "00011011" // /* MW 2 */ + 7167 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7168 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7169 "01100001" // /* MW 7 */ + 7170 "11100000" // /* MW 6 */ + 7171 "00000001" // /* MW 5 */ + 7172 "00000010" // /* MW 4 */ + 7173 "01100000" // /* MW 3 */ + 7174 "10010100" // /* MW 2 */ + 7175 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7176 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7177 "01000001" // /* MW 7 */ + 7178 "11100010" // /* MW 6 */ + 7179 "00000000" // /* MW 5 */ + 7180 "00000010" // /* MW 4 */ + 7181 "01100000" // /* MW 3 */ + 7182 "10000100" // /* MW 2 */ + 7183 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7184 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7185 "01100001" // /* MW 7 */ + 7186 "11100000" // /* MW 6 */ + 7187 "00000001" // /* MW 5 */ + 7188 "00000010" // /* MW 4 */ + 7189 "01100000" // /* MW 3 */ + 7190 "10010100" // /* MW 2 */ + 7191 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7192 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7193 "01000001" // /* MW 7 */ + 7194 "11100010" // /* MW 6 */ + 7195 "00000000" // /* MW 5 */ + 7196 "00000010" // /* MW 4 */ + 7197 "01100000" // /* MW 3 */ + 7198 "10000100" // /* MW 2 */ + 7199 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7200 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7201 "01100001" // /* MW 7 */ + 7202 "11100000" // /* MW 6 */ + 7203 "00000001" // /* MW 5 */ + 7204 "00000010" // /* MW 4 */ + 7205 "01100000" // /* MW 3 */ + 7206 "10010100" // /* MW 2 */ + 7207 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7208 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7209 "01000001" // /* MW 7 */ + 7210 "11100010" // /* MW 6 */ + 7211 "00000000" // /* MW 5 */ + 7212 "00000010" // /* MW 4 */ + 7213 "01100000" // /* MW 3 */ + 7214 "10000100" // /* MW 2 */ + 7215 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7216 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7217 "01100001" // /* MW 7 */ + 7218 "11100000" // /* MW 6 */ + 7219 "00000001" // /* MW 5 */ + 7220 "00000010" // /* MW 4 */ + 7221 "01100000" // /* MW 3 */ + 7222 "10010100" // /* MW 2 */ + 7223 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7224 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7225 "00100011" // /* MW 3 */ + 7226 "00011100" // /* MW 2 */ + 7227 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7228 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7229 "00000000" // /* MW 5 */ + 7230 "01010000" // /* MW 4 */ + 7231 "01100000" // /* MW 3 */ + 7232 "10010100" // /* MW 2 */ + 7233 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7234 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7235 "00100011" // /* MW 3 */ + 7236 "00011100" // /* MW 2 */ + 7237 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7238 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7239 "10100011" // /* MW 3 */ + 7240 "00011100" // /* MW 2 */ + 7241 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 7242 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7243 "00100011" // /* MW 3 */ + 7244 "00011100" // /* MW 2 */ + 7245 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 7246 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7247 "10100011" // /* MW 3 */ + 7248 "00011100" // /* MW 2 */ + 7249 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 7251 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 7264 "01000100" // MOVXM p4, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7265 "10000000" // /* MW 5 */ + 7266 "11000111" // /* MW 4 */ + 7267 "11001000" // /* MW 3 */ + 7268 "00000111" // /* MW 2 */ + 7269 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 7270 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7271 "11000001" // /* MW 5 */ + 7272 "10110101" // /* MW 4 */ + 7273 "11011000" // /* MW 3 */ + 7274 "11000010" // /* MW 2 */ + 7275 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 7276 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7277 "00000001" // /* MW 5 */ + 7278 "00000000" // /* MW 4 */ + 7279 "00000000" // /* MW 3 */ + 7280 "00001000" // /* MW 2 */ + 7281 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 7282 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7283 "01111001" // /* MW 9 */ + 7284 "01100000" // /* MW 8 */ + 7285 "11001010" // /* MW 7 */ + 7286 "10000001" // /* MW 6 */ + 7287 "00010100" // /* MW 5 */ + 7288 "00100011" // /* MW 4 */ + 7289 "10110000" // /* MW 3 */ + 7290 "00111010" // /* MW 2 */ + 7291 "11111111" // /* MW 1 */ + 7292 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7293 "01110000" // /* MW 7 */ + 7294 "11010000" // /* MW 6 */ + 7295 "00001011" // /* MW 5 */ + 7296 "00000000" // /* MW 4 */ + 7297 "10110000" // /* MW 3 */ + 7298 "10000011" // /* MW 2 */ + 7299 "11111101" // /* MW 1 */ + 7300 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "00010101" // /* MW 3 */ + 7302 "11111100" // /* MW 2 */ + 7303 "00001111" // /* MW 1 */ + 7304 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00111101" // /* MW 3 */ + 7306 "11110000" // /* MW 2 */ + 7307 "00001111" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 7310 "10000100" // JNZ r16, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */ + 7311 "00000001" // /* MW 5 */ + 7312 "01000000" // /* MW 4 */ + 7313 "10010000" // /* MW 3 */ + 7314 "00001110" // /* MW 2 */ + 7315 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 7316 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7317 "11111011" // /* MW 3 */ + 7318 "01100011" // /* MW 2 */ + 7319 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 7320 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7321 "10011000" // /* MW 5 */ + 7322 "11000111" // /* MW 4 */ + 7323 "11000100" // /* MW 3 */ + 7324 "00000111" // /* MW 2 */ + 7325 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 7326 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7327 "01110000" // /* MW 7 */ + 7328 "01100000" // /* MW 6 */ + 7329 "00110111" // /* MW 5 */ + 7330 "00000001" // /* MW 4 */ + 7331 "00110000" // /* MW 3 */ + 7332 "11000110" // /* MW 2 */ + 7333 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 7334 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7335 "11000000" // /* MW 3 */ + 7336 "11010110" // /* MW 2 */ + 7337 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 7338 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7339 "00010001" // /* MW 9 */ + 7340 "10000000" // /* MW 8 */ + 7341 "10110010" // /* MW 7 */ + 7342 "11110011" // /* MW 6 */ + 7343 "00000001" // /* MW 5 */ + 7344 "00000000" // /* MW 4 */ + 7345 "10110000" // /* MW 3 */ + 7346 "10100011" // /* MW 2 */ + 7347 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7348 "00111010" // MOVS p0, p7; MOVXM p2, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7349 "00010001" // /* MW 9 */ + 7350 "00001000" // /* MW 8 */ + 7351 "00110010" // /* MW 7 */ + 7352 "11110001" // /* MW 6 */ + 7353 "00000001" // /* MW 5 */ + 7354 "00000000" // /* MW 4 */ + 7355 "01100000" // /* MW 3 */ + 7356 "10010001" // /* MW 2 */ + 7357 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7358 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7359 "00010000" // /* MW 9 */ + 7360 "00000110" // /* MW 8 */ + 7361 "00110010" // /* MW 7 */ + 7362 "11110001" // /* MW 6 */ + 7363 "00000001" // /* MW 5 */ + 7364 "00000000" // /* MW 4 */ + 7365 "11100000" // /* MW 3 */ + 7366 "11000000" // /* MW 2 */ + 7367 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7369 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7370 "00000100" // JL #6784 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6784 delay_slots=5 */ + 7371 "00000001" // /* MW 5 */ + 7372 "00000000" // /* MW 4 */ + 7373 "01000000" // /* MW 3 */ + 7374 "00001101" // /* MW 2 */ + 7375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7379 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7380 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7381 "00110001" // /* MW 3 */ + 7382 "00100000" // /* MW 2 */ + 7383 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 7384 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7385 "00000101" // /* MW 3 */ + 7386 "00100000" // /* MW 2 */ + 7387 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 7388 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00010001" // /* MW 3 */ + 7390 "00000110" // /* MW 2 */ + 7391 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 7392 "10111010" // LDA r16, [p7]; MOVXM p1, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7393 "00010000" // /* MW 9 */ + 7394 "11100110" // /* MW 8 */ + 7395 "10110001" // /* MW 7 */ + 7396 "11110000" // /* MW 6 */ + 7397 "00000001" // /* MW 5 */ + 7398 "00000000" // /* MW 4 */ + 7399 "11010000" // /* MW 3 */ + 7400 "11000010" // /* MW 2 */ + 7401 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 7402 "10111010" // LDA r17, [p1]; MOVXM p3, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7403 "00010000" // /* MW 9 */ + 7404 "11101000" // /* MW 8 */ + 7405 "10110001" // /* MW 7 */ + 7406 "11110001" // /* MW 6 */ + 7407 "00000001" // /* MW 5 */ + 7408 "00000000" // /* MW 4 */ + 7409 "11010000" // /* MW 3 */ + 7410 "11000110" // /* MW 2 */ + 7411 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 7412 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7413 "00010000" // /* MW 9 */ + 7414 "11101010" // /* MW 8 */ + 7415 "10110001" // /* MW 7 */ + 7416 "11110000" // /* MW 6 */ + 7417 "00000001" // /* MW 5 */ + 7418 "00000000" // /* MW 4 */ + 7419 "01010000" // /* MW 3 */ + 7420 "11001011" // /* MW 2 */ + 7421 "11101010" // /* MW 1 */ + 7422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7423 "00000000" // /* MW 1 */ + 7424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7425 "00000000" // /* MW 1 */ + 7426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7427 "00000000" // /* MW 1 */ + 7428 "10000100" // J #7472 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7472 delay_slots=5 */ + 7429 "00000000" // /* MW 5 */ + 7430 "00000000" // /* MW 4 */ + 7431 "10011000" // /* MW 3 */ + 7432 "00001110" // /* MW 2 */ + 7433 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 7434 "01000100" // MOVXM p2, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7435 "00010000" // /* MW 5 */ + 7436 "11001000" // /* MW 4 */ + 7437 "11000100" // /* MW 3 */ + 7438 "00000111" // /* MW 2 */ + 7439 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 7440 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7441 "00001111" // /* MW 3 */ + 7442 "01100001" // /* MW 2 */ + 7443 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 7444 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "01010001" // /* MW 3 */ + 7446 "00000110" // /* MW 2 */ + 7447 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 7448 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7449 "00010001" // /* MW 3 */ + 7450 "00000110" // /* MW 2 */ + 7451 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 7452 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7453 "00010001" // /* MW 3 */ + 7454 "00000110" // /* MW 2 */ + 7455 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 7456 "01000100" // MOVXM p3, #508880 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7457 "10100000" // /* MW 5 */ + 7458 "11000111" // /* MW 4 */ + 7459 "11000110" // /* MW 3 */ + 7460 "00000111" // /* MW 2 */ + 7461 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 7462 "10111010" // NOPA; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7463 "00010000" // /* MW 9 */ + 7464 "11101010" // /* MW 8 */ + 7465 "10110001" // /* MW 7 */ + 7466 "11110000" // /* MW 6 */ + 7467 "00000001" // /* MW 5 */ + 7468 "00000000" // /* MW 4 */ + 7469 "11110000" // /* MW 3 */ + 7470 "00101100" // /* MW 2 */ + 7471 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 7472 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7473 "10000110" // /* MW 3 */ + 7474 "01100111" // /* MW 2 */ + 7475 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 7476 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7477 "00010000" // /* MW 9 */ + 7478 "11100000" // /* MW 8 */ + 7479 "00110001" // /* MW 7 */ + 7480 "11110001" // /* MW 6 */ + 7481 "00000001" // /* MW 5 */ + 7482 "00000000" // /* MW 4 */ + 7483 "11010000" // /* MW 3 */ + 7484 "11101110" // /* MW 2 */ + 7485 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 7486 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7487 "00010110" // /* MW 3 */ + 7488 "11111110" // /* MW 2 */ + 7489 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 7490 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7491 "00110110" // /* MW 3 */ + 7492 "11111110" // /* MW 2 */ + 7493 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 7494 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7495 "01010110" // /* MW 3 */ + 7496 "00000110" // /* MW 2 */ + 7497 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 7498 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "01110110" // /* MW 3 */ + 7500 "01000110" // /* MW 2 */ + 7501 "00000000" // /* MW 1 */ + 7502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7503 "00000000" // /* MW 1 */ + 7504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7505 "00000000" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 7510 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7511 "00000010" // /* MW 3 */ + 7512 "01100001" // /* MW 2 */ + 7513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 7514 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7515 "00001110" // /* MW 5 */ + 7516 "01000000" // /* MW 4 */ + 7517 "00111001" // /* MW 3 */ + 7518 "11000010" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 7520 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00010001" // /* MW 3 */ + 7522 "00000110" // /* MW 2 */ + 7523 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 7524 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7525 "11111101" // /* MW 3 */ + 7526 "11100000" // /* MW 2 */ + 7527 "00010111" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ + 7532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7533 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 7534 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7535 "00001000" // /* MW 3 */ + 7536 "11010011" // /* MW 2 */ + 7537 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 7538 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7539 "00000110" // /* MW 3 */ + 7540 "01100111" // /* MW 2 */ + 7541 "00011010" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 7546 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7547 "01110110" // /* MW 3 */ + 7548 "11111111" // /* MW 2 */ + 7549 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 7550 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00110110" // /* MW 3 */ + 7552 "11111110" // /* MW 2 */ + 7553 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 7554 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7555 "01010110" // /* MW 3 */ + 7556 "11111110" // /* MW 2 */ + 7557 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 7558 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7559 "01110110" // /* MW 3 */ + 7560 "01010110" // /* MW 2 */ + 7561 "00000010" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ + 7564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7565 "00000000" // /* MW 1 */ + 7566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7567 "00000000" // /* MW 1 */ + 7568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7569 "00000000" // /* MW 1 */ + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 7572 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7573 "00010010" // /* MW 3 */ + 7574 "10100011" // /* MW 2 */ + 7575 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 7576 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00110001" // /* MW 3 */ + 7578 "00000110" // /* MW 2 */ + 7579 "00001010" // /* MW 1 */ + 7580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7581 "00000000" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 7588 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7589 "00001000" // /* MW 3 */ + 7590 "11010011" // /* MW 2 */ + 7591 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 7592 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7593 "01111001" // /* MW 9 */ + 7594 "01100000" // /* MW 8 */ + 7595 "11001110" // /* MW 7 */ + 7596 "00101001" // /* MW 6 */ + 7597 "00000000" // /* MW 5 */ + 7598 "00000001" // /* MW 4 */ + 7599 "01100000" // /* MW 3 */ + 7600 "00010001" // /* MW 2 */ + 7601 "11010001" // /* MW 1 */ + 7602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7603 "00000000" // /* MW 1 */ + 7604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7605 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 7606 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7607 "00011001" // /* MW 3 */ + 7608 "11101110" // /* MW 2 */ + 7609 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 7610 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7611 "00111011" // /* MW 5 */ + 7612 "11011000" // /* MW 4 */ + 7613 "11011111" // /* MW 3 */ + 7614 "11000110" // /* MW 2 */ + 7615 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 7616 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7617 "10000001" // /* MW 5 */ + 7618 "11011101" // /* MW 4 */ + 7619 "11010110" // /* MW 3 */ + 7620 "11010010" // /* MW 2 */ + 7621 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7622 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "01010110" // /* MW 3 */ + 7624 "01001110" // /* MW 2 */ + 7625 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7626 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7627 "00011110" // /* MW 3 */ + 7628 "01011101" // /* MW 2 */ + 7629 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7630 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "11000000" // /* MW 3 */ + 7632 "01100000" // /* MW 2 */ + 7633 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7635 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7636 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7637 "01110110" // /* MW 3 */ + 7638 "00000110" // /* MW 2 */ + 7639 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7641 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7642 "00000100" // JL #6944 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6944 delay_slots=5 */ + 7643 "00000001" // /* MW 5 */ + 7644 "00000000" // /* MW 4 */ + 7645 "10010000" // /* MW 3 */ + 7646 "00001101" // /* MW 2 */ + 7647 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7648 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7649 "11000000" // /* MW 3 */ + 7650 "11010100" // /* MW 2 */ + 7651 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 7652 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7653 "00001101" // /* MW 3 */ + 7654 "01100011" // /* MW 2 */ + 7655 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 7656 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7657 "00001101" // /* MW 3 */ + 7658 "00100001" // /* MW 2 */ + 7659 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 7660 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7661 "01000001" // /* MW 3 */ + 7662 "01101001" // /* MW 2 */ + 7663 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "10101000" // /* MW 13 */ + 7668 "11100010" // /* MW 12 */ + 7669 "00110100" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 7680 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "01111000" // /* MW 9 */ + 7682 "11010000" // /* MW 8 */ + 7683 "10110011" // /* MW 7 */ + 7684 "00101000" // /* MW 6 */ + 7685 "00000000" // /* MW 5 */ + 7686 "00000001" // /* MW 4 */ + 7687 "11010000" // /* MW 3 */ + 7688 "11000110" // /* MW 2 */ + 7689 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 7690 "01000100" // MOVXM p6, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7691 "00010000" // /* MW 5 */ + 7692 "11001000" // /* MW 4 */ + 7693 "11001100" // /* MW 3 */ + 7694 "00000111" // /* MW 2 */ + 7695 "00000000" // /* MW 1 */ + 7696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7697 "00000000" // /* MW 1 */ + 7698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7699 "00000000" // /* MW 1 */ + 7700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7701 "00000000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ + 7704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7705 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7706 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7707 "00001000" // /* MW 3 */ + 7708 "01010001" // /* MW 2 */ + 7709 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 7710 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7711 "00110110" // /* MW 3 */ + 7712 "11110110" // /* MW 2 */ + 7713 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 7714 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7715 "00011001" // /* MW 3 */ + 7716 "11101101" // /* MW 2 */ + 7717 "00000111" // /* MW 1 */ + 7718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7719 "00000000" // /* MW 1 */ + 7720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7721 "00000000" // /* MW 1 */ + 7722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7723 "00000000" // /* MW 1 */ + 7724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7725 "00000000" // /* MW 1 */ + 7726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7727 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 7728 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7729 "00010001" // /* MW 3 */ + 7730 "00100011" // /* MW 2 */ + 7731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 7732 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7733 "01100011" // /* MW 5 */ + 7734 "11101100" // /* MW 4 */ + 7735 "11010011" // /* MW 3 */ + 7736 "11000110" // /* MW 2 */ + 7737 "01001010" // /* MW 1 */ + 7738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7739 "00000000" // /* MW 1 */ + 7740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7741 "00000000" // /* MW 1 */ + 7742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7743 "00000000" // /* MW 1 */ + 7744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7745 "00000000" // /* MW 1 */ + 7746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7747 "00000000" // /* MW 1 */ + 7748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7749 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7750 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7751 "00001000" // /* MW 3 */ + 7752 "01010001" // /* MW 2 */ + 7753 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 7754 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7755 "00010000" // /* MW 9 */ + 7756 "11100000" // /* MW 8 */ + 7757 "10110001" // /* MW 7 */ + 7758 "11110000" // /* MW 6 */ + 7759 "00000001" // /* MW 5 */ + 7760 "00000000" // /* MW 4 */ + 7761 "11010000" // /* MW 3 */ + 7762 "11001110" // /* MW 2 */ + 7763 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 7764 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "01010110" // /* MW 3 */ + 7766 "00000110" // /* MW 2 */ + 7767 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 7768 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7769 "00110110" // /* MW 3 */ + 7770 "00000110" // /* MW 2 */ + 7771 "00000001" // /* MW 1 */ + 7772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7773 "00000000" // /* MW 1 */ + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ + 7776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7777 "00000000" // /* MW 1 */ + 7778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7779 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 7780 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7781 "00110001" // /* MW 3 */ + 7782 "00100001" // /* MW 2 */ + 7783 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 7784 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7785 "00010001" // /* MW 3 */ + 7786 "11100110" // /* MW 2 */ + 7787 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 7788 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7789 "00101000" // /* MW 3 */ + 7790 "01100001" // /* MW 2 */ + 7791 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 7792 "10000100" // JNZ r16, #7824 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7824 delay_slots=5 */ + 7793 "00000001" // /* MW 5 */ + 7794 "01000000" // /* MW 4 */ + 7795 "01001000" // /* MW 3 */ + 7796 "00001111" // /* MW 2 */ + 7797 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7799 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 7808 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7809 "00000001" // /* MW 3 */ + 7810 "00100000" // /* MW 2 */ + 7811 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 7812 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7813 "11000001" // /* MW 11 */ + 7814 "00001000" // /* MW 10 */ + 7815 "10000011" // /* MW 9 */ + 7816 "00000000" // /* MW 8 */ + 7817 "00000000" // /* MW 7 */ + 7818 "00000000" // /* MW 6 */ + 7819 "00100000" // /* MW 5 */ + 7820 "00000000" // /* MW 4 */ + 7821 "11110000" // /* MW 3 */ + 7822 "00101100" // /* MW 2 */ + 7823 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 7824 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7825 "00111001" // /* MW 3 */ + 7826 "11110000" // /* MW 2 */ + 7827 "00000111" // /* MW 1 */ + 7828 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7829 "11110001" // /* MW 3 */ + 7830 "11111101" // /* MW 2 */ + 7831 "00000111" // /* MW 1 */ + 7832 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7833 "10011001" // /* MW 3 */ + 7834 "11110111" // /* MW 2 */ + 7835 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7837 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7838 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7839 "11010001" // /* MW 3 */ + 7840 "11111001" // /* MW 2 */ + 7841 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7846 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7847 "00000000" // /* MW 3 */ + 7848 "00101000" // /* MW 2 */ + 7849 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7850 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7851 "00001011" // /* MW 3 */ + 7852 "10001110" // /* MW 2 */ + 7853 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 7854 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7855 "00000001" // /* MW 5 */ + 7856 "00000000" // /* MW 4 */ + 7857 "00000000" // /* MW 3 */ + 7858 "11111000" // /* MW 2 */ + 7859 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7863 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 7865 "00000000" // /* MW 1 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_begin0 +.function setup_gemm_bfp16_params _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.src_ref 7 "gemm_bfp16_params.h" 128 first +.src_ref 7 "gemm_bfp16_params.h" 130 24 +.src_ref 7 "gemm_bfp16_params.h" 130 26 first +.function_start + 7872 "10111010" // LDA r3, [p0], #4; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7873 "00010000" // /* MW 9 */ + 7874 "00000000" // /* MW 8 */ + 7875 "10110001" // /* MW 7 */ + 7876 "11110000" // /* MW 6 */ + 7877 "00000001" // /* MW 5 */ + 7878 "00000000" // /* MW 4 */ + 7879 "11010000" // /* MW 3 */ + 7880 "10001110" // /* MW 2 */ + 7881 "00000011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 58 39 +.src_ref 7 "gemm_bfp16_params.h" 59 38 +.src_ref 7 "gemm_bfp16_params.h" 61 39 +.src_ref 7 "gemm_bfp16_params.h" 71 52 +.src_ref 7 "gemm_bfp16_params.h" 86 29 +.src_ref 7 "gemm_bfp16_params.h" 93 56 + 7882 "10111010" // MOVA r29, #-2; MOVX r6, #-3; MOV r5, #-4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7883 "01011000" // /* MW 9 */ + 7884 "11111100" // /* MW 8 */ + 7885 "10101111" // /* MW 7 */ + 7886 "10101000" // /* MW 6 */ + 7887 "01100111" // /* MW 5 */ + 7888 "00111110" // /* MW 4 */ + 7889 "00000000" // /* MW 3 */ + 7890 "11011101" // /* MW 2 */ + 7891 "11111111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 8 "aie.hpp" 7072 95 +.src_ref 7 "gemm_bfp16_params.h" 44 26 +.src_ref 7 "gemm_bfp16_params.h" 44 26 +.src_ref 7 "gemm_bfp16_params.h" 80 39 +.src_ref 7 "gemm_bfp16_params.h" 99 73 +.src_ref 7 "gemm_bfp16_params.h" 138 24 + 7892 "10111010" // MOVA r24, #0; MOVX r1, #1; MOV r0, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7893 "01011000" // /* MW 9 */ + 7894 "00001000" // /* MW 8 */ + 7895 "00001000" // /* MW 7 */ + 7896 "00101000" // /* MW 6 */ + 7897 "00010000" // /* MW 5 */ + 7898 "00000000" // /* MW 4 */ + 7899 "00000000" // /* MW 3 */ + 7900 "00011000" // /* MW 2 */ + 7901 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7072 95 +.src_ref 8 "aie.hpp" 7073 95 +.src_ref 7 "gemm_bfp16_params.h" 44 26 +.src_ref 7 "gemm_bfp16_params.h" 88 55 + 7902 "10111010" // MOVA r4, #256; MOVXM r28, #16777214 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7903 "00010000" // /* MW 9 */ + 7904 "11111111" // /* MW 8 */ + 7905 "10001111" // /* MW 7 */ + 7906 "11111111" // /* MW 6 */ + 7907 "00111111" // /* MW 5 */ + 7908 "00000000" // /* MW 4 */ + 7909 "00000000" // /* MW 3 */ + 7910 "00000100" // /* MW 2 */ + 7911 "00100000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 +.src_ref 8 "aie.hpp" 7053 42 +.src_ref 8 "aie.hpp" 7053 42 +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 7 "gemm_bfp16_params.h" 85 38 +.src_ref 7 "gemm_bfp16_params.h" 88 66 + 7912 "10111010" // MOVA r16, #7; MOVX r19, #9; MOV r2, #512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7913 "01011000" // /* MW 9 */ + 7914 "00000000" // /* MW 8 */ + 7915 "01001010" // /* MW 7 */ + 7916 "00101000" // /* MW 6 */ + 7917 "00110001" // /* MW 5 */ + 7918 "00000001" // /* MW 4 */ + 7919 "00000000" // /* MW 3 */ + 7920 "11110000" // /* MW 2 */ + 7921 "00000000" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 138 24 + 7922 "01100100" // MOVX r7, #128; MOV m0, #52 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7923 "11010001" // /* MW 5 */ + 7924 "00000000" // /* MW 4 */ + 7925 "00100000" // /* MW 3 */ + 7926 "11000000" // /* MW 2 */ + 7927 "00010001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 138 24 + 7928 "11111000" // MOV dj0, m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7929 "00000000" // /* MW 3 */ + 7930 "10000000" // /* MW 2 */ + 7931 "00011000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 58 39 first +.src_ref 7 "gemm_bfp16_params.h" 130 24 first + 7932 "01011100" // ST r3, [p1], #4; LSHL r27, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7933 "11011011" // /* MW 5 */ + 7934 "11101100" // /* MW 4 */ + 7935 "00110001" // /* MW 3 */ + 7936 "10001110" // /* MW 2 */ + 7937 "00100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 93 56 first +.src_ref 7 "gemm_bfp16_params.h" 131 26 first + 7938 "00101100" // LDA r3, [p0], #4; LSHL r17, r3, r5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7939 "10111011" // /* MW 5 */ + 7940 "11000100" // /* MW 4 */ + 7941 "11010001" // /* MW 3 */ + 7942 "10001110" // /* MW 2 */ + 7943 "00000011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 59 first +.src_ref 7 "gemm_bfp16_params.h" 80 39 first + 7944 "00100100" // LSHL r31, r27, r0; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7945 "11111111" // /* MW 5 */ + 7946 "10110001" // /* MW 4 */ + 7947 "10111000" // /* MW 3 */ + 7948 "11000001" // /* MW 2 */ + 7949 "11011111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 82 45 +.src_ref 7 "gemm_bfp16_params.h" 85 38 first + 7950 "10100100" // LSHL r19, r27, r19; ADD.NC r18, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7951 "00100010" // /* MW 5 */ + 7952 "00111111" // /* MW 4 */ + 7953 "10111001" // /* MW 3 */ + 7954 "11100111" // /* MW 2 */ + 7955 "11011100" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 first + 7956 "10011000" // LSHL r22, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00001101" // /* MW 3 */ + 7958 "11101101" // /* MW 2 */ + 7959 "00010110" // /* MW 1 */ + 7960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7961 "00000000" // /* MW 1 */ + 7962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7963 "00000000" // /* MW 1 */ + 7964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7965 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 59 38 first +.src_ref 7 "gemm_bfp16_params.h" 131 24 first + 7966 "01011100" // ST r3, [p1], #4; LSHL r26, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7967 "11011011" // /* MW 5 */ + 7968 "11101000" // /* MW 4 */ + 7969 "00110001" // /* MW 3 */ + 7970 "10001110" // /* MW 2 */ + 7971 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 59 first +.src_ref 7 "gemm_bfp16_params.h" 132 26 first + 7972 "00101100" // LDA r21, [p0], #4; ADD r20, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7973 "11111110" // /* MW 5 */ + 7974 "01010011" // /* MW 4 */ + 7975 "11011101" // /* MW 3 */ + 7976 "11010110" // /* MW 2 */ + 7977 "00000011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 + 7978 "10011000" // MUL r23, r22, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7979 "01001111" // /* MW 3 */ + 7980 "10101111" // /* MW 2 */ + 7981 "00010101" // /* MW 1 */ + 7982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7983 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first + 7984 "10011000" // SUB r30, r7, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7985 "01110001" // /* MW 3 */ + 7986 "11111101" // /* MW 2 */ + 7987 "00010001" // /* MW 1 */ + 7988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7989 "00000000" // /* MW 1 */ + 7990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7991 "00000000" // /* MW 1 */ + 7992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7993 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 65 24 first +.src_ref 7 "gemm_bfp16_params.h" 132 24 first + 7994 "01011100" // ST r21, [p1], #4; MUL r3, r3, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7995 "10111111" // /* MW 5 */ + 7996 "10001110" // /* MW 4 */ + 7997 "00110001" // /* MW 3 */ + 7998 "11010110" // /* MW 2 */ + 7999 "00100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 61 39 first +.src_ref 7 "gemm_bfp16_params.h" 133 26 first + 8000 "00101100" // LDA el0, [p0], #4; LSHL r6, r21, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8001 "11011011" // /* MW 5 */ + 8002 "10011000" // /* MW 4 */ + 8003 "11011010" // /* MW 3 */ + 8004 "10000101" // /* MW 2 */ + 8005 "00000011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 71 36 first +.src_ref 7 "gemm_bfp16_params.h" 88 55 + 8006 "10100100" // MUL r25, r27, r6; ADD.NC r28, r6, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8007 "11100010" // /* MW 5 */ + 8008 "00100110" // /* MW 4 */ + 8009 "11111110" // /* MW 3 */ + 8010 "01001101" // /* MW 2 */ + 8011 "11011110" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 first +.src_ref 7 "gemm_bfp16_params.h" 86 29 first + 8012 "10100100" // LSHL r5, r21, r5; ADD.NC r21, r26, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8013 "10100010" // /* MW 5 */ + 8014 "10111010" // /* MW 4 */ + 8015 "10111010" // /* MW 3 */ + 8016 "01001011" // /* MW 2 */ + 8017 "10101001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 71 52 first +.src_ref 7 "gemm_bfp16_params.h" 86 38 + 8018 "10111010" // MOVA r25, #128; LSHL r29, r25, r29; ADD.NC r5, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8019 "11001000" // /* MW 9 */ + 8020 "01111111" // /* MW 8 */ + 8021 "10101001" // /* MW 7 */ + 8022 "11101100" // /* MW 6 */ + 8023 "11011110" // /* MW 5 */ + 8024 "00110011" // /* MW 4 */ + 8025 "00000000" // /* MW 3 */ + 8026 "00011001" // /* MW 2 */ + 8027 "00010000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 88 66 first + 8028 "00011000" // MSC r2, r2, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "11001110" // /* MW 3 */ + 8030 "11000101" // /* MW 2 */ + 8031 "00010111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 first + 8032 "10011000" // LSHL r6, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "00001101" // /* MW 3 */ + 8034 "10001101" // /* MW 2 */ + 8035 "00010001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 first + 8036 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "00001101" // /* MW 3 */ + 8038 "01101011" // /* MW 2 */ + 8039 "00010101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 99 73 first +.src_ref 7 "gemm_bfp16_params.h" 133 24 first + 8040 "01011100" // ST el0, [p1], #4; LSHL r28, r26, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8041 "00011011" // /* MW 5 */ + 8042 "01110000" // /* MW 4 */ + 8043 "00111101" // /* MW 3 */ + 8044 "10000101" // /* MW 2 */ + 8045 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 first +.src_ref 7 "gemm_bfp16_params.h" 134 26 first + 8046 "00101100" // LDA el0, [p0]; LSHL r16, r26, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8047 "00011011" // /* MW 5 */ + 8048 "01000010" // /* MW 4 */ + 8049 "11011101" // /* MW 3 */ + 8050 "10000101" // /* MW 2 */ + 8051 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first + 8052 "10011000" // SUB r27, r28, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8053 "01010001" // /* MW 3 */ + 8054 "00110111" // /* MW 2 */ + 8055 "00010111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 first + 8056 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8057 "00001101" // /* MW 3 */ + 8058 "01000000" // /* MW 2 */ + 8059 "00010001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7057 21 first + 8060 "00011000" // MAC r0, r0, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8061 "01000110" // /* MW 3 */ + 8062 "10000001" // /* MW 2 */ + 8063 "00010001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 first +.src_ref 8 "aie.hpp" 7056 79 first + 8064 "00011000" // MSC r25, r25, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8065 "01001110" // /* MW 3 */ + 8066 "10110011" // /* MW 2 */ + 8067 "00010001" // /* MW 1 */ + 8068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8069 "00000000" // /* MW 1 */ + 8070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8071 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 134 24 first + 8072 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8073 "00101001" // /* MW 3 */ + 8074 "00011100" // /* MW 2 */ + 8075 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 135 26 first + 8076 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8077 "00101110" // /* MW 3 */ + 8078 "00010100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ + 8080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8081 "00000000" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ + 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8087 "00000000" // /* MW 1 */ + 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8089 "00000000" // /* MW 1 */ + 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8091 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 135 24 + 8092 "10011000" // ST el0, [p1], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8093 "00101001" // /* MW 3 */ + 8094 "00111100" // /* MW 2 */ + 8095 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8096 "00000010" // ST r3, [p1], #4; ADD.NC r3, r6, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8097 "00000000" // /* MW 7 */ + 8098 "10100000" // /* MW 6 */ + 8099 "01101001" // /* MW 5 */ + 8100 "00000000" // /* MW 4 */ + 8101 "00110000" // /* MW 3 */ + 8102 "10001110" // /* MW 2 */ + 8103 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8104 "01011100" // ST r29, [p1], #4; SUB r29, r7, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8105 "00000011" // /* MW 5 */ + 8106 "11110110" // /* MW 4 */ + 8107 "00110011" // /* MW 3 */ + 8108 "11110110" // /* MW 2 */ + 8109 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8110 "00000010" // ST r26, [p1], #4; ADD.NC r26, r22, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8111 "00000000" // /* MW 7 */ + 8112 "10100000" // /* MW 6 */ + 8113 "01001101" // /* MW 5 */ + 8114 "00000011" // /* MW 4 */ + 8115 "00110000" // /* MW 3 */ + 8116 "11101010" // /* MW 2 */ + 8117 "00100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8118 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8119 "00110001" // /* MW 3 */ + 8120 "00011100" // /* MW 2 */ + 8121 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8122 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8123 "00010001" // /* MW 3 */ + 8124 "00011111" // /* MW 2 */ + 8125 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8126 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8127 "11110001" // /* MW 3 */ + 8128 "00011111" // /* MW 2 */ + 8129 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8130 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8131 "10010001" // /* MW 3 */ + 8132 "00011100" // /* MW 2 */ + 8133 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7072 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8134 "01011100" // ST r18, [p1], #4; ADD r18, r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "00000001" // /* MW 5 */ + 8136 "11001010" // /* MW 4 */ + 8137 "00111101" // /* MW 3 */ + 8138 "11001010" // /* MW 2 */ + 8139 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7073 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8140 "01011100" // ST r19, [p1], #4; SUB r19, r4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11100011" // /* MW 5 */ + 8142 "01001110" // /* MW 4 */ + 8143 "00110010" // /* MW 3 */ + 8144 "11001110" // /* MW 2 */ + 8145 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 first +.src_ref 8 "aie.hpp" 7072 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8146 "01011100" // ST r5, [p1], #4; MSC r4, r4, r6, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8147 "10011100" // /* MW 5 */ + 8148 "00010010" // /* MW 4 */ + 8149 "00110011" // /* MW 3 */ + 8150 "10010110" // /* MW 2 */ + 8151 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8152 "01011100" // ST r2, [p1], #16; MOVX r2, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8153 "00000010" // /* MW 5 */ + 8154 "00001000" // /* MW 4 */ + 8155 "00111111" // /* MW 3 */ + 8156 "10001010" // /* MW 2 */ + 8157 "00101001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8158 "01011100" // ST r24, [p1], #4; XOR r31, r23, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8159 "01001101" // /* MW 5 */ + 8160 "11111100" // /* MW 4 */ + 8161 "00111011" // /* MW 3 */ + 8162 "11100010" // /* MW 2 */ + 8163 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7072 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8164 "01011100" // ST r24, [p1], #-12; SUB r23, r24, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8165 "11100011" // /* MW 5 */ + 8166 "01011110" // /* MW 4 */ + 8167 "00111100" // /* MW 3 */ + 8168 "11100010" // /* MW 2 */ + 8169 "00111011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8170 "01011100" // ST r24, [p1], #4; XOR r2, r2, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8171 "00001101" // /* MW 5 */ + 8172 "00001000" // /* MW 4 */ + 8173 "00110001" // /* MW 3 */ + 8174 "11100010" // /* MW 2 */ + 8175 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8176 "01011100" // ST r24, [p1], #-8; SUB r0, r24, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8177 "00000011" // /* MW 5 */ + 8178 "00000000" // /* MW 4 */ + 8179 "00111100" // /* MW 3 */ + 8180 "11100010" // /* MW 2 */ + 8181 "00111101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8182 "10011000" // ST r24, [p1], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8183 "00010001" // /* MW 3 */ + 8184 "01011111" // /* MW 2 */ + 8185 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8186 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8187 "00110001" // /* MW 3 */ + 8188 "00011110" // /* MW 2 */ + 8189 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8190 "10011000" // ST r30, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8191 "11010001" // /* MW 3 */ + 8192 "00011111" // /* MW 2 */ + 8193 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8194 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8195 "10110001" // /* MW 3 */ + 8196 "00011100" // /* MW 2 */ + 8197 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8198 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8199 "11110001" // /* MW 3 */ + 8200 "00011111" // /* MW 2 */ + 8201 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8202 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8203 "10010001" // /* MW 3 */ + 8204 "00011110" // /* MW 2 */ + 8205 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8206 "10011000" // ST r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8207 "01010001" // /* MW 3 */ + 8208 "00011111" // /* MW 2 */ + 8209 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8210 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8211 "00110001" // /* MW 3 */ + 8212 "00011100" // /* MW 2 */ + 8213 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8214 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8215 "11110001" // /* MW 3 */ + 8216 "00011100" // /* MW 2 */ + 8217 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8218 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8219 "10010001" // /* MW 3 */ + 8220 "00011110" // /* MW 2 */ + 8221 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8222 "10011000" // ST r22, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8223 "11010001" // /* MW 3 */ + 8224 "00011110" // /* MW 2 */ + 8225 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8226 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8227 "10110001" // /* MW 3 */ + 8228 "00011100" // /* MW 2 */ + 8229 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8230 "10011000" // ST r23, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8231 "11110001" // /* MW 3 */ + 8232 "00011110" // /* MW 2 */ + 8233 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 8 "aie.hpp" 7054 44 first +.src_ref 8 "aie.hpp" 7057 21 first + 8234 "01011100" // ST r19, [p1], #4; MAC r21, r21, r5, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8235 "10001100" // /* MW 5 */ + 8236 "11010111" // /* MW 4 */ + 8237 "00110010" // /* MW 3 */ + 8238 "11001110" // /* MW 2 */ + 8239 "00100011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8240 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8241 "11110001" // /* MW 3 */ + 8242 "00011100" // /* MW 2 */ + 8243 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first +.src_ref 8 "aie.hpp" 7056 79 first + 8244 "01011100" // ST r17, [p1], #4; SUB r28, r24, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8245 "10100011" // /* MW 5 */ + 8246 "01110010" // /* MW 4 */ + 8247 "00111100" // /* MW 3 */ + 8248 "11000110" // /* MW 2 */ + 8249 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 7073 95 first + 8250 "01011100" // ST r28, [p1], #4; SUB r21, r16, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8251 "10100011" // /* MW 5 */ + 8252 "01010110" // /* MW 4 */ + 8253 "00111000" // /* MW 3 */ + 8254 "11110010" // /* MW 2 */ + 8255 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8256 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8257 "10110001" // /* MW 3 */ + 8258 "00011100" // /* MW 2 */ + 8259 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8260 "10011000" // ST r27, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8261 "01110001" // /* MW 3 */ + 8262 "00011111" // /* MW 2 */ + 8263 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8264 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8265 "10010001" // /* MW 3 */ + 8266 "00011110" // /* MW 2 */ + 8267 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8268 "10011000" // ST r29, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8269 "10110001" // /* MW 3 */ + 8270 "00011111" // /* MW 2 */ + 8271 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8272 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8273 "00110001" // /* MW 3 */ + 8274 "00011100" // /* MW 2 */ + 8275 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8276 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "00010001" // /* MW 3 */ + 8278 "00011110" // /* MW 2 */ + 8279 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8280 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8281 "10010001" // /* MW 3 */ + 8282 "00011110" // /* MW 2 */ + 8283 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8284 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8285 "11110001" // /* MW 3 */ + 8286 "00011100" // /* MW 2 */ + 8287 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8288 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8289 "10110001" // /* MW 3 */ + 8290 "00011100" // /* MW 2 */ + 8291 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8292 "10011000" // ST r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8293 "01010001" // /* MW 3 */ + 8294 "00011110" // /* MW 2 */ + 8295 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8296 "10011000" // ST r21, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8297 "10110001" // /* MW 3 */ + 8298 "00011110" // /* MW 2 */ + 8299 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8300 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8301 "00010001" // /* MW 3 */ + 8302 "00011110" // /* MW 2 */ + 8303 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8304 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8305 "00110001" // /* MW 3 */ + 8306 "00011110" // /* MW 2 */ + 8307 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8308 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8309 "01010001" // /* MW 3 */ + 8310 "00011100" // /* MW 2 */ + 8311 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8312 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8313 "10110001" // /* MW 3 */ + 8314 "00011100" // /* MW 2 */ + 8315 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8316 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8317 "00110001" // /* MW 3 */ + 8318 "00011111" // /* MW 2 */ + 8319 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8320 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8321 "10010001" // /* MW 3 */ + 8322 "00011110" // /* MW 2 */ + 8323 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8324 "10011000" // ST r3, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8325 "01110001" // /* MW 3 */ + 8326 "00011100" // /* MW 2 */ + 8327 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8328 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8329 "00110001" // /* MW 3 */ + 8330 "00011100" // /* MW 2 */ + 8331 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8332 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8333 "11110001" // /* MW 3 */ + 8334 "00011100" // /* MW 2 */ + 8335 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8336 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8337 "10010001" // /* MW 3 */ + 8338 "00011110" // /* MW 2 */ + 8339 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8340 "10011000" // ST r6, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8341 "11010001" // /* MW 3 */ + 8342 "00011100" // /* MW 2 */ + 8343 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8344 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "10110001" // /* MW 3 */ + 8346 "00011100" // /* MW 2 */ + 8347 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8348 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "10010001" // /* MW 3 */ + 8350 "00011100" // /* MW 2 */ + 8351 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8352 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8353 "00010001" // /* MW 3 */ + 8354 "00011100" // /* MW 2 */ + 8355 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8356 "10011000" // ST r7, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8357 "11110001" // /* MW 3 */ + 8358 "00001000" // /* MW 2 */ + 8359 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8360 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8361 "00010001" // /* MW 3 */ + 8362 "00011111" // /* MW 2 */ + 8363 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8364 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8365 "00010001" // /* MW 3 */ + 8366 "11011111" // /* MW 2 */ + 8367 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8368 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8369 "00010001" // /* MW 3 */ + 8370 "00011111" // /* MW 2 */ + 8371 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8372 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8373 "00010001" // /* MW 3 */ + 8374 "11011111" // /* MW 2 */ + 8375 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8376 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8377 "00010001" // /* MW 3 */ + 8378 "00011111" // /* MW 2 */ + 8379 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8380 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8381 "00010001" // /* MW 3 */ + 8382 "11011111" // /* MW 2 */ + 8383 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8384 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8385 "00010001" // /* MW 3 */ + 8386 "00011111" // /* MW 2 */ + 8387 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8388 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "00010001" // /* MW 3 */ + 8390 "11011111" // /* MW 2 */ + 8391 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8392 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8393 "00010001" // /* MW 3 */ + 8394 "00011111" // /* MW 2 */ + 8395 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 7 "gemm_bfp16_params.h" 139 first + 8396 "01011100" // ST r24, [p1], #-12; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8397 "00000000" // /* MW 5 */ + 8398 "01010000" // /* MW 4 */ + 8399 "00110000" // /* MW 3 */ + 8400 "11100010" // /* MW 2 */ + 8401 "00111011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first +.delay_slot + 8402 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8403 "00010001" // /* MW 3 */ + 8404 "00011111" // /* MW 2 */ + 8405 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.delay_slot + 8406 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8407 "00010001" // /* MW 3 */ + 8408 "11011111" // /* MW 2 */ + 8409 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.delay_slot + 8410 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8411 "00010001" // /* MW 3 */ + 8412 "00011111" // /* MW 2 */ + 8413 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.delay_slot + 8414 "10011000" // ST r24, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8415 "00010001" // /* MW 3 */ + 8416 "00000111" // /* MW 2 */ + 8417 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 138 24 first +.delay_slot + 8418 "10011000" // ST r24, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00010001" // /* MW 3 */ + 8420 "00000011" // /* MW 2 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv__end +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_end0 + 8421 "00001001" // /* MW 1 */ +.label __Z8init_accILt1EEvPaS0_iii___func_begin0 +.label _Z8init_accILt1EEvPaS0_iii +.function init_acc<(unsigned short)1> _Z8init_accILt1EEvPaS0_iii +.src_ref 7 "gemm_bfp16.h" 38 first +.src_ref 7 "gemm_bfp16.h" 41 47 +.function_start + 8432 "01000100" // MOVXM p2, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8433 "00100000" // /* MW 5 */ + 8434 "11001000" // /* MW 4 */ + 8435 "11000100" // /* MW 3 */ + 8436 "00000111" // /* MW 2 */ + 8437 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 38 + 8438 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8439 "00000001" // /* MW 5 */ + 8440 "00000000" // /* MW 4 */ + 8441 "00000000" // /* MW 3 */ + 8442 "00001000" // /* MW 2 */ + 8443 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 first + 8444 "10011000" // LDA.s8 r4, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8445 "10000010" // /* MW 3 */ + 8446 "00000100" // /* MW 2 */ + 8447 "00000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 68 6 first + 8448 "01000100" // MOVXM ls, #8608 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8449 "01000000" // /* MW 5 */ + 8450 "11100011" // /* MW 4 */ + 8451 "00100001" // /* MW 3 */ + 8452 "00000000" // /* MW 2 */ + 8453 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 +.src_ref 7 "gemm_bfp16.h" 68 6 + 8454 "10111010" // MOVA r26, #0; MOVXM le, #8672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8455 "00010000" // /* MW 9 */ + 8456 "11110000" // /* MW 8 */ + 8457 "10111000" // /* MW 7 */ + 8458 "00001001" // /* MW 6 */ + 8459 "00000000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "00000000" // /* MW 3 */ + 8462 "00011010" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 53 4 +.src_ref 7 "gemm_bfp16.h" 53 29 + 8464 "10111010" // MOVA r5, #-4; MOVXM p3, #8560 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8465 "00010000" // /* MW 9 */ + 8466 "10111000" // /* MW 8 */ + 8467 "10110000" // /* MW 7 */ + 8468 "00001001" // /* MW 6 */ + 8469 "00000000" // /* MW 5 */ + 8470 "00000000" // /* MW 4 */ + 8471 "00000000" // /* MW 3 */ + 8472 "10000101" // /* MW 2 */ + 8473 "11111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 39 33 +.src_ref 7 "gemm_bfp16.h" 41 47 first +.src_ref 7 "gemm_bfp16.h" 53 29 first +.src_ref 7 "gemm_bfp16.h" 75 43 + 8474 "10111010" // MOVA r3, #5; LSHL r5, r1, r5; VINSERT.32 x1, x0, #0, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8475 "10111000" // /* MW 9 */ + 8476 "10101000" // /* MW 8 */ + 8477 "01000001" // /* MW 7 */ + 8478 "11101100" // /* MW 6 */ + 8479 "01010010" // /* MW 5 */ + 8480 "00000010" // /* MW 4 */ + 8481 "00000000" // /* MW 3 */ + 8482 "10100011" // /* MW 2 */ + 8483 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 39 33 first + 8484 "11100100" // LSHL r7, r0, r3; MOV p2, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8485 "11000001" // /* MW 5 */ + 8486 "11001011" // /* MW 4 */ + 8487 "10110100" // /* MW 3 */ + 8488 "11000111" // /* MW 2 */ + 8489 "00000001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 +.src_ref 7 "gemm_bfp16.h" 75 43 first + 8490 "11100100" // LSHL r3, r2, r3; VMOV bmll0, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8491 "00100101" // /* MW 5 */ + 8492 "00000101" // /* MW 4 */ + 8493 "10110000" // /* MW 3 */ + 8494 "11000111" // /* MW 2 */ + 8495 "00010000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 +.src_ref 7 "gemm_bfp16.h" 42 54 + 8496 "11100100" // MOVX crRnd, r4; MOV r1, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8497 "10000001" // /* MW 5 */ + 8498 "10100101" // /* MW 4 */ + 8499 "00000000" // /* MW 3 */ + 8500 "01010000" // /* MW 2 */ + 8501 "00100111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 first +.src_ref 7 "gemm_bfp16.h" 42 69 +.src_ref 7 "gemm_bfp16.h" 75 14 + 8502 "00110110" // PADDB [p2], #-64; VCONV.bf16.fp32 wl0, bmll0; MOVX r16, #1; MOV m1, r3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8503 "01111000" // /* MW 11 */ + 8504 "11010000" // /* MW 10 */ + 8505 "10000000" // /* MW 9 */ + 8506 "00101000" // /* MW 8 */ + 8507 "00000000" // /* MW 7 */ + 8508 "00000001" // /* MW 6 */ + 8509 "00100000" // /* MW 5 */ + 8510 "11111111" // /* MW 4 */ + 8511 "11000101" // /* MW 3 */ + 8512 "00000010" // /* MW 2 */ + 8513 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 42 54 +.src_ref 7 "gemm_bfp16.h" 42 69 first +.src_ref 7 "gemm_bfp16.h" 75 43 + 8514 "10111010" // MOVA r6, #-3; EQ r27, r2, r16; MOV r3, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8515 "01111000" // /* MW 9 */ + 8516 "01100000" // /* MW 8 */ + 8517 "01101010" // /* MW 7 */ + 8518 "00111100" // /* MW 6 */ + 8519 "10111000" // /* MW 5 */ + 8520 "00000101" // /* MW 4 */ + 8521 "00000000" // /* MW 3 */ + 8522 "10100110" // /* MW 2 */ + 8523 "11111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 22 first +.src_ref 7 "gemm_bfp16.h" 41 47 first +.src_ref 7 "gemm_bfp16.h" 75 43 first + 8524 "10100100" // LSHL r0, r0, r6; VEXTBCST.16 x1, x0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8525 "00000110" // /* MW 5 */ + 8526 "00000010" // /* MW 4 */ + 8527 "10110001" // /* MW 3 */ + 8528 "00001101" // /* MW 2 */ + 8529 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 42 54 first +.src_ref 7 "gemm_bfp16.h" 44 44 +.src_ref 7 "gemm_bfp16.h" 69 17 +.src_ref 7 "gemm_bfp16.h" 76 14 +.src_ref 7 "gemm_bfp16.h" 77 16 + 8530 "01111110" // NOPA; NOPB; MOVS p1, p0; SEL.EQZ r1, r3, r1, r27; MOV m0, r7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8531 "01100000" // /* MW 13 */ + 8532 "00010001" // /* MW 12 */ + 8533 "00110000" // /* MW 11 */ + 8534 "00001111" // /* MW 10 */ + 8535 "00111010" // /* MW 9 */ + 8536 "00000000" // /* MW 8 */ + 8537 "00010010" // /* MW 7 */ + 8538 "11000010" // /* MW 6 */ + 8539 "00100000" // /* MW 5 */ + 8540 "00000000" // /* MW 4 */ + 8541 "11110000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 22 first +.src_ref 7 "gemm_bfp16.h" 44 44 first +.src_ref 7 "gemm_bfp16.h" 54 24 +.src_ref 7 "gemm_bfp16.h" 75 14 + 8544 "11100001" // NOPA; PADDB [p0], m0; VST x1, [p2]; ADD r2, r5, #-1; MOV p2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8545 "00000000" // /* MW 15 */ + 8546 "00000000" // /* MW 14 */ + 8547 "01111000" // /* MW 13 */ + 8548 "01010000" // /* MW 12 */ + 8549 "00110000" // /* MW 11 */ + 8550 "11111001" // /* MW 10 */ + 8551 "00101111" // /* MW 9 */ + 8552 "00001010" // /* MW 8 */ + 8553 "01010011" // /* MW 7 */ + 8554 "00000100" // /* MW 6 */ + 8555 "00100010" // /* MW 5 */ + 8556 "00010111" // /* MW 4 */ + 8557 "11110000" // /* MW 3 */ + 8558 "00101100" // /* MW 2 */ + 8559 "00000000" // /* MW 1 */ +.label TGT_F_Z8init_accILt1EEvPaS0_iii_128 +.src_ref 7 "gemm_bfp16.h" 54 24 first +.src_ref 7 "gemm_bfp16.h" 68 6 first +.loop_nesting 1 + 8560 "11110100" // VLDB wl0, [p2]; MOV lc, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8561 "01000001" // /* MW 5 */ + 8562 "11100000" // /* MW 4 */ + 8563 "10001010" // /* MW 3 */ + 8564 "10000100" // /* MW 2 */ + 8565 "01000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8569 "00000000" // /* MW 1 */ + 8570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8571 "00000000" // /* MW 1 */ + 8572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8573 "00000000" // /* MW 1 */ + 8574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8575 "00000000" // /* MW 1 */ + 8576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8577 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 54 24 +.src_ref 7 "gemm_bfp16.h" 63 39 +.src_ref 7 "gemm_bfp16.h" 64 39 + 8578 "11111000" // VMOV wh0, wl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8579 "00100010" // /* MW 3 */ + 8580 "00000001" // /* MW 2 */ + 8581 "00011000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 54 24 +.src_ref 7 "gemm_bfp16.h" 63 39 first + 8582 "01011000" // VEXTBCST.128 x3, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8583 "00000011" // /* MW 3 */ + 8584 "10000100" // /* MW 2 */ + 8585 "00011001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 54 24 first +.src_ref 7 "gemm_bfp16.h" 64 39 first + 8586 "01011000" // VEXTBCST.128 x1, x0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8587 "00000111" // /* MW 3 */ + 8588 "10000100" // /* MW 2 */ + 8589 "00011000" // /* MW 1 */ + 8590 "11111000" // VCONV.fp32.bf16 cml0, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8591 "10001010" // /* MW 3 */ + 8592 "00000111" // /* MW 2 */ + 8593 "00011000" // /* MW 1 */ + 8594 "11111000" // VCONV.fp32.bf16 cmh0, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8595 "10001010" // /* MW 3 */ + 8596 "10000011" // /* MW 2 */ + 8597 "00011000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 69 17 first + 8598 "11111000" // VMOV bmll1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8599 "00010010" // /* MW 3 */ + 8600 "00000000" // /* MW 2 */ + 8601 "00011001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 71 19 first + 8602 "11010100" // NOPA; VMOV bmlh1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8603 "00100101" // /* MW 5 */ + 8604 "10000100" // /* MW 4 */ + 8605 "11110010" // /* MW 3 */ + 8606 "00101100" // /* MW 2 */ + 8607 "00000000" // /* MW 1 */ +.label ZLS_F_Z8init_accILt1EEvPaS0_iii_176 +.src_ref 7 "gemm_bfp16.h" 69 17 first +.begin_of_loop +.loop_nesting 2 + 8608 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8609 "00100110" // /* MW 3 */ + 8610 "00010100" // /* MW 2 */ + 8611 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 69 17 + 8612 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8613 "10000110" // /* MW 3 */ + 8614 "00101100" // /* MW 2 */ + 8615 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 70 17 first + 8616 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00100110" // /* MW 3 */ + 8618 "00010100" // /* MW 2 */ + 8619 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 70 17 + 8620 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8621 "10000110" // /* MW 3 */ + 8622 "00101100" // /* MW 2 */ + 8623 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 71 19 first + 8624 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8625 "00000000" // /* MW 15 */ + 8626 "00000000" // /* MW 14 */ + 8627 "01111000" // /* MW 13 */ + 8628 "10100101" // /* MW 12 */ + 8629 "00000001" // /* MW 11 */ + 8630 "00000000" // /* MW 10 */ + 8631 "00000000" // /* MW 9 */ + 8632 "10000000" // /* MW 8 */ + 8633 "01100110" // /* MW 7 */ + 8634 "00010100" // /* MW 6 */ + 8635 "00100000" // /* MW 5 */ + 8636 "00000000" // /* MW 4 */ + 8637 "11110000" // /* MW 3 */ + 8638 "00101100" // /* MW 2 */ + 8639 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 71 19 + 8640 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8641 "00000000" // /* MW 15 */ + 8642 "00000000" // /* MW 14 */ + 8643 "01111000" // /* MW 13 */ + 8644 "10100101" // /* MW 12 */ + 8645 "00000001" // /* MW 11 */ + 8646 "00000000" // /* MW 10 */ + 8647 "00000000" // /* MW 9 */ + 8648 "10000000" // /* MW 8 */ + 8649 "10100110" // /* MW 7 */ + 8650 "00101100" // /* MW 6 */ + 8651 "00100000" // /* MW 5 */ + 8652 "00000000" // /* MW 4 */ + 8653 "11110000" // /* MW 3 */ + 8654 "00101100" // /* MW 2 */ + 8655 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 72 19 first + 8656 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8657 "00000000" // /* MW 15 */ + 8658 "00000000" // /* MW 14 */ + 8659 "01111000" // /* MW 13 */ + 8660 "10100101" // /* MW 12 */ + 8661 "00000001" // /* MW 11 */ + 8662 "00000000" // /* MW 10 */ + 8663 "00000000" // /* MW 9 */ + 8664 "10000000" // /* MW 8 */ + 8665 "01100110" // /* MW 7 */ + 8666 "00010100" // /* MW 6 */ + 8667 "00100000" // /* MW 5 */ + 8668 "00000000" // /* MW 4 */ + 8669 "11110000" // /* MW 3 */ + 8670 "00101100" // /* MW 2 */ + 8671 "00000000" // /* MW 1 */ +.label ZLE_F_Z8init_accILt1EEvPaS0_iii_240 +.src_ref 7 "gemm_bfp16.h" 72 19 +.end_of_loop + 8672 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8673 "00000000" // /* MW 15 */ + 8674 "00000000" // /* MW 14 */ + 8675 "01111000" // /* MW 13 */ + 8676 "10100101" // /* MW 12 */ + 8677 "00000001" // /* MW 11 */ + 8678 "00000000" // /* MW 10 */ + 8679 "00000000" // /* MW 9 */ + 8680 "10000000" // /* MW 8 */ + 8681 "10100110" // /* MW 7 */ + 8682 "00101100" // /* MW 6 */ + 8683 "00100000" // /* MW 5 */ + 8684 "00000000" // /* MW 4 */ + 8685 "11110000" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 53 4 first +.src_ref 7 "gemm_bfp16.h" 75 14 first +.src_ref 7 "gemm_bfp16.h" 76 14 first +.loop_nesting 1 + 8688 "00010010" // PADDA [p1], m0; PADDB [p2], m1; JNZD r2, r2, p3 /* MW 8 */ /* control_operation: words=8 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 8689 "11100000" // /* MW 7 */ + 8690 "10000100" // /* MW 6 */ + 8691 "00100000" // /* MW 5 */ + 8692 "01010111" // /* MW 4 */ + 8693 "11110100" // /* MW 3 */ + 8694 "00001100" // /* MW 2 */ + 8695 "00100001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 77 16 first +.delay_slot + 8696 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8697 "10010000" // /* MW 3 */ + 8698 "00001011" // /* MW 2 */ + 8699 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 80 first +.loop_nesting 0 + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 80 +.delay_slot + 8712 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8713 "00000001" // /* MW 5 */ + 8714 "00000000" // /* MW 4 */ + 8715 "00000000" // /* MW 3 */ + 8716 "11111000" // /* MW 2 */ + 8717 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8719 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z8init_accILt1EEvPaS0_iii__end +.label __Z8init_accILt1EEvPaS0_iii___func_end0 + 8725 "00000000" // /* MW 1 */ +.label __Z12post_processPai___func_begin0 +.label _Z12post_processPai +.function post_process _Z12post_processPai +.src_ref 7 "gemm_bfp16.h" 83 first +.src_ref 7 "gemm_bfp16.h" 92 26 +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 97 21 +.src_ref 7 "gemm_bfp16.h" 97 23 +.function_start + 8736 "01110110" // MOVA m0, #512; MOVS p2, p0; MOVXM p1, #508944 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8737 "00010000" // /* MW 11 */ + 8738 "00001000" // /* MW 10 */ + 8739 "10110010" // /* MW 9 */ + 8740 "11110000" // /* MW 8 */ + 8741 "00000001" // /* MW 7 */ + 8742 "00000000" // /* MW 6 */ + 8743 "10001011" // /* MW 5 */ + 8744 "10000000" // /* MW 4 */ + 8745 "10000010" // /* MW 3 */ + 8746 "00000000" // /* MW 2 */ + 8747 "01000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 26 +.src_ref 7 "gemm_bfp16.h" 94 26 +.src_ref 7 "gemm_bfp16.h" 94 26 +.src_ref 7 "gemm_bfp16.h" 95 26 +.src_ref 7 "gemm_bfp16.h" 96 26 + 8748 "10111010" // MOVA r1, #-7; MOVX r2, #0; MOV r4, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8749 "01011000" // /* MW 9 */ + 8750 "00000001" // /* MW 8 */ + 8751 "10001000" // /* MW 7 */ + 8752 "00001000" // /* MW 6 */ + 8753 "00100000" // /* MW 5 */ + 8754 "00000000" // /* MW 4 */ + 8755 "00000000" // /* MW 3 */ + 8756 "00100001" // /* MW 2 */ + 8757 "11111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 26 first +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 94 26 first +.src_ref 7 "gemm_bfp16.h" 95 14 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8758 "01110110" // LDA.s8 r24, [p1]; MOVS p1, p0; OR r16, r2, r4; MOV r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8759 "01011000" // /* MW 11 */ + 8760 "00000111" // /* MW 10 */ + 8761 "01101000" // /* MW 9 */ + 8762 "00101100" // /* MW 8 */ + 8763 "00000010" // /* MW 7 */ + 8764 "00000101" // /* MW 6 */ + 8765 "10001011" // /* MW 5 */ + 8766 "10000000" // /* MW 4 */ + 8767 "01010001" // /* MW 3 */ + 8768 "11100000" // /* MW 2 */ + 8769 "00100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 26 +.src_ref 7 "gemm_bfp16.h" 93 12 first +.src_ref 7 "gemm_bfp16.h" 95 26 + 8770 "10111010" // VLDA bmlh1, [p1, #64]; LSHL r1, r0, r1; MOV r5, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8771 "01011000" // /* MW 9 */ + 8772 "00000010" // /* MW 8 */ + 8773 "10101000" // /* MW 7 */ + 8774 "11101100" // /* MW 6 */ + 8775 "00010000" // /* MW 5 */ + 8776 "00000000" // /* MW 4 */ + 8777 "10110000" // /* MW 3 */ + 8778 "10010110" // /* MW 2 */ + 8779 "00100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 94 14 first +.src_ref 7 "gemm_bfp16.h" 95 14 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8780 "10111010" // VLDA bmll1, [p1], m0; LSHL r18, r16, r3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8781 "01111000" // /* MW 9 */ + 8782 "01100000" // /* MW 8 */ + 8783 "00001000" // /* MW 7 */ + 8784 "11101100" // /* MW 6 */ + 8785 "00100001" // /* MW 5 */ + 8786 "00100001" // /* MW 4 */ + 8787 "10110000" // /* MW 3 */ + 8788 "00010010" // /* MW 2 */ + 8789 "00100001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 95 26 first +.src_ref 7 "gemm_bfp16.h" 96 26 + 8790 "10111010" // MOVA r6, #3; OR r7, r5, r2; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8791 "10101000" // /* MW 9 */ + 8792 "10000000" // /* MW 8 */ + 8793 "10110100" // /* MW 7 */ + 8794 "00101101" // /* MW 6 */ + 8795 "01110001" // /* MW 5 */ + 8796 "00001010" // /* MW 4 */ + 8797 "00000000" // /* MW 3 */ + 8798 "01100110" // /* MW 2 */ + 8799 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 2 first +.src_ref 7 "gemm_bfp16.h" 94 12 first +.src_ref 7 "gemm_bfp16.h" 95 14 + 8800 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r19, r7, r3; ADD.NC lc, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8801 "11001000" // /* MW 9 */ + 8802 "01111111" // /* MW 8 */ + 8803 "10111000" // /* MW 7 */ + 8804 "11101110" // /* MW 6 */ + 8805 "00110001" // /* MW 5 */ + 8806 "00001111" // /* MW 4 */ + 8807 "10110000" // /* MW 3 */ + 8808 "10001110" // /* MW 2 */ + 8809 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 12 +.src_ref 7 "gemm_bfp16.h" 95 14 first +.src_ref 7 "gemm_bfp16.h" 96 26 first + 8810 "10111010" // VLDA bmhl0, [p3]; OR r17, r6, r2; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8811 "10101000" // /* MW 9 */ + 8812 "11000000" // /* MW 8 */ + 8813 "00110100" // /* MW 7 */ + 8814 "00101110" // /* MW 6 */ + 8815 "00010001" // /* MW 5 */ + 8816 "00001101" // /* MW 4 */ + 8817 "10110000" // /* MW 3 */ + 8818 "10001010" // /* MW 2 */ + 8819 "01100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8820 "10111010" // VLDA bmlh0, [p4, #64]; LSHL r20, r17, r3; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8821 "00001000" // /* MW 9 */ + 8822 "10000001" // /* MW 8 */ + 8823 "01001000" // /* MW 7 */ + 8824 "11101100" // /* MW 6 */ + 8825 "01000001" // /* MW 5 */ + 8826 "00100011" // /* MW 4 */ + 8827 "10110000" // /* MW 3 */ + 8828 "10000110" // /* MW 2 */ + 8829 "10000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 +.src_ref 7 "gemm_bfp16.h" 96 14 +.src_ref 7 "gemm_bfp16.h" 97 21 +.src_ref 7 "gemm_bfp16.h" 97 23 +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 98 21 +.src_ref 7 "gemm_bfp16.h" 98 23 +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 99 21 +.src_ref 7 "gemm_bfp16.h" 99 23 +.src_ref 7 "gemm_bfp16.h" 100 4 +.src_ref 7 "gemm_bfp16.h" 100 21 +.src_ref 7 "gemm_bfp16.h" 100 23 + 8830 "10111010" // VLDA bmll0, [p4]; MOVX crRnd, r24; ADD.NC p5, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8831 "10101000" // /* MW 9 */ + 8832 "00000000" // /* MW 8 */ + 8833 "10110101" // /* MW 7 */ + 8834 "00000010" // /* MW 6 */ + 8835 "11010100" // /* MW 5 */ + 8836 "00110001" // /* MW 4 */ + 8837 "10110000" // /* MW 3 */ + 8838 "10000010" // /* MW 2 */ + 8839 "10000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 2 first +.src_ref 7 "gemm_bfp16.h" 96 12 + 8840 "10111010" // VLDA bmhh1, [p5, #64]; MOVXM ls, #8880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8841 "00010000" // /* MW 9 */ + 8842 "01011000" // /* MW 8 */ + 8843 "01111001" // /* MW 7 */ + 8844 "00001000" // /* MW 6 */ + 8845 "00000000" // /* MW 5 */ + 8846 "00000000" // /* MW 4 */ + 8847 "10110000" // /* MW 3 */ + 8848 "10011110" // /* MW 2 */ + 8849 "10100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 2 +.src_ref 7 "gemm_bfp16.h" 96 12 first + 8850 "10111010" // VLDA bmhl1, [p5]; MOVXM le, #8976 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8851 "00010000" // /* MW 9 */ + 8852 "10001000" // /* MW 8 */ + 8853 "10111001" // /* MW 7 */ + 8854 "00001001" // /* MW 6 */ + 8855 "00000000" // /* MW 5 */ + 8856 "00000000" // /* MW 4 */ + 8857 "10110000" // /* MW 3 */ + 8858 "10011010" // /* MW 2 */ + 8859 "10100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 100 4 + 8860 "00011000" // MOVX r1, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8861 "00011001" // /* MW 3 */ + 8862 "00000010" // /* MW 2 */ + 8863 "00010000" // /* MW 1 */ + 8864 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8865 "00000000" // /* MW 15 */ + 8866 "00000000" // /* MW 14 */ + 8867 "01111000" // /* MW 13 */ + 8868 "10100101" // /* MW 12 */ + 8869 "00000001" // /* MW 11 */ + 8870 "00000000" // /* MW 10 */ + 8871 "00000000" // /* MW 9 */ + 8872 "00000000" // /* MW 8 */ + 8873 "01011011" // /* MW 7 */ + 8874 "00000001" // /* MW 6 */ + 8875 "00100000" // /* MW 5 */ + 8876 "00000000" // /* MW 4 */ + 8877 "11110000" // /* MW 3 */ + 8878 "00101100" // /* MW 2 */ + 8879 "00000000" // /* MW 1 */ +.label ZLS_F_Z12post_processPai_144 +.src_ref 7 "gemm_bfp16.h" 97 21 first +.src_ref 7 "gemm_bfp16.h" 97 23 first +.src_ref 7 "gemm_bfp16.h" 98 4 first +.begin_of_loop +.loop_nesting 1 + 8880 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8881 "00111011" // /* MW 5 */ + 8882 "01010100" // /* MW 4 */ + 8883 "01101000" // /* MW 3 */ + 8884 "10010100" // /* MW 2 */ + 8885 "01001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 98 21 +.src_ref 7 "gemm_bfp16.h" 98 23 +.src_ref 7 "gemm_bfp16.h" 99 4 first + 8886 "11100100" // LSHL r22, r7, r1; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8887 "01000001" // /* MW 5 */ + 8888 "00010101" // /* MW 4 */ + 8889 "10110101" // /* MW 3 */ + 8890 "10000011" // /* MW 2 */ + 8891 "00111101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 first +.src_ref 7 "gemm_bfp16.h" 98 21 first +.src_ref 7 "gemm_bfp16.h" 98 23 first +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 99 21 +.src_ref 7 "gemm_bfp16.h" 99 23 +.src_ref 7 "gemm_bfp16.h" 100 4 first + 8892 "00111010" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r23, r17, r1; MOV dj0, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8893 "01111001" // /* MW 9 */ + 8894 "10010000" // /* MW 8 */ + 8895 "01000101" // /* MW 7 */ + 8896 "11101100" // /* MW 6 */ + 8897 "01110000" // /* MW 5 */ + 8898 "00100011" // /* MW 4 */ + 8899 "01100000" // /* MW 3 */ + 8900 "00001100" // /* MW 2 */ + 8901 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 26 first +.src_ref 7 "gemm_bfp16.h" 99 4 first +.src_ref 7 "gemm_bfp16.h" 99 21 first +.src_ref 7 "gemm_bfp16.h" 99 23 first +.src_ref 7 "gemm_bfp16.h" 100 4 +.src_ref 7 "gemm_bfp16.h" 100 21 +.src_ref 7 "gemm_bfp16.h" 100 23 + 8902 "00111010" // VST.CONV.bf16.fp32 cml0, [p0, dj0];OR r16, r2, r4; MOV dj1, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8903 "01111001" // /* MW 9 */ + 8904 "11010000" // /* MW 8 */ + 8905 "11000101" // /* MW 7 */ + 8906 "00101100" // /* MW 6 */ + 8907 "00000010" // /* MW 5 */ + 8908 "00000101" // /* MW 4 */ + 8909 "01100000" // /* MW 3 */ + 8910 "00000100" // /* MW 2 */ + 8911 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 26 first + 8912 "10011000" // OR r7, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8913 "00100101" // /* MW 3 */ + 8914 "01001110" // /* MW 2 */ + 8915 "00010001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 96 26 first +.src_ref 7 "gemm_bfp16.h" 100 4 first +.src_ref 7 "gemm_bfp16.h" 100 21 first +.src_ref 7 "gemm_bfp16.h" 100 23 first + 8916 "00111010" // VST.CONV.bf16.fp32 cmh1, [p0, dj1];OR r17, r6, r2; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8917 "00001001" // /* MW 9 */ + 8918 "10000001" // /* MW 8 */ + 8919 "01001000" // /* MW 7 */ + 8920 "00101100" // /* MW 6 */ + 8921 "00010001" // /* MW 5 */ + 8922 "00001101" // /* MW 4 */ + 8923 "01100000" // /* MW 3 */ + 8924 "00011100" // /* MW 2 */ + 8925 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 93 12 first +.src_ref 7 "gemm_bfp16.h" 94 14 first + 8926 "00101100" // VLDA bmlh1, [p1, #64]; LSHL r18, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8927 "01111011" // /* MW 5 */ + 8928 "01001000" // /* MW 4 */ + 8929 "10111000" // /* MW 3 */ + 8930 "10010110" // /* MW 2 */ + 8931 "00100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 95 14 first + 8932 "10111010" // VLDA bmll1, [p1], m0; LSHL r19, r7, r3; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8933 "10101000" // /* MW 9 */ + 8934 "10000000" // /* MW 8 */ + 8935 "10110100" // /* MW 7 */ + 8936 "11101101" // /* MW 6 */ + 8937 "00110001" // /* MW 5 */ + 8938 "00001111" // /* MW 4 */ + 8939 "10110000" // /* MW 3 */ + 8940 "00010010" // /* MW 2 */ + 8941 "00100001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 12 first +.src_ref 7 "gemm_bfp16.h" 95 14 +.src_ref 7 "gemm_bfp16.h" 96 14 first + 8942 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r20, r17, r3; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8943 "10101000" // /* MW 9 */ + 8944 "11000000" // /* MW 8 */ + 8945 "00110100" // /* MW 7 */ + 8946 "11101110" // /* MW 6 */ + 8947 "01000001" // /* MW 5 */ + 8948 "00100011" // /* MW 4 */ + 8949 "10110000" // /* MW 3 */ + 8950 "10001110" // /* MW 2 */ + 8951 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 12 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8952 "10010100" // VLDA bmhl0, [p3]; ADD.NC p5, r20, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8953 "00000010" // /* MW 5 */ + 8954 "11010100" // /* MW 4 */ + 8955 "10111010" // /* MW 3 */ + 8956 "10001010" // /* MW 2 */ + 8957 "01100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 first + 8958 "10011000" // VLDA bmlh0, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00110101" // /* MW 3 */ + 8960 "00010100" // /* MW 2 */ + 8961 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 + 8962 "10011000" // VLDA bmll0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8963 "00010101" // /* MW 3 */ + 8964 "00000100" // /* MW 2 */ + 8965 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 96 12 first + 8966 "10011000" // VLDA bmhh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8967 "11110101" // /* MW 3 */ + 8968 "00010100" // /* MW 2 */ + 8969 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 96 12 + 8970 "00111100" // VLDA bmhl1, [p5]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "10110000" // /* MW 3 */ + 8974 "10011010" // /* MW 2 */ + 8975 "10100000" // /* MW 1 */ +.label ZLE_F_Z12post_processPai_240 +.end_of_loop + 8976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "10100101" // /* MW 12 */ + 8981 "00000001" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "01011011" // /* MW 7 */ + 8986 "00000001" // /* MW 6 */ + 8987 "00100000" // /* MW 5 */ + 8988 "00000000" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 97 21 first +.src_ref 7 "gemm_bfp16.h" 97 23 first +.src_ref 7 "gemm_bfp16.h" 98 4 first +.loop_nesting 0 + 8992 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8993 "00111011" // /* MW 5 */ + 8994 "01010100" // /* MW 4 */ + 8995 "01101000" // /* MW 3 */ + 8996 "10010100" // /* MW 2 */ + 8997 "01001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 98 21 +.src_ref 7 "gemm_bfp16.h" 98 23 +.src_ref 7 "gemm_bfp16.h" 102 first + 8998 "11100100" // RET lr; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8999 "01000001" // /* MW 5 */ + 9000 "00010101" // /* MW 4 */ + 9001 "00000101" // /* MW 3 */ + 9002 "00000000" // /* MW 2 */ + 9003 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 first +.src_ref 7 "gemm_bfp16.h" 98 21 first +.src_ref 7 "gemm_bfp16.h" 98 23 first +.src_ref 7 "gemm_bfp16.h" 99 4 first +.delay_slot + 9004 "01011100" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r22, r7, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9005 "00111011" // /* MW 5 */ + 9006 "11011000" // /* MW 4 */ + 9007 "01100011" // /* MW 3 */ + 9008 "00001100" // /* MW 2 */ + 9009 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 99 21 +.src_ref 7 "gemm_bfp16.h" 99 23 +.src_ref 7 "gemm_bfp16.h" 100 4 first +.delay_slot + 9010 "11100100" // LSHL r23, r17, r1; MOV dj0, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9011 "01000001" // /* MW 5 */ + 9012 "00010110" // /* MW 4 */ + 9013 "10110001" // /* MW 3 */ + 9014 "11000011" // /* MW 2 */ + 9015 "10001101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 99 4 first +.src_ref 7 "gemm_bfp16.h" 99 21 first +.src_ref 7 "gemm_bfp16.h" 99 23 first +.src_ref 7 "gemm_bfp16.h" 100 4 +.src_ref 7 "gemm_bfp16.h" 100 21 +.src_ref 7 "gemm_bfp16.h" 100 23 +.delay_slot + 9016 "00000010" // VST.CONV.bf16.fp32 cml0, [p0, dj0]; MOV dj1, r23 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9017 "01110000" // /* MW 7 */ + 9018 "11010000" // /* MW 6 */ + 9019 "11000101" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "01100000" // /* MW 3 */ + 9022 "00000100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 100 4 first +.src_ref 7 "gemm_bfp16.h" 100 21 first +.src_ref 7 "gemm_bfp16.h" 100 23 first +.delay_slot + 9024 "00011000" // VST.CONV.bf16.fp32 cmh1, [p0, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9025 "11100011" // /* MW 3 */ + 9026 "00100000" // /* MW 2 */ + 9027 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z12post_processPai__end +.label __Z12post_processPai___func_end0 + 9029 "00000000" // /* MW 1 */ +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_begin0 +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.function gemm_bfp16 _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.src_ref 7 "gemm_bfp16.h" 225 first +.src_ref 7 "gemm_bfp16.h" 231 12 +.src_ref 7 "gemm_bfp16.h" 231 12 +.function_start + 9040 "01110110" // MOVA m4, #-300; MOVS p4, p7; MOVXM p7, #508736 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9041 "00010000" // /* MW 11 */ + 9042 "10100000" // /* MW 10 */ + 9043 "10110001" // /* MW 9 */ + 9044 "11110011" // /* MW 8 */ + 9045 "00000001" // /* MW 7 */ + 9046 "00000000" // /* MW 6 */ + 9047 "10001011" // /* MW 5 */ + 9048 "10011100" // /* MW 4 */ + 9049 "10000100" // /* MW 3 */ + 9050 "10010000" // /* MW 2 */ + 9051 "11011010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 231 12 first + 9052 "10011000" // LDA r16, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9053 "00010110" // /* MW 3 */ + 9054 "10001010" // /* MW 2 */ + 9055 "00000111" // /* MW 1 */ + 9056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9057 "00000000" // /* MW 1 */ + 9058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9059 "00000000" // /* MW 1 */ + 9060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9061 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 + 9062 "00000010" // MOVS p0, p6; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9063 "01110000" // /* MW 7 */ + 9064 "01100000" // /* MW 6 */ + 9065 "00110000" // /* MW 5 */ + 9066 "00000011" // /* MW 4 */ + 9067 "01100000" // /* MW 3 */ + 9068 "00010001" // /* MW 2 */ + 9069 "00010011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 225 + 9070 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9071 "00000001" // /* MW 5 */ + 9072 "00000000" // /* MW 4 */ + 9073 "00000000" // /* MW 3 */ + 9074 "00001000" // /* MW 2 */ + 9075 "00000000" // /* MW 1 */ + 9076 "10011000" // ST p0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "00011101" // /* MW 3 */ + 9078 "11111100" // /* MW 2 */ + 9079 "00001111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 231 6 +.src_ref 7 "gemm_bfp16.h" 231 28 + 9080 "00111010" // ST p4, [sp, #-16]; JNZ r16, #9168 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9168 delay_slots=5 */ + 9081 "01100001" // /* MW 9 */ + 9082 "00000000" // /* MW 8 */ + 9083 "00010000" // /* MW 7 */ + 9084 "01111010" // /* MW 6 */ + 9085 "00000100" // /* MW 5 */ + 9086 "00100000" // /* MW 4 */ + 9087 "10110000" // /* MW 3 */ + 9088 "01000011" // /* MW 2 */ + 9089 "11111110" // /* MW 1 */ +.delay_slot + 9090 "10011000" // ST p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9091 "00011101" // /* MW 3 */ + 9092 "11110101" // /* MW 2 */ + 9093 "00001111" // /* MW 1 */ +.delay_slot + 9094 "10011000" // ST p1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9095 "10011101" // /* MW 3 */ + 9096 "11101100" // /* MW 2 */ + 9097 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9098 "01110110" // MOVA r18, #1; ST lr, [sp, #-8]; MOVXM p0, #508940 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9099 "00010000" // /* MW 11 */ + 9100 "00000110" // /* MW 10 */ + 9101 "00110010" // /* MW 9 */ + 9102 "11110000" // /* MW 8 */ + 9103 "00000001" // /* MW 7 */ + 9104 "10000000" // /* MW 6 */ + 9105 "00111101" // /* MW 5 */ + 9106 "11111000" // /* MW 4 */ + 9107 "00000111" // /* MW 3 */ + 9108 "00110010" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 4 "tile.hpp" 86 8 +.src_ref 4 "tile.hpp" 86 8 +.delay_slot + 9110 "01110110" // MOVA r17, #11; ST r18, [p0]; MOVXM p0, #508944 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9111 "00010000" // /* MW 11 */ + 9112 "00001000" // /* MW 10 */ + 9113 "00110010" // /* MW 9 */ + 9114 "11110000" // /* MW 8 */ + 9115 "00000001" // /* MW 7 */ + 9116 "10000000" // /* MW 6 */ + 9117 "01010001" // /* MW 5 */ + 9118 "00000110" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "01110001" // /* MW 2 */ + 9121 "00000001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 7 "gemm_bfp16.h" 235 66 +.delay_slot + 9122 "10111010" // ST.s8 r17, [p0]; MOVXM p5, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9123 "00010000" // /* MW 9 */ + 9124 "00000000" // /* MW 8 */ + 9125 "10110001" // /* MW 7 */ + 9126 "11110010" // /* MW 6 */ + 9127 "00000001" // /* MW 5 */ + 9128 "00000000" // /* MW 4 */ + 9129 "11100000" // /* MW 3 */ + 9130 "11000100" // /* MW 2 */ + 9131 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 4 +.src_ref 7 "gemm_bfp16.h" 235 66 first + 9132 "11010100" // LDA r0, [p5], #8; MOV p0, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9133 "10000001" // /* MW 5 */ + 9134 "11001001" // /* MW 4 */ + 9135 "11010000" // /* MW 3 */ + 9136 "10000010" // /* MW 2 */ + 9137 "10100101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 79 + 9138 "10011000" // LDA r1, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9139 "00110110" // /* MW 3 */ + 9140 "00000100" // /* MW 2 */ + 9141 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 92 + 9142 "10011000" // LDA r2, [p5, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9143 "01010110" // /* MW 3 */ + 9144 "00010100" // /* MW 2 */ + 9145 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 4 +.no_stack_arguments + 9146 "00000100" // JL #8432 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8432 delay_slots=5 */ + 9147 "00000001" // /* MW 5 */ + 9148 "00000000" // /* MW 4 */ + 9149 "01111000" // /* MW 3 */ + 9150 "00010000" // /* MW 2 */ + 9151 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 4 +.delay_slot + 9152 "11111000" // MOV p1, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9153 "11000000" // /* MW 3 */ + 9154 "01100110" // /* MW 2 */ + 9155 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9161 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9162 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9163 "00100000" // /* MW 5 */ + 9164 "00000000" // /* MW 4 */ + 9165 "11110000" // /* MW 3 */ + 9166 "00101100" // /* MW 2 */ + 9167 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.src_ref 9 "tuple" 562 47 +.src_ref 4 "tile.hpp" 86 8 +.src_ref 7 "gemm_bfp16.h" 252 79 +.src_ref 7 "gemm_bfp16.h" 252 85 +.return_address + 9168 "10111010" // MOVA r16, #184; MOVX r18, #-184; MOV m4, #220 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9169 "01011000" // /* MW 9 */ + 9170 "11011100" // /* MW 8 */ + 9171 "00000000" // /* MW 7 */ + 9172 "00001010" // /* MW 6 */ + 9173 "00100001" // /* MW 5 */ + 9174 "00111011" // /* MW 4 */ + 9175 "00000000" // /* MW 3 */ + 9176 "00010000" // /* MW 2 */ + 9177 "00010111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 252 85 first + 9178 "10011000" // LDA r27, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9179 "01110110" // /* MW 3 */ + 9180 "10001011" // /* MW 2 */ + 9181 "00000111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 7 "gemm_bfp16.h" 252 79 + 9182 "11111000" // MOV r19, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "11000000" // /* MW 3 */ + 9184 "11011110" // /* MW 2 */ + 9185 "00011100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 126 19 + 9186 "00011000" // ADD.NC r20, r19, #-56 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9187 "11100100" // /* MW 3 */ + 9188 "00011001" // /* MW 2 */ + 9189 "00011101" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 + 9190 "01011000" // ADD.NC p7, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9191 "01010001" // /* MW 3 */ + 9192 "01101001" // /* MW 2 */ + 9193 "00011111" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 7 "gemm_bfp16.h" 252 79 + 9194 "00011000" // MOVX r17, #240 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9195 "11000001" // /* MW 3 */ + 9196 "11100010" // /* MW 2 */ + 9197 "00010000" // /* MW 1 */ + 9198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9199 "00000000" // /* MW 1 */ + 9200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9201 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first +.src_ref 7 "gemm_bfp16.h" 252 79 + 9202 "00011000" // SEL.EQZ r18, r20, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9203 "00110010" // /* MW 3 */ + 9204 "00100101" // /* MW 2 */ + 9205 "00010101" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first +.src_ref 9 "tuple" 562 47 first +.src_ref 7 "gemm_bfp16.h" 252 79 first + 9206 "00100100" // SEL.EQZ r16, r16, r17, r27; ADD.NC p3, r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9207 "00000100" // /* MW 5 */ + 9208 "11010010" // /* MW 4 */ + 9209 "01000110" // /* MW 3 */ + 9210 "00100010" // /* MW 2 */ + 9211 "10000100" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 7 "gemm_bfp16.h" 134 10 first +.src_ref 7 "gemm_bfp16.h" 252 79 + 9212 "10111010" // LDA dj1, [p3], #4; JZ r27, #9472 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9472 delay_slots=5 */ + 9213 "01100000" // /* MW 9 */ + 9214 "00000000" // /* MW 8 */ + 9215 "00000000" // /* MW 7 */ + 9216 "10100000" // /* MW 6 */ + 9217 "00000100" // /* MW 5 */ + 9218 "00110110" // /* MW 4 */ + 9219 "11010000" // /* MW 3 */ + 9220 "10011000" // /* MW 2 */ + 9221 "01100011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 first +.delay_slot + 9222 "11010100" // LDA dn5, [p3], #4; MOV dj3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9223 "01000001" // /* MW 5 */ + 9224 "00010000" // /* MW 4 */ + 9225 "11010111" // /* MW 3 */ + 9226 "11010100" // /* MW 2 */ + 9227 "01100011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.delay_slot + 9228 "10011000" // LDA dj5, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9229 "11000110" // /* MW 3 */ + 9230 "00011110" // /* MW 2 */ + 9231 "00000011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.delay_slot + 9232 "10011000" // LDA dn1, [p7, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9233 "10100110" // /* MW 3 */ + 9234 "01100000" // /* MW 2 */ + 9235 "00000111" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.delay_slot + 9236 "10011000" // LDA r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9237 "00010110" // /* MW 3 */ + 9238 "00000110" // /* MW 2 */ + 9239 "00000011" // /* MW 1 */ +.src_ref 9 "tuple" 562 49 +.delay_slot + 9240 "10011000" // LDA m4, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9241 "00000110" // /* MW 3 */ + 9242 "00010110" // /* MW 2 */ + 9243 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 7 "gemm_bfp16.h" 113 16 +.src_ref 7 "gemm_bfp16.h" 135 60 + 9244 "10111010" // LDA p3, [sp, #-20]; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9245 "00010000" // /* MW 9 */ + 9246 "00010000" // /* MW 8 */ + 9247 "00110001" // /* MW 7 */ + 9248 "11110001" // /* MW 6 */ + 9249 "00000001" // /* MW 5 */ + 9250 "00000000" // /* MW 4 */ + 9251 "00100000" // /* MW 3 */ + 9252 "10110011" // /* MW 2 */ + 9253 "11111101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 110 4 first +.src_ref 7 "gemm_bfp16.h" 135 60 first + 9254 "10111010" // LDA r19, [p2]; MOVXM ls, #9344 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9255 "00010000" // /* MW 9 */ + 9256 "01000000" // /* MW 8 */ + 9257 "01111010" // /* MW 7 */ + 9258 "00001000" // /* MW 6 */ + 9259 "00000000" // /* MW 5 */ + 9260 "00000000" // /* MW 4 */ + 9261 "11010000" // /* MW 3 */ + 9262 "11001110" // /* MW 2 */ + 9263 "01000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 110 4 +.src_ref 7 "gemm_bfp16.h" 135 68 + 9264 "10111010" // MOVA r20, #-6; MOVXM le, #9408 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9265 "00010000" // /* MW 9 */ + 9266 "01100000" // /* MW 8 */ + 9267 "10111010" // /* MW 7 */ + 9268 "00001001" // /* MW 6 */ + 9269 "00000000" // /* MW 5 */ + 9270 "00000000" // /* MW 4 */ + 9271 "00000000" // /* MW 3 */ + 9272 "01010100" // /* MW 2 */ + 9273 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 1365 19 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 4 "transpose.hpp" 225 15 + 9274 "01100100" // MOVX r17, #52; MOV r18, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9275 "11010101" // /* MW 5 */ + 9276 "00100000" // /* MW 4 */ + 9277 "00101001" // /* MW 3 */ + 9278 "01011010" // /* MW 2 */ + 9279 "00000100" // /* MW 1 */ + 9280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9283 "00000000" // /* MW 1 */ + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 + 9286 "11111000" // MOV p2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "11000000" // /* MW 3 */ + 9288 "01100110" // /* MW 2 */ + 9289 "00011010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 7 "gemm_bfp16.h" 135 68 + 9290 "00101100" // VLDA lfh0, [p2, #64]; LSHL r19, r19, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10011011" // /* MW 5 */ + 9292 "11001110" // /* MW 4 */ + 9293 "11111001" // /* MW 3 */ + 9294 "10000000" // /* MW 2 */ + 9295 "01000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 7 "gemm_bfp16.h" 110 4 first + 9296 "00010100" // VLDA lfl0, [p2], #128; ADD.NC lc, r19, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9297 "11111110" // /* MW 5 */ + 9298 "11110011" // /* MW 4 */ + 9299 "11111010" // /* MW 3 */ + 9300 "10010000" // /* MW 2 */ + 9301 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9303 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9304 "10011000" // VLDA lfh0, [p2, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9305 "00000111" // /* MW 3 */ + 9306 "00010100" // /* MW 2 */ + 9307 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9308 "10011000" // VLDA lfl0, [p2], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9309 "10000111" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9313 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9315 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9316 "11111000" // VMOV x8, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9317 "10010010" // /* MW 3 */ + 9318 "00100001" // /* MW 2 */ + 9319 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9320 "00000010" // NOPS; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9321 "01110000" // /* MW 7 */ + 9322 "11001001" // /* MW 6 */ + 9323 "01010000" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "01100000" // /* MW 3 */ + 9326 "00101011" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9328 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9329 "00000000" // /* MW 15 */ + 9330 "00000000" // /* MW 14 */ + 9331 "01111000" // /* MW 13 */ + 9332 "11001001" // /* MW 12 */ + 9333 "00010010" // /* MW 11 */ + 9334 "00000000" // /* MW 10 */ + 9335 "00000000" // /* MW 9 */ + 9336 "00000000" // /* MW 8 */ + 9337 "01011011" // /* MW 7 */ + 9338 "00000001" // /* MW 6 */ + 9339 "00100000" // /* MW 5 */ + 9340 "00000000" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 9344 "11100001" // VLDA lfh0, [p2, #64]; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x8, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9345 "00000000" // /* MW 15 */ + 9346 "00000000" // /* MW 14 */ + 9347 "00111000" // /* MW 13 */ + 9348 "00100100" // /* MW 12 */ + 9349 "11000010" // /* MW 11 */ + 9350 "00000000" // /* MW 10 */ + 9351 "00000000" // /* MW 9 */ + 9352 "00000000" // /* MW 8 */ + 9353 "01011011" // /* MW 7 */ + 9354 "00000001" // /* MW 6 */ + 9355 "00100000" // /* MW 5 */ + 9356 "00000000" // /* MW 4 */ + 9357 "11110000" // /* MW 3 */ + 9358 "10000000" // /* MW 2 */ + 9359 "01000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9360 "11100001" // VLDA lfl0, [p2], #128; NOPB; NOPS; NOPX; VSHUFFLE x2, x0, x8, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9361 "00000000" // /* MW 15 */ + 9362 "00000000" // /* MW 14 */ + 9363 "00111000" // /* MW 13 */ + 9364 "00100010" // /* MW 12 */ + 9365 "10000010" // /* MW 11 */ + 9366 "00000000" // /* MW 10 */ + 9367 "00000000" // /* MW 9 */ + 9368 "00000000" // /* MW 8 */ + 9369 "01011011" // /* MW 7 */ + 9370 "00000001" // /* MW 6 */ + 9371 "00100000" // /* MW 5 */ + 9372 "00000000" // /* MW 4 */ + 9373 "11110000" // /* MW 3 */ + 9374 "10010000" // /* MW 2 */ + 9375 "01000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 7 "gemm_bfp16.h" 113 16 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9376 "11100001" // NOPA; NOPB; VST x3, [p3, #64]; NOPX; VMOV x8, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9377 "00000000" // /* MW 15 */ + 9378 "00000000" // /* MW 14 */ + 9379 "01111000" // /* MW 13 */ + 9380 "11001001" // /* MW 12 */ + 9381 "00010000" // /* MW 11 */ + 9382 "00000010" // /* MW 10 */ + 9383 "00000000" // /* MW 9 */ + 9384 "00000000" // /* MW 8 */ + 9385 "11010011" // /* MW 7 */ + 9386 "00010100" // /* MW 6 */ + 9387 "00100011" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9392 "11100001" // NOPA; NOPB; VST x2, [p3], #128; NOPX; VMOV x1, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9393 "00000000" // /* MW 15 */ + 9394 "00000000" // /* MW 14 */ + 9395 "01111000" // /* MW 13 */ + 9396 "11001001" // /* MW 12 */ + 9397 "01010000" // /* MW 11 */ + 9398 "00000000" // /* MW 10 */ + 9399 "00000000" // /* MW 9 */ + 9400 "00000000" // /* MW 8 */ + 9401 "10010011" // /* MW 7 */ + 9402 "00101100" // /* MW 6 */ + 9403 "00100011" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11110000" // /* MW 3 */ + 9406 "00101100" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9408 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9409 "00000000" // /* MW 15 */ + 9410 "00000000" // /* MW 14 */ + 9411 "01111000" // /* MW 13 */ + 9412 "11001001" // /* MW 12 */ + 9413 "00010010" // /* MW 11 */ + 9414 "00000000" // /* MW 10 */ + 9415 "00000000" // /* MW 9 */ + 9416 "00000000" // /* MW 8 */ + 9417 "01011011" // /* MW 7 */ + 9418 "00000001" // /* MW 6 */ + 9419 "00100000" // /* MW 5 */ + 9420 "00000000" // /* MW 4 */ + 9421 "11110000" // /* MW 3 */ + 9422 "00101100" // /* MW 2 */ + 9423 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.loop_nesting 0 + 9424 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9425 "01001000" // /* MW 3 */ + 9426 "10000100" // /* MW 2 */ + 9427 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "transpose.hpp" 224 15 first + 9428 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "01000100" // /* MW 3 */ + 9430 "00000100" // /* MW 2 */ + 9431 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 7 "gemm_bfp16.h" 113 16 first + 9432 "00000010" // VST x3, [p3, #64]; VMOV x8, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9433 "01110000" // /* MW 7 */ + 9434 "11001001" // /* MW 6 */ + 9435 "00010000" // /* MW 5 */ + 9436 "00000010" // /* MW 4 */ + 9437 "01100000" // /* MW 3 */ + 9438 "10011010" // /* MW 2 */ + 9439 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 + 9440 "00000010" // VST x2, [p3], #128; VMOV x0, lfl0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9441 "01110000" // /* MW 7 */ + 9442 "11001001" // /* MW 6 */ + 9443 "00010010" // /* MW 5 */ + 9444 "00000000" // /* MW 4 */ + 9445 "01100000" // /* MW 3 */ + 9446 "10010010" // /* MW 2 */ + 9447 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first + 9448 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9449 "01001000" // /* MW 3 */ + 9450 "10000100" // /* MW 2 */ + 9451 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "transpose.hpp" 224 15 first + 9452 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9453 "01000100" // /* MW 3 */ + 9454 "00000100" // /* MW 2 */ + 9455 "00011001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 first + 9456 "00000010" // VST x3, [p3, #64]; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9457 "01110000" // /* MW 7 */ + 9458 "11001001" // /* MW 6 */ + 9459 "01010000" // /* MW 5 */ + 9460 "00000000" // /* MW 4 */ + 9461 "01100000" // /* MW 3 */ + 9462 "10011010" // /* MW 2 */ + 9463 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 + 9464 "00000010" // VST x2, [p3], #128; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9465 "01110000" // /* MW 7 */ + 9466 "10100101" // /* MW 6 */ + 9467 "00000001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "01100000" // /* MW 3 */ + 9470 "10010010" // /* MW 2 */ + 9471 "01100101" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 +.src_ref 7 "gemm_bfp16.h" 141 44 first + 9472 "00011000" // PADDB [p7], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9473 "10010000" // /* MW 3 */ + 9474 "00011111" // /* MW 2 */ + 9475 "00111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 148 2 first + 9476 "10011000" // LDA dj3, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9477 "11000110" // /* MW 3 */ + 9478 "00011101" // /* MW 2 */ + 9479 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 148 2 +.src_ref 7 "gemm_bfp16.h" 148 2 + 9480 "01010100" // LDA dn3, [p7], #4; MOV m5, #-36 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9481 "01110001" // /* MW 5 */ + 9482 "00011111" // /* MW 4 */ + 9483 "11011010" // /* MW 3 */ + 9484 "10110100" // /* MW 2 */ + 9485 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 148 2 + 9486 "10011000" // LDA r18, [p7], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9487 "01010110" // /* MW 3 */ + 9488 "10101010" // /* MW 2 */ + 9489 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9490 "10111010" // LDA r20, [p7], #12; MOVXM p3, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9491 "00010000" // /* MW 9 */ + 9492 "00001000" // /* MW 8 */ + 9493 "10110010" // /* MW 7 */ + 9494 "11110001" // /* MW 6 */ + 9495 "00000001" // /* MW 5 */ + 9496 "00000000" // /* MW 4 */ + 9497 "11010000" // /* MW 3 */ + 9498 "11010010" // /* MW 2 */ + 9499 "11100111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9500 "10111010" // LDA.s8 r20, [p3]; MOVXM r23, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9501 "00010000" // /* MW 9 */ + 9502 "11000000" // /* MW 8 */ + 9503 "11101111" // /* MW 7 */ + 9504 "00001110" // /* MW 6 */ + 9505 "00000000" // /* MW 5 */ + 9506 "00000000" // /* MW 4 */ + 9507 "01010000" // /* MW 3 */ + 9508 "11010000" // /* MW 2 */ + 9509 "01100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9510 "11010100" // LDA p3, [sp, #-12]; VBCST.16 x5, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9511 "11100101" // /* MW 5 */ + 9512 "10111010" // /* MW 4 */ + 9513 "00100101" // /* MW 3 */ + 9514 "10110011" // /* MW 2 */ + 9515 "11111110" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9516 "01010100" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOV m6, #84 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9517 "01010001" // /* MW 5 */ + 9518 "00000001" // /* MW 4 */ + 9519 "01111100" // /* MW 3 */ + 9520 "11001101" // /* MW 2 */ + 9521 "11000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9522 "11010100" // LDA m7, [p7], #4; VBCST.16 x4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9523 "11100101" // /* MW 5 */ + 9524 "10111010" // /* MW 4 */ + 9525 "11010100" // /* MW 3 */ + 9526 "11110000" // /* MW 2 */ + 9527 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9528 "11010100" // LDA m3, [p7], #4; VMOV x10, x4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9529 "00100101" // /* MW 5 */ + 9530 "01010001" // /* MW 4 */ + 9531 "11011010" // /* MW 3 */ + 9532 "10110000" // /* MW 2 */ + 9533 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9534 "11010100" // LDA m1, [p7], #4; VMOV x11, x5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9535 "00100101" // /* MW 5 */ + 9536 "01010101" // /* MW 4 */ + 9537 "11011011" // /* MW 3 */ + 9538 "10010000" // /* MW 2 */ + 9539 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9540 "00101100" // LDA m6, [p7], m6; ADD r23, r20, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9541 "11110110" // /* MW 5 */ + 9542 "01011111" // /* MW 4 */ + 9543 "11011010" // /* MW 3 */ + 9544 "01100000" // /* MW 2 */ + 9545 "11111001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.src_ref 7 "gemm_bfp16.h" 172 37 + 9546 "01010100" // LDA m0, [p7], #-16; MOV m2, #280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9547 "01100001" // /* MW 5 */ + 9548 "00000100" // /* MW 4 */ + 9549 "11010100" // /* MW 3 */ + 9550 "10000000" // /* MW 2 */ + 9551 "11111001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.src_ref 7 "gemm_bfp16.h" 172 37 + 9552 "01010100" // LDA dn0, [p7], #4; MOV m5, #-108 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9553 "01010001" // /* MW 5 */ + 9554 "00011110" // /* MW 4 */ + 9555 "11011010" // /* MW 3 */ + 9556 "10000100" // /* MW 2 */ + 9557 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 + 9558 "10011000" // LDA dj0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9559 "01000110" // /* MW 3 */ + 9560 "00011100" // /* MW 2 */ + 9561 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 + 9562 "10011000" // LDA dn4, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9563 "00100110" // /* MW 3 */ + 9564 "00011110" // /* MW 2 */ + 9565 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 + 9566 "10011000" // LDA dj4, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "01000110" // /* MW 3 */ + 9568 "00101110" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 7 "gemm_bfp16.h" 172 37 + 9570 "01010100" // LDA m5, [p7], m5; MOV dc4, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9571 "00000001" // /* MW 5 */ + 9572 "10000000" // /* MW 4 */ + 9573 "11011001" // /* MW 3 */ + 9574 "01010000" // /* MW 2 */ + 9575 "11110101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 9576 "10111010" // LDA r26, [p7], m2; MOVS p0, p3; MOV r25, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9577 "01110010" // /* MW 9 */ + 9578 "01100000" // /* MW 8 */ + 9579 "00101111" // /* MW 7 */ + 9580 "00000011" // /* MW 6 */ + 9581 "10001011" // /* MW 5 */ + 9582 "10001100" // /* MW 4 */ + 9583 "11010000" // /* MW 3 */ + 9584 "01101010" // /* MW 2 */ + 9585 "11101001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 7 "gemm_bfp16.h" 172 2 +.src_ref 7 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 9586 "01111110" // LDA p7, [sp, #-20]; PADDB [p0], m3; MOVS dc0, dc4; MOVXM p2, #9696 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9587 "01100000" // /* MW 13 */ + 9588 "00001001" // /* MW 12 */ + 9589 "00000010" // /* MW 11 */ + 9590 "00000010" // /* MW 10 */ + 9591 "01011110" // /* MW 9 */ + 9592 "00100110" // /* MW 8 */ + 9593 "00000001" // /* MW 7 */ + 9594 "00000000" // /* MW 6 */ + 9595 "00100000" // /* MW 5 */ + 9596 "11010111" // /* MW 4 */ + 9597 "00100000" // /* MW 3 */ + 9598 "11110011" // /* MW 2 */ + 9599 "11111101" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 7 "gemm_bfp16.h" 175 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9600 "10111010" // VLDA bmlh2, [p0, #64]; MOVS dc2, dc4; MOV dc5, dc4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01110010" // /* MW 9 */ + 9602 "11000000" // /* MW 8 */ + 9603 "11100100" // /* MW 7 */ + 9604 "00000010" // /* MW 6 */ + 9605 "01001011" // /* MW 5 */ + 9606 "00010000" // /* MW 4 */ + 9607 "10110010" // /* MW 3 */ + 9608 "10100110" // /* MW 2 */ + 9609 "00000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9610 "10111010" // VLDA bmhl2, [p0, #128]; MOVS p4, p3; MOV dj2, dj3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9611 "01110010" // /* MW 9 */ + 9612 "10000000" // /* MW 8 */ + 9613 "01000011" // /* MW 7 */ + 9614 "00000001" // /* MW 6 */ + 9615 "10001011" // /* MW 5 */ + 9616 "10001100" // /* MW 4 */ + 9617 "10110100" // /* MW 3 */ + 9618 "10101010" // /* MW 2 */ + 9619 "00000100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9620 "01111110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; MOVS p1, p3; MOVX r17, #780; MOV r24, m1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9621 "01100000" // /* MW 13 */ + 9622 "10010001" // /* MW 12 */ + 9623 "00110001" // /* MW 11 */ + 9624 "00001111" // /* MW 10 */ + 9625 "00100000" // /* MW 9 */ + 9626 "01100001" // /* MW 8 */ + 9627 "00110001" // /* MW 7 */ + 9628 "00100010" // /* MW 6 */ + 9629 "00100011" // /* MW 5 */ + 9630 "10010111" // /* MW 4 */ + 9631 "10110111" // /* MW 3 */ + 9632 "10101110" // /* MW 2 */ + 9633 "00000110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9634 "01111110" // VLDA bmlh0, [p3, #64]; NOPB; MOVS dc3, dc0; MOVX crRnd, r20; MOV r20, p7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9635 "01100000" // /* MW 13 */ + 9636 "00001001" // /* MW 12 */ + 9637 "01100000" // /* MW 11 */ + 9638 "00001111" // /* MW 10 */ + 9639 "11101100" // /* MW 9 */ + 9640 "01010001" // /* MW 8 */ + 9641 "10000000" // /* MW 7 */ + 9642 "00111010" // /* MW 6 */ + 9643 "00100101" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "10110000" // /* MW 3 */ + 9646 "10000110" // /* MW 2 */ + 9647 "01100010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 203 6 + 9648 "11100001" // VLDA bmhl0, [p3, #128]; NOPB; MOVS dn2, dn3; MOVX r19, #52; MOV m2, m3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "00000000" // /* MW 12 */ + 9653 "00000011" // /* MW 11 */ + 9654 "10001001" // /* MW 10 */ + 9655 "00110110" // /* MW 9 */ + 9656 "00000001" // /* MW 8 */ + 9657 "01001011" // /* MW 7 */ + 9658 "01001110" // /* MW 6 */ + 9659 "00100010" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "10110000" // /* MW 3 */ + 9662 "10001010" // /* MW 2 */ + 9663 "01100100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1365 19 +.src_ref 4 "vector.hpp" 1365 19 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 7 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 9664 "11100001" // VLDA bmhh0, [p3, #192]; NOPB; MOVS dc1, dc3; MOVX r21, #53; MOV m3, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9665 "00000000" // /* MW 15 */ + 9666 "00000000" // /* MW 14 */ + 9667 "01111000" // /* MW 13 */ + 9668 "10010000" // /* MW 12 */ + 9669 "10000100" // /* MW 11 */ + 9670 "10101001" // /* MW 10 */ + 9671 "01010110" // /* MW 9 */ + 9672 "00000001" // /* MW 8 */ + 9673 "01001011" // /* MW 7 */ + 9674 "00001100" // /* MW 6 */ + 9675 "00100001" // /* MW 5 */ + 9676 "00000000" // /* MW 4 */ + 9677 "10110000" // /* MW 3 */ + 9678 "10001110" // /* MW 2 */ + 9679 "01100110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 9680 "11100001" // VLDA bmll0, [p3]; VLDB x4, [p7, #64]; PADDS [p4], m1; MOVX r22, #60; MOV p5, p4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9681 "00000000" // /* MW 15 */ + 9682 "00000000" // /* MW 14 */ + 9683 "01111000" // /* MW 13 */ + 9684 "01100000" // /* MW 12 */ + 9685 "10110100" // /* MW 11 */ + 9686 "10001010" // /* MW 10 */ + 9687 "01100111" // /* MW 9 */ + 9688 "00000001" // /* MW 8 */ + 9689 "01011011" // /* MW 7 */ + 9690 "00101000" // /* MW 6 */ + 9691 "01101100" // /* MW 5 */ + 9692 "00101010" // /* MW 4 */ + 9693 "10111110" // /* MW 3 */ + 9694 "10000010" // /* MW 2 */ + 9695 "01100000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9696 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9697 "01110000" // /* MW 11 */ + 9698 "00010000" // /* MW 10 */ + 9699 "10000100" // /* MW 9 */ + 9700 "00000000" // /* MW 8 */ + 9701 "10001011" // /* MW 7 */ + 9702 "10010100" // /* MW 6 */ + 9703 "00100011" // /* MW 5 */ + 9704 "11010111" // /* MW 4 */ + 9705 "10111011" // /* MW 3 */ + 9706 "10010110" // /* MW 2 */ + 9707 "10000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9708 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9709 "01001110" // /* MW 9 */ + 9710 "10111111" // /* MW 8 */ + 9711 "10111110" // /* MW 7 */ + 9712 "00000010" // /* MW 6 */ + 9713 "10010000" // /* MW 5 */ + 9714 "01110011" // /* MW 4 */ + 9715 "10110011" // /* MW 3 */ + 9716 "10011010" // /* MW 2 */ + 9717 "10000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9718 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #9904 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9719 "00010000" // /* MW 9 */ + 9720 "01011000" // /* MW 8 */ + 9721 "01111011" // /* MW 7 */ + 9722 "00001000" // /* MW 6 */ + 9723 "00000000" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "10110000" // /* MW 3 */ + 9726 "10011110" // /* MW 2 */ + 9727 "10000110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9728 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #9984 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9729 "01100000" // /* MW 13 */ + 9730 "10010001" // /* MW 12 */ + 9731 "10010011" // /* MW 11 */ + 9732 "00000010" // /* MW 10 */ + 9733 "01110000" // /* MW 9 */ + 9734 "00110111" // /* MW 8 */ + 9735 "00000001" // /* MW 7 */ + 9736 "00000000" // /* MW 6 */ + 9737 "11101000" // /* MW 5 */ + 9738 "01110011" // /* MW 4 */ + 9739 "10111110" // /* MW 3 */ + 9740 "10010010" // /* MW 2 */ + 9741 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9742 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9743 "01111110" // /* MW 9 */ + 9744 "00000000" // /* MW 8 */ + 9745 "10000010" // /* MW 7 */ + 9746 "00000001" // /* MW 6 */ + 9747 "10010000" // /* MW 5 */ + 9748 "10001011" // /* MW 4 */ + 9749 "10110100" // /* MW 3 */ + 9750 "10110110" // /* MW 2 */ + 9751 "10100010" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9752 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9753 "10001011" // /* MW 7 */ + 9754 "10011100" // /* MW 6 */ + 9755 "11101100" // /* MW 5 */ + 9756 "00101010" // /* MW 4 */ + 9757 "01111000" // /* MW 3 */ + 9758 "11001011" // /* MW 2 */ + 9759 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9760 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9761 "01011011" // /* MW 7 */ + 9762 "10001000" // /* MW 6 */ + 9763 "01101100" // /* MW 5 */ + 9764 "00101010" // /* MW 4 */ + 9765 "10111110" // /* MW 3 */ + 9766 "10111010" // /* MW 2 */ + 9767 "10100100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 7 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9768 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9769 "11101000" // /* MW 5 */ + 9770 "01110011" // /* MW 4 */ + 9771 "10111110" // /* MW 3 */ + 9772 "10111110" // /* MW 2 */ + 9773 "10100110" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9774 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9775 "01111110" // /* MW 9 */ + 9776 "01100000" // /* MW 8 */ + 9777 "10110110" // /* MW 7 */ + 9778 "00000010" // /* MW 6 */ + 9779 "01110100" // /* MW 5 */ + 9780 "00010101" // /* MW 4 */ + 9781 "10110100" // /* MW 3 */ + 9782 "10110010" // /* MW 2 */ + 9783 "10100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9784 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9785 "00100000" // /* MW 5 */ + 9786 "01010111" // /* MW 4 */ + 9787 "01111011" // /* MW 3 */ + 9788 "01000101" // /* MW 2 */ + 9789 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9790 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9791 "00111110" // /* MW 9 */ + 9792 "00100110" // /* MW 8 */ + 9793 "10011101" // /* MW 7 */ + 9794 "00000001" // /* MW 6 */ + 9795 "01110100" // /* MW 5 */ + 9796 "00000110" // /* MW 4 */ + 9797 "10110100" // /* MW 3 */ + 9798 "10100010" // /* MW 2 */ + 9799 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9800 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9801 "01010100" // /* MW 3 */ + 9802 "10111010" // /* MW 2 */ + 9803 "00011011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9804 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9805 "00110110" // /* MW 9 */ + 9806 "01100110" // /* MW 8 */ + 9807 "00100101" // /* MW 7 */ + 9808 "00000010" // /* MW 6 */ + 9809 "00110100" // /* MW 5 */ + 9810 "00010101" // /* MW 4 */ + 9811 "01100111" // /* MW 3 */ + 9812 "10010001" // /* MW 2 */ + 9813 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9814 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9815 "01010001" // /* MW 11 */ + 9816 "11101101" // /* MW 10 */ + 9817 "10110100" // /* MW 9 */ + 9818 "01100010" // /* MW 8 */ + 9819 "11010100" // /* MW 7 */ + 9820 "11001010" // /* MW 6 */ + 9821 "00100100" // /* MW 5 */ + 9822 "00010111" // /* MW 4 */ + 9823 "01111001" // /* MW 3 */ + 9824 "11000101" // /* MW 2 */ + 9825 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9826 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9827 "00110000" // /* MW 11 */ + 9828 "00100110" // /* MW 10 */ + 9829 "10011101" // /* MW 9 */ + 9830 "00000001" // /* MW 8 */ + 9831 "10001011" // /* MW 7 */ + 9832 "10011000" // /* MW 6 */ + 9833 "11101101" // /* MW 5 */ + 9834 "00101010" // /* MW 4 */ + 9835 "01111000" // /* MW 3 */ + 9836 "11001101" // /* MW 2 */ + 9837 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9838 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9839 "10101000" // /* MW 5 */ + 9840 "01110100" // /* MW 4 */ + 9841 "11110111" // /* MW 3 */ + 9842 "00001100" // /* MW 2 */ + 9843 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9844 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9845 "01010001" // /* MW 9 */ + 9846 "11110001" // /* MW 8 */ + 9847 "10110100" // /* MW 7 */ + 9848 "00001001" // /* MW 6 */ + 9849 "00110110" // /* MW 5 */ + 9850 "00001010" // /* MW 4 */ + 9851 "01110000" // /* MW 3 */ + 9852 "11001101" // /* MW 2 */ + 9853 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9854 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9855 "00111110" // /* MW 9 */ + 9856 "01100110" // /* MW 8 */ + 9857 "00100101" // /* MW 7 */ + 9858 "00000010" // /* MW 6 */ + 9859 "11110100" // /* MW 5 */ + 9860 "00111001" // /* MW 4 */ + 9861 "01110111" // /* MW 3 */ + 9862 "01000101" // /* MW 2 */ + 9863 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9864 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11010100" // /* MW 3 */ + 9866 "11001010" // /* MW 2 */ + 9867 "00011100" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9868 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9869 "00110110" // /* MW 3 */ + 9870 "10001010" // /* MW 2 */ + 9871 "00001000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9872 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9873 "01010001" // /* MW 9 */ + 9874 "11101101" // /* MW 8 */ + 9875 "10110100" // /* MW 7 */ + 9876 "00011101" // /* MW 6 */ + 9877 "01110100" // /* MW 5 */ + 9878 "00000110" // /* MW 4 */ + 9879 "01110100" // /* MW 3 */ + 9880 "11000101" // /* MW 2 */ + 9881 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9882 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9883 "01101100" // /* MW 5 */ + 9884 "00010100" // /* MW 4 */ + 9885 "01110010" // /* MW 3 */ + 9886 "11001101" // /* MW 2 */ + 9887 "10100010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9888 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9889 "00000000" // /* MW 15 */ + 9890 "00000000" // /* MW 14 */ + 9891 "01111000" // /* MW 13 */ + 9892 "10100101" // /* MW 12 */ + 9893 "00000001" // /* MW 11 */ + 9894 "00000000" // /* MW 10 */ + 9895 "00000000" // /* MW 9 */ + 9896 "00000000" // /* MW 8 */ + 9897 "00110110" // /* MW 7 */ + 9898 "10001010" // /* MW 6 */ + 9899 "00100001" // /* MW 5 */ + 9900 "00000000" // /* MW 4 */ + 9901 "11110000" // /* MW 3 */ + 9902 "00101100" // /* MW 2 */ + 9903 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 9904 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9905 "01010001" // /* MW 9 */ + 9906 "11110001" // /* MW 8 */ + 9907 "10110100" // /* MW 7 */ + 9908 "11100110" // /* MW 6 */ + 9909 "11000000" // /* MW 5 */ + 9910 "01101100" // /* MW 4 */ + 9911 "01101101" // /* MW 3 */ + 9912 "00101010" // /* MW 2 */ + 9913 "00001110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9914 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9915 "00000001" // /* MW 15 */ + 9916 "01011011" // /* MW 14 */ + 9917 "00111100" // /* MW 13 */ + 9918 "00100110" // /* MW 12 */ + 9919 "10011101" // /* MW 11 */ + 9920 "00000001" // /* MW 10 */ + 9921 "00000000" // /* MW 9 */ + 9922 "00000000" // /* MW 8 */ + 9923 "10001011" // /* MW 7 */ + 9924 "10011100" // /* MW 6 */ + 9925 "11101100" // /* MW 5 */ + 9926 "01110011" // /* MW 4 */ + 9927 "01111110" // /* MW 3 */ + 9928 "11001101" // /* MW 2 */ + 9929 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9930 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9931 "00100001" // /* MW 15 */ + 9932 "01001001" // /* MW 14 */ + 9933 "00111100" // /* MW 13 */ + 9934 "00101010" // /* MW 12 */ + 9935 "11011101" // /* MW 11 */ + 9936 "00000001" // /* MW 10 */ + 9937 "00000000" // /* MW 9 */ + 9938 "00000000" // /* MW 8 */ + 9939 "00110110" // /* MW 7 */ + 9940 "00001010" // /* MW 6 */ + 9941 "00100000" // /* MW 5 */ + 9942 "00010111" // /* MW 4 */ + 9943 "01111001" // /* MW 3 */ + 9944 "01000101" // /* MW 2 */ + 9945 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9946 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9947 "01101001" // /* MW 11 */ + 9948 "01000000" // /* MW 10 */ + 9949 "10001010" // /* MW 9 */ + 9950 "00001110" // /* MW 8 */ + 9951 "00011011" // /* MW 7 */ + 9952 "01000101" // /* MW 6 */ + 9953 "11101000" // /* MW 5 */ + 9954 "00101010" // /* MW 4 */ + 9955 "11111000" // /* MW 3 */ + 9956 "00001100" // /* MW 2 */ + 9957 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9958 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9959 "01010001" // /* MW 11 */ + 9960 "11101101" // /* MW 10 */ + 9961 "10110100" // /* MW 9 */ + 9962 "01100010" // /* MW 8 */ + 9963 "11001100" // /* MW 7 */ + 9964 "01001010" // /* MW 6 */ + 9965 "11101100" // /* MW 5 */ + 9966 "00001100" // /* MW 4 */ + 9967 "01111000" // /* MW 3 */ + 9968 "11000101" // /* MW 2 */ + 9969 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9970 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9971 "01101001" // /* MW 13 */ + 9972 "00000100" // /* MW 12 */ + 9973 "10001000" // /* MW 11 */ + 9974 "10100011" // /* MW 10 */ + 9975 "01010110" // /* MW 9 */ + 9976 "01100110" // /* MW 8 */ + 9977 "00000000" // /* MW 7 */ + 9978 "00000000" // /* MW 6 */ + 9979 "01101100" // /* MW 5 */ + 9980 "00010100" // /* MW 4 */ + 9981 "01110010" // /* MW 3 */ + 9982 "11001101" // /* MW 2 */ + 9983 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9984 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9985 "00000000" // /* MW 15 */ + 9986 "00000000" // /* MW 14 */ + 9987 "01111000" // /* MW 13 */ + 9988 "10100101" // /* MW 12 */ + 9989 "00000001" // /* MW 11 */ + 9990 "00000000" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00110110" // /* MW 7 */ + 9994 "10001010" // /* MW 6 */ + 9995 "00100001" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10000 "10001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB x4, [p7, #64]; MOVS p4, p1; NOPX; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10001 "10001010" // /* MW 15 */ + 10002 "10100111" // /* MW 14 */ + 10003 "01111101" // /* MW 13 */ + 10004 "01100000" // /* MW 12 */ + 10005 "10110110" // /* MW 11 */ + 10006 "00000010" // /* MW 10 */ + 10007 "00000000" // /* MW 9 */ + 10008 "00000000" // /* MW 8 */ + 10009 "10001011" // /* MW 7 */ + 10010 "10000100" // /* MW 6 */ + 10011 "01101100" // /* MW 5 */ + 10012 "00101010" // /* MW 4 */ + 10013 "01111110" // /* MW 3 */ + 10014 "11001101" // /* MW 2 */ + 10015 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10016 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10017 "00000001" // /* MW 15 */ + 10018 "01011011" // /* MW 14 */ + 10019 "00111100" // /* MW 13 */ + 10020 "00100110" // /* MW 12 */ + 10021 "10011101" // /* MW 11 */ + 10022 "00000001" // /* MW 10 */ + 10023 "00000000" // /* MW 9 */ + 10024 "00000000" // /* MW 8 */ + 10025 "10001011" // /* MW 7 */ + 10026 "10000100" // /* MW 6 */ + 10027 "00100000" // /* MW 5 */ + 10028 "01010111" // /* MW 4 */ + 10029 "01111011" // /* MW 3 */ + 10030 "01000101" // /* MW 2 */ + 10031 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.src_ref 7 "gemm_bfp16.h" 202 6 first +.src_ref 7 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10032 "01001011" // PADDA [p0], m3; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00100001" // /* MW 15 */ + 10034 "01001001" // /* MW 14 */ + 10035 "00111100" // /* MW 13 */ + 10036 "00101010" // /* MW 12 */ + 10037 "11011101" // /* MW 11 */ + 10038 "00000001" // /* MW 10 */ + 10039 "00000000" // /* MW 9 */ + 10040 "00000000" // /* MW 8 */ + 10041 "00110110" // /* MW 7 */ + 10042 "00001010" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "11010111" // /* MW 4 */ + 10045 "11110011" // /* MW 3 */ + 10046 "00001100" // /* MW 2 */ + 10047 "00001101" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10048 "01001010" // VCONV.bfp16ebs8.fp32 ex1, dm4; MOV m1, r24; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01101001" // /* MW 9 */ + 10050 "01000000" // /* MW 8 */ + 10051 "10001010" // /* MW 7 */ + 10052 "11100100" // /* MW 6 */ + 10053 "00100000" // /* MW 5 */ + 10054 "00001100" // /* MW 4 */ + 10055 "11000001" // /* MW 3 */ + 10056 "01000110" // /* MW 2 */ + 10057 "00010001" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10058 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10059 "01010001" // /* MW 9 */ + 10060 "11101101" // /* MW 8 */ + 10061 "10110100" // /* MW 7 */ + 10062 "01100010" // /* MW 6 */ + 10063 "11001100" // /* MW 5 */ + 10064 "01001010" // /* MW 4 */ + 10065 "01110100" // /* MW 3 */ + 10066 "11000101" // /* MW 2 */ + 10067 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10068 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10069 "01101001" // /* MW 13 */ + 10070 "00000100" // /* MW 12 */ + 10071 "10001000" // /* MW 11 */ + 10072 "10100011" // /* MW 10 */ + 10073 "01010110" // /* MW 9 */ + 10074 "01100110" // /* MW 8 */ + 10075 "00000000" // /* MW 7 */ + 10076 "00000000" // /* MW 6 */ + 10077 "01101100" // /* MW 5 */ + 10078 "00010100" // /* MW 4 */ + 10079 "01110010" // /* MW 3 */ + 10080 "11001101" // /* MW 2 */ + 10081 "10100010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10082 "10111010" // PADDB [p4], m1; VCONV.bfp16ebs8.fp32 ex3, dm4; MOV p5, p4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10083 "01110110" // /* MW 9 */ + 10084 "01100000" // /* MW 8 */ + 10085 "10110100" // /* MW 7 */ + 10086 "00000010" // /* MW 6 */ + 10087 "10010000" // /* MW 5 */ + 10088 "00101011" // /* MW 4 */ + 10089 "11000100" // /* MW 3 */ + 10090 "01000110" // /* MW 2 */ + 10091 "00110001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10092 "01100010" // MOV m2, r18; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10093 "00101001" // /* MW 7 */ + 10094 "01100000" // /* MW 6 */ + 10095 "10001011" // /* MW 5 */ + 10096 "11100110" // /* MW 4 */ + 10097 "00100000" // /* MW 3 */ + 10098 "00001001" // /* MW 2 */ + 10099 "00000010" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10100 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10101 "01010001" // /* MW 9 */ + 10102 "11110001" // /* MW 8 */ + 10103 "10110100" // /* MW 7 */ + 10104 "00001001" // /* MW 6 */ + 10105 "00110110" // /* MW 5 */ + 10106 "00001010" // /* MW 4 */ + 10107 "01110000" // /* MW 3 */ + 10108 "11001101" // /* MW 2 */ + 10109 "11000010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10110 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10111 "00101001" // /* MW 3 */ + 10112 "00100100" // /* MW 2 */ + 10113 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10114 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10115 "01101001" // /* MW 7 */ + 10116 "01000000" // /* MW 6 */ + 10117 "10001010" // /* MW 5 */ + 10118 "00000010" // /* MW 4 */ + 10119 "11000000" // /* MW 3 */ + 10120 "01000110" // /* MW 2 */ + 10121 "00010001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10122 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10123 "01101001" // /* MW 3 */ + 10124 "00000100" // /* MW 2 */ + 10125 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10128 "00011000" // VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10129 "00110110" // /* MW 3 */ + 10130 "00001010" // /* MW 2 */ + 10131 "00001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10132 "01100010" // VCONV.bfp16ebs8.fp32 ex3, dm4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10133 "00101001" // /* MW 7 */ + 10134 "01100000" // /* MW 6 */ + 10135 "10001011" // /* MW 5 */ + 10136 "00000010" // /* MW 4 */ + 10137 "11000000" // /* MW 3 */ + 10138 "01000110" // /* MW 2 */ + 10139 "00110001" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10141 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10143 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10144 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10145 "00101001" // /* MW 3 */ + 10146 "00100100" // /* MW 2 */ + 10147 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10148 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10149 "01101001" // /* MW 3 */ + 10150 "01000000" // /* MW 2 */ + 10151 "10001010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10152 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10153 "01101001" // /* MW 3 */ + 10154 "00000100" // /* MW 2 */ + 10155 "10001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10156 "10011000" // VST bmlh3, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10157 "10100110" // /* MW 3 */ + 10158 "00010101" // /* MW 2 */ + 10159 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10160 "10011000" // VST bmhl3, [p1, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10161 "11000110" // /* MW 3 */ + 10162 "00100101" // /* MW 2 */ + 10163 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10164 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10165 "11100110" // /* MW 3 */ + 10166 "00110101" // /* MW 2 */ + 10167 "00001001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10168 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10169 "01110110" // /* MW 9 */ + 10170 "01100000" // /* MW 8 */ + 10171 "10110101" // /* MW 7 */ + 10172 "00000000" // /* MW 6 */ + 10173 "10010000" // /* MW 5 */ + 10174 "11001011" // /* MW 4 */ + 10175 "11010101" // /* MW 3 */ + 10176 "10110000" // /* MW 2 */ + 10177 "00100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.src_ref 7 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10178 "10111010" // PADDB.2D [p1], d2; VST bmlh2, [p0, #64]; MOV m2, m3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10179 "01110110" // /* MW 9 */ + 10180 "00000000" // /* MW 8 */ + 10181 "00000011" // /* MW 7 */ + 10182 "00000001" // /* MW 6 */ + 10183 "10010000" // /* MW 5 */ + 10184 "01010011" // /* MW 4 */ + 10185 "11010001" // /* MW 3 */ + 10186 "10100100" // /* MW 2 */ + 10187 "00000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10188 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10189 "01000110" // /* MW 3 */ + 10190 "00100101" // /* MW 2 */ + 10191 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10192 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10193 "01100110" // /* MW 3 */ + 10194 "00110101" // /* MW 2 */ + 10195 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10196 "00000010" // VST bmll2, [p0]; MOV p0, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10197 "01110000" // /* MW 7 */ + 10198 "01100000" // /* MW 6 */ + 10199 "00110011" // /* MW 5 */ + 10200 "00000000" // /* MW 4 */ + 10201 "11010000" // /* MW 3 */ + 10202 "10100000" // /* MW 2 */ + 10203 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 175 6 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10204 "10111010" // PADDB [p0], m3; VST bmlh1, [p4, #64]; MOV m3, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10205 "01110110" // /* MW 9 */ + 10206 "10010000" // /* MW 8 */ + 10207 "10000100" // /* MW 7 */ + 10208 "00000001" // /* MW 6 */ + 10209 "10010000" // /* MW 5 */ + 10210 "01101011" // /* MW 4 */ + 10211 "11010000" // /* MW 3 */ + 10212 "10010100" // /* MW 2 */ + 10213 "10000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10214 "00001100" // VLDA bmlh2, [p0, #64]; VST bmhl1, [p4, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10215 "10001101" // /* MW 5 */ + 10216 "01001001" // /* MW 4 */ + 10217 "10111000" // /* MW 3 */ + 10218 "10100110" // /* MW 2 */ + 10219 "00000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 2 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10220 "01111010" // VLDA bmhl2, [p0, #128]; VST bmhh1, [p4, #192]; JNZD r23, r23, p2 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10221 "10100000" // /* MW 9 */ + 10222 "11101110" // /* MW 8 */ + 10223 "00000101" // /* MW 7 */ + 10224 "10000000" // /* MW 6 */ + 10225 "11100110" // /* MW 5 */ + 10226 "00110100" // /* MW 4 */ + 10227 "10110100" // /* MW 3 */ + 10228 "10101010" // /* MW 2 */ + 10229 "00000100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 175 6 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10230 "11110110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; VST bmll1, [p4]; MOV p4, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10231 "01110000" // /* MW 11 */ + 10232 "01100000" // /* MW 10 */ + 10233 "00110011" // /* MW 9 */ + 10234 "10000010" // /* MW 8 */ + 10235 "10000110" // /* MW 7 */ + 10236 "00000100" // /* MW 6 */ + 10237 "00100100" // /* MW 5 */ + 10238 "10010111" // /* MW 4 */ + 10239 "10110111" // /* MW 3 */ + 10240 "10101110" // /* MW 2 */ + 10241 "00000110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10242 "00001100" // VLDA bmlh0, [p3, #64]; VST bmlh0, [p5, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10243 "01001101" // /* MW 5 */ + 10244 "00101000" // /* MW 4 */ + 10245 "10111010" // /* MW 3 */ + 10246 "10000110" // /* MW 2 */ + 10247 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10248 "00001100" // VLDA bmhl0, [p3, #128]; VST bmhl0, [p5, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10249 "10001101" // /* MW 5 */ + 10250 "01001000" // /* MW 4 */ + 10251 "10111010" // /* MW 3 */ + 10252 "10001010" // /* MW 2 */ + 10253 "01100100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10254 "00001100" // VLDA bmhh0, [p3, #192]; VST bmhh0, [p5, #192] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10255 "11001101" // /* MW 5 */ + 10256 "01101000" // /* MW 4 */ + 10257 "10111010" // /* MW 3 */ + 10258 "10001110" // /* MW 2 */ + 10259 "01100110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10260 "11110110" // VLDA bmll0, [p3]; PADDB [p4], m1; VST bmll0, [p5]; MOV p5, p4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10261 "01110000" // /* MW 11 */ + 10262 "01100000" // /* MW 10 */ + 10263 "10110100" // /* MW 9 */ + 10264 "10000010" // /* MW 8 */ + 10265 "00000110" // /* MW 7 */ + 10266 "00000100" // /* MW 6 */ + 10267 "00100101" // /* MW 5 */ + 10268 "01010111" // /* MW 4 */ + 10269 "10111000" // /* MW 3 */ + 10270 "10000010" // /* MW 2 */ + 10271 "01100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10272 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10273 "01110000" // /* MW 11 */ + 10274 "00010000" // /* MW 10 */ + 10275 "10000100" // /* MW 9 */ + 10276 "00000000" // /* MW 8 */ + 10277 "10001011" // /* MW 7 */ + 10278 "10010100" // /* MW 6 */ + 10279 "00100011" // /* MW 5 */ + 10280 "11010111" // /* MW 4 */ + 10281 "10111011" // /* MW 3 */ + 10282 "10010110" // /* MW 2 */ + 10283 "10000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10284 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10285 "01001110" // /* MW 9 */ + 10286 "10111111" // /* MW 8 */ + 10287 "10111110" // /* MW 7 */ + 10288 "00000010" // /* MW 6 */ + 10289 "10010000" // /* MW 5 */ + 10290 "01110011" // /* MW 4 */ + 10291 "10110011" // /* MW 3 */ + 10292 "10011010" // /* MW 2 */ + 10293 "10000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10294 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #10480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10295 "00010000" // /* MW 9 */ + 10296 "01111000" // /* MW 8 */ + 10297 "01111100" // /* MW 7 */ + 10298 "00001000" // /* MW 6 */ + 10299 "00000000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "10110000" // /* MW 3 */ + 10302 "10011110" // /* MW 2 */ + 10303 "10000110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10304 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #10560 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10305 "01100000" // /* MW 13 */ + 10306 "10010001" // /* MW 12 */ + 10307 "10010011" // /* MW 11 */ + 10308 "00000010" // /* MW 10 */ + 10309 "10010100" // /* MW 9 */ + 10310 "00110111" // /* MW 8 */ + 10311 "00000001" // /* MW 7 */ + 10312 "00000000" // /* MW 6 */ + 10313 "11101000" // /* MW 5 */ + 10314 "01110011" // /* MW 4 */ + 10315 "10111110" // /* MW 3 */ + 10316 "10010010" // /* MW 2 */ + 10317 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10318 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10319 "01111110" // /* MW 9 */ + 10320 "00000000" // /* MW 8 */ + 10321 "10000010" // /* MW 7 */ + 10322 "00000001" // /* MW 6 */ + 10323 "10010000" // /* MW 5 */ + 10324 "10001011" // /* MW 4 */ + 10325 "10110100" // /* MW 3 */ + 10326 "10110110" // /* MW 2 */ + 10327 "10100010" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10328 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10329 "10001011" // /* MW 7 */ + 10330 "10011100" // /* MW 6 */ + 10331 "11101100" // /* MW 5 */ + 10332 "00101010" // /* MW 4 */ + 10333 "01111000" // /* MW 3 */ + 10334 "11001011" // /* MW 2 */ + 10335 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10336 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10337 "01011011" // /* MW 7 */ + 10338 "10001000" // /* MW 6 */ + 10339 "01101100" // /* MW 5 */ + 10340 "00101010" // /* MW 4 */ + 10341 "10111110" // /* MW 3 */ + 10342 "10111010" // /* MW 2 */ + 10343 "10100100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 7 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10344 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10345 "11101000" // /* MW 5 */ + 10346 "01110011" // /* MW 4 */ + 10347 "10111110" // /* MW 3 */ + 10348 "10111110" // /* MW 2 */ + 10349 "10100110" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10350 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10351 "01111110" // /* MW 9 */ + 10352 "01100000" // /* MW 8 */ + 10353 "10110110" // /* MW 7 */ + 10354 "00000010" // /* MW 6 */ + 10355 "01110100" // /* MW 5 */ + 10356 "00010101" // /* MW 4 */ + 10357 "10110100" // /* MW 3 */ + 10358 "10110010" // /* MW 2 */ + 10359 "10100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10360 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10361 "00100000" // /* MW 5 */ + 10362 "01010111" // /* MW 4 */ + 10363 "01111011" // /* MW 3 */ + 10364 "01000101" // /* MW 2 */ + 10365 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10366 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10367 "00111110" // /* MW 9 */ + 10368 "00100110" // /* MW 8 */ + 10369 "10011101" // /* MW 7 */ + 10370 "00000001" // /* MW 6 */ + 10371 "01110100" // /* MW 5 */ + 10372 "00000110" // /* MW 4 */ + 10373 "10110100" // /* MW 3 */ + 10374 "10100010" // /* MW 2 */ + 10375 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10376 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10377 "01010100" // /* MW 3 */ + 10378 "10111010" // /* MW 2 */ + 10379 "00011011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10380 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10381 "00110110" // /* MW 9 */ + 10382 "01100110" // /* MW 8 */ + 10383 "00100101" // /* MW 7 */ + 10384 "00000010" // /* MW 6 */ + 10385 "00110100" // /* MW 5 */ + 10386 "00010101" // /* MW 4 */ + 10387 "01100111" // /* MW 3 */ + 10388 "10010001" // /* MW 2 */ + 10389 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10390 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10391 "01010001" // /* MW 11 */ + 10392 "11101101" // /* MW 10 */ + 10393 "10110100" // /* MW 9 */ + 10394 "01100010" // /* MW 8 */ + 10395 "11010100" // /* MW 7 */ + 10396 "11001010" // /* MW 6 */ + 10397 "00100100" // /* MW 5 */ + 10398 "00010111" // /* MW 4 */ + 10399 "01111001" // /* MW 3 */ + 10400 "11000101" // /* MW 2 */ + 10401 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10402 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10403 "00110000" // /* MW 11 */ + 10404 "00100110" // /* MW 10 */ + 10405 "10011101" // /* MW 9 */ + 10406 "00000001" // /* MW 8 */ + 10407 "10001011" // /* MW 7 */ + 10408 "10011000" // /* MW 6 */ + 10409 "11101101" // /* MW 5 */ + 10410 "00101010" // /* MW 4 */ + 10411 "01111000" // /* MW 3 */ + 10412 "11001101" // /* MW 2 */ + 10413 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10414 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10415 "10101000" // /* MW 5 */ + 10416 "01110100" // /* MW 4 */ + 10417 "11110111" // /* MW 3 */ + 10418 "00001100" // /* MW 2 */ + 10419 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10420 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10421 "01010001" // /* MW 9 */ + 10422 "11110001" // /* MW 8 */ + 10423 "10110100" // /* MW 7 */ + 10424 "00001001" // /* MW 6 */ + 10425 "00110110" // /* MW 5 */ + 10426 "00001010" // /* MW 4 */ + 10427 "01110000" // /* MW 3 */ + 10428 "11001101" // /* MW 2 */ + 10429 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10430 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10431 "00111110" // /* MW 9 */ + 10432 "01100110" // /* MW 8 */ + 10433 "00100101" // /* MW 7 */ + 10434 "00000010" // /* MW 6 */ + 10435 "11110100" // /* MW 5 */ + 10436 "00111001" // /* MW 4 */ + 10437 "01110111" // /* MW 3 */ + 10438 "01000101" // /* MW 2 */ + 10439 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10440 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10441 "11010100" // /* MW 3 */ + 10442 "11001010" // /* MW 2 */ + 10443 "00011100" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10444 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "00110110" // /* MW 3 */ + 10446 "10001010" // /* MW 2 */ + 10447 "00001000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10448 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10449 "01010001" // /* MW 9 */ + 10450 "11101101" // /* MW 8 */ + 10451 "10110100" // /* MW 7 */ + 10452 "00011101" // /* MW 6 */ + 10453 "01110100" // /* MW 5 */ + 10454 "00000110" // /* MW 4 */ + 10455 "01110100" // /* MW 3 */ + 10456 "11000101" // /* MW 2 */ + 10457 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10458 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10459 "01101100" // /* MW 5 */ + 10460 "00010100" // /* MW 4 */ + 10461 "01110010" // /* MW 3 */ + 10462 "11001101" // /* MW 2 */ + 10463 "10100010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10464 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "01111000" // /* MW 13 */ + 10468 "10100101" // /* MW 12 */ + 10469 "00000001" // /* MW 11 */ + 10470 "00000000" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00110110" // /* MW 7 */ + 10474 "10001010" // /* MW 6 */ + 10475 "00100001" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10480 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10481 "01010001" // /* MW 9 */ + 10482 "11110001" // /* MW 8 */ + 10483 "10110100" // /* MW 7 */ + 10484 "11100110" // /* MW 6 */ + 10485 "11000000" // /* MW 5 */ + 10486 "01101100" // /* MW 4 */ + 10487 "01101101" // /* MW 3 */ + 10488 "00101010" // /* MW 2 */ + 10489 "00001110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10490 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10491 "00000001" // /* MW 15 */ + 10492 "01011011" // /* MW 14 */ + 10493 "00111100" // /* MW 13 */ + 10494 "00100110" // /* MW 12 */ + 10495 "10011101" // /* MW 11 */ + 10496 "00000001" // /* MW 10 */ + 10497 "00000000" // /* MW 9 */ + 10498 "00000000" // /* MW 8 */ + 10499 "10001011" // /* MW 7 */ + 10500 "10011100" // /* MW 6 */ + 10501 "11101100" // /* MW 5 */ + 10502 "01110011" // /* MW 4 */ + 10503 "01111110" // /* MW 3 */ + 10504 "11001101" // /* MW 2 */ + 10505 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10506 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10507 "00100001" // /* MW 15 */ + 10508 "01001001" // /* MW 14 */ + 10509 "00111100" // /* MW 13 */ + 10510 "00101010" // /* MW 12 */ + 10511 "11011101" // /* MW 11 */ + 10512 "00000001" // /* MW 10 */ + 10513 "00000000" // /* MW 9 */ + 10514 "00000000" // /* MW 8 */ + 10515 "00110110" // /* MW 7 */ + 10516 "00001010" // /* MW 6 */ + 10517 "00100000" // /* MW 5 */ + 10518 "00010111" // /* MW 4 */ + 10519 "01111001" // /* MW 3 */ + 10520 "01000101" // /* MW 2 */ + 10521 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10522 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10523 "01101001" // /* MW 11 */ + 10524 "01000000" // /* MW 10 */ + 10525 "10001010" // /* MW 9 */ + 10526 "00001110" // /* MW 8 */ + 10527 "00011011" // /* MW 7 */ + 10528 "01000101" // /* MW 6 */ + 10529 "11101000" // /* MW 5 */ + 10530 "00101010" // /* MW 4 */ + 10531 "11111000" // /* MW 3 */ + 10532 "00001100" // /* MW 2 */ + 10533 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10534 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10535 "01010001" // /* MW 11 */ + 10536 "11101101" // /* MW 10 */ + 10537 "10110100" // /* MW 9 */ + 10538 "01100010" // /* MW 8 */ + 10539 "11001100" // /* MW 7 */ + 10540 "01001010" // /* MW 6 */ + 10541 "11101100" // /* MW 5 */ + 10542 "00001100" // /* MW 4 */ + 10543 "01111000" // /* MW 3 */ + 10544 "11000101" // /* MW 2 */ + 10545 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10546 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10547 "01101001" // /* MW 13 */ + 10548 "00000100" // /* MW 12 */ + 10549 "10001000" // /* MW 11 */ + 10550 "10100011" // /* MW 10 */ + 10551 "01010110" // /* MW 9 */ + 10552 "01100110" // /* MW 8 */ + 10553 "00000000" // /* MW 7 */ + 10554 "00000000" // /* MW 6 */ + 10555 "01101100" // /* MW 5 */ + 10556 "00010100" // /* MW 4 */ + 10557 "01110010" // /* MW 3 */ + 10558 "11001101" // /* MW 2 */ + 10559 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10560 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10561 "00000000" // /* MW 15 */ + 10562 "00000000" // /* MW 14 */ + 10563 "01111000" // /* MW 13 */ + 10564 "10100101" // /* MW 12 */ + 10565 "00000001" // /* MW 11 */ + 10566 "00000000" // /* MW 10 */ + 10567 "00000000" // /* MW 9 */ + 10568 "00000000" // /* MW 8 */ + 10569 "00110110" // /* MW 7 */ + 10570 "10001010" // /* MW 6 */ + 10571 "00100001" // /* MW 5 */ + 10572 "00000000" // /* MW 4 */ + 10573 "11110000" // /* MW 3 */ + 10574 "00101100" // /* MW 2 */ + 10575 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10576 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOVS p4, p1; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10577 "01010001" // /* MW 13 */ + 10578 "11110001" // /* MW 12 */ + 10579 "10110100" // /* MW 11 */ + 10580 "00000111" // /* MW 10 */ + 10581 "01100110" // /* MW 9 */ + 10582 "01101011" // /* MW 8 */ + 10583 "00000000" // /* MW 7 */ + 10584 "00000000" // /* MW 6 */ + 10585 "00010110" // /* MW 5 */ + 10586 "00001001" // /* MW 4 */ + 10587 "01111001" // /* MW 3 */ + 10588 "11001101" // /* MW 2 */ + 10589 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10590 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10591 "00000001" // /* MW 15 */ + 10592 "01011011" // /* MW 14 */ + 10593 "00111100" // /* MW 13 */ + 10594 "00100110" // /* MW 12 */ + 10595 "10011101" // /* MW 11 */ + 10596 "00000001" // /* MW 10 */ + 10597 "00000000" // /* MW 9 */ + 10598 "00000000" // /* MW 8 */ + 10599 "10001011" // /* MW 7 */ + 10600 "10000100" // /* MW 6 */ + 10601 "00100000" // /* MW 5 */ + 10602 "01010111" // /* MW 4 */ + 10603 "01111011" // /* MW 3 */ + 10604 "01000101" // /* MW 2 */ + 10605 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.src_ref 7 "gemm_bfp16.h" 202 6 first +.src_ref 7 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10606 "01001011" // MOVA dj1, #-304; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10607 "00100001" // /* MW 15 */ + 10608 "01001001" // /* MW 14 */ + 10609 "00111100" // /* MW 13 */ + 10610 "00101010" // /* MW 12 */ + 10611 "11011101" // /* MW 11 */ + 10612 "00000001" // /* MW 10 */ + 10613 "00000000" // /* MW 9 */ + 10614 "00000000" // /* MW 8 */ + 10615 "00110110" // /* MW 7 */ + 10616 "00001010" // /* MW 6 */ + 10617 "00100000" // /* MW 5 */ + 10618 "11010111" // /* MW 4 */ + 10619 "10000011" // /* MW 3 */ + 10620 "00000110" // /* MW 2 */ + 10621 "11011010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 203 6 first +.src_ref 7 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10622 "01101110" // PADDA [p0], m3; VCONV.bfp16ebs8.fp32 ex1, dm4; MOV p7, r20; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10623 "01101001" // /* MW 13 */ + 10624 "01000000" // /* MW 12 */ + 10625 "10001010" // /* MW 11 */ + 10626 "00000111" // /* MW 10 */ + 10627 "01010001" // /* MW 9 */ + 10628 "01111011" // /* MW 8 */ + 10629 "00000000" // /* MW 7 */ + 10630 "00000000" // /* MW 6 */ + 10631 "01101100" // /* MW 5 */ + 10632 "00010100" // /* MW 4 */ + 10633 "11110001" // /* MW 3 */ + 10634 "00001100" // /* MW 2 */ + 10635 "00001101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10636 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p5]; MOVS p6, r25; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10637 "01010001" // /* MW 13 */ + 10638 "11101101" // /* MW 12 */ + 10639 "10110100" // /* MW 11 */ + 10640 "01100011" // /* MW 10 */ + 10641 "01010110" // /* MW 9 */ + 10642 "01100010" // /* MW 8 */ + 10643 "00000000" // /* MW 7 */ + 10644 "00000000" // /* MW 6 */ + 10645 "00010110" // /* MW 5 */ + 10646 "00110010" // /* MW 4 */ + 10647 "01111101" // /* MW 3 */ + 10648 "11000101" // /* MW 2 */ + 10649 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10650 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10651 "01101001" // /* MW 13 */ + 10652 "00000100" // /* MW 12 */ + 10653 "10001000" // /* MW 11 */ + 10654 "10100011" // /* MW 10 */ + 10655 "01010110" // /* MW 9 */ + 10656 "01100110" // /* MW 8 */ + 10657 "00000000" // /* MW 7 */ + 10658 "00000000" // /* MW 6 */ + 10659 "01101100" // /* MW 5 */ + 10660 "00010100" // /* MW 4 */ + 10661 "01110010" // /* MW 3 */ + 10662 "11001101" // /* MW 2 */ + 10663 "10100010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10664 "00000010" // VCONV.bfp16ebs8.fp32 ex3, dm4; MOV m1, r24 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10665 "01110000" // /* MW 7 */ + 10666 "00010000" // /* MW 6 */ + 10667 "10000110" // /* MW 5 */ + 10668 "00000000" // /* MW 4 */ + 10669 "11000000" // /* MW 3 */ + 10670 "01000110" // /* MW 2 */ + 10671 "00110001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10672 "01001010" // PADDB [p4], m1; MOV p5, p4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10673 "00101001" // /* MW 9 */ + 10674 "01100000" // /* MW 8 */ + 10675 "10001011" // /* MW 7 */ + 10676 "11100110" // /* MW 6 */ + 10677 "11000000" // /* MW 5 */ + 10678 "01101000" // /* MW 4 */ + 10679 "00100101" // /* MW 3 */ + 10680 "01010111" // /* MW 2 */ + 10681 "00001000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10682 "01100010" // VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10683 "01010001" // /* MW 7 */ + 10684 "11110001" // /* MW 6 */ + 10685 "10110100" // /* MW 5 */ + 10686 "00000010" // /* MW 4 */ + 10687 "11000000" // /* MW 3 */ + 10688 "01000110" // /* MW 2 */ + 10689 "00000001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10690 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10691 "00101001" // /* MW 3 */ + 10692 "00100100" // /* MW 2 */ + 10693 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10694 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10695 "01101001" // /* MW 7 */ + 10696 "01000000" // /* MW 6 */ + 10697 "10001010" // /* MW 5 */ + 10698 "00000010" // /* MW 4 */ + 10699 "11000000" // /* MW 3 */ + 10700 "01000110" // /* MW 2 */ + 10701 "00010001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10702 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10703 "01101001" // /* MW 3 */ + 10704 "00000100" // /* MW 2 */ + 10705 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10707 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "gemm_bfp16.h" 268 12 +.src_ref 7 "gemm_bfp16.h" 268 37 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10708 "10111010" // LDA r17, [p7, dj1]; VCONV.bfp16ebs8.fp32 ex2, dm4; MOV dj1, #280 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10709 "01010010" // /* MW 9 */ + 10710 "00011000" // /* MW 8 */ + 10711 "11000001" // /* MW 7 */ + 10712 "00000000" // /* MW 6 */ + 10713 "00110110" // /* MW 5 */ + 10714 "00001010" // /* MW 4 */ + 10715 "11010001" // /* MW 3 */ + 10716 "01000110" // /* MW 2 */ + 10717 "11100100" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 268 12 +.src_ref 7 "gemm_bfp16.h" 269 34 +.src_ref 7 "gemm_bfp16.h" 269 48 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10718 "01001011" // LDA r16, [p6, dj1]; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;MOVXM p7, #508416; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10719 "00000001" // /* MW 15 */ + 10720 "01011011" // /* MW 14 */ + 10721 "00010100" // /* MW 13 */ + 10722 "00000000" // /* MW 12 */ + 10723 "10110001" // /* MW 11 */ + 10724 "11110011" // /* MW 10 */ + 10725 "00000001" // /* MW 9 */ + 10726 "00000000" // /* MW 8 */ + 10727 "00110110" // /* MW 7 */ + 10728 "10001010" // /* MW 6 */ + 10729 "00100001" // /* MW 5 */ + 10730 "00000000" // /* MW 4 */ + 10731 "11010000" // /* MW 3 */ + 10732 "01000010" // /* MW 2 */ + 10733 "11000100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10735 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10737 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10738 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10739 "00101001" // /* MW 3 */ + 10740 "00100100" // /* MW 2 */ + 10741 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10742 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10743 "01101001" // /* MW 3 */ + 10744 "01000000" // /* MW 2 */ + 10745 "10001010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10746 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10747 "01101001" // /* MW 3 */ + 10748 "00000100" // /* MW 2 */ + 10749 "10001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 first +.src_ref 7 "gemm_bfp16.h" 268 45 first + 10750 "01011100" // VST bmlh3, [p1, #64]; ADD r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10751 "11111110" // /* MW 5 */ + 10752 "11000111" // /* MW 4 */ + 10753 "11011000" // /* MW 3 */ + 10754 "10110100" // /* MW 2 */ + 10755 "00100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 268 28 + 10756 "01011100" // VST bmhl3, [p1, #128]; NE r17, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10757 "00010001" // /* MW 5 */ + 10758 "11000110" // /* MW 4 */ + 10759 "11011000" // /* MW 3 */ + 10760 "10111000" // /* MW 2 */ + 10761 "00100100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 + 10762 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "11100110" // /* MW 3 */ + 10764 "00110101" // /* MW 2 */ + 10765 "00001001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first + 10766 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10767 "01110110" // /* MW 9 */ + 10768 "01100000" // /* MW 8 */ + 10769 "10110101" // /* MW 7 */ + 10770 "00000000" // /* MW 6 */ + 10771 "10010000" // /* MW 5 */ + 10772 "11001011" // /* MW 4 */ + 10773 "11010101" // /* MW 3 */ + 10774 "10110000" // /* MW 2 */ + 10775 "00100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 first + 10776 "10011000" // VST bmlh2, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00100110" // /* MW 3 */ + 10778 "00010101" // /* MW 2 */ + 10779 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 + 10780 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "01000110" // /* MW 3 */ + 10782 "00100101" // /* MW 2 */ + 10783 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 + 10784 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "01100110" // /* MW 3 */ + 10786 "00110101" // /* MW 2 */ + 10787 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 + 10788 "10011000" // VST bmll2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10789 "00000110" // /* MW 3 */ + 10790 "00000101" // /* MW 2 */ + 10791 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 first + 10792 "10011000" // VST bmlh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10793 "10100110" // /* MW 3 */ + 10794 "00010100" // /* MW 2 */ + 10795 "00001100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 + 10796 "10011000" // VST bmhl1, [p4, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10797 "11000110" // /* MW 3 */ + 10798 "00100100" // /* MW 2 */ + 10799 "00001100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 268 6 first + 10800 "00111010" // VST bmhh1, [p4, #192]; JNZ r17, #10912 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10912 delay_slots=5 */ + 10801 "01100001" // /* MW 9 */ + 10802 "00000000" // /* MW 8 */ + 10803 "00010000" // /* MW 7 */ + 10804 "01010100" // /* MW 6 */ + 10805 "00000101" // /* MW 5 */ + 10806 "00100010" // /* MW 4 */ + 10807 "11010000" // /* MW 3 */ + 10808 "10011100" // /* MW 2 */ + 10809 "10000110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 first +.delay_slot + 10810 "10011000" // VST bmll1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10811 "10000110" // /* MW 3 */ + 10812 "00000100" // /* MW 2 */ + 10813 "00001100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 205 6 first +.delay_slot + 10814 "10011000" // VST bmlh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10815 "00100110" // /* MW 3 */ + 10816 "00010100" // /* MW 2 */ + 10817 "00001101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot + 10818 "10011000" // VST bmhl0, [p5, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01000110" // /* MW 3 */ + 10820 "00100100" // /* MW 2 */ + 10821 "00001101" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot + 10822 "00000010" // VST bmhh0, [p5, #192]; MOV m2, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10823 "01110000" // /* MW 7 */ + 10824 "10010000" // /* MW 6 */ + 10825 "00000100" // /* MW 5 */ + 10826 "00000001" // /* MW 4 */ + 10827 "11010000" // /* MW 3 */ + 10828 "10001100" // /* MW 2 */ + 10829 "10100110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot + 10830 "01001100" // PADDB.2D [p1], d2; VST bmll0, [p5] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10831 "00001101" // /* MW 5 */ + 10832 "00001000" // /* MW 4 */ + 10833 "00001010" // /* MW 3 */ + 10834 "01110010" // /* MW 2 */ + 10835 "00101010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 34 first + 10836 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00010110" // /* MW 3 */ + 10838 "00000110" // /* MW 2 */ + 10839 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 48 + 10840 "10011000" // LDA r17, [p7, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110110" // /* MW 3 */ + 10842 "00100110" // /* MW 2 */ + 10843 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 4 + 10844 "00011000" // LDA p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00011001" // /* MW 3 */ + 10846 "11110100" // /* MW 2 */ + 10847 "00000111" // /* MW 1 */ + 10848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10849 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 4 +.no_stack_arguments + 10850 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 10851 "00000001" // /* MW 5 */ + 10852 "00000000" // /* MW 4 */ + 10853 "00010000" // /* MW 3 */ + 10854 "00010001" // /* MW 2 */ + 10855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10861 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 40 +.delay_slot + 10862 "10011000" // MUL r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10863 "00001111" // /* MW 3 */ + 10864 "01000001" // /* MW 2 */ + 10865 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10866 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10867 "00011100" // /* MW 13 */ + 10868 "00000000" // /* MW 12 */ + 10869 "00000000" // /* MW 11 */ + 10870 "01010111" // /* MW 10 */ + 10871 "00011010" // /* MW 9 */ + 10872 "01000000" // /* MW 8 */ + 10873 "00000000" // /* MW 7 */ + 10874 "00000000" // /* MW 6 */ + 10875 "10110110" // /* MW 5 */ + 10876 "00000010" // /* MW 4 */ + 10877 "11110000" // /* MW 3 */ + 10878 "00101100" // /* MW 2 */ + 10879 "00000000" // /* MW 1 */ +.return_address + 10880 "10000100" // J #10928 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10928 delay_slots=5 */ + 10881 "00000000" // /* MW 5 */ + 10882 "00000000" // /* MW 4 */ + 10883 "01011000" // /* MW 3 */ + 10884 "00010101" // /* MW 2 */ + 10885 "00000000" // /* MW 1 */ +.delay_slot + 10886 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10887 "00000001" // /* MW 3 */ + 10888 "00100000" // /* MW 2 */ + 10889 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10893 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10895 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10896 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10897 "00000000" // /* MW 15 */ + 10898 "00000000" // /* MW 14 */ + 10899 "01111000" // /* MW 13 */ + 10900 "10100101" // /* MW 12 */ + 10901 "00000001" // /* MW 11 */ + 10902 "00000000" // /* MW 10 */ + 10903 "00000000" // /* MW 9 */ + 10904 "00000000" // /* MW 8 */ + 10905 "01011011" // /* MW 7 */ + 10906 "00000001" // /* MW 6 */ + 10907 "00100000" // /* MW 5 */ + 10908 "00000000" // /* MW 4 */ + 10909 "11110000" // /* MW 3 */ + 10910 "00101100" // /* MW 2 */ + 10911 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 +.src_ref 7 "gemm_bfp16.h" 272 25 first + 10912 "11100001" // NOPA; NOPB; NOPS; ADD r16, r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10913 "00000000" // /* MW 15 */ + 10914 "00000000" // /* MW 14 */ + 10915 "01111000" // /* MW 13 */ + 10916 "10100101" // /* MW 12 */ + 10917 "00000001" // /* MW 11 */ + 10918 "00111000" // /* MW 10 */ + 10919 "00000000" // /* MW 9 */ + 10920 "00100001" // /* MW 8 */ + 10921 "01011011" // /* MW 7 */ + 10922 "00000001" // /* MW 6 */ + 10923 "00100000" // /* MW 5 */ + 10924 "00000000" // /* MW 4 */ + 10925 "11110000" // /* MW 3 */ + 10926 "00101100" // /* MW 2 */ + 10927 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 +.src_ref 7 "gemm_bfp16.h" 274 + 10928 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10929 "00111001" // /* MW 3 */ + 10930 "11111000" // /* MW 2 */ + 10931 "00000111" // /* MW 1 */ + 10932 "00011000" // LDA p7, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10933 "10011001" // /* MW 3 */ + 10934 "11110011" // /* MW 2 */ + 10935 "00000111" // /* MW 1 */ + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.noswbrkpt + 10944 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10945 "00011001" // /* MW 3 */ + 10946 "11111111" // /* MW 2 */ + 10947 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 274 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 10948 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10949 "00000000" // /* MW 3 */ + 10950 "00101000" // /* MW 2 */ + 10951 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 10952 "10111000" // MOV dj1, #280 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10953 "00110000" // /* MW 3 */ + 10954 "10000010" // /* MW 2 */ + 10955 "00011001" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10956 "10011000" // ST r16, [p6, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10957 "00010001" // /* MW 3 */ + 10958 "00100010" // /* MW 2 */ + 10959 "00001110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 274 first +.delay_slot + 10960 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10961 "00000001" // /* MW 5 */ + 10962 "00000000" // /* MW 4 */ + 10963 "00000000" // /* MW 3 */ + 10964 "11111000" // /* MW 2 */ + 10965 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10967 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params__end +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_end0 + 10969 "00000000" // /* MW 1 */ +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_GemmBfp16 _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 381 first +.src_ref 6 "superkernels.cpp" 382 6 +.src_ref 6 "superkernels.cpp" 388 11 +.function_start + 10976 "00111010" // MOVS p4, p1; MOVXM p5, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10977 "00010001" // /* MW 9 */ + 10978 "11100000" // /* MW 8 */ + 10979 "10110001" // /* MW 7 */ + 10980 "11110010" // /* MW 6 */ + 10981 "00000001" // /* MW 5 */ + 10982 "00000000" // /* MW 4 */ + 10983 "01100000" // /* MW 3 */ + 10984 "10010001" // /* MW 2 */ + 10985 "10010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 382 6 first + 10986 "10011000" // LDA r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10987 "00010110" // /* MW 3 */ + 10988 "00000110" // /* MW 2 */ + 10989 "00000101" // /* MW 1 */ + 10990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10991 "00000000" // /* MW 1 */ + 10992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10993 "00000000" // /* MW 1 */ + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10999 "00000000" // /* MW 1 */ + 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11001 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 382 6 +.src_ref 6 "superkernels.cpp" 382 16 + 11002 "10000100" // JNZ r16, #11120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11120 delay_slots=5 */ + 11003 "00000001" // /* MW 5 */ + 11004 "01000000" // /* MW 4 */ + 11005 "10111000" // /* MW 3 */ + 11006 "00010101" // /* MW 2 */ + 11007 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 381 +.delay_slot + 11008 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11009 "00000001" // /* MW 5 */ + 11010 "00000000" // /* MW 4 */ + 11011 "00000000" // /* MW 3 */ + 11012 "00001000" // /* MW 2 */ + 11013 "00000000" // /* MW 1 */ +.delay_slot + 11014 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11015 "00011101" // /* MW 3 */ + 11016 "11111111" // /* MW 2 */ + 11017 "00001111" // /* MW 1 */ +.delay_slot + 11018 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "10011101" // /* MW 3 */ + 11020 "11110111" // /* MW 2 */ + 11021 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 11022 "00000010" // ST lr, [sp, #-8]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11023 "01110000" // /* MW 7 */ + 11024 "01100000" // /* MW 6 */ + 11025 "10110000" // /* MW 5 */ + 11026 "00000011" // /* MW 4 */ + 11027 "10110000" // /* MW 3 */ + 11028 "00000111" // /* MW 2 */ + 11029 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 11030 "11111000" // MOV p6, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11031 "11000000" // /* MW 3 */ + 11032 "01100110" // /* MW 2 */ + 11033 "00011110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 384 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11034 "00111010" // MOVS p0, p2; MOVXM p3, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11035 "00010001" // /* MW 9 */ + 11036 "00001000" // /* MW 8 */ + 11037 "10110010" // /* MW 7 */ + 11038 "11110001" // /* MW 6 */ + 11039 "00000001" // /* MW 5 */ + 11040 "00000000" // /* MW 4 */ + 11041 "01100000" // /* MW 3 */ + 11042 "00010001" // /* MW 2 */ + 11043 "00010001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11044 "10111010" // ST.s8 r16, [p3]; MOVXM p3, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11045 "00010000" // /* MW 9 */ + 11046 "00000110" // /* MW 8 */ + 11047 "10110010" // /* MW 7 */ + 11048 "11110001" // /* MW 6 */ + 11049 "00000001" // /* MW 5 */ + 11050 "00000000" // /* MW 4 */ + 11051 "11100000" // /* MW 3 */ + 11052 "11000000" // /* MW 2 */ + 11053 "01100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 384 6 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11056 "00000100" // JL #7872 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7872 delay_slots=5 */ + 11057 "00000001" // /* MW 5 */ + 11058 "00000000" // /* MW 4 */ + 11059 "01100000" // /* MW 3 */ + 11060 "00001111" // /* MW 2 */ + 11061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11066 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11067 "00110001" // /* MW 3 */ + 11068 "00100000" // /* MW 2 */ + 11069 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11070 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11071 "00000101" // /* MW 3 */ + 11072 "00100000" // /* MW 2 */ + 11073 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11074 "00101110" // NOPA; ST r16, [p3]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11075 "00011100" // /* MW 13 */ + 11076 "00000000" // /* MW 12 */ + 11077 "00000000" // /* MW 11 */ + 11078 "01010111" // /* MW 10 */ + 11079 "00011010" // /* MW 9 */ + 11080 "01000000" // /* MW 8 */ + 11081 "00000000" // /* MW 7 */ + 11082 "00000000" // /* MW 6 */ + 11083 "00100011" // /* MW 5 */ + 11084 "00001100" // /* MW 4 */ + 11085 "11110110" // /* MW 3 */ + 11086 "00101100" // /* MW 2 */ + 11087 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 385 18 +.src_ref 6 "superkernels.cpp" 385 20 first +.return_address + 11088 "10111010" // LDA el0, [p2, #24]; MOVXM p2, #508872 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11089 "00010000" // /* MW 9 */ + 11090 "11100100" // /* MW 8 */ + 11091 "00110001" // /* MW 7 */ + 11092 "11110001" // /* MW 6 */ + 11093 "00000001" // /* MW 5 */ + 11094 "00000000" // /* MW 4 */ + 11095 "11010000" // /* MW 3 */ + 11096 "10000101" // /* MW 2 */ + 11097 "01001100" // /* MW 1 */ + 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11099 "00000000" // /* MW 1 */ + 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11101 "00000000" // /* MW 1 */ + 11102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11103 "00000000" // /* MW 1 */ + 11104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11105 "00000000" // /* MW 1 */ + 11106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11107 "00000000" // /* MW 1 */ + 11108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11109 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 385 18 + 11110 "01111010" // NOPA; ST el0, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "00000000" // /* MW 7 */ + 11114 "10000000" // /* MW 6 */ + 11115 "00101001" // /* MW 5 */ + 11116 "00000100" // /* MW 4 */ + 11117 "11110010" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 +.src_ref 6 "superkernels.cpp" 387 12 +.src_ref 6 "superkernels.cpp" 388 11 first + 11120 "10111010" // LDA r16, [p5]; MOVXM p2, #508868 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11121 "00010000" // /* MW 9 */ + 11122 "11100010" // /* MW 8 */ + 11123 "00110001" // /* MW 7 */ + 11124 "11110001" // /* MW 6 */ + 11125 "00000001" // /* MW 5 */ + 11126 "00000000" // /* MW 4 */ + 11127 "11010000" // /* MW 3 */ + 11128 "11000010" // /* MW 2 */ + 11129 "10100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 387 12 first + 11130 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11131 "00110110" // /* MW 3 */ + 11132 "00000110" // /* MW 2 */ + 11133 "00000010" // /* MW 1 */ + 11134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11135 "00000000" // /* MW 1 */ + 11136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11137 "00000000" // /* MW 1 */ + 11138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11139 "00000000" // /* MW 1 */ + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 389 6 first +.src_ref 6 "superkernels.cpp" 389 17 first + 11146 "10000100" // JNZ r17, #11232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11232 delay_slots=5 */ + 11147 "00000001" // /* MW 5 */ + 11148 "01000000" // /* MW 4 */ + 11149 "11110000" // /* MW 3 */ + 11150 "00010101" // /* MW 2 */ + 11151 "10001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 388 11 first +.delay_slot + 11152 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11153 "00000111" // /* MW 3 */ + 11154 "00100000" // /* MW 2 */ + 11155 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 388 11 +.delay_slot + 11156 "10011000" // ST r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00010001" // /* MW 3 */ + 11158 "00000110" // /* MW 2 */ + 11159 "00001101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 387 12 first +.delay_slot + 11160 "00011000" // ADD r16, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "00000111" // /* MW 3 */ + 11162 "01100000" // /* MW 2 */ + 11163 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 387 12 +.delay_slot + 11164 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11165 "00010001" // /* MW 3 */ + 11166 "00000110" // /* MW 2 */ + 11167 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11169 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 11170 "11111000" // MOV r16, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11171 "11000000" // /* MW 3 */ + 11172 "00011100" // /* MW 2 */ + 11173 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 11174 "00011000" // ADD.NC p2, r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11175 "00000110" // /* MW 3 */ + 11176 "01101000" // /* MW 2 */ + 11177 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 11178 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11179 "01110110" // /* MW 3 */ + 11180 "11111111" // /* MW 2 */ + 11181 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 11182 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "00010110" // /* MW 3 */ + 11184 "11111110" // /* MW 2 */ + 11185 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 11186 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11187 "00110110" // /* MW 3 */ + 11188 "11111110" // /* MW 2 */ + 11189 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11192 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11193 "00010110" // /* MW 3 */ + 11194 "01000110" // /* MW 2 */ + 11195 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11197 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11199 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11201 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11204 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00000010" // /* MW 3 */ + 11206 "01100001" // /* MW 2 */ + 11207 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11208 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11209 "00010001" // /* MW 3 */ + 11210 "00000110" // /* MW 2 */ + 11211 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 11212 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11213 "11111101" // /* MW 3 */ + 11214 "11100010" // /* MW 2 */ + 11215 "00010111" // /* MW 1 */ + 11216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11217 "00000000" // /* MW 1 */ + 11218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11219 "00000000" // /* MW 1 */ + 11220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11221 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 11222 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11223 "00011000" // /* MW 9 */ + 11224 "00010011" // /* MW 8 */ + 11225 "00000100" // /* MW 7 */ + 11226 "00000000" // /* MW 6 */ + 11227 "01011011" // /* MW 5 */ + 11228 "00000001" // /* MW 4 */ + 11229 "11110000" // /* MW 3 */ + 11230 "00101100" // /* MW 2 */ + 11231 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.src_ref 7 "gemm_bfp16.h" 285 80 +.src_ref 7 "gemm_bfp16.h" 285 80 + 11232 "10111010" // MOVA r24, #0; MOVXM r16, #2147483616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11233 "00010000" // /* MW 9 */ + 11234 "11110000" // /* MW 8 */ + 11235 "00001111" // /* MW 7 */ + 11236 "11111110" // /* MW 6 */ + 11237 "11111111" // /* MW 5 */ + 11238 "00011111" // /* MW 4 */ + 11239 "00000000" // /* MW 3 */ + 11240 "00011000" // /* MW 2 */ + 11241 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 + 11242 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11243 "00000101" // /* MW 3 */ + 11244 "00100010" // /* MW 2 */ + 11245 "00010000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 7 "gemm_bfp16.h" 285 86 + 11248 "10111010" // LDA p3, [p4]; MOVXM p4, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11249 "00010000" // /* MW 9 */ + 11250 "00000110" // /* MW 8 */ + 11251 "00110001" // /* MW 7 */ + 11252 "11110010" // /* MW 6 */ + 11253 "00000001" // /* MW 5 */ + 11254 "00000000" // /* MW 4 */ + 11255 "11010000" // /* MW 3 */ + 11256 "10110011" // /* MW 2 */ + 11257 "10000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 86 first + 11258 "10011000" // LDA r27, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11259 "01110110" // /* MW 3 */ + 11260 "11111111" // /* MW 2 */ + 11261 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 + 11262 "10011000" // LDA r18, [p4], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11263 "01010110" // /* MW 3 */ + 11264 "11101110" // /* MW 2 */ + 11265 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 6 "superkernels.cpp" 393 34 + 11266 "11010100" // LDA p0, [p7]; MOV p7, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11267 "10000001" // /* MW 5 */ + 11268 "11010001" // /* MW 4 */ + 11269 "11011110" // /* MW 3 */ + 11270 "10000011" // /* MW 2 */ + 11271 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 11272 "10011000" // LDA p2, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11273 "00011110" // /* MW 3 */ + 11274 "00000101" // /* MW 2 */ + 11275 "00000110" // /* MW 1 */ + 11276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11277 "00000000" // /* MW 1 */ + 11278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11279 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 + 11280 "11111000" // MOV r19, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "11010110" // /* MW 2 */ + 11283 "00011100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 4 first +.no_stack_arguments + 11284 "00000100" // JL #9040 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9040 delay_slots=5 */ + 11285 "00000001" // /* MW 5 */ + 11286 "00000000" // /* MW 4 */ + 11287 "10101000" // /* MW 3 */ + 11288 "00010001" // /* MW 2 */ + 11289 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 +.delay_slot + 11290 "00011000" // ADD r18, r18, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11291 "01111111" // /* MW 3 */ + 11292 "10100100" // /* MW 2 */ + 11293 "00010100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 +.delay_slot + 11294 "10011000" // AND r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11295 "00000100" // /* MW 3 */ + 11296 "10100001" // /* MW 2 */ + 11297 "00010100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 +.delay_slot + 11298 "00011000" // SEL.EQZ r16, r24, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11299 "00000010" // /* MW 3 */ + 11300 "00100001" // /* MW 2 */ + 11301 "00010110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 +.delay_slot + 11302 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11303 "00011101" // /* MW 3 */ + 11304 "00100001" // /* MW 2 */ + 11305 "00010100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 +.delay_slot + 11306 "10010100" // NOPA; ADD.NC p1, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11307 "10000010" // /* MW 5 */ + 11308 "11010011" // /* MW 4 */ + 11309 "11110010" // /* MW 3 */ + 11310 "00101100" // /* MW 2 */ + 11311 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 393 6 +.src_ref 6 "superkernels.cpp" 393 34 first +.src_ref 6 "superkernels.cpp" 394 17 +.return_address + 11312 "10111010" // LDA r16, [p7, #16]; MOVXM p2, #508868 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11313 "00010000" // /* MW 9 */ + 11314 "11100010" // /* MW 8 */ + 11315 "00110001" // /* MW 7 */ + 11316 "11110001" // /* MW 6 */ + 11317 "00000001" // /* MW 5 */ + 11318 "00000000" // /* MW 4 */ + 11319 "11010000" // /* MW 3 */ + 11320 "11000010" // /* MW 2 */ + 11321 "11101000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 393 6 + 11322 "11010100" // LDA r18, [p2]; MOV r17, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11323 "10000001" // /* MW 5 */ + 11324 "10111001" // /* MW 4 */ + 11325 "11011000" // /* MW 3 */ + 11326 "11001010" // /* MW 2 */ + 11327 "01000000" // /* MW 1 */ + 11328 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "10011001" // /* MW 3 */ + 11330 "11110111" // /* MW 2 */ + 11331 "00000111" // /* MW 1 */ + 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11333 "00000000" // /* MW 1 */ + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ + 11338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11339 "00000000" // /* MW 1 */ + 11340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11341 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 393 17 + 11342 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11343 "00001000" // /* MW 3 */ + 11344 "10100001" // /* MW 2 */ + 11345 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 393 6 + 11346 "10000100" // JNZ r16, #11424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11424 delay_slots=5 */ + 11347 "00000001" // /* MW 5 */ + 11348 "01000000" // /* MW 4 */ + 11349 "01010000" // /* MW 3 */ + 11350 "00010110" // /* MW 2 */ + 11351 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 394 17 +.src_ref 6 "superkernels.cpp" 398 16 +.delay_slot + 11352 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11353 "00000001" // /* MW 3 */ + 11354 "00110000" // /* MW 2 */ + 11355 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11363 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 394 17 first + 11364 "00111010" // ST r24, [p2]; MOVX r16, #1; ADD.NC p6, r17, #20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11365 "00001001" // /* MW 9 */ + 11366 "01000101" // /* MW 8 */ + 11367 "00110100" // /* MW 7 */ + 11368 "00101011" // /* MW 6 */ + 11369 "00000000" // /* MW 5 */ + 11370 "00000001" // /* MW 4 */ + 11371 "00110000" // /* MW 3 */ + 11372 "11100010" // /* MW 2 */ + 11373 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 11374 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11375 "00110110" // /* MW 3 */ + 11376 "00000110" // /* MW 2 */ + 11377 "00000110" // /* MW 1 */ + 11378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11379 "00000000" // /* MW 1 */ + 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11381 "00000000" // /* MW 1 */ + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ + 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11385 "00000000" // /* MW 1 */ + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 11390 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11391 "00001000" // /* MW 3 */ + 11392 "01010001" // /* MW 2 */ + 11393 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 11394 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11395 "00110110" // /* MW 3 */ + 11396 "11100110" // /* MW 2 */ + 11397 "00000110" // /* MW 1 */ + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ + 11408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11409 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 11410 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11411 "00010001" // /* MW 3 */ + 11412 "00100001" // /* MW 2 */ + 11413 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 11414 "01111010" // NOPA; ST r16, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11415 "00000000" // /* MW 9 */ + 11416 "00000000" // /* MW 8 */ + 11417 "00000000" // /* MW 7 */ + 11418 "10000000" // /* MW 6 */ + 11419 "00010001" // /* MW 5 */ + 11420 "11100110" // /* MW 4 */ + 11421 "11110110" // /* MW 3 */ + 11422 "00101100" // /* MW 2 */ + 11423 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 397 6 +.src_ref 6 "superkernels.cpp" 398 16 + 11424 "01000100" // MOVXM p2, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11425 "10000000" // /* MW 5 */ + 11426 "11000111" // /* MW 4 */ + 11427 "11000100" // /* MW 3 */ + 11428 "00000111" // /* MW 2 */ + 11429 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 6 first +.src_ref 6 "superkernels.cpp" 397 19 + 11430 "10111010" // LDA r16, [p2]; MOVXM p3, #508872 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11431 "00010000" // /* MW 9 */ + 11432 "11100100" // /* MW 8 */ + 11433 "10110001" // /* MW 7 */ + 11434 "11110001" // /* MW 6 */ + 11435 "00000001" // /* MW 5 */ + 11436 "00000000" // /* MW 4 */ + 11437 "11010000" // /* MW 3 */ + 11438 "11000010" // /* MW 2 */ + 11439 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 19 + 11440 "10011000" // LDA r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11441 "00110110" // /* MW 3 */ + 11442 "00000110" // /* MW 2 */ + 11443 "00000011" // /* MW 1 */ + 11444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11445 "00000000" // /* MW 1 */ + 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11447 "00000000" // /* MW 1 */ + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 16 + 11456 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11457 "00001000" // /* MW 3 */ + 11458 "01100001" // /* MW 2 */ + 11459 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 6 + 11460 "10000100" // JNZ r16, #11488 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11488 delay_slots=5 */ + 11461 "00000001" // /* MW 5 */ + 11462 "01000000" // /* MW 4 */ + 11463 "01110000" // /* MW 3 */ + 11464 "00010110" // /* MW 2 */ + 11465 "10000000" // /* MW 1 */ +.delay_slot + 11466 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11467 "00011001" // /* MW 3 */ + 11468 "11111111" // /* MW 2 */ + 11469 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11477 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 398 16 first + 11478 "01111010" // NOPA; ST r24, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11479 "00000000" // /* MW 9 */ + 11480 "00000000" // /* MW 8 */ + 11481 "00000000" // /* MW 7 */ + 11482 "10000000" // /* MW 6 */ + 11483 "00010001" // /* MW 5 */ + 11484 "00000111" // /* MW 4 */ + 11485 "11110010" // /* MW 3 */ + 11486 "00101100" // /* MW 2 */ + 11487 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 +.src_ref 6 "superkernels.cpp" 400 + 11488 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11489 "00111001" // /* MW 3 */ + 11490 "11111000" // /* MW 2 */ + 11491 "00000111" // /* MW 1 */ + 11492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11493 "00000000" // /* MW 1 */ + 11494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11495 "00000000" // /* MW 1 */ + 11496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11497 "00000000" // /* MW 1 */ + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ + 11500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11501 "00000000" // /* MW 1 */ + 11502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11503 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 400 first + 11504 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11505 "00000000" // /* MW 3 */ + 11506 "00101000" // /* MW 2 */ + 11507 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 400 +.delay_slot + 11508 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11509 "00000001" // /* MW 5 */ + 11510 "00000000" // /* MW 4 */ + 11511 "00000000" // /* MW 3 */ + 11512 "11111000" // /* MW 2 */ + 11513 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 11521 "00000000" // /* MW 1 */ +.label __Z15_b13786_wrapperPPv___func_begin0 +.label _Z15_b13786_wrapperPPv +.function _b13786_wrapper _Z15_b13786_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 20 first +.src_ref 0 "0_0_reloadable5.cc" 22 79 +.function_start + 11536 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11537 "11000000" // /* MW 3 */ + 11538 "01100000" // /* MW 2 */ + 11539 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 22 79 first + 11540 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00011110" // /* MW 3 */ + 11542 "00011100" // /* MW 2 */ + 11543 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 23 79 first + 11544 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "10011110" // /* MW 3 */ + 11546 "00101100" // /* MW 2 */ + 11547 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 25 81 first + 11548 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11549 "10011110" // /* MW 3 */ + 11550 "11110101" // /* MW 2 */ + 11551 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 24 46 first + 11552 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11553 "00011110" // /* MW 3 */ + 11554 "00000101" // /* MW 2 */ + 11555 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 21 4 first +.tail_call + 11556 "10000100" // J #10976 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10976 delay_slots=5 */ + 11557 "00000000" // /* MW 5 */ + 11558 "00000000" // /* MW 4 */ + 11559 "01110000" // /* MW 3 */ + 11560 "00010101" // /* MW 2 */ + 11561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11567 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13786_wrapperPPv__end +.label __Z15_b13786_wrapperPPv___func_end0 + 11571 "00000000" // /* MW 1 */ +.label __Z15_b13811_wrapperPPv___func_begin0 +.label _Z15_b13811_wrapperPPv +.function _b13811_wrapper _Z15_b13811_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 29 first +.src_ref 0 "0_0_reloadable5.cc" 31 79 +.function_start + 11584 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11585 "11000000" // /* MW 3 */ + 11586 "01100000" // /* MW 2 */ + 11587 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 31 79 first + 11588 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11589 "00011110" // /* MW 3 */ + 11590 "00111100" // /* MW 2 */ + 11591 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 32 47 first + 11592 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11593 "10011110" // /* MW 3 */ + 11594 "11101100" // /* MW 2 */ + 11595 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 34 81 first + 11596 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11597 "10011110" // /* MW 3 */ + 11598 "00010101" // /* MW 2 */ + 11599 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 33 80 first + 11600 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11601 "00011110" // /* MW 3 */ + 11602 "00000101" // /* MW 2 */ + 11603 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 30 4 first +.tail_call + 11604 "10000100" // J #6144 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6144 delay_slots=5 */ + 11605 "00000000" // /* MW 5 */ + 11606 "00000000" // /* MW 4 */ + 11607 "00000000" // /* MW 3 */ + 11608 "00001100" // /* MW 2 */ + 11609 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11615 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11617 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13811_wrapperPPv__end +.label __Z15_b13811_wrapperPPv___func_end0 + 11619 "00000000" // /* MW 1 */ +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function _b13739_wrapper _Z15_b13739_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 38 first +.src_ref 0 "0_0_reloadable5.cc" 40 79 +.function_start + 11632 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11633 "11000000" // /* MW 3 */ + 11634 "01100000" // /* MW 2 */ + 11635 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 40 79 first + 11636 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11637 "00011110" // /* MW 3 */ + 11638 "00101100" // /* MW 2 */ + 11639 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 42 81 first + 11640 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11641 "00011110" // /* MW 3 */ + 11642 "11110101" // /* MW 2 */ + 11643 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 41 47 first + 11644 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11645 "10011110" // /* MW 3 */ + 11646 "00000100" // /* MW 2 */ + 11647 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 39 4 first +.tail_call + 11648 "10000100" // J #3904 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3904 delay_slots=5 */ + 11649 "00000000" // /* MW 5 */ + 11650 "00000000" // /* MW 4 */ + 11651 "10100000" // /* MW 3 */ + 11652 "00000111" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 + 11663 "00000000" // /* MW 1 */ +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function _b13744_wrapper _Z15_b13744_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 46 first +.src_ref 0 "0_0_reloadable5.cc" 48 79 +.function_start + 11664 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "11000000" // /* MW 3 */ + 11666 "01100000" // /* MW 2 */ + 11667 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 48 79 first + 11668 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "00011110" // /* MW 3 */ + 11670 "00101100" // /* MW 2 */ + 11671 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 50 81 first + 11672 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00011110" // /* MW 3 */ + 11674 "11110101" // /* MW 2 */ + 11675 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 49 47 first + 11676 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "10011110" // /* MW 3 */ + 11678 "00000100" // /* MW 2 */ + 11679 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 47 4 first +.tail_call + 11680 "10000100" // J #5360 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5360 delay_slots=5 */ + 11681 "00000000" // /* MW 5 */ + 11682 "00000000" // /* MW 4 */ + 11683 "01111000" // /* MW 3 */ + 11684 "00001010" // /* MW 2 */ + 11685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11693 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 + 11695 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 54 first +.src_ref 0 "0_0_reloadable5.cc" 56 79 +.function_start + 11696 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11697 "11000000" // /* MW 3 */ + 11698 "01100000" // /* MW 2 */ + 11699 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 56 79 first + 11700 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11701 "00011110" // /* MW 3 */ + 11702 "00111100" // /* MW 2 */ + 11703 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 57 47 first + 11704 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11705 "10011110" // /* MW 3 */ + 11706 "11101100" // /* MW 2 */ + 11707 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 59 81 first + 11708 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11709 "10011110" // /* MW 3 */ + 11710 "00010101" // /* MW 2 */ + 11711 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 58 80 first + 11712 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11713 "00011110" // /* MW 3 */ + 11714 "00000101" // /* MW 2 */ + 11715 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 55 4 first +.tail_call + 11716 "10000100" // J #7264 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7264 delay_slots=5 */ + 11717 "00000000" // /* MW 5 */ + 11718 "00000000" // /* MW 4 */ + 11719 "00110000" // /* MW 3 */ + 11720 "00001110" // /* MW 2 */ + 11721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11727 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11729 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11731 "00000000" // /* MW 1 */ +.label _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj +.label __ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj___func_begin0 +.function setup_rmsnorm_row_major_params _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj +.src_ref 3 "rmsnorm_row_major_params.h" 45 first +.src_ref 3 "rmsnorm_row_major_params.h" 48 34 +.src_ref 3 "rmsnorm_row_major_params.h" 49 21 +.src_ref 3 "rmsnorm_row_major_params.h" 62 38 +.function_start + 11744 "01110110" // MOVA m0, #-24; MOVS p1, p7; MOVX r20, #-1; MOV r16, p0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11745 "01111000" // /* MW 11 */ + 11746 "01100000" // /* MW 10 */ + 11747 "00001000" // /* MW 9 */ + 11748 "11101010" // /* MW 8 */ + 11749 "01000111" // /* MW 7 */ + 11750 "00111111" // /* MW 6 */ + 11751 "10001011" // /* MW 5 */ + 11752 "10011100" // /* MW 4 */ + 11753 "10000001" // /* MW 3 */ + 11754 "00000000" // /* MW 2 */ + 11755 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 48 34 first +.src_ref 3 "rmsnorm_row_major_params.h" 51 19 +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 +.src_ref 3 "rmsnorm_row_major_params.h" 63 27 +.src_ref 3 "rmsnorm_row_major_params.h" 64 23 +.src_ref 3 "rmsnorm_row_major_params.h" 65 23 + 11756 "10111010" // MOVA m1, #54; MOVX r16, #1; ADD.NC p0, r16, #20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11757 "00001000" // /* MW 9 */ + 11758 "00000101" // /* MW 8 */ + 11759 "00110100" // /* MW 7 */ + 11760 "00101000" // /* MW 6 */ + 11761 "00000000" // /* MW 5 */ + 11762 "00000001" // /* MW 4 */ + 11763 "10000000" // /* MW 3 */ + 11764 "11000100" // /* MW 2 */ + 11765 "00000110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 48 21 +.src_ref 3 "rmsnorm_row_major_params.h" 50 29 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11766 "10111010" // LDA.s16 r18, [p0], #4; MOVXM p7, #508788 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11767 "00010000" // /* MW 9 */ + 11768 "10111010" // /* MW 8 */ + 11769 "10110001" // /* MW 7 */ + 11770 "11110011" // /* MW 6 */ + 11771 "00000001" // /* MW 5 */ + 11772 "00000000" // /* MW 4 */ + 11773 "01010000" // /* MW 3 */ + 11774 "11001010" // /* MW 2 */ + 11775 "00000101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 50 29 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11776 "11010100" // ST.s16 r18, [p7], #2; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11777 "01000001" // /* MW 5 */ + 11778 "00101111" // /* MW 4 */ + 11779 "11100000" // /* MW 3 */ + 11780 "11001010" // /* MW 2 */ + 11781 "11100011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 49 21 first +.src_ref 3 "rmsnorm_row_major_params.h" 68 70 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11782 "01010100" // LDA.s16 r17, [p0], m0; MOV m0, #-76 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11783 "11010001" // /* MW 5 */ + 11784 "00011110" // /* MW 4 */ + 11785 "01010000" // /* MW 3 */ + 11786 "01000110" // /* MW 2 */ + 11787 "00000001" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 53 23 first + 11788 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11789 "00101110" // /* MW 3 */ + 11790 "00011100" // /* MW 2 */ + 11791 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 45 + 11792 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11793 "00000001" // /* MW 5 */ + 11794 "00000000" // /* MW 4 */ + 11795 "00000000" // /* MW 3 */ + 11796 "00001000" // /* MW 2 */ + 11797 "00000000" // /* MW 1 */ + 11798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11799 "00000000" // /* MW 1 */ + 11800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11801 "00000000" // /* MW 1 */ + 11802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11803 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 51 19 first + 11804 "00011000" // ST.s16 r17, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11805 "00110111" // /* MW 3 */ + 11806 "00101010" // /* MW 2 */ + 11807 "00000111" // /* MW 1 */ + 11808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11809 "00000000" // /* MW 1 */ + 11810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11811 "00000000" // /* MW 1 */ + 11812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11813 "00000000" // /* MW 1 */ + 11814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11815 "00000000" // /* MW 1 */ + 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11817 "00000000" // /* MW 1 */ + 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11819 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 70 first + 11820 "00001100" // LDA r15, [p7], m0; ST r13, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11821 "01101011" // /* MW 5 */ + 11822 "11111011" // /* MW 4 */ + 11823 "11011111" // /* MW 3 */ + 11824 "00111110" // /* MW 2 */ + 11825 "11100001" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 53 21 first + 11826 "10011000" // ST el0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11827 "00101001" // /* MW 3 */ + 11828 "00011100" // /* MW 2 */ + 11829 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 54 24 first + 11830 "00001100" // LDA r17, [p0], #4; ST lr, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11831 "01111011" // /* MW 5 */ + 11832 "11011000" // /* MW 4 */ + 11833 "11011111" // /* MW 3 */ + 11834 "11000110" // /* MW 2 */ + 11835 "00000011" // /* MW 1 */ + 11836 "10011000" // ST r0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11837 "00010101" // /* MW 3 */ + 11838 "11111000" // /* MW 2 */ + 11839 "00001111" // /* MW 1 */ + 11840 "10011000" // ST r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11841 "11010101" // /* MW 3 */ + 11842 "11110001" // /* MW 2 */ + 11843 "00001111" // /* MW 1 */ + 11844 "10011000" // ST p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11845 "10011101" // /* MW 3 */ + 11846 "11110100" // /* MW 2 */ + 11847 "00001111" // /* MW 1 */ + 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11849 "00000000" // /* MW 1 */ + 11850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11851 "00000000" // /* MW 1 */ + 11852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11853 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 54 22 + 11854 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "00110001" // /* MW 3 */ + 11856 "00011110" // /* MW 2 */ + 11857 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 55 23 first + 11858 "10011000" // LDA r18, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11859 "01010110" // /* MW 3 */ + 11860 "00011110" // /* MW 2 */ + 11861 "00000000" // /* MW 1 */ + 11862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11863 "00000000" // /* MW 1 */ + 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11865 "00000000" // /* MW 1 */ + 11866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11867 "00000000" // /* MW 1 */ + 11868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11869 "00000000" // /* MW 1 */ + 11870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11871 "00000000" // /* MW 1 */ + 11872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11873 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 55 21 + 11874 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11875 "01010001" // /* MW 3 */ + 11876 "00011110" // /* MW 2 */ + 11877 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 56 25 first + 11878 "10011000" // LDA r19, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11879 "01110110" // /* MW 3 */ + 11880 "00011110" // /* MW 2 */ + 11881 "00000000" // /* MW 1 */ + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ + 11888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11889 "00000000" // /* MW 1 */ + 11890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11891 "00000000" // /* MW 1 */ + 11892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11893 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 56 23 + 11894 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11895 "01110001" // /* MW 3 */ + 11896 "00011110" // /* MW 2 */ + 11897 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 57 17 first + 11898 "10011000" // LDA r21, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11899 "10110110" // /* MW 3 */ + 11900 "00111110" // /* MW 2 */ + 11901 "00000000" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11909 "00000000" // /* MW 1 */ + 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11911 "00000000" // /* MW 1 */ + 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 57 15 +.src_ref 3 "rmsnorm_row_major_params.h" 62 38 first + 11914 "01011100" // ST r21, [p7], #8; EQ r27, r21, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "10001111" // /* MW 5 */ + 11916 "11101110" // /* MW 4 */ + 11917 "00111010" // /* MW 3 */ + 11918 "11010110" // /* MW 2 */ + 11919 "11100101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 58 24 first +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 + 11920 "00101100" // LDA r20, [p0], #4; SEL.EQZ r21, r18, r16, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11921 "00000100" // /* MW 5 */ + 11922 "01010110" // /* MW 4 */ + 11923 "11011001" // /* MW 3 */ + 11924 "11010010" // /* MW 2 */ + 11925 "00000011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 first +.src_ref 3 "rmsnorm_row_major_params.h" 63 27 first + 11926 "00011000" // SEL.EQZ r18, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11927 "00100010" // /* MW 3 */ + 11928 "00100101" // /* MW 2 */ + 11929 "00010100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 63 27 + 11930 "10011000" // MUL r13, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11931 "00101111" // /* MW 3 */ + 11932 "01011011" // /* MW 2 */ + 11933 "00010100" // /* MW 1 */ + 11934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11935 "00000000" // /* MW 1 */ + 11936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11937 "00000000" // /* MW 1 */ + 11938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11939 "00000000" // /* MW 1 */ + 11940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11941 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 58 22 first + 11942 "10011000" // ST r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11943 "10010001" // /* MW 3 */ + 11944 "00011110" // /* MW 2 */ + 11945 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 59 23 first + 11946 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11947 "00110110" // /* MW 3 */ + 11948 "00000110" // /* MW 2 */ + 11949 "00000000" // /* MW 1 */ + 11950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11951 "00000000" // /* MW 1 */ + 11952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11953 "00000000" // /* MW 1 */ + 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11955 "00000000" // /* MW 1 */ + 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11957 "00000000" // /* MW 1 */ + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 59 21 +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 first +.src_ref 3 "rmsnorm_row_major_params.h" 64 23 first + 11962 "01011100" // ST r17, [p7], #4; SEL.EQZ r18, r17, r16, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11963 "00000100" // /* MW 5 */ + 11964 "11001010" // /* MW 4 */ + 11965 "00111000" // /* MW 3 */ + 11966 "11000110" // /* MW 2 */ + 11967 "11100011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 60 25 first +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 +.src_ref 3 "rmsnorm_row_major_params.h" 65 23 first + 11968 "00101100" // LDA r16, [p0, #4]; SEL.EQZ r17, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11969 "00100100" // /* MW 5 */ + 11970 "01000110" // /* MW 4 */ + 11971 "11011000" // /* MW 3 */ + 11972 "11000010" // /* MW 2 */ + 11973 "00000010" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 65 23 + 11974 "10011000" // MUL r14, r17, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "01001111" // /* MW 3 */ + 11976 "01011101" // /* MW 2 */ + 11977 "00010100" // /* MW 1 */ + 11978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11979 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 first + 11980 "10011000" // MUL r0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11981 "01011111" // /* MW 3 */ + 11982 "11000001" // /* MW 2 */ + 11983 "00010100" // /* MW 1 */ + 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11985 "00000000" // /* MW 1 */ + 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11987 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 66 49 first +.no_stack_arguments + 11988 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 11989 "00000001" // /* MW 5 */ + 11990 "00000000" // /* MW 4 */ + 11991 "10111000" // /* MW 3 */ + 11992 "00011110" // /* MW 2 */ + 11993 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 60 23 first +.src_ref 3 "rmsnorm_row_major_params.h" 64 23 first +.delay_slot + 11994 "01011100" // ST r16, [p7], #4; MUL r1, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11995 "00011111" // /* MW 5 */ + 11996 "00000110" // /* MW 4 */ + 11997 "00111001" // /* MW 3 */ + 11998 "11000010" // /* MW 2 */ + 11999 "11100011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 62 25 first +.delay_slot + 12000 "10011000" // ST r0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12001 "00010001" // /* MW 3 */ + 12002 "00011100" // /* MW 2 */ + 12003 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 63 25 first +.delay_slot + 12004 "10011000" // ST r13, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "10110001" // /* MW 3 */ + 12006 "00011101" // /* MW 2 */ + 12007 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 64 21 first +.delay_slot + 12008 "10011000" // ST r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12009 "00110001" // /* MW 3 */ + 12010 "00011100" // /* MW 2 */ + 12011 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 65 21 first +.delay_slot + 12012 "10011000" // ST r14, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12013 "11010001" // /* MW 3 */ + 12014 "00011101" // /* MW 2 */ + 12015 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 66 26 first +.src_ref 3 "rmsnorm_row_major_params.h" 67 51 +.src_ref 3 "rmsnorm_row_major_params.h" 68 43 +.src_ref 3 "rmsnorm_row_major_params.h" 75 +.return_address + 12016 "01110110" // LDA r13, [sp, #-20]; ST r2, [p7], #4; ADD r16, r2, #63; MOV r0, r13 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12017 "01111000" // /* MW 11 */ + 12018 "01010000" // /* MW 10 */ + 12019 "00001011" // /* MW 9 */ + 12020 "11111000" // /* MW 8 */ + 12021 "00000111" // /* MW 7 */ + 12022 "10000101" // /* MW 6 */ + 12023 "01010001" // /* MW 5 */ + 12024 "00011100" // /* MW 4 */ + 12025 "00100111" // /* MW 3 */ + 12026 "10110110" // /* MW 2 */ + 12027 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 43 first +.no_stack_arguments + 12028 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 12029 "00000001" // /* MW 5 */ + 12030 "00000000" // /* MW 4 */ + 12031 "10111000" // /* MW 3 */ + 12032 "00011110" // /* MW 2 */ + 12033 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 67 25 +.src_ref 3 "rmsnorm_row_major_params.h" 68 43 +.delay_slot + 12034 "11100100" // MOVX r17, #-64; MOV r1, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12035 "01000001" // /* MW 5 */ + 12036 "10101110" // /* MW 4 */ + 12037 "00100000" // /* MW 3 */ + 12038 "01000000" // /* MW 2 */ + 12039 "11111100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 67 25 first +.delay_slot + 12040 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12041 "00000100" // /* MW 3 */ + 12042 "01100001" // /* MW 2 */ + 12043 "00010100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 67 20 +.delay_slot + 12044 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12045 "00010001" // /* MW 3 */ + 12046 "00000110" // /* MW 2 */ + 12047 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12049 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12050 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12051 "00011100" // /* MW 13 */ + 12052 "00000000" // /* MW 12 */ + 12053 "00000000" // /* MW 11 */ + 12054 "01010111" // /* MW 10 */ + 12055 "00011010" // /* MW 9 */ + 12056 "01000000" // /* MW 8 */ + 12057 "00000000" // /* MW 7 */ + 12058 "00000000" // /* MW 6 */ + 12059 "10110110" // /* MW 5 */ + 12060 "00000010" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 62 first +.return_address +.no_stack_arguments + 12064 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 12065 "00000001" // /* MW 5 */ + 12066 "00000000" // /* MW 4 */ + 12067 "10111000" // /* MW 3 */ + 12068 "00011110" // /* MW 2 */ + 12069 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 62 +.delay_slot + 12070 "11111000" // MOV r1, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12071 "10100000" // /* MW 3 */ + 12072 "01010111" // /* MW 2 */ + 12073 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 62 +.delay_slot + 12074 "11111000" // MOV r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12075 "00100000" // /* MW 3 */ + 12076 "00010001" // /* MW 2 */ + 12077 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12082 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12083 "00011100" // /* MW 13 */ + 12084 "00000000" // /* MW 12 */ + 12085 "00000000" // /* MW 11 */ + 12086 "01010111" // /* MW 10 */ + 12087 "00011010" // /* MW 9 */ + 12088 "01000000" // /* MW 8 */ + 12089 "00000000" // /* MW 7 */ + 12090 "00000000" // /* MW 6 */ + 12091 "10110110" // /* MW 5 */ + 12092 "00000010" // /* MW 4 */ + 12093 "11110000" // /* MW 3 */ + 12094 "00101100" // /* MW 2 */ + 12095 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 75 +.return_address + 12096 "11010100" // LDA r15, [sp, #-8]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "01000001" // /* MW 5 */ + 12098 "11101101" // /* MW 4 */ + 12099 "00101110" // /* MW 3 */ + 12100 "00111110" // /* MW 2 */ + 12101 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 "00011000" // LDA r13, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12103 "10110001" // /* MW 3 */ + 12104 "11111101" // /* MW 2 */ + 12105 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "10011001" // /* MW 3 */ + 12108 "11110111" // /* MW 2 */ + 12109 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "11010001" // /* MW 3 */ + 12112 "11110001" // /* MW 2 */ + 12113 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 75 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12115 "00000000" // /* MW 3 */ + 12116 "00101000" // /* MW 2 */ + 12117 "00010000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 20 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12118 "10011000" // ST r2, [p7, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12119 "01010001" // /* MW 3 */ + 12120 "00010100" // /* MW 2 */ + 12121 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 75 first +.delay_slot + 12122 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12123 "00000001" // /* MW 5 */ + 12124 "00000000" // /* MW 4 */ + 12125 "00000000" // /* MW 3 */ + 12126 "11111000" // /* MW 2 */ + 12127 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj__end +.label __ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj___func_end0 + 12133 "00000000" // /* MW 1 */ +.label __Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params___func_begin0 +.label _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.function rmsnorm_row_major_part1 _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 48 first +.src_ref 3 "rmsnorm_row_major.h" 60 15 +.src_ref 3 "rmsnorm_row_major.h" 65 51 +.src_ref 3 "rmsnorm_row_major.h" 65 51 +.function_start + 12144 "01110110" // MOVA r24, #0; MOVS p6, p1; MOVX vaddSign0, #1; MOV p2, p6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12145 "01111000" // /* MW 11 */ + 12146 "01100000" // /* MW 10 */ + 12147 "00110110" // /* MW 9 */ + 12148 "00000001" // /* MW 8 */ + 12149 "11010010" // /* MW 7 */ + 12150 "00000010" // /* MW 6 */ + 12151 "10001011" // /* MW 5 */ + 12152 "10000100" // /* MW 4 */ + 12153 "00000110" // /* MW 3 */ + 12154 "00011000" // /* MW 2 */ + 12155 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 49 +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12156 "10111010" // MOVA r16, #1; MOVXM p1, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12157 "00010000" // /* MW 9 */ + 12158 "00001000" // /* MW 8 */ + 12159 "10110010" // /* MW 7 */ + 12160 "11110000" // /* MW 6 */ + 12161 "00000001" // /* MW 5 */ + 12162 "00000000" // /* MW 4 */ + 12163 "00000000" // /* MW 3 */ + 12164 "00110000" // /* MW 2 */ + 12165 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 53 31 +.src_ref 3 "rmsnorm_row_major.h" 65 51 first + 12166 "01110110" // LDA.s8 r17, [p1]; MOVS p1, p7; MOVXM p7, #508824 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12167 "00010000" // /* MW 11 */ + 12168 "11001100" // /* MW 10 */ + 12169 "10110001" // /* MW 9 */ + 12170 "11110011" // /* MW 8 */ + 12171 "00000001" // /* MW 7 */ + 12172 "00000000" // /* MW 6 */ + 12173 "10001011" // /* MW 5 */ + 12174 "10011100" // /* MW 4 */ + 12175 "01010001" // /* MW 3 */ + 12176 "11000100" // /* MW 2 */ + 12177 "00100000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 53 31 first +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12178 "11010100" // LDA el0, [p7], #8; VINSERT.32 x0, x0, #0, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12179 "00100010" // /* MW 5 */ + 12180 "00000110" // /* MW 4 */ + 12181 "11010000" // /* MW 3 */ + 12182 "10000101" // /* MW 2 */ + 12183 "11100101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 55 33 first +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12184 "11010100" // LDA r0, [p7], #8; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12185 "00100101" // /* MW 5 */ + 12186 "00000001" // /* MW 4 */ + 12187 "11010000" // /* MW 3 */ + 12188 "10000010" // /* MW 2 */ + 12189 "11100101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 58 35 first + 12190 "10011000" // LDA r18, [p7], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12191 "01010110" // /* MW 3 */ + 12192 "10011110" // /* MW 2 */ + 12193 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 48 + 12194 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12195 "00000001" // /* MW 5 */ + 12196 "00000000" // /* MW 4 */ + 12197 "00000000" // /* MW 3 */ + 12198 "00010000" // /* MW 2 */ + 12199 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 67 28 first + 12200 "00001100" // LDA r1, [p7]; ST lr, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12201 "01111011" // /* MW 5 */ + 12202 "11011000" // /* MW 4 */ + 12203 "11011111" // /* MW 3 */ + 12204 "10000110" // /* MW 2 */ + 12205 "11100000" // /* MW 1 */ + 12206 "10011000" // ST p2, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12207 "00011101" // /* MW 3 */ + 12208 "11111101" // /* MW 2 */ + 12209 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12210 "01011100" // ST r17, [sp, #-8]; MOVX crRnd, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12211 "00000000" // /* MW 5 */ + 12212 "11110101" // /* MW 4 */ + 12213 "10111000" // /* MW 3 */ + 12214 "01000110" // /* MW 2 */ + 12215 "11111111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 65 51 first + 12216 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12217 "00010110" // /* MW 3 */ + 12218 "01000000" // /* MW 2 */ + 12219 "00001000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 67 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first +.no_stack_arguments + 12220 "00111010" // ST p0, [sp, #-12]; JL #15728 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 12221 "01000001" // /* MW 9 */ + 12222 "00000000" // /* MW 8 */ + 12223 "00000000" // /* MW 7 */ + 12224 "10101110" // /* MW 6 */ + 12225 "00000111" // /* MW 5 */ + 12226 "00000000" // /* MW 4 */ + 12227 "10110000" // /* MW 3 */ + 12228 "10000011" // /* MW 2 */ + 12229 "11111110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 65 51 first +.delay_slot +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12230 "00000010" // ST p1, [sp, #-24]; VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12231 "11010000" // /* MW 7 */ + 12232 "10000000" // /* MW 6 */ + 12233 "00000000" // /* MW 5 */ + 12234 "00000010" // /* MW 4 */ + 12235 "10110000" // /* MW 3 */ + 12236 "00010011" // /* MW 2 */ + 12237 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 49 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12238 "01011100" // ST el0, [sp, #-16]; LSHL r17, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12239 "00011011" // /* MW 5 */ + 12240 "01000110" // /* MW 4 */ + 12241 "10111001" // /* MW 3 */ + 12242 "00000101" // /* MW 2 */ + 12243 "11111110" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 12244 "00000010" // ST r16, [sp, #-28]; VBCST.16 x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12245 "01110000" // /* MW 7 */ + 12246 "10111001" // /* MW 6 */ + 12247 "00100000" // /* MW 5 */ + 12248 "00000000" // /* MW 4 */ + 12249 "10110000" // /* MW 3 */ + 12250 "11000010" // /* MW 2 */ + 12251 "11111100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 15 +.delay_slot + 12252 "11111000" // MOV m0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12253 "10100000" // /* MW 3 */ + 12254 "00001000" // /* MW 2 */ + 12255 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 15 first +.delay_slot + 12256 "11100001" // NOPA; PADDB [p6], m0; VST x0, [sp, #-128]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12257 "00000000" // /* MW 15 */ + 12258 "00000000" // /* MW 14 */ + 12259 "01111000" // /* MW 13 */ + 12260 "10100101" // /* MW 12 */ + 12261 "00000001" // /* MW 11 */ + 12262 "00000000" // /* MW 10 */ + 12263 "00000000" // /* MW 9 */ + 12264 "00000000" // /* MW 8 */ + 12265 "00110011" // /* MW 7 */ + 12266 "11111000" // /* MW 6 */ + 12267 "00100111" // /* MW 5 */ + 12268 "00010111" // /* MW 4 */ + 12269 "11111100" // /* MW 3 */ + 12270 "00101100" // /* MW 2 */ + 12271 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 54 31 first +.return_address + 12272 "10011000" // LDA r16, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "00010110" // /* MW 3 */ + 12274 "01000110" // /* MW 2 */ + 12275 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 67 8 first +.src_ref 3 "rmsnorm_row_major.h" 67 39 first + 12276 "10000100" // JZ r3, #12768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12768 delay_slots=5 */ + 12277 "00000001" // /* MW 5 */ + 12278 "00000000" // /* MW 4 */ + 12279 "11110000" // /* MW 3 */ + 12280 "00011000" // /* MW 2 */ + 12281 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12285 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12291 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 + 12292 "00011000" // LDA lr, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12293 "00111001" // /* MW 3 */ + 12294 "11101100" // /* MW 2 */ + 12295 "00000111" // /* MW 1 */ + 12296 "00100010" // LDA p7, [sp, #-24]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12297 "00011100" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00000000" // /* MW 5 */ + 12300 "00000100" // /* MW 4 */ + 12301 "00100000" // /* MW 3 */ + 12302 "01110011" // /* MW 2 */ + 12303 "11111101" // /* MW 1 */ +.label __ll6__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 80 4 first + 12304 "10000100" // JZ r16, #12736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12736 delay_slots=5 */ + 12305 "00000001" // /* MW 5 */ + 12306 "00000000" // /* MW 4 */ + 12307 "11100000" // /* MW 3 */ + 12308 "00011000" // /* MW 2 */ + 12309 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12313 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12315 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12317 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12319 "00000000" // /* MW 1 */ +.label __ll14__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 87 8 first + 12320 "10111010" // VLDA x0, [sp, #-128]; MOVXM ls, #12464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12321 "00010000" // /* MW 9 */ + 12322 "01011000" // /* MW 8 */ + 12323 "01111000" // /* MW 7 */ + 12324 "00001100" // /* MW 6 */ + 12325 "00000000" // /* MW 5 */ + 12326 "00000000" // /* MW 4 */ + 12327 "01110000" // /* MW 3 */ + 12328 "00000111" // /* MW 2 */ + 12329 "11111111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 87 8 + 12330 "10111010" // LDA r27, [sp, #-16]; MOVXM le, #12512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12331 "00010000" // /* MW 9 */ + 12332 "01110000" // /* MW 8 */ + 12333 "10111000" // /* MW 7 */ + 12334 "00001101" // /* MW 6 */ + 12335 "00000000" // /* MW 5 */ + 12336 "00000000" // /* MW 4 */ + 12337 "00100000" // /* MW 3 */ + 12338 "01101110" // /* MW 2 */ + 12339 "11111110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 4 "add_reduce.hpp" 332 18 +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.src_ref 3 "rmsnorm_row_major.h" 99 36 + 12340 "10111010" // LDA r26, [sp, #-8]; MOVX r18, #60; MOV r21, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12341 "01011000" // /* MW 9 */ + 12342 "00010000" // /* MW 8 */ + 12343 "10101000" // /* MW 7 */ + 12344 "10001010" // /* MW 6 */ + 12345 "00100111" // /* MW 5 */ + 12346 "00000001" // /* MW 4 */ + 12347 "00100000" // /* MW 3 */ + 12348 "01101010" // /* MW 2 */ + 12349 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 3 "rmsnorm_row_major.h" 89 21 + 12350 "10111010" // LDA p0, [sp, #-12]; MOVX r20, #828; MOV r19, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12351 "01011000" // /* MW 9 */ + 12352 "00100000" // /* MW 8 */ + 12353 "01101000" // /* MW 7 */ + 12354 "10001010" // /* MW 6 */ + 12355 "01000111" // /* MW 5 */ + 12356 "00011001" // /* MW 4 */ + 12357 "00100000" // /* MW 3 */ + 12358 "10000011" // /* MW 2 */ + 12359 "11111110" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 + 12360 "10111010" // MOVA r23, #8; MOVX r16, #-5; ADD.NC r17, r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12361 "11001000" // /* MW 9 */ + 12362 "00111111" // /* MW 8 */ + 12363 "00101100" // /* MW 7 */ + 12364 "01101010" // /* MW 6 */ + 12365 "00000111" // /* MW 5 */ + 12366 "00111111" // /* MW 4 */ + 12367 "00000000" // /* MW 3 */ + 12368 "00010111" // /* MW 2 */ + 12369 "00000001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 3 "rmsnorm_row_major.h" 80 4 + 12370 "10111010" // MOVA r22, #4; MOVXM p1, #12400 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12371 "00010000" // /* MW 9 */ + 12372 "00111000" // /* MW 8 */ + 12373 "10110000" // /* MW 7 */ + 12374 "00001100" // /* MW 6 */ + 12375 "00000000" // /* MW 5 */ + 12376 "00000000" // /* MW 4 */ + 12377 "00000000" // /* MW 3 */ + 12378 "10010110" // /* MW 2 */ + 12379 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 3 "rmsnorm_row_major.h" 99 36 + 12380 "00011000" // MOVX vaddSign0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12381 "01000000" // /* MW 3 */ + 12382 "01011010" // /* MW 2 */ + 12383 "00010000" // /* MW 1 */ + 12384 "11111000" // VCONV.fp32.bf16 cml0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12385 "10001010" // /* MW 3 */ + 12386 "00000001" // /* MW 2 */ + 12387 "00011000" // /* MW 1 */ + 12388 "00101100" // NOPA; LSHL r16, r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12389 "00011011" // /* MW 5 */ + 12390 "11000010" // /* MW 4 */ + 12391 "11111101" // /* MW 3 */ + 12392 "00101100" // /* MW 2 */ + 12393 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 3 "rmsnorm_row_major.h" 99 36 + 12394 "11100100" // MOVX crRnd, r26; VMOV cml1, cml0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12395 "00010101" // /* MW 5 */ + 12396 "00000001" // /* MW 4 */ + 12397 "00000010" // /* MW 3 */ + 12398 "01010000" // /* MW 2 */ + 12399 "11010111" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_256 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "rmsnorm_row_major.h" 87 8 first +.src_ref 3 "rmsnorm_row_major.h" 89 21 first +.loop_nesting 1 + 12400 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; ADD.NC lc, r16, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12401 "00000000" // /* MW 15 */ + 12402 "00000000" // /* MW 14 */ + 12403 "11001000" // /* MW 13 */ + 12404 "00111111" // /* MW 12 */ + 12405 "10111100" // /* MW 11 */ + 12406 "00000010" // /* MW 10 */ + 12407 "00000000" // /* MW 9 */ + 12408 "00000000" // /* MW 8 */ + 12409 "01011011" // /* MW 7 */ + 12410 "00000001" // /* MW 6 */ + 12411 "01101000" // /* MW 5 */ + 12412 "00111001" // /* MW 4 */ + 12413 "11110000" // /* MW 3 */ + 12414 "00101100" // /* MW 2 */ + 12415 "00000000" // /* MW 1 */ + 12416 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12417 "00000000" // /* MW 15 */ + 12418 "00000000" // /* MW 14 */ + 12419 "01111000" // /* MW 13 */ + 12420 "10100101" // /* MW 12 */ + 12421 "00000001" // /* MW 11 */ + 12422 "00000000" // /* MW 10 */ + 12423 "00000000" // /* MW 9 */ + 12424 "00000000" // /* MW 8 */ + 12425 "01011011" // /* MW 7 */ + 12426 "00000001" // /* MW 6 */ + 12427 "00100000" // /* MW 5 */ + 12428 "00000000" // /* MW 4 */ + 12429 "11110000" // /* MW 3 */ + 12430 "00101100" // /* MW 2 */ + 12431 "00000000" // /* MW 1 */ + 12432 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12433 "00000000" // /* MW 15 */ + 12434 "00000000" // /* MW 14 */ + 12435 "01111000" // /* MW 13 */ + 12436 "10100101" // /* MW 12 */ + 12437 "00000001" // /* MW 11 */ + 12438 "00000000" // /* MW 10 */ + 12439 "00000000" // /* MW 9 */ + 12440 "00000000" // /* MW 8 */ + 12441 "01011011" // /* MW 7 */ + 12442 "00000001" // /* MW 6 */ + 12443 "00100000" // /* MW 5 */ + 12444 "00000000" // /* MW 4 */ + 12445 "11110000" // /* MW 3 */ + 12446 "00101100" // /* MW 2 */ + 12447 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12448 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12449 "00000000" // /* MW 15 */ + 12450 "00000000" // /* MW 14 */ + 12451 "01111000" // /* MW 13 */ + 12452 "10100101" // /* MW 12 */ + 12453 "00000001" // /* MW 11 */ + 12454 "00000000" // /* MW 10 */ + 12455 "00000000" // /* MW 9 */ + 12456 "00000000" // /* MW 8 */ + 12457 "01011011" // /* MW 7 */ + 12458 "00000001" // /* MW 6 */ + 12459 "00100000" // /* MW 5 */ + 12460 "00000000" // /* MW 4 */ + 12461 "11110000" // /* MW 3 */ + 12462 "00101100" // /* MW 2 */ + 12463 "00000000" // /* MW 1 */ +.label ZLS_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_320 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "rmsnorm_row_major.h" 89 21 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 12464 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12465 "00000000" // /* MW 15 */ + 12466 "00000000" // /* MW 14 */ + 12467 "01111000" // /* MW 13 */ + 12468 "10100101" // /* MW 12 */ + 12469 "00000001" // /* MW 11 */ + 12470 "00000000" // /* MW 10 */ + 12471 "00000000" // /* MW 9 */ + 12472 "00000000" // /* MW 8 */ + 12473 "01011011" // /* MW 7 */ + 12474 "00000001" // /* MW 6 */ + 12475 "01101000" // /* MW 5 */ + 12476 "00111001" // /* MW 4 */ + 12477 "11110000" // /* MW 3 */ + 12478 "00101100" // /* MW 2 */ + 12479 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00000000" // /* MW 15 */ + 12482 "00000000" // /* MW 14 */ + 12483 "01111000" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "00000000" // /* MW 10 */ + 12487 "00000000" // /* MW 9 */ + 12488 "00000000" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12496 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12497 "00000000" // /* MW 15 */ + 12498 "00000000" // /* MW 14 */ + 12499 "01111000" // /* MW 13 */ + 12500 "10100101" // /* MW 12 */ + 12501 "00000001" // /* MW 11 */ + 12502 "00000000" // /* MW 10 */ + 12503 "00000000" // /* MW 9 */ + 12504 "00000000" // /* MW 8 */ + 12505 "01011011" // /* MW 7 */ + 12506 "00000001" // /* MW 6 */ + 12507 "00100000" // /* MW 5 */ + 12508 "00000000" // /* MW 4 */ + 12509 "11110000" // /* MW 3 */ + 12510 "00101100" // /* MW 2 */ + 12511 "00000000" // /* MW 1 */ +.label ZLE_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_368 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12512 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm0, dm0, x2, x2, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12513 "00100010" // /* MW 15 */ + 12514 "00000000" // /* MW 14 */ + 12515 "01111101" // /* MW 13 */ + 12516 "10100101" // /* MW 12 */ + 12517 "00000001" // /* MW 11 */ + 12518 "00000000" // /* MW 10 */ + 12519 "00000000" // /* MW 9 */ + 12520 "00000000" // /* MW 8 */ + 12521 "01011011" // /* MW 7 */ + 12522 "00000001" // /* MW 6 */ + 12523 "00100000" // /* MW 5 */ + 12524 "00000000" // /* MW 4 */ + 12525 "11110000" // /* MW 3 */ + 12526 "00101100" // /* MW 2 */ + 12527 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 97 26 first +.loop_nesting 1 + 12528 "10011000" // LDA.s16 r26, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12529 "01010010" // /* MW 3 */ + 12530 "00000111" // /* MW 2 */ + 12531 "00000110" // /* MW 1 */ + 12532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12533 "00000000" // /* MW 1 */ + 12534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12535 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 12536 "01001000" // VMAC.f dm0, dm0, x2, x2, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12537 "01000001" // /* MW 3 */ + 12538 "00000100" // /* MW 2 */ + 12539 "10100000" // /* MW 1 */ + 12540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12541 "00000000" // /* MW 1 */ + 12542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12543 "00000000" // /* MW 1 */ + 12544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12545 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 first + 12546 "10011000" // ASHL r26, r26, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "01011110" // /* MW 3 */ + 12548 "10110101" // /* MW 2 */ + 12549 "00010110" // /* MW 1 */ + 12550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12551 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 12552 "00011000" // VCONV.bf16.fp32 x0, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12553 "00010110" // /* MW 3 */ + 12554 "00010000" // /* MW 2 */ + 12555 "00001000" // /* MW 1 */ + 12556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12557 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12558 "11111000" // VCONV.fp32.bf16 cml2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "10001010" // /* MW 3 */ + 12560 "00000001" // /* MW 2 */ + 12561 "00011010" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 332 18 first +.src_ref 3 "rmsnorm_row_major.h" 99 36 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12562 "01100010" // VINSERT.32 x0, x0, #0, r26; VADD.f dm2, dm2, dm3, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12563 "00111101" // /* MW 7 */ + 12564 "01001100" // /* MW 6 */ + 12565 "10010010" // /* MW 5 */ + 12566 "01100110" // /* MW 4 */ + 12567 "01010001" // /* MW 3 */ + 12568 "00000011" // /* MW 2 */ + 12569 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12570 "11111000" // VMOV bmll3, bmlh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12571 "00010010" // /* MW 3 */ + 12572 "00001001" // /* MW 2 */ + 12573 "00011011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 first + 12574 "11111000" // VMOV bmll3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "10010010" // /* MW 3 */ + 12576 "00000000" // /* MW 2 */ + 12577 "00011011" // /* MW 1 */ + 12578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12579 "00000000" // /* MW 1 */ + 12580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12581 "00000000" // /* MW 1 */ + 12582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12583 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 12584 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12585 "00010010" // /* MW 3 */ + 12586 "00101000" // /* MW 2 */ + 12587 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 12588 "01100010" // VSHIFT x0, x0, x0, r19; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12589 "00111101" // /* MW 7 */ + 12590 "01010000" // /* MW 6 */ + 12591 "10010010" // /* MW 5 */ + 12592 "11000110" // /* MW 4 */ + 12593 "01001110" // /* MW 3 */ + 12594 "00000000" // /* MW 2 */ + 12595 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12596 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12597 "10010010" // /* MW 3 */ + 12598 "00000000" // /* MW 2 */ + 12599 "00011100" // /* MW 1 */ + 12600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12601 "00000000" // /* MW 1 */ + 12602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12603 "00000000" // /* MW 1 */ + 12604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12605 "00000000" // /* MW 1 */ + 12606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12607 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 12608 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12609 "00010010" // /* MW 3 */ + 12610 "00101000" // /* MW 2 */ + 12611 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 12612 "01100010" // VSHIFT x0, x0, x0, r21; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12613 "00111101" // /* MW 7 */ + 12614 "01010000" // /* MW 6 */ + 12615 "10010010" // /* MW 5 */ + 12616 "11000110" // /* MW 4 */ + 12617 "01010110" // /* MW 3 */ + 12618 "00000000" // /* MW 2 */ + 12619 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12620 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12621 "10010010" // /* MW 3 */ + 12622 "00000000" // /* MW 2 */ + 12623 "00011100" // /* MW 1 */ + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ + 12626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12627 "00000000" // /* MW 1 */ + 12628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12629 "00000000" // /* MW 1 */ + 12630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12631 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 12632 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12633 "00010010" // /* MW 3 */ + 12634 "00101000" // /* MW 2 */ + 12635 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 12636 "01100010" // VSHIFT x0, x0, x0, r23; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12637 "00111101" // /* MW 7 */ + 12638 "01010000" // /* MW 6 */ + 12639 "10010010" // /* MW 5 */ + 12640 "11000110" // /* MW 4 */ + 12641 "01011110" // /* MW 3 */ + 12642 "00000000" // /* MW 2 */ + 12643 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12644 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12645 "10010010" // /* MW 3 */ + 12646 "00000000" // /* MW 2 */ + 12647 "00011100" // /* MW 1 */ + 12648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12649 "00000000" // /* MW 1 */ + 12650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12651 "00000000" // /* MW 1 */ + 12652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12653 "00000000" // /* MW 1 */ + 12654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12655 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 12656 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12657 "00010010" // /* MW 3 */ + 12658 "00101000" // /* MW 2 */ + 12659 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 12660 "01100010" // VSHIFT x0, x0, x0, r22; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12661 "00111101" // /* MW 7 */ + 12662 "01010000" // /* MW 6 */ + 12663 "10010010" // /* MW 5 */ + 12664 "11000110" // /* MW 4 */ + 12665 "01011010" // /* MW 3 */ + 12666 "00000000" // /* MW 2 */ + 12667 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12668 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12669 "10010010" // /* MW 3 */ + 12670 "00000000" // /* MW 2 */ + 12671 "00011100" // /* MW 1 */ + 12672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12673 "00000000" // /* MW 1 */ + 12674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12675 "00000000" // /* MW 1 */ + 12676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12677 "00000000" // /* MW 1 */ + 12678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12679 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 12680 "00011000" // VCONV.bf16.fp32 x0, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12681 "00010110" // /* MW 3 */ + 12682 "00010001" // /* MW 2 */ + 12683 "00001000" // /* MW 1 */ + 12684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12685 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first + 12686 "10111000" // VEXTRACT.16 r26, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12687 "00000001" // /* MW 3 */ + 12688 "10000001" // /* MW 2 */ + 12689 "00011110" // /* MW 1 */ + 12690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12691 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 12692 "10011000" // ASHL r26, r26, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12693 "01011110" // /* MW 3 */ + 12694 "10110101" // /* MW 2 */ + 12695 "00010110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 12696 "01100010" // VINSERT.32 x0, x0, #0, r26; VADD.f dm2, dm2, dm3, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12697 "00111101" // /* MW 7 */ + 12698 "01001100" // /* MW 6 */ + 12699 "10010010" // /* MW 5 */ + 12700 "01100110" // /* MW 4 */ + 12701 "01010001" // /* MW 3 */ + 12702 "00000011" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12704 "11111000" // VMOV bmll2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12705 "10010010" // /* MW 3 */ + 12706 "00000000" // /* MW 2 */ + 12707 "00011010" // /* MW 1 */ + 12708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12709 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 80 4 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 12710 "00011000" // JNZD r17, r17, p1 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 12711 "01100000" // /* MW 3 */ + 12712 "01100010" // /* MW 2 */ + 12713 "00010100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 24 first +.src_ref 3 "rmsnorm_row_major.h" 104 23 first +.delay_slot +.aggressive_scheduled_block_id 9 +.noswbrkpt + 12714 "00011000" // ST.s16 r26, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "01010111" // /* MW 3 */ + 12716 "00101111" // /* MW 2 */ + 12717 "00000110" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12719 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12720 "00011000" // VCONV.bf16.fp32 wl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12721 "00010110" // /* MW 3 */ + 12722 "01000001" // /* MW 2 */ + 12723 "00001000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12724 "11111000" // VMOV cml0, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12725 "10001010" // /* MW 3 */ + 12726 "00000100" // /* MW 2 */ + 12727 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.delay_slot +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12728 "00000010" // NOPS; VEXTRACT.16 r26, x0, #0, vaddSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12729 "11010000" // /* MW 7 */ + 12730 "10000000" // /* MW 6 */ + 12731 "01000000" // /* MW 5 */ + 12732 "00000011" // /* MW 4 */ + 12733 "01100000" // /* MW 3 */ + 12734 "00101011" // /* MW 2 */ + 12735 "00000000" // /* MW 1 */ +.label __ll61__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.loop_nesting 0 + 12736 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12737 "00011001" // /* MW 3 */ + 12738 "11111111" // /* MW 2 */ + 12739 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 first + 12740 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12741 "00000000" // /* MW 3 */ + 12742 "00101000" // /* MW 2 */ + 12743 "00010000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 +.delay_slot + 12744 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12745 "00000001" // /* MW 5 */ + 12746 "00000000" // /* MW 4 */ + 12747 "00000000" // /* MW 3 */ + 12748 "11110000" // /* MW 2 */ + 12749 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12756 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12757 "10000001" // /* MW 11 */ + 12758 "10101101" // /* MW 10 */ + 12759 "00000000" // /* MW 9 */ + 12760 "00000000" // /* MW 8 */ + 12761 "00000000" // /* MW 7 */ + 12762 "00000000" // /* MW 6 */ + 12763 "00100000" // /* MW 5 */ + 12764 "00000000" // /* MW 4 */ + 12765 "11110000" // /* MW 3 */ + 12766 "00101100" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_624 +.src_ref 3 "rmsnorm_row_major.h" 67 8 first + 12768 "10000100" // JZ r16, #12976 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12976 delay_slots=5 */ + 12769 "00000001" // /* MW 5 */ + 12770 "00000000" // /* MW 4 */ + 12771 "01011000" // /* MW 3 */ + 12772 "00011001" // /* MW 2 */ + 12773 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12783 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 70 8 first +.src_ref 3 "rmsnorm_row_major.h" 72 35 +.src_ref 3 "rmsnorm_row_major.h" 72 35 +.src_ref 3 "rmsnorm_row_major.h" 73 35 + 12784 "01110110" // LDA r17, [sp, #-28]; MOVS p0, p6; MOVXM ls, #12816 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12785 "00010000" // /* MW 11 */ + 12786 "00001000" // /* MW 10 */ + 12787 "01111001" // /* MW 9 */ + 12788 "00001100" // /* MW 8 */ + 12789 "00000000" // /* MW 7 */ + 12790 "00000000" // /* MW 6 */ + 12791 "10001011" // /* MW 5 */ + 12792 "10011000" // /* MW 4 */ + 12793 "00100000" // /* MW 3 */ + 12794 "11000110" // /* MW 2 */ + 12795 "11111100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 70 8 + 12796 "10111010" // LDA p7, [sp, #-24]; MOVXM le, #12928 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12797 "00010000" // /* MW 9 */ + 12798 "01000000" // /* MW 8 */ + 12799 "10111001" // /* MW 7 */ + 12800 "00001101" // /* MW 6 */ + 12801 "00000000" // /* MW 5 */ + 12802 "00000000" // /* MW 4 */ + 12803 "00100000" // /* MW 3 */ + 12804 "01110011" // /* MW 2 */ + 12805 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 70 8 + 12806 "10111010" // NOPA; NOPB; MOV lc, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12807 "01111110" // /* MW 9 */ + 12808 "00010000" // /* MW 8 */ + 12809 "10111100" // /* MW 7 */ + 12810 "00000010" // /* MW 6 */ + 12811 "00010000" // /* MW 5 */ + 12812 "00000000" // /* MW 4 */ + 12813 "11110000" // /* MW 3 */ + 12814 "00101100" // /* MW 2 */ + 12815 "00000000" // /* MW 1 */ +.label ZLS_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_672 +.src_ref 3 "rmsnorm_row_major.h" 72 35 first +.begin_of_loop +.loop_nesting 1 + 12816 "00011000" // ST.s16 r17, [p0], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12817 "00110111" // /* MW 3 */ + 12818 "00011110" // /* MW 2 */ + 12819 "00000000" // /* MW 1 */ + 12820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12821 "00000000" // /* MW 1 */ + 12822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12823 "00000000" // /* MW 1 */ + 12824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12825 "00000000" // /* MW 1 */ + 12826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12827 "00000000" // /* MW 1 */ + 12828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12829 "00000000" // /* MW 1 */ + 12830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12831 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 73 35 first + 12832 "11100001" // ST.s16 r17, [p0], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12833 "00000000" // /* MW 15 */ + 12834 "00000000" // /* MW 14 */ + 12835 "01111000" // /* MW 13 */ + 12836 "10100101" // /* MW 12 */ + 12837 "00000001" // /* MW 11 */ + 12838 "00000000" // /* MW 10 */ + 12839 "00000000" // /* MW 9 */ + 12840 "00000000" // /* MW 8 */ + 12841 "01011011" // /* MW 7 */ + 12842 "00000001" // /* MW 6 */ + 12843 "00100000" // /* MW 5 */ + 12844 "00000000" // /* MW 4 */ + 12845 "11100000" // /* MW 3 */ + 12846 "11000110" // /* MW 2 */ + 12847 "00000011" // /* MW 1 */ + 12848 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12849 "00000000" // /* MW 15 */ + 12850 "00000000" // /* MW 14 */ + 12851 "01111000" // /* MW 13 */ + 12852 "10100101" // /* MW 12 */ + 12853 "00000001" // /* MW 11 */ + 12854 "00000000" // /* MW 10 */ + 12855 "00000000" // /* MW 9 */ + 12856 "00000000" // /* MW 8 */ + 12857 "01011011" // /* MW 7 */ + 12858 "00000001" // /* MW 6 */ + 12859 "00100000" // /* MW 5 */ + 12860 "00000000" // /* MW 4 */ + 12861 "11110000" // /* MW 3 */ + 12862 "00101100" // /* MW 2 */ + 12863 "00000000" // /* MW 1 */ + 12864 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12865 "00000000" // /* MW 15 */ + 12866 "00000000" // /* MW 14 */ + 12867 "01111000" // /* MW 13 */ + 12868 "10100101" // /* MW 12 */ + 12869 "00000001" // /* MW 11 */ + 12870 "00000000" // /* MW 10 */ + 12871 "00000000" // /* MW 9 */ + 12872 "00000000" // /* MW 8 */ + 12873 "01011011" // /* MW 7 */ + 12874 "00000001" // /* MW 6 */ + 12875 "00100000" // /* MW 5 */ + 12876 "00000000" // /* MW 4 */ + 12877 "11110000" // /* MW 3 */ + 12878 "00101100" // /* MW 2 */ + 12879 "00000000" // /* MW 1 */ + 12880 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12881 "00000000" // /* MW 15 */ + 12882 "00000000" // /* MW 14 */ + 12883 "01111000" // /* MW 13 */ + 12884 "10100101" // /* MW 12 */ + 12885 "00000001" // /* MW 11 */ + 12886 "00000000" // /* MW 10 */ + 12887 "00000000" // /* MW 9 */ + 12888 "00000000" // /* MW 8 */ + 12889 "01011011" // /* MW 7 */ + 12890 "00000001" // /* MW 6 */ + 12891 "00100000" // /* MW 5 */ + 12892 "00000000" // /* MW 4 */ + 12893 "11110000" // /* MW 3 */ + 12894 "00101100" // /* MW 2 */ + 12895 "00000000" // /* MW 1 */ + 12896 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12897 "00000000" // /* MW 15 */ + 12898 "00000000" // /* MW 14 */ + 12899 "01111000" // /* MW 13 */ + 12900 "10100101" // /* MW 12 */ + 12901 "00000001" // /* MW 11 */ + 12902 "00000000" // /* MW 10 */ + 12903 "00000000" // /* MW 9 */ + 12904 "00000000" // /* MW 8 */ + 12905 "01011011" // /* MW 7 */ + 12906 "00000001" // /* MW 6 */ + 12907 "00100000" // /* MW 5 */ + 12908 "00000000" // /* MW 4 */ + 12909 "11110000" // /* MW 3 */ + 12910 "00101100" // /* MW 2 */ + 12911 "00000000" // /* MW 1 */ + 12912 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12913 "00000000" // /* MW 15 */ + 12914 "00000000" // /* MW 14 */ + 12915 "01111000" // /* MW 13 */ + 12916 "10100101" // /* MW 12 */ + 12917 "00000001" // /* MW 11 */ + 12918 "00000000" // /* MW 10 */ + 12919 "00000000" // /* MW 9 */ + 12920 "00000000" // /* MW 8 */ + 12921 "01011011" // /* MW 7 */ + 12922 "00000001" // /* MW 6 */ + 12923 "00100000" // /* MW 5 */ + 12924 "00000000" // /* MW 4 */ + 12925 "11110000" // /* MW 3 */ + 12926 "00101100" // /* MW 2 */ + 12927 "00000000" // /* MW 1 */ +.label ZLE_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_784 +.end_of_loop + 12928 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12929 "00000000" // /* MW 15 */ + 12930 "00000000" // /* MW 14 */ + 12931 "01111000" // /* MW 13 */ + 12932 "10100101" // /* MW 12 */ + 12933 "00000001" // /* MW 11 */ + 12934 "00000000" // /* MW 10 */ + 12935 "00000000" // /* MW 9 */ + 12936 "00000000" // /* MW 8 */ + 12937 "01011011" // /* MW 7 */ + 12938 "00000001" // /* MW 6 */ + 12939 "00100000" // /* MW 5 */ + 12940 "00000000" // /* MW 4 */ + 12941 "11110000" // /* MW 3 */ + 12942 "00101100" // /* MW 2 */ + 12943 "00000000" // /* MW 1 */ +.loop_nesting 0 + 12944 "10000100" // J #12320 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12320 delay_slots=5 */ + 12945 "00000000" // /* MW 5 */ + 12946 "00000000" // /* MW 4 */ + 12947 "00010000" // /* MW 3 */ + 12948 "00011000" // /* MW 2 */ + 12949 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 +.delay_slot + 12950 "00011000" // LDA lr, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12951 "00111001" // /* MW 3 */ + 12952 "11101100" // /* MW 2 */ + 12953 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12960 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12961 "00000000" // /* MW 15 */ + 12962 "00000000" // /* MW 14 */ + 12963 "01111000" // /* MW 13 */ + 12964 "10100101" // /* MW 12 */ + 12965 "00000001" // /* MW 11 */ + 12966 "00000000" // /* MW 10 */ + 12967 "00000000" // /* MW 9 */ + 12968 "00000000" // /* MW 8 */ + 12969 "01011011" // /* MW 7 */ + 12970 "00000001" // /* MW 6 */ + 12971 "00100000" // /* MW 5 */ + 12972 "00000000" // /* MW 4 */ + 12973 "11110000" // /* MW 3 */ + 12974 "00101100" // /* MW 2 */ + 12975 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_832 + 12976 "10000100" // J #12304 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12304 delay_slots=5 */ + 12977 "00000000" // /* MW 5 */ + 12978 "00000000" // /* MW 4 */ + 12979 "00001000" // /* MW 3 */ + 12980 "00011000" // /* MW 2 */ + 12981 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 +.delay_slot + 12982 "00011000" // LDA lr, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12983 "00111001" // /* MW 3 */ + 12984 "11101100" // /* MW 2 */ + 12985 "00000111" // /* MW 1 */ +.delay_slot + 12986 "00011000" // LDA p7, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12987 "10011001" // /* MW 3 */ + 12988 "11101011" // /* MW 2 */ + 12989 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12991 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params__end +.label __Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params___func_end0 + 12995 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE +.function rmsnorm_row_major_part1_4x4_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE +.src_ref 12 "rms_norm_adf_wrapper.cpp" 76 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 5 +.function_start + 13008 "01000100" // MOVXM p1, #508900 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13009 "11001000" // /* MW 5 */ + 13010 "11000111" // /* MW 4 */ + 13011 "11000010" // /* MW 3 */ + 13012 "00000111" // /* MW 2 */ + 13013 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 5 first + 13014 "10111010" // LDA r16, [p1]; MOVS p0, p6; MOV p6, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13015 "01110010" // /* MW 9 */ + 13016 "01100000" // /* MW 8 */ + 13017 "00110000" // /* MW 7 */ + 13018 "00000011" // /* MW 6 */ + 13019 "10001011" // /* MW 5 */ + 13020 "10011000" // /* MW 4 */ + 13021 "11010000" // /* MW 3 */ + 13022 "11000010" // /* MW 2 */ + 13023 "00100000" // /* MW 1 */ + 13024 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13025 "10100000" // /* MW 3 */ + 13026 "00010111" // /* MW 2 */ + 13027 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 49 + 13028 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13029 "11000000" // /* MW 3 */ + 13030 "11010110" // /* MW 2 */ + 13031 "00011011" // /* MW 1 */ + 13032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13033 "00000000" // /* MW 1 */ + 13034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13035 "00000000" // /* MW 1 */ + 13036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13037 "00000000" // /* MW 1 */ + 13038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13039 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 5 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 17 + 13040 "10000100" // JNZ r16, #13152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13152 delay_slots=5 */ + 13041 "00000001" // /* MW 5 */ + 13042 "01000000" // /* MW 4 */ + 13043 "10110000" // /* MW 3 */ + 13044 "00011001" // /* MW 2 */ + 13045 "10000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 76 +.delay_slot + 13046 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13047 "00000001" // /* MW 5 */ + 13048 "00000000" // /* MW 4 */ + 13049 "00000000" // /* MW 3 */ + 13050 "00010000" // /* MW 2 */ + 13051 "00000000" // /* MW 1 */ +.delay_slot + 13052 "10011000" // ST p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13053 "00011101" // /* MW 3 */ + 13054 "11110100" // /* MW 2 */ + 13055 "00001111" // /* MW 1 */ +.delay_slot + 13056 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "10011101" // /* MW 3 */ + 13058 "11111011" // /* MW 2 */ + 13059 "00001111" // /* MW 1 */ +.delay_slot + 13060 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13061 "00010101" // /* MW 3 */ + 13062 "11111100" // /* MW 2 */ + 13063 "00001111" // /* MW 1 */ +.delay_slot + 13064 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13065 "00111101" // /* MW 3 */ + 13066 "11110000" // /* MW 2 */ + 13067 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 36 + 13068 "10111010" // MOVA r18, #12; MOVX r20, #-16; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13069 "01111000" // /* MW 9 */ + 13070 "01110000" // /* MW 8 */ + 13071 "00101101" // /* MW 7 */ + 13072 "00001010" // /* MW 6 */ + 13073 "01000110" // /* MW 5 */ + 13074 "00111111" // /* MW 4 */ + 13075 "00000000" // /* MW 3 */ + 13076 "10010010" // /* MW 2 */ + 13077 "00000001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 84 14 + 13078 "10111010" // MOVA r19, #1; MOVXM p7, #508888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13079 "00010000" // /* MW 9 */ + 13080 "11101100" // /* MW 8 */ + 13081 "10110001" // /* MW 7 */ + 13082 "11110011" // /* MW 6 */ + 13083 "00000001" // /* MW 5 */ + 13084 "00000000" // /* MW 4 */ + 13085 "00000000" // /* MW 3 */ + 13086 "00110011" // /* MW 2 */ + 13087 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 84 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 27 first + 13088 "01011100" // ST r17, [p7]; EXTEND.u8 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13089 "00100000" // /* MW 5 */ + 13090 "11010101" // /* MW 4 */ + 13091 "00111000" // /* MW 3 */ + 13092 "11000110" // /* MW 2 */ + 13093 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 36 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 44 + 13094 "00100100" // LSHL r17, r17, r20; ADD.NC r20, r21, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13095 "11111110" // /* MW 5 */ + 13096 "00110101" // /* MW 4 */ + 13097 "10111010" // /* MW 3 */ + 13098 "01101001" // /* MW 2 */ + 13099 "10001100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 15 + 13100 "01000100" // MOVXM p7, #508892 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13101 "10111000" // /* MW 5 */ + 13102 "11000111" // /* MW 4 */ + 13103 "11001110" // /* MW 3 */ + 13104 "00000111" // /* MW 2 */ + 13105 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 15 first + 13106 "00111010" // ST r20, [p7]; MOVXM p7, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13107 "00010001" // /* MW 9 */ + 13108 "00001000" // /* MW 8 */ + 13109 "10110010" // /* MW 7 */ + 13110 "11110011" // /* MW 6 */ + 13111 "00000001" // /* MW 5 */ + 13112 "00000000" // /* MW 4 */ + 13113 "00110000" // /* MW 3 */ + 13114 "11010010" // /* MW 2 */ + 13115 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first + 13116 "10111010" // ST.s8 r18, [p7]; MOVXM p0, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13117 "00010000" // /* MW 9 */ + 13118 "00000110" // /* MW 8 */ + 13119 "00110010" // /* MW 7 */ + 13120 "11110000" // /* MW 6 */ + 13121 "00000001" // /* MW 5 */ + 13122 "00000000" // /* MW 4 */ + 13123 "11100000" // /* MW 3 */ + 13124 "11001000" // /* MW 2 */ + 13125 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 27 first + 13126 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13127 "10010000" // /* MW 3 */ + 13128 "01100010" // /* MW 2 */ + 13129 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 15 + 13130 "01000100" // MOVXM p7, #508896 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13131 "11000000" // /* MW 5 */ + 13132 "11000111" // /* MW 4 */ + 13133 "11001110" // /* MW 3 */ + 13134 "00000111" // /* MW 2 */ + 13135 "00000000" // /* MW 1 */ + 13136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13137 "00000000" // /* MW 1 */ + 13138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13139 "00000000" // /* MW 1 */ + 13140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13141 "00000000" // /* MW 1 */ + 13142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13143 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first + 13144 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13145 "01110001" // /* MW 3 */ + 13146 "00000110" // /* MW 2 */ + 13147 "00001000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 15 first + 13148 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13149 "00110001" // /* MW 3 */ + 13150 "00000110" // /* MW 2 */ + 13151 "00001111" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE_144 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 95 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 97 24 + 13152 "10111010" // MOVA dj0, #40; MOVXM p7, #508832 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13153 "00010000" // /* MW 9 */ + 13154 "11010000" // /* MW 8 */ + 13155 "10110001" // /* MW 7 */ + 13156 "11110011" // /* MW 6 */ + 13157 "00000001" // /* MW 5 */ + 13158 "00000000" // /* MW 4 */ + 13159 "10000000" // /* MW 3 */ + 13160 "00000010" // /* MW 2 */ + 13161 "00000101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 95 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 + 13162 "00111010" // ST r16, [p7], #-20; MOVX r24, #0; MOV r17, sp /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13163 "01111001" // /* MW 9 */ + 13164 "11110000" // /* MW 8 */ + 13165 "00101010" // /* MW 7 */ + 13166 "00001010" // /* MW 6 */ + 13167 "10000000" // /* MW 5 */ + 13168 "00000001" // /* MW 4 */ + 13169 "00110000" // /* MW 3 */ + 13170 "11000010" // /* MW 2 */ + 13171 "11110111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 97 24 first + 13172 "00010100" // LDA el0, [p2, dj0]; ADD.NC p0, r17, #-104 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13173 "10011000" // /* MW 5 */ + 13174 "11010001" // /* MW 4 */ + 13175 "11010000" // /* MW 3 */ + 13176 "00000101" // /* MW 2 */ + 13177 "01000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 33 first + 13178 "10011000" // LDA eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13179 "00001110" // /* MW 3 */ + 13180 "00011100" // /* MW 2 */ + 13181 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 33 + 13182 "10011000" // LDA el3, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13183 "11101110" // /* MW 3 */ + 13184 "00011100" // /* MW 2 */ + 13185 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 67 + 13186 "10011000" // LDA el2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13187 "10101110" // /* MW 3 */ + 13188 "00011100" // /* MW 2 */ + 13189 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 67 + 13190 "10011000" // LDA el1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13191 "01101110" // /* MW 3 */ + 13192 "00011100" // /* MW 2 */ + 13193 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 101 + 13194 "10011000" // LDA eh1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13195 "01001110" // /* MW 3 */ + 13196 "00011100" // /* MW 2 */ + 13197 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 101 + 13198 "10011000" // LDA eh2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13199 "10001110" // /* MW 3 */ + 13200 "00011100" // /* MW 2 */ + 13201 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 50 first + 13202 "00001100" // LDA el0, [p2], #4; ST el0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13203 "01011011" // /* MW 5 */ + 13204 "11011000" // /* MW 4 */ + 13205 "11011111" // /* MW 3 */ + 13206 "10000101" // /* MW 2 */ + 13207 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 50 + 13208 "00001100" // LDA eh0, [p2], #4; ST eh0, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13209 "00011011" // /* MW 5 */ + 13210 "00110000" // /* MW 4 */ + 13211 "11011111" // /* MW 3 */ + 13212 "10000001" // /* MW 2 */ + 13213 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13214 "10011000" // ST el3, [sp, #-100] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13215 "11101101" // /* MW 3 */ + 13216 "10011100" // /* MW 2 */ + 13217 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13218 "10011000" // ST el2, [sp, #-96] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13219 "10101101" // /* MW 3 */ + 13220 "10100000" // /* MW 2 */ + 13221 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13222 "10011000" // ST el1, [sp, #-92] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13223 "01101101" // /* MW 3 */ + 13224 "10100100" // /* MW 2 */ + 13225 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13226 "10011000" // ST eh1, [sp, #-88] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13227 "01001101" // /* MW 3 */ + 13228 "10101000" // /* MW 2 */ + 13229 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13230 "10011000" // ST eh2, [sp, #-84] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13231 "10001101" // /* MW 3 */ + 13232 "10101100" // /* MW 2 */ + 13233 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13234 "10011000" // ST el0, [sp, #-80] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13235 "00101101" // /* MW 3 */ + 13236 "10110000" // /* MW 2 */ + 13237 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13238 "10011000" // ST eh0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13239 "00001101" // /* MW 3 */ + 13240 "10110100" // /* MW 2 */ + 13241 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 84 first + 13242 "10011000" // LDA eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13243 "00001110" // /* MW 3 */ + 13244 "00000100" // /* MW 2 */ + 13245 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 84 + 13246 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13247 "00101110" // /* MW 3 */ + 13248 "00010100" // /* MW 2 */ + 13249 "00000010" // /* MW 1 */ + 13250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13251 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 102 6 first + 13252 "10000100" // JNZ r16, #13360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13360 delay_slots=5 */ + 13253 "00000001" // /* MW 5 */ + 13254 "01000000" // /* MW 4 */ + 13255 "00011000" // /* MW 3 */ + 13256 "00011010" // /* MW 2 */ + 13257 "10000000" // /* MW 1 */ +.delay_slot + 13258 "10011000" // ST p7, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13259 "10011101" // /* MW 3 */ + 13260 "11101011" // /* MW 2 */ + 13261 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13265 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 first +.delay_slot + 13266 "10011000" // ST eh0, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13267 "00001101" // /* MW 3 */ + 13268 "10111000" // /* MW 2 */ + 13269 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 +.delay_slot + 13270 "10011000" // ST el0, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13271 "00101101" // /* MW 3 */ + 13272 "10111100" // /* MW 2 */ + 13273 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 106 4 first +.no_stack_arguments + 13274 "10111010" // MOVA dj0, #16; JL #11744 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 13275 "01000000" // /* MW 9 */ + 13276 "00000000" // /* MW 8 */ + 13277 "00000000" // /* MW 7 */ + 13278 "10111100" // /* MW 6 */ + 13279 "00000101" // /* MW 5 */ + 13280 "00000000" // /* MW 4 */ + 13281 "10000000" // /* MW 3 */ + 13282 "00000010" // /* MW 2 */ + 13283 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 +.delay_slot + 13284 "01000100" // MOVXM p7, #508840 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13285 "01010000" // /* MW 5 */ + 13286 "11000111" // /* MW 4 */ + 13287 "11001110" // /* MW 3 */ + 13288 "00000111" // /* MW 2 */ + 13289 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 first +.delay_slot + 13290 "10011000" // ST r24, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13291 "00010001" // /* MW 3 */ + 13292 "00011111" // /* MW 2 */ + 13293 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 +.delay_slot + 13294 "10011000" // ST dj0, [p7], #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13295 "01000001" // /* MW 3 */ + 13296 "10111100" // /* MW 2 */ + 13297 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13299 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13300 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13301 "10000001" // /* MW 11 */ + 13302 "10101101" // /* MW 10 */ + 13303 "00000000" // /* MW 9 */ + 13304 "00000000" // /* MW 8 */ + 13305 "00000000" // /* MW 7 */ + 13306 "00000000" // /* MW 6 */ + 13307 "00100000" // /* MW 5 */ + 13308 "00000000" // /* MW 4 */ + 13309 "11110000" // /* MW 3 */ + 13310 "00101100" // /* MW 2 */ + 13311 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 55 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 98 +.return_address + 13312 "10111010" // LDA r16, [p7], #4; MOVXM p2, #508892 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13313 "00010000" // /* MW 9 */ + 13314 "11101110" // /* MW 8 */ + 13315 "00110001" // /* MW 7 */ + 13316 "11110001" // /* MW 6 */ + 13317 "00000001" // /* MW 5 */ + 13318 "00000000" // /* MW 4 */ + 13319 "11010000" // /* MW 3 */ + 13320 "11000010" // /* MW 2 */ + 13321 "11100011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 86 + 13322 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13323 "01010110" // /* MW 3 */ + 13324 "00000110" // /* MW 2 */ + 13325 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 98 + 13326 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00110110" // /* MW 3 */ + 13328 "00000110" // /* MW 2 */ + 13329 "00000010" // /* MW 1 */ + 13330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13331 "00000000" // /* MW 1 */ + 13332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13333 "00000000" // /* MW 1 */ + 13334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13335 "00000000" // /* MW 1 */ + 13336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13337 "00000000" // /* MW 1 */ + 13338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13339 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 65 + 13340 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13341 "00001111" // /* MW 3 */ + 13342 "10100001" // /* MW 2 */ + 13343 "00010100" // /* MW 1 */ + 13344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13345 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 96 + 13346 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001111" // /* MW 3 */ + 13348 "01100001" // /* MW 2 */ + 13349 "00010100" // /* MW 1 */ + 13350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13351 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 34 + 13352 "00000010" // ST r16, [p7, #20]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13353 "01110000" // /* MW 7 */ + 13354 "10100101" // /* MW 6 */ + 13355 "00000001" // /* MW 5 */ + 13356 "00000000" // /* MW 4 */ + 13357 "00110000" // /* MW 3 */ + 13358 "11000010" // /* MW 2 */ + 13359 "11101010" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE_352 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 47 + 13360 "10111010" // LDA p7, [sp, #-24]; MOVXM p2, #508900 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13361 "00010000" // /* MW 9 */ + 13362 "11110010" // /* MW 8 */ + 13363 "00110001" // /* MW 7 */ + 13364 "11110001" // /* MW 6 */ + 13365 "00000001" // /* MW 5 */ + 13366 "00000000" // /* MW 4 */ + 13367 "00100000" // /* MW 3 */ + 13368 "01110011" // /* MW 2 */ + 13369 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 47 + 13370 "01010100" // LDA r0, [p2]; MOV m0, #36 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13371 "10010001" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11010000" // /* MW 3 */ + 13374 "10000010" // /* MW 2 */ + 13375 "01000000" // /* MW 1 */ + 13376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13377 "00000000" // /* MW 1 */ + 13378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13379 "00000000" // /* MW 1 */ + 13380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13381 "00000000" // /* MW 1 */ + 13382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13383 "00000000" // /* MW 1 */ + 13384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13385 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 47 + 13386 "10011000" // LDA r1, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13387 "00110110" // /* MW 3 */ + 13388 "00001000" // /* MW 2 */ + 13389 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 26 +.no_stack_arguments + 13390 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 13391 "00000001" // /* MW 5 */ + 13392 "00000000" // /* MW 4 */ + 13393 "10111000" // /* MW 3 */ + 13394 "00011110" // /* MW 2 */ + 13395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13405 "01100111" // /* MW 3 */ + 13406 "00000001" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 49 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 58 +.return_address + 13408 "00100100" // EQZ r26, r3; ADD.NC p2, r15, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13409 "00001100" // /* MW 5 */ + 13410 "11001111" // /* MW 4 */ + 13411 "00000100" // /* MW 3 */ + 13412 "10011010" // /* MW 2 */ + 13413 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 49 + 13414 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13415 "01110110" // /* MW 3 */ + 13416 "11111111" // /* MW 2 */ + 13417 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 64 + 13418 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13419 "00010110" // /* MW 3 */ + 13420 "11111110" // /* MW 2 */ + 13421 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 80 + 13422 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13423 "00110110" // /* MW 3 */ + 13424 "11111110" // /* MW 2 */ + 13425 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 302 28 first + 13426 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13427 "01010110" // /* MW 3 */ + 13428 "01000110" // /* MW 2 */ + 13429 "00000010" // /* MW 1 */ + 13430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13431 "00000000" // /* MW 1 */ + 13432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13433 "00000000" // /* MW 1 */ + 13434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13435 "00000000" // /* MW 1 */ + 13436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13437 "00000000" // /* MW 1 */ + 13438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13439 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 36 first +.src_ref 1 "io_buffer_main.h" 285 43 first + 13440 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13441 "00000010" // /* MW 3 */ + 13442 "01100001" // /* MW 2 */ + 13443 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 20 + 13444 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13445 "00010001" // /* MW 3 */ + 13446 "00000110" // /* MW 2 */ + 13447 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 13448 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13449 "11111101" // /* MW 3 */ + 13450 "11100000" // /* MW 2 */ + 13451 "00010111" // /* MW 1 */ + 13452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13453 "00000000" // /* MW 1 */ + 13454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13455 "00000000" // /* MW 1 */ + 13456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13457 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 13458 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13459 "00001000" // /* MW 3 */ + 13460 "10010111" // /* MW 2 */ + 13461 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 48 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 59 + 13462 "10111010" // MOVA m0, #-80; MOVX r16, #1; MOV r15, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13463 "01111000" // /* MW 9 */ + 13464 "01100000" // /* MW 8 */ + 13465 "11101010" // /* MW 7 */ + 13466 "00101001" // /* MW 6 */ + 13467 "00000000" // /* MW 5 */ + 13468 "00000001" // /* MW 4 */ + 13469 "10000000" // /* MW 3 */ + 13470 "00000000" // /* MW 2 */ + 13471 "11110110" // /* MW 1 */ + 13472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13473 "00000000" // /* MW 1 */ + 13474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13475 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 48 first + 13476 "10011000" // LDA r18, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13477 "01010110" // /* MW 3 */ + 13478 "00001010" // /* MW 2 */ + 13479 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 13480 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13481 "00110110" // /* MW 3 */ + 13482 "00000110" // /* MW 2 */ + 13483 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 13484 "10011000" // LDA p1, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13485 "10011110" // /* MW 3 */ + 13486 "01011100" // /* MW 2 */ + 13487 "00000010" // /* MW 1 */ + 13488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13489 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 145 2 first +.no_stack_arguments + 13490 "00000100" // JL #12144 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12144 delay_slots=5 */ + 13491 "00000001" // /* MW 5 */ + 13492 "00000000" // /* MW 4 */ + 13493 "10111000" // /* MW 3 */ + 13494 "00010111" // /* MW 2 */ + 13495 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 28 +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 145 2 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 +.delay_slot + 13496 "00000010" // MOVS p2, p7; MOV p7, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13497 "01110000" // /* MW 7 */ + 13498 "01100000" // /* MW 6 */ + 13499 "10110010" // /* MW 5 */ + 13500 "00000011" // /* MW 4 */ + 13501 "01100000" // /* MW 3 */ + 13502 "10010001" // /* MW 2 */ + 13503 "01010011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 +.delay_slot + 13504 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13505 "11000000" // /* MW 3 */ + 13506 "01100100" // /* MW 2 */ + 13507 "00011110" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 59 first +.delay_slot + 13508 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13509 "00001101" // /* MW 3 */ + 13510 "10100001" // /* MW 2 */ + 13511 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13513 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 11 +.delay_slot + 13514 "10010100" // NOPA; ADD.NC p0, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13515 "10000010" // /* MW 5 */ + 13516 "11010001" // /* MW 4 */ + 13517 "11110000" // /* MW 3 */ + 13518 "00101100" // /* MW 2 */ + 13519 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 +.return_address + 13520 "10111000" // MOV dj0, #44 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13521 "01011000" // /* MW 3 */ + 13522 "10000000" // /* MW 2 */ + 13523 "00011000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 first + 13524 "10111010" // LDA r1, [p6, dj0]; MOVXM p0, #508900 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13525 "00010000" // /* MW 9 */ + 13526 "11110010" // /* MW 8 */ + 13527 "00110001" // /* MW 7 */ + 13528 "11110000" // /* MW 6 */ + 13529 "00000001" // /* MW 5 */ + 13530 "00000000" // /* MW 4 */ + 13531 "11010000" // /* MW 3 */ + 13532 "00000110" // /* MW 2 */ + 13533 "11000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 14 + 13534 "10011000" // LDA r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13535 "00010110" // /* MW 3 */ + 13536 "00000100" // /* MW 2 */ + 13537 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 26 +.no_stack_arguments + 13538 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 13539 "00000001" // /* MW 5 */ + 13540 "00000000" // /* MW 4 */ + 13541 "10111000" // /* MW 3 */ + 13542 "00011110" // /* MW 2 */ + 13543 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 28 +.src_ref 1 "io_buffer_main.h" 400 30 +.delay_slot + 13544 "11111000" // MOV p6, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13545 "11000000" // /* MW 3 */ + 13546 "01101110" // /* MW 2 */ + 13547 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13551 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 +.delay_slot + 13552 "11111000" // MOV p2, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13553 "10100000" // /* MW 3 */ + 13554 "01100111" // /* MW 2 */ + 13555 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 91 +.delay_slot + 13556 "11110110" // NOPA; NOPB; MOVS p7, p2; MOV r15, r1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13557 "01110000" // /* MW 11 */ + 13558 "01010000" // /* MW 10 */ + 13559 "11101000" // /* MW 9 */ + 13560 "00000001" // /* MW 8 */ + 13561 "10001011" // /* MW 7 */ + 13562 "10001000" // /* MW 6 */ + 13563 "00100111" // /* MW 5 */ + 13564 "00000000" // /* MW 4 */ + 13565 "11110000" // /* MW 3 */ + 13566 "00101100" // /* MW 2 */ + 13567 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 first +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 91 +.return_address + 13568 "10111010" // LDA r17, [p7, #20]; ADD r18, r15, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13569 "01011000" // /* MW 9 */ + 13570 "00000001" // /* MW 8 */ + 13571 "00001000" // /* MW 7 */ + 13572 "11111010" // /* MW 6 */ + 13573 "00101111" // /* MW 5 */ + 13574 "00011111" // /* MW 4 */ + 13575 "11010000" // /* MW 3 */ + 13576 "11000110" // /* MW 2 */ + 13577 "11101010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 58 first + 13578 "10011000" // EQ r26, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13579 "00100111" // /* MW 3 */ + 13580 "11110101" // /* MW 2 */ + 13581 "00010000" // /* MW 1 */ + 13582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13583 "00000000" // /* MW 1 */ + 13584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13585 "00000000" // /* MW 1 */ + 13586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13587 "00000000" // /* MW 1 */ + 13588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13589 "00000000" // /* MW 1 */ + 13590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13591 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 13592 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13593 "00001000" // /* MW 3 */ + 13594 "01010101" // /* MW 2 */ + 13595 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 169 + 13596 "10111010" // LDA lr, [sp, #-16]; MOVXM p2, #508900 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13597 "00010000" // /* MW 9 */ + 13598 "11110010" // /* MW 8 */ + 13599 "00110001" // /* MW 7 */ + 13600 "11110001" // /* MW 6 */ + 13601 "00000001" // /* MW 5 */ + 13602 "00000000" // /* MW 4 */ + 13603 "00100000" // /* MW 3 */ + 13604 "00000111" // /* MW 2 */ + 13605 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 1 "io_buffer_main.h" 400 30 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 + 13606 "10111010" // LDA r17, [p6, #-8]; MOVX r24, #0; MOV r27, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13607 "01111000" // /* MW 9 */ + 13608 "10010000" // /* MW 8 */ + 13609 "01101110" // /* MW 7 */ + 13610 "00001011" // /* MW 6 */ + 13611 "10000000" // /* MW 5 */ + 13612 "00000001" // /* MW 4 */ + 13613 "11010000" // /* MW 3 */ + 13614 "11000110" // /* MW 2 */ + 13615 "11011100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 32 + 13616 "00011000" // LDA r18, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13617 "01010001" // /* MW 3 */ + 13618 "11101110" // /* MW 2 */ + 13619 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 first + 13620 "10011000" // LDA r19, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13621 "01110110" // /* MW 3 */ + 13622 "00000110" // /* MW 2 */ + 13623 "00000010" // /* MW 1 */ + 13624 "00011000" // LDA p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13625 "00011001" // /* MW 3 */ + 13626 "11110100" // /* MW 2 */ + 13627 "00000111" // /* MW 1 */ + 13628 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13629 "10011001" // /* MW 3 */ + 13630 "11111011" // /* MW 2 */ + 13631 "00000111" // /* MW 1 */ + 13632 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13633 "11110001" // /* MW 3 */ + 13634 "11111101" // /* MW 2 */ + 13635 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 169 first + 13636 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13637 "00000000" // /* MW 3 */ + 13638 "00101000" // /* MW 2 */ + 13639 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 30 first +.delay_slot + 13640 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13641 "00010001" // /* MW 3 */ + 13642 "00100001" // /* MW 2 */ + 13643 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 32 first +.delay_slot + 13644 "00100100" // SEL.EQZ r17, r17, r16, r27; ADD.NC r16, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13645 "11111111" // /* MW 5 */ + 13646 "00110010" // /* MW 4 */ + 13647 "01001000" // /* MW 3 */ + 13648 "01100000" // /* MW 2 */ + 13649 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 28 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 18 +.delay_slot + 13650 "00111010" // ST r17, [p6, #-8]; EQ r27, r19, r16; ADD.NC r16, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13651 "01001001" // /* MW 9 */ + 13652 "11000000" // /* MW 8 */ + 13653 "00001100" // /* MW 7 */ + 13654 "00111110" // /* MW 6 */ + 13655 "10111000" // /* MW 5 */ + 13656 "00100111" // /* MW 4 */ + 13657 "00110000" // /* MW 3 */ + 13658 "11000110" // /* MW 2 */ + 13659 "11011100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 first +.delay_slot + 13660 "01011100" // MOVS p6, p0; SEL.EQZ r16, r16, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13661 "00000100" // /* MW 5 */ + 13662 "01000011" // /* MW 4 */ + 13663 "01101000" // /* MW 3 */ + 13664 "00010001" // /* MW 2 */ + 13665 "11010000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 169 first +.delay_slot + 13666 "00111010" // ST r16, [p2]; PADDXM [sp], #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13667 "01110001" // /* MW 9 */ + 13668 "00000000" // /* MW 8 */ + 13669 "00000000" // /* MW 7 */ + 13670 "00000000" // /* MW 6 */ + 13671 "11111100" // /* MW 5 */ + 13672 "00111111" // /* MW 4 */ + 13673 "00110000" // /* MW 3 */ + 13674 "11000010" // /* MW 2 */ +.label _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE___func_end0 + 13675 "01000000" // /* MW 1 */ +.label __Z14_b8292_wrapperPPv___func_begin0 +.label _Z14_b8292_wrapperPPv +.function _b8292_wrapper _Z14_b8292_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 63 first +.src_ref 0 "0_0_reloadable5.cc" 65 79 +.function_start + 13680 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13681 "11000000" // /* MW 3 */ + 13682 "01100000" // /* MW 2 */ + 13683 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 65 79 first + 13684 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13685 "00011110" // /* MW 3 */ + 13686 "00011100" // /* MW 2 */ + 13687 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 66 79 first + 13688 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13689 "10011110" // /* MW 3 */ + 13690 "00101100" // /* MW 2 */ + 13691 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 68 81 first + 13692 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13693 "10011110" // /* MW 3 */ + 13694 "11110101" // /* MW 2 */ + 13695 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 67 47 first + 13696 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "00011110" // /* MW 3 */ + 13698 "00000101" // /* MW 2 */ + 13699 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 64 4 first +.tail_call + 13700 "10000100" // J #13008 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13008 delay_slots=5 */ + 13701 "00000000" // /* MW 5 */ + 13702 "00000000" // /* MW 4 */ + 13703 "01101000" // /* MW 3 */ + 13704 "00011001" // /* MW 2 */ + 13705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8292_wrapperPPv__end +.label __Z14_b8292_wrapperPPv___func_end0 + 13715 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.function rmsnorm_row_major_part2_4x4_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.src_ref 12 "rms_norm_adf_wrapper.cpp" 178 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 5 +.function_start + 13728 "01000100" // MOVXM p4, #508916 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13729 "11101000" // /* MW 5 */ + 13730 "11000111" // /* MW 4 */ + 13731 "11001000" // /* MW 3 */ + 13732 "00000111" // /* MW 2 */ + 13733 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 5 first + 13734 "10011000" // LDA r16, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "00010110" // /* MW 3 */ + 13736 "00000110" // /* MW 2 */ + 13737 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 178 + 13738 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13739 "00000001" // /* MW 5 */ + 13740 "00000000" // /* MW 4 */ + 13741 "00000000" // /* MW 3 */ + 13742 "00010000" // /* MW 2 */ + 13743 "00000000" // /* MW 1 */ + 13744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13745 "10011101" // /* MW 3 */ + 13746 "11111111" // /* MW 2 */ + 13747 "00001111" // /* MW 1 */ + 13748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13749 "00000000" // /* MW 1 */ + 13750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13751 "00000000" // /* MW 1 */ + 13752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13753 "00000000" // /* MW 1 */ + 13754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13755 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 5 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 17 + 13756 "10000100" // JNZ r16, #13872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13872 delay_slots=5 */ + 13757 "00000001" // /* MW 5 */ + 13758 "01000000" // /* MW 4 */ + 13759 "00011000" // /* MW 3 */ + 13760 "00011011" // /* MW 2 */ + 13761 "10000000" // /* MW 1 */ +.delay_slot + 13762 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13763 "00011101" // /* MW 3 */ + 13764 "11101000" // /* MW 2 */ + 13765 "00001111" // /* MW 1 */ +.delay_slot + 13766 "10011000" // ST p3, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13767 "10011101" // /* MW 3 */ + 13768 "11110001" // /* MW 2 */ + 13769 "00001111" // /* MW 1 */ +.delay_slot + 13770 "10011000" // ST p1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13771 "10011101" // /* MW 3 */ + 13772 "11101100" // /* MW 2 */ + 13773 "00001111" // /* MW 1 */ +.delay_slot + 13774 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13775 "11110101" // /* MW 3 */ + 13776 "11111001" // /* MW 2 */ + 13777 "00001111" // /* MW 1 */ +.delay_slot + 13778 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13779 "00111101" // /* MW 3 */ + 13780 "11110100" // /* MW 2 */ + 13781 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 36 + 13782 "10111010" // MOVA r17, #12; MOVX r19, #-16; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13783 "01111000" // /* MW 9 */ + 13784 "01110000" // /* MW 8 */ + 13785 "00001101" // /* MW 7 */ + 13786 "00001010" // /* MW 6 */ + 13787 "00110110" // /* MW 5 */ + 13788 "00111111" // /* MW 4 */ + 13789 "00000000" // /* MW 3 */ + 13790 "10010001" // /* MW 2 */ + 13791 "00000001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 185 14 + 13792 "10111010" // MOVA r18, #1; MOVXM p7, #508904 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13793 "00010000" // /* MW 9 */ + 13794 "11110100" // /* MW 8 */ + 13795 "10110001" // /* MW 7 */ + 13796 "11110011" // /* MW 6 */ + 13797 "00000001" // /* MW 5 */ + 13798 "00000000" // /* MW 4 */ + 13799 "00000000" // /* MW 3 */ + 13800 "00110010" // /* MW 2 */ + 13801 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 185 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 27 first + 13802 "01011100" // ST r16, [p7]; EXTEND.u8 r20, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13803 "00100000" // /* MW 5 */ + 13804 "01010001" // /* MW 4 */ + 13805 "00111000" // /* MW 3 */ + 13806 "11000010" // /* MW 2 */ + 13807 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 36 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 44 + 13808 "00100100" // LSHL r16, r16, r19; ADD.NC r19, r20, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13809 "11111110" // /* MW 5 */ + 13810 "10110100" // /* MW 4 */ + 13811 "10111001" // /* MW 3 */ + 13812 "00100111" // /* MW 2 */ + 13813 "10000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 15 + 13814 "01000100" // MOVXM p7, #508908 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13815 "11011000" // /* MW 5 */ + 13816 "11000111" // /* MW 4 */ + 13817 "11001110" // /* MW 3 */ + 13818 "00000111" // /* MW 2 */ + 13819 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 15 first + 13820 "00111010" // ST r19, [p7]; MOVXM p7, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13821 "00010001" // /* MW 9 */ + 13822 "00001000" // /* MW 8 */ + 13823 "10110010" // /* MW 7 */ + 13824 "11110011" // /* MW 6 */ + 13825 "00000001" // /* MW 5 */ + 13826 "00000000" // /* MW 4 */ + 13827 "00110000" // /* MW 3 */ + 13828 "11001110" // /* MW 2 */ + 13829 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first + 13830 "10111010" // ST.s8 r17, [p7]; MOVXM p0, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13831 "00010000" // /* MW 9 */ + 13832 "00000110" // /* MW 8 */ + 13833 "00110010" // /* MW 7 */ + 13834 "11110000" // /* MW 6 */ + 13835 "00000001" // /* MW 5 */ + 13836 "00000000" // /* MW 4 */ + 13837 "11100000" // /* MW 3 */ + 13838 "11000100" // /* MW 2 */ + 13839 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 27 first + 13840 "00011000" // EXTEND.u8 r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13841 "10010000" // /* MW 3 */ + 13842 "00100000" // /* MW 2 */ + 13843 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 15 + 13844 "01000100" // MOVXM p7, #508912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13845 "11100000" // /* MW 5 */ + 13846 "11000111" // /* MW 4 */ + 13847 "11001110" // /* MW 3 */ + 13848 "00000111" // /* MW 2 */ + 13849 "00000000" // /* MW 1 */ + 13850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13851 "00000000" // /* MW 1 */ + 13852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13853 "00000000" // /* MW 1 */ + 13854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13855 "00000000" // /* MW 1 */ + 13856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13857 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first + 13858 "10011000" // ST r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13859 "01010001" // /* MW 3 */ + 13860 "00000110" // /* MW 2 */ + 13861 "00001000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 15 first + 13862 "01111010" // NOPA; ST r16, [p7]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13863 "00000000" // /* MW 9 */ + 13864 "00000000" // /* MW 8 */ + 13865 "00000000" // /* MW 7 */ + 13866 "10000000" // /* MW 6 */ + 13867 "00010001" // /* MW 5 */ + 13868 "00000110" // /* MW 4 */ + 13869 "11110111" // /* MW 3 */ + 13870 "00101100" // /* MW 2 */ + 13871 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_144 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 197 24 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 197 24 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 201 30 + 13872 "01110110" // LDA el0, [p2], #4; MOVS p1, p2; MOVX r17, #2; MOV dj0, #40 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13873 "01011000" // /* MW 11 */ + 13874 "00101000" // /* MW 10 */ + 13875 "01000000" // /* MW 9 */ + 13876 "01001000" // /* MW 8 */ + 13877 "00010000" // /* MW 7 */ + 13878 "00000001" // /* MW 6 */ + 13879 "10001011" // /* MW 5 */ + 13880 "10001000" // /* MW 4 */ + 13881 "11010001" // /* MW 3 */ + 13882 "10000101" // /* MW 2 */ + 13883 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 201 30 + 13884 "10111010" // LDA el3, [p2], #4; MOVXM p7, #508852 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13885 "00010000" // /* MW 9 */ + 13886 "11011010" // /* MW 8 */ + 13887 "10110001" // /* MW 7 */ + 13888 "11110011" // /* MW 6 */ + 13889 "00000001" // /* MW 5 */ + 13890 "00000000" // /* MW 4 */ + 13891 "11010000" // /* MW 3 */ + 13892 "10011101" // /* MW 2 */ + 13893 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 67 + 13894 "11010100" // LDA el2, [p2], #4; MOV r18, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13895 "11000001" // /* MW 5 */ + 13896 "00101011" // /* MW 4 */ + 13897 "11011001" // /* MW 3 */ + 13898 "10010101" // /* MW 2 */ + 13899 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 67 + 13900 "00010100" // LDA eh0, [p2], #4; ADD.NC p0, r18, #-104 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13901 "10011000" // /* MW 5 */ + 13902 "11010010" // /* MW 4 */ + 13903 "11010000" // /* MW 3 */ + 13904 "10000001" // /* MW 2 */ + 13905 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 101 + 13906 "10011000" // LDA el1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13907 "01101110" // /* MW 3 */ + 13908 "00011100" // /* MW 2 */ + 13909 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 101 + 13910 "10011000" // LDA eh1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13911 "01001110" // /* MW 3 */ + 13912 "00011100" // /* MW 2 */ + 13913 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 50 first + 13914 "10011000" // LDA eh2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13915 "10001110" // /* MW 3 */ + 13916 "00011100" // /* MW 2 */ + 13917 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 50 + 13918 "00001100" // LDA el0, [p2], #4; ST el0, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13919 "01011011" // /* MW 5 */ + 13920 "00110000" // /* MW 4 */ + 13921 "11011111" // /* MW 3 */ + 13922 "10000101" // /* MW 2 */ + 13923 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 197 24 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13924 "00001100" // LDA r15, [p1, dj0]; ST el3, [sp, #-100] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13925 "11011011" // /* MW 5 */ + 13926 "00111001" // /* MW 4 */ + 13927 "11011111" // /* MW 3 */ + 13928 "00111110" // /* MW 2 */ + 13929 "00100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 first + 13930 "10011000" // ST el2, [sp, #-96] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13931 "10101101" // /* MW 3 */ + 13932 "10100000" // /* MW 2 */ + 13933 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13934 "10011000" // ST eh0, [sp, #-92] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13935 "00001101" // /* MW 3 */ + 13936 "10100100" // /* MW 2 */ + 13937 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13938 "10011000" // ST el1, [sp, #-88] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13939 "01101101" // /* MW 3 */ + 13940 "10101000" // /* MW 2 */ + 13941 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13942 "10011000" // ST eh1, [sp, #-84] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13943 "01001101" // /* MW 3 */ + 13944 "10101100" // /* MW 2 */ + 13945 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13946 "10011000" // ST eh2, [sp, #-80] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13947 "10001101" // /* MW 3 */ + 13948 "10110000" // /* MW 2 */ + 13949 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13950 "10011000" // ST el0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13951 "00101101" // /* MW 3 */ + 13952 "10110100" // /* MW 2 */ + 13953 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 84 first + 13954 "10011000" // LDA eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13955 "00001110" // /* MW 3 */ + 13956 "00000100" // /* MW 2 */ + 13957 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 84 + 13958 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13959 "00101110" // /* MW 3 */ + 13960 "00010100" // /* MW 2 */ + 13961 "00000010" // /* MW 1 */ + 13962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13963 "00000000" // /* MW 1 */ + 13964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13965 "00000000" // /* MW 1 */ + 13966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13967 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 203 2 first +.no_stack_arguments + 13968 "00000100" // JL #11744 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 13969 "00000001" // /* MW 5 */ + 13970 "00000000" // /* MW 4 */ + 13971 "11110000" // /* MW 3 */ + 13972 "00010110" // /* MW 2 */ + 13973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13975 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 first +.delay_slot + 13976 "10011000" // ST eh0, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13977 "00001101" // /* MW 3 */ + 13978 "10111000" // /* MW 2 */ + 13979 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 +.delay_slot + 13980 "10011000" // ST el0, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13981 "00101101" // /* MW 3 */ + 13982 "10111100" // /* MW 2 */ + 13983 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 201 30 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 202 31 +.delay_slot + 13984 "01011100" // ST r17, [p7], #-8; MOVX r16, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13985 "10000010" // /* MW 5 */ + 13986 "01000000" // /* MW 4 */ + 13987 "00110000" // /* MW 3 */ + 13988 "11000110" // /* MW 2 */ + 13989 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 202 31 first +.delay_slot + 13990 "01111010" // NOPA; ST r16, [p7], #-12; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13991 "00000000" // /* MW 9 */ + 13992 "00000000" // /* MW 8 */ + 13993 "00000000" // /* MW 7 */ + 13994 "10000000" // /* MW 6 */ + 13995 "00010001" // /* MW 5 */ + 13996 "11011110" // /* MW 4 */ + 13997 "11110111" // /* MW 3 */ + 13998 "00101100" // /* MW 2 */ + 13999 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 204 35 +.return_address + 14000 "10111010" // LDA p1, [sp, #-24]; MOVXM p2, #508916 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14001 "00010000" // /* MW 9 */ + 14002 "11111010" // /* MW 8 */ + 14003 "00110001" // /* MW 7 */ + 14004 "11110001" // /* MW 6 */ + 14005 "00000001" // /* MW 5 */ + 14006 "00000000" // /* MW 4 */ + 14007 "00100000" // /* MW 3 */ + 14008 "00010011" // /* MW 2 */ + 14009 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 204 35 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 95 + 14010 "10111010" // LDA r16, [p2]; MOVXM p0, #508908 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14011 "00010000" // /* MW 9 */ + 14012 "11110110" // /* MW 8 */ + 14013 "00110001" // /* MW 7 */ + 14014 "11110000" // /* MW 6 */ + 14015 "00000001" // /* MW 5 */ + 14016 "00000000" // /* MW 4 */ + 14017 "11010000" // /* MW 3 */ + 14018 "11000010" // /* MW 2 */ + 14019 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 2 first + 14020 "10111010" // LDA p3, [sp, #-20]; MOVXM ls, #14128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14021 "00010000" // /* MW 9 */ + 14022 "10011000" // /* MW 8 */ + 14023 "01111011" // /* MW 7 */ + 14024 "00001100" // /* MW 6 */ + 14025 "00000000" // /* MW 5 */ + 14026 "00000000" // /* MW 4 */ + 14027 "00100000" // /* MW 3 */ + 14028 "10110011" // /* MW 2 */ + 14029 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 37 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 95 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 215 59 + 14030 "10111010" // LDA r20, [p0]; MOVX r19, #-5; MOV r6, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14031 "01011000" // /* MW 9 */ + 14032 "00000001" // /* MW 8 */ + 14033 "11001000" // /* MW 7 */ + 14034 "01101000" // /* MW 6 */ + 14035 "00110111" // /* MW 5 */ + 14036 "00111111" // /* MW 4 */ + 14037 "11010000" // /* MW 3 */ + 14038 "11010010" // /* MW 2 */ + 14039 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 2 first + 14040 "10111010" // LDA p0, [sp, #-16]; MOVXM le, #14224 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14041 "00010000" // /* MW 9 */ + 14042 "11001000" // /* MW 8 */ + 14043 "10111011" // /* MW 7 */ + 14044 "00001101" // /* MW 6 */ + 14045 "00000000" // /* MW 5 */ + 14046 "00000000" // /* MW 4 */ + 14047 "00100000" // /* MW 3 */ + 14048 "00000011" // /* MW 2 */ + 14049 "11111110" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 205 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 224 6 + 14050 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14051 "00000001" // /* MW 3 */ + 14052 "00110000" // /* MW 2 */ + 14053 "00010000" // /* MW 1 */ + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 14056 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14057 "10110110" // /* MW 3 */ + 14058 "00000110" // /* MW 2 */ + 14059 "00000001" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 204 33 first + 14060 "10011000" // ST r16, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14061 "00010001" // /* MW 3 */ + 14062 "00101110" // /* MW 2 */ + 14063 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 205 33 first + 14064 "00001100" // LDA p1, [p3]; ST r24, [p7], #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14065 "00100011" // /* MW 5 */ + 14066 "01111110" // /* MW 4 */ + 14067 "11011110" // /* MW 3 */ + 14068 "10010011" // /* MW 2 */ + 14069 "01100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 84 first + 14070 "10011000" // LDA r22, [p7], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14071 "11010110" // /* MW 3 */ + 14072 "10011110" // /* MW 2 */ + 14073 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 43 29 first + 14074 "10011000" // LDA r23, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14075 "11110110" // /* MW 3 */ + 14076 "00011110" // /* MW 2 */ + 14077 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 53 first + 14078 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14079 "00110110" // /* MW 3 */ + 14080 "00000110" // /* MW 2 */ + 14081 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 14082 "10011000" // LDA p0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14083 "00011110" // /* MW 3 */ + 14084 "00000100" // /* MW 2 */ + 14085 "00000000" // /* MW 1 */ + 14086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14087 "00000000" // /* MW 1 */ + 14088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14089 "00000000" // /* MW 1 */ + 14090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14091 "00000000" // /* MW 1 */ + 14092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14093 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 37 first + 14094 "10011000" // LSHL r19, r23, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14095 "00111101" // /* MW 3 */ + 14096 "11100111" // /* MW 2 */ + 14097 "00010101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 2 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 63 first + 14098 "00100100" // MUL r19, r17, r22; ADD.NC lc, r19, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14099 "00000000" // /* MW 5 */ + 14100 "11110011" // /* MW 4 */ + 14101 "11111010" // /* MW 3 */ + 14102 "11101101" // /* MW 2 */ + 14103 "10001100" // /* MW 1 */ + 14104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14105 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 93 + 14106 "10011000" // MUL r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14107 "01001111" // /* MW 3 */ + 14108 "11100111" // /* MW 2 */ + 14109 "00010100" // /* MW 1 */ + 14110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14111 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 32 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 215 59 first + 14112 "01011100" // ST r19, [p7, #20]; LSHL r6, r19, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14113 "11011011" // /* MW 5 */ + 14114 "10011000" // /* MW 4 */ + 14115 "00111001" // /* MW 3 */ + 14116 "11001110" // /* MW 2 */ + 14117 "11101010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 215 11 + 14118 "10111010" // NOPA; NOPB; ADD.NC p7, r21, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14119 "10101110" // /* MW 9 */ + 14120 "01001100" // /* MW 8 */ + 14121 "10110101" // /* MW 7 */ + 14122 "00000011" // /* MW 6 */ + 14123 "00010000" // /* MW 5 */ + 14124 "00000000" // /* MW 4 */ + 14125 "11110000" // /* MW 3 */ + 14126 "00101100" // /* MW 2 */ + 14127 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_400 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 54 13 first +.begin_of_loop +.loop_nesting 1 + 14128 "10011000" // VLDA bmll0, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14129 "00010101" // /* MW 3 */ + 14130 "00011100" // /* MW 2 */ + 14131 "00000001" // /* MW 1 */ + 14132 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14133 "10000001" // /* MW 11 */ + 14134 "10101101" // /* MW 10 */ + 14135 "00000000" // /* MW 9 */ + 14136 "00000000" // /* MW 8 */ + 14137 "00000000" // /* MW 7 */ + 14138 "00000000" // /* MW 6 */ + 14139 "00100000" // /* MW 5 */ + 14140 "00000000" // /* MW 4 */ + 14141 "11110000" // /* MW 3 */ + 14142 "00101100" // /* MW 2 */ + 14143 "00000000" // /* MW 1 */ + 14144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14145 "00000000" // /* MW 15 */ + 14146 "00000000" // /* MW 14 */ + 14147 "01111000" // /* MW 13 */ + 14148 "10100101" // /* MW 12 */ + 14149 "00000001" // /* MW 11 */ + 14150 "00000000" // /* MW 10 */ + 14151 "00000000" // /* MW 9 */ + 14152 "00000000" // /* MW 8 */ + 14153 "01011011" // /* MW 7 */ + 14154 "00000001" // /* MW 6 */ + 14155 "00100000" // /* MW 5 */ + 14156 "00000000" // /* MW 4 */ + 14157 "11110000" // /* MW 3 */ + 14158 "00101100" // /* MW 2 */ + 14159 "00000000" // /* MW 1 */ + 14160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14161 "00000000" // /* MW 15 */ + 14162 "00000000" // /* MW 14 */ + 14163 "01111000" // /* MW 13 */ + 14164 "10100101" // /* MW 12 */ + 14165 "00000001" // /* MW 11 */ + 14166 "00000000" // /* MW 10 */ + 14167 "00000000" // /* MW 9 */ + 14168 "00000000" // /* MW 8 */ + 14169 "01011011" // /* MW 7 */ + 14170 "00000001" // /* MW 6 */ + 14171 "00100000" // /* MW 5 */ + 14172 "00000000" // /* MW 4 */ + 14173 "11110000" // /* MW 3 */ + 14174 "00101100" // /* MW 2 */ + 14175 "00000000" // /* MW 1 */ + 14176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14177 "00000000" // /* MW 15 */ + 14178 "00000000" // /* MW 14 */ + 14179 "01111000" // /* MW 13 */ + 14180 "10100101" // /* MW 12 */ + 14181 "00000001" // /* MW 11 */ + 14182 "00000000" // /* MW 10 */ + 14183 "00000000" // /* MW 9 */ + 14184 "00000000" // /* MW 8 */ + 14185 "01011011" // /* MW 7 */ + 14186 "00000001" // /* MW 6 */ + 14187 "00100000" // /* MW 5 */ + 14188 "00000000" // /* MW 4 */ + 14189 "11110000" // /* MW 3 */ + 14190 "00101100" // /* MW 2 */ + 14191 "00000000" // /* MW 1 */ + 14192 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14193 "00000000" // /* MW 15 */ + 14194 "00000000" // /* MW 14 */ + 14195 "01111000" // /* MW 13 */ + 14196 "10100101" // /* MW 12 */ + 14197 "00000001" // /* MW 11 */ + 14198 "00000000" // /* MW 10 */ + 14199 "00000000" // /* MW 9 */ + 14200 "00000000" // /* MW 8 */ + 14201 "01011011" // /* MW 7 */ + 14202 "00000001" // /* MW 6 */ + 14203 "00100000" // /* MW 5 */ + 14204 "00000000" // /* MW 4 */ + 14205 "11110000" // /* MW 3 */ + 14206 "00101100" // /* MW 2 */ + 14207 "00000000" // /* MW 1 */ + 14208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14209 "00000000" // /* MW 15 */ + 14210 "00000000" // /* MW 14 */ + 14211 "01111000" // /* MW 13 */ + 14212 "10100101" // /* MW 12 */ + 14213 "00000001" // /* MW 11 */ + 14214 "00000000" // /* MW 10 */ + 14215 "00000000" // /* MW 9 */ + 14216 "00000000" // /* MW 8 */ + 14217 "01011011" // /* MW 7 */ + 14218 "00000001" // /* MW 6 */ + 14219 "00100000" // /* MW 5 */ + 14220 "00000000" // /* MW 4 */ + 14221 "11110000" // /* MW 3 */ + 14222 "00101100" // /* MW 2 */ + 14223 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_496 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 56 13 first +.end_of_loop + 14224 "11100001" // NOPA; NOPB; VST bmll0, [p0], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14225 "00000000" // /* MW 15 */ + 14226 "00000000" // /* MW 14 */ + 14227 "01111000" // /* MW 13 */ + 14228 "10100101" // /* MW 12 */ + 14229 "00000001" // /* MW 11 */ + 14230 "00000000" // /* MW 10 */ + 14231 "00000000" // /* MW 9 */ + 14232 "10000000" // /* MW 8 */ + 14233 "00000110" // /* MW 7 */ + 14234 "00011100" // /* MW 6 */ + 14235 "00100000" // /* MW 5 */ + 14236 "00000000" // /* MW 4 */ + 14237 "11110000" // /* MW 3 */ + 14238 "00101100" // /* MW 2 */ + 14239 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 61 2 first +.loop_nesting 0 + 14240 "01000100" // MOVXM ls, #14256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14241 "01100000" // /* MW 5 */ + 14242 "11101111" // /* MW 4 */ + 14243 "00110001" // /* MW 3 */ + 14244 "00000000" // /* MW 2 */ + 14245 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 61 2 + 14246 "01000100" // MOVXM le, #14352 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14247 "00100000" // /* MW 5 */ + 14248 "11110000" // /* MW 4 */ + 14249 "00110110" // /* MW 3 */ + 14250 "00000000" // /* MW 2 */ + 14251 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 61 2 + 14252 "00011000" // ADD.NC lc, r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14253 "10000000" // /* MW 3 */ + 14254 "01111000" // /* MW 2 */ + 14255 "00011101" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_528 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 63 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 64 13 first +.begin_of_loop +.loop_nesting 1 + 14256 "11100001" // LDA.s16 r18, [p7], #4; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14257 "00000000" // /* MW 15 */ + 14258 "00000000" // /* MW 14 */ + 14259 "01111000" // /* MW 13 */ + 14260 "10100101" // /* MW 12 */ + 14261 "00000001" // /* MW 11 */ + 14262 "00000000" // /* MW 10 */ + 14263 "00000000" // /* MW 9 */ + 14264 "00000000" // /* MW 8 */ + 14265 "01011011" // /* MW 7 */ + 14266 "00000001" // /* MW 6 */ + 14267 "00100000" // /* MW 5 */ + 14268 "00000000" // /* MW 4 */ + 14269 "01010000" // /* MW 3 */ + 14270 "11001010" // /* MW 2 */ + 14271 "11100101" // /* MW 1 */ + 14272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14273 "00000000" // /* MW 15 */ + 14274 "00000000" // /* MW 14 */ + 14275 "01111000" // /* MW 13 */ + 14276 "10100101" // /* MW 12 */ + 14277 "00000001" // /* MW 11 */ + 14278 "00000000" // /* MW 10 */ + 14279 "00000000" // /* MW 9 */ + 14280 "00000000" // /* MW 8 */ + 14281 "01011011" // /* MW 7 */ + 14282 "00000001" // /* MW 6 */ + 14283 "00100000" // /* MW 5 */ + 14284 "00000000" // /* MW 4 */ + 14285 "11110000" // /* MW 3 */ + 14286 "00101100" // /* MW 2 */ + 14287 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 63 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 65 13 first + 14288 "11100001" // ST.s16 r18, [p0], #4; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14289 "00000000" // /* MW 15 */ + 14290 "00000000" // /* MW 14 */ + 14291 "01111000" // /* MW 13 */ + 14292 "10100101" // /* MW 12 */ + 14293 "00000001" // /* MW 11 */ + 14294 "00000000" // /* MW 10 */ + 14295 "00000000" // /* MW 9 */ + 14296 "00000000" // /* MW 8 */ + 14297 "01011011" // /* MW 7 */ + 14298 "00000001" // /* MW 6 */ + 14299 "00100000" // /* MW 5 */ + 14300 "00000000" // /* MW 4 */ + 14301 "11100000" // /* MW 3 */ + 14302 "11001010" // /* MW 2 */ + 14303 "00000101" // /* MW 1 */ + 14304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14305 "00000000" // /* MW 15 */ + 14306 "00000000" // /* MW 14 */ + 14307 "01111000" // /* MW 13 */ + 14308 "10100101" // /* MW 12 */ + 14309 "00000001" // /* MW 11 */ + 14310 "00000000" // /* MW 10 */ + 14311 "00000000" // /* MW 9 */ + 14312 "00000000" // /* MW 8 */ + 14313 "01011011" // /* MW 7 */ + 14314 "00000001" // /* MW 6 */ + 14315 "00100000" // /* MW 5 */ + 14316 "00000000" // /* MW 4 */ + 14317 "11110000" // /* MW 3 */ + 14318 "00101100" // /* MW 2 */ + 14319 "00000000" // /* MW 1 */ + 14320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14321 "00000000" // /* MW 15 */ + 14322 "00000000" // /* MW 14 */ + 14323 "01111000" // /* MW 13 */ + 14324 "10100101" // /* MW 12 */ + 14325 "00000001" // /* MW 11 */ + 14326 "00000000" // /* MW 10 */ + 14327 "00000000" // /* MW 9 */ + 14328 "00000000" // /* MW 8 */ + 14329 "01011011" // /* MW 7 */ + 14330 "00000001" // /* MW 6 */ + 14331 "00100000" // /* MW 5 */ + 14332 "00000000" // /* MW 4 */ + 14333 "11110000" // /* MW 3 */ + 14334 "00101100" // /* MW 2 */ + 14335 "00000000" // /* MW 1 */ + 14336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14337 "00000000" // /* MW 15 */ + 14338 "00000000" // /* MW 14 */ + 14339 "01111000" // /* MW 13 */ + 14340 "10100101" // /* MW 12 */ + 14341 "00000001" // /* MW 11 */ + 14342 "00000000" // /* MW 10 */ + 14343 "00000000" // /* MW 9 */ + 14344 "00000000" // /* MW 8 */ + 14345 "01011011" // /* MW 7 */ + 14346 "00000001" // /* MW 6 */ + 14347 "00100000" // /* MW 5 */ + 14348 "00000000" // /* MW 4 */ + 14349 "11110000" // /* MW 3 */ + 14350 "00101100" // /* MW 2 */ + 14351 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_624 +.end_of_loop + 14352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14353 "00000000" // /* MW 15 */ + 14354 "00000000" // /* MW 14 */ + 14355 "01111000" // /* MW 13 */ + 14356 "10100101" // /* MW 12 */ + 14357 "00000001" // /* MW 11 */ + 14358 "00000000" // /* MW 10 */ + 14359 "00000000" // /* MW 9 */ + 14360 "00000000" // /* MW 8 */ + 14361 "01011011" // /* MW 7 */ + 14362 "00000001" // /* MW 6 */ + 14363 "00100000" // /* MW 5 */ + 14364 "00000000" // /* MW 4 */ + 14365 "11110000" // /* MW 3 */ + 14366 "00101100" // /* MW 2 */ + 14367 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 228 +.loop_nesting 0 + 14368 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14369 "00111001" // /* MW 3 */ + 14370 "11110100" // /* MW 2 */ + 14371 "00000111" // /* MW 1 */ + 14372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14373 "00000000" // /* MW 1 */ + 14374 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14375 "10011001" // /* MW 3 */ + 14376 "11111111" // /* MW 2 */ + 14377 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14379 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14380 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14381 "11110001" // /* MW 3 */ + 14382 "11111001" // /* MW 2 */ + 14383 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 228 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14384 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14385 "00000001" // /* MW 5 */ + 14386 "00000000" // /* MW 4 */ + 14387 "00000000" // /* MW 3 */ + 14388 "11110000" // /* MW 2 */ + 14389 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14391 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 228 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14392 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14393 "00000000" // /* MW 3 */ + 14394 "00101000" // /* MW 2 */ + 14395 "00010000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 222 14 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14396 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14397 "00000111" // /* MW 3 */ + 14398 "00100000" // /* MW 2 */ + 14399 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 224 18 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14400 "10011000" // EQ r27, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14401 "00000111" // /* MW 3 */ + 14402 "11110111" // /* MW 2 */ + 14403 "00010011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 224 6 +.delay_slot + 14404 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14405 "10000010" // /* MW 3 */ + 14406 "00100001" // /* MW 2 */ + 14407 "00010100" // /* MW 1 */ +.delay_slot + 14408 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14409 "00010001" // /* MW 3 */ + 14410 "00000110" // /* MW 2 */ + 14411 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_end0 + 14413 "00000000" // /* MW 1 */ +.label __Z14_b8300_wrapperPPv___func_begin0 +.label _Z14_b8300_wrapperPPv +.function _b8300_wrapper _Z14_b8300_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 72 first +.src_ref 0 "0_0_reloadable5.cc" 74 79 +.function_start + 14416 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14417 "11000000" // /* MW 3 */ + 14418 "01100000" // /* MW 2 */ + 14419 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 74 79 first + 14420 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14421 "00011110" // /* MW 3 */ + 14422 "00011100" // /* MW 2 */ + 14423 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 75 79 first + 14424 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14425 "10011110" // /* MW 3 */ + 14426 "00101100" // /* MW 2 */ + 14427 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 77 80 first + 14428 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14429 "10011110" // /* MW 3 */ + 14430 "11110101" // /* MW 2 */ + 14431 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 76 47 first + 14432 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14433 "00011110" // /* MW 3 */ + 14434 "00000101" // /* MW 2 */ + 14435 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 73 4 first +.tail_call + 14436 "10000100" // J #13728 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13728 delay_slots=5 */ + 14437 "00000000" // /* MW 5 */ + 14438 "00000000" // /* MW 4 */ + 14439 "11010000" // /* MW 3 */ + 14440 "00011010" // /* MW 2 */ + 14441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8300_wrapperPPv__end +.label __Z14_b8300_wrapperPPv___func_end0 + 14451 "00000000" // /* MW 1 */ +.label __Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params___func_begin0 +.label _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params +.function rmsnorm_row_major_part2 _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 118 first +.src_ref 3 "rmsnorm_row_major.h" 125 31 +.function_start + 14464 "01000100" // MOVXM p4, #508824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14465 "00110000" // /* MW 5 */ + 14466 "11000111" // /* MW 4 */ + 14467 "11001000" // /* MW 3 */ + 14468 "00000111" // /* MW 2 */ + 14469 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 125 31 first +.src_ref 3 "rmsnorm_row_major.h" 126 31 + 14470 "01010100" // LDA r6, [p4], #4; MOV m0, #-40 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14471 "01100001" // /* MW 5 */ + 14472 "00011111" // /* MW 4 */ + 14473 "11010000" // /* MW 3 */ + 14474 "10011010" // /* MW 2 */ + 14475 "10000011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 126 31 first + 14476 "10011000" // LDA r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14477 "01010110" // /* MW 3 */ + 14478 "00001000" // /* MW 2 */ + 14479 "00000100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 127 44 first + 14480 "10011000" // LDA.s16 r0, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14481 "00010010" // /* MW 3 */ + 14482 "00011100" // /* MW 2 */ + 14483 "00000100" // /* MW 1 */ + 14484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14485 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 118 + 14486 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14487 "00000001" // /* MW 5 */ + 14488 "00000000" // /* MW 4 */ + 14489 "00000000" // /* MW 3 */ + 14490 "00001000" // /* MW 2 */ + 14491 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 128 34 first +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14492 "10111010" // LDA.s16 r5, [p4]; MOVXM p5, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14493 "00010000" // /* MW 9 */ + 14494 "00001000" // /* MW 8 */ + 14495 "10110010" // /* MW 7 */ + 14496 "11110010" // /* MW 6 */ + 14497 "00000001" // /* MW 5 */ + 14498 "00000000" // /* MW 4 */ + 14499 "01010000" // /* MW 3 */ + 14500 "10010110" // /* MW 2 */ + 14501 "10000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 134 35 +.src_ref 3 "rmsnorm_row_major.h" 143 25 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14502 "10111010" // LDA.s8 r14, [p5]; MOVX r4, #-5; MOV dj1, #50 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14503 "01011000" // /* MW 9 */ + 14504 "00110010" // /* MW 8 */ + 14505 "11000000" // /* MW 7 */ + 14506 "01101000" // /* MW 6 */ + 14507 "01000111" // /* MW 5 */ + 14508 "00111110" // /* MW 4 */ + 14509 "01010000" // /* MW 3 */ + 14510 "10111000" // /* MW 2 */ + 14511 "10100000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 134 35 first +.src_ref 3 "rmsnorm_row_major.h" 143 25 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14512 "11111010" // LDA r11, [p4, dj1]; ST r12, [sp, #-4]; LSHL r12, r6, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14513 "01001101" // /* MW 9 */ + 14514 "10011000" // /* MW 8 */ + 14515 "00000001" // /* MW 7 */ + 14516 "10000000" // /* MW 6 */ + 14517 "10010101" // /* MW 5 */ + 14518 "11111101" // /* MW 4 */ + 14519 "11010111" // /* MW 3 */ + 14520 "00101110" // /* MW 2 */ + 14521 "10000100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "rmsnorm_row_major.h" 127 38 +.src_ref 3 "rmsnorm_row_major.h" 128 28 +.src_ref 3 "rmsnorm_row_major.h" 143 34 +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14522 "01110110" // MOVA dj0, #64; ST r9, [sp, #-12]; MOVX r3, #16; ADD.NC r9, r12, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14523 "11001000" // /* MW 11 */ + 14524 "00111111" // /* MW 10 */ + 14525 "00101011" // /* MW 9 */ + 14526 "00001001" // /* MW 8 */ + 14527 "00110010" // /* MW 7 */ + 14528 "10000000" // /* MW 6 */ + 14529 "00110101" // /* MW 5 */ + 14530 "11110101" // /* MW 4 */ + 14531 "10000111" // /* MW 3 */ + 14532 "00000010" // /* MW 2 */ + 14533 "00001000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 127 38 first +.src_ref 3 "rmsnorm_row_major.h" 139 49 +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14534 "01110110" // MOVA r1, #1; ST r13, [sp, #-28]; ASHL r12, r0, r3; MOV r13, dj0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14535 "01111000" // /* MW 11 */ + 14536 "10000000" // /* MW 10 */ + 14537 "10101000" // /* MW 9 */ + 14538 "11110101" // /* MW 8 */ + 14539 "11000001" // /* MW 7 */ + 14540 "10000000" // /* MW 6 */ + 14541 "10110101" // /* MW 5 */ + 14542 "11100101" // /* MW 4 */ + 14543 "00000111" // /* MW 3 */ + 14544 "00100001" // /* MW 2 */ + 14545 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 144 36 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14546 "00111010" // ST r15, [sp, #-8]; LSHL r15, r6, r1; VINSERT.32 x0, x0, #0, r12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14547 "10111001" // /* MW 9 */ + 14548 "11001000" // /* MW 8 */ + 14549 "00000000" // /* MW 7 */ + 14550 "11101100" // /* MW 6 */ + 14551 "11110000" // /* MW 5 */ + 14552 "00001100" // /* MW 4 */ + 14553 "10110000" // /* MW 3 */ + 14554 "00111110" // /* MW 2 */ + 14555 "11111111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 147 4 first +.src_ref 3 "rmsnorm_row_major.h" 147 27 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14556 "00111010" // ST r14, [sp, #-16]; JZ r2, #15072 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15072 delay_slots=5 */ + 14557 "01100001" // /* MW 9 */ + 14558 "00000000" // /* MW 8 */ + 14559 "00000000" // /* MW 7 */ + 14560 "01011100" // /* MW 6 */ + 14561 "00000111" // /* MW 5 */ + 14562 "00000100" // /* MW 4 */ + 14563 "10110000" // /* MW 3 */ + 14564 "00111010" // /* MW 2 */ + 14565 "11111110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 128 28 first +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14566 "00111010" // ST r8, [sp, #-32]; ASHL r8, r5, r3; VMOV bmlh1, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14567 "01111001" // /* MW 9 */ + 14568 "01001001" // /* MW 8 */ + 14569 "10100000" // /* MW 7 */ + 14570 "11110100" // /* MW 6 */ + 14571 "10000001" // /* MW 5 */ + 14572 "00001010" // /* MW 4 */ + 14573 "10110000" // /* MW 3 */ + 14574 "00100010" // /* MW 2 */ + 14575 "11111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 130 16 +.src_ref 3 "rmsnorm_row_major.h" 130 16 first +.src_ref 3 "rmsnorm_row_major.h" 173 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14576 "00111010" // ST r11, [sp, #-24]; MOVX crRnd, r14; VINSERT.32 x0, x0, #0, r8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14577 "10111001" // /* MW 9 */ + 14578 "10001000" // /* MW 8 */ + 14579 "00000000" // /* MW 7 */ + 14580 "00000000" // /* MW 6 */ + 14581 "11010100" // /* MW 5 */ + 14582 "00011101" // /* MW 4 */ + 14583 "10110000" // /* MW 3 */ + 14584 "00101110" // /* MW 2 */ + 14585 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.src_ref 3 "rmsnorm_row_major.h" 130 16 +.src_ref 3 "rmsnorm_row_major.h" 139 49 first +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.delay_slot + 14586 "01110110" // MOVA r7, #-64; VCONV.bf16.fp32 wl3, bmlh1; LSHL r8, r11, r1; VMOV bmll1, x0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14587 "01111000" // /* MW 11 */ + 14588 "01001001" // /* MW 10 */ + 14589 "10000000" // /* MW 9 */ + 14590 "11101100" // /* MW 8 */ + 14591 "10000000" // /* MW 7 */ + 14592 "00010110" // /* MW 6 */ + 14593 "10110110" // /* MW 5 */ + 14594 "11000000" // /* MW 4 */ + 14595 "00000001" // /* MW 3 */ + 14596 "00000111" // /* MW 2 */ + 14597 "11111000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 139 15 +.src_ref 3 "rmsnorm_row_major.h" 144 36 first +.delay_slot + 14598 "00111010" // ST r10, [sp, #-20]; AND r10, r15, r7; MOV m0, r8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14599 "01111001" // /* MW 9 */ + 14600 "00010000" // /* MW 8 */ + 14601 "00000010" // /* MW 7 */ + 14602 "10100100" // /* MW 6 */ + 14603 "10100011" // /* MW 5 */ + 14604 "00011110" // /* MW 4 */ + 14605 "10110000" // /* MW 3 */ + 14606 "10101010" // /* MW 2 */ + 14607 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 130 16 first +.src_ref 3 "rmsnorm_row_major.h" 139 15 first +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.delay_slot + 14608 "11010010" // PADDB [p3], m0; VCONV.bf16.fp32 wl0, bmll1; SUB r12, r13, r10 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14609 "10100001" // /* MW 7 */ + 14610 "01011000" // /* MW 6 */ + 14611 "00100011" // /* MW 5 */ + 14612 "00010111" // /* MW 4 */ + 14613 "11000110" // /* MW 3 */ + 14614 "00010010" // /* MW 2 */ + 14615 "00001000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "rmsnorm_row_major.h" 161 8 first + 14616 "01110110" // MOVA dc0, #0; MOVS dn0, r9; MOVXM ls, #14976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14617 "00010000" // /* MW 11 */ + 14618 "01000000" // /* MW 10 */ + 14619 "01111101" // /* MW 9 */ + 14620 "00001100" // /* MW 8 */ + 14621 "00000000" // /* MW 7 */ + 14622 "00000000" // /* MW 6 */ + 14623 "00001011" // /* MW 5 */ + 14624 "01001001" // /* MW 4 */ + 14625 "10000000" // /* MW 3 */ + 14626 "00000011" // /* MW 2 */ + 14627 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "rmsnorm_row_major.h" 161 8 + 14628 "10111010" // MOVA r10, #828; MOVXM le, #15040 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14629 "00010000" // /* MW 9 */ + 14630 "01100000" // /* MW 8 */ + 14631 "10111101" // /* MW 7 */ + 14632 "00001101" // /* MW 6 */ + 14633 "00000000" // /* MW 5 */ + 14634 "00000000" // /* MW 4 */ + 14635 "00000000" // /* MW 3 */ + 14636 "10001010" // /* MW 2 */ + 14637 "01100111" // /* MW 1 */ +.src_ref 5 "add_accum.hpp" 19 92 + 14638 "10111010" // MOVA r11, #60; ADD r8, r2, #-1; VEXTBCST.16 x1, x0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14639 "10101000" // /* MW 9 */ + 14640 "10000001" // /* MW 8 */ + 14641 "01000000" // /* MW 7 */ + 14642 "11111000" // /* MW 6 */ + 14643 "10001111" // /* MW 5 */ + 14644 "00000100" // /* MW 4 */ + 14645 "00000000" // /* MW 3 */ + 14646 "10001011" // /* MW 2 */ + 14647 "00000111" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 1139 17 + 14648 "11100100" // MOVX vaddSign0, #1; MOV m0, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14649 "01000001" // /* MW 5 */ + 14650 "00001100" // /* MW 4 */ + 14651 "00000000" // /* MW 3 */ + 14652 "01001000" // /* MW 2 */ + 14653 "00001011" // /* MW 1 */ + 14654 "01011000" // VEXTBCST.16 x0, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14655 "00000011" // /* MW 3 */ + 14656 "00011001" // /* MW 2 */ + 14657 "00011000" // /* MW 1 */ + 14658 "11111000" // VCONV.fp32.bf16 cml0, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14659 "10001010" // /* MW 3 */ + 14660 "00000011" // /* MW 2 */ + 14661 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 147 4 + 14662 "10111010" // NOPA; MOVXM p4, #14672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14663 "00010000" // /* MW 9 */ + 14664 "10101000" // /* MW 8 */ + 14665 "00110100" // /* MW 7 */ + 14666 "00001110" // /* MW 6 */ + 14667 "00000000" // /* MW 5 */ + 14668 "00000000" // /* MW 4 */ + 14669 "11110000" // /* MW 3 */ + 14670 "00101100" // /* MW 2 */ + 14671 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_208 +.src_ref 3 "rmsnorm_row_major.h" 150 38 first +.src_ref 3 "rmsnorm_row_major.h" 151 23 first +.src_ref 3 "rmsnorm_row_major.h" 161 8 first +.loop_nesting 1 + 14672 "00010100" // LDA.s16 r12, [p3], #4; ADD.NC lc, r9, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14673 "00000001" // /* MW 5 */ + 14674 "11101001" // /* MW 4 */ + 14675 "01011010" // /* MW 3 */ + 14676 "10110010" // /* MW 2 */ + 14677 "01100101" // /* MW 1 */ + 14678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14679 "00000000" // /* MW 1 */ + 14680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14681 "00000000" // /* MW 1 */ + 14682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14683 "00000000" // /* MW 1 */ + 14684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14685 "00000000" // /* MW 1 */ + 14686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14687 "00000000" // /* MW 1 */ + 14688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14689 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 14690 "11111000" // VBCST.16 x1, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14691 "01110010" // /* MW 3 */ + 14692 "10110001" // /* MW 2 */ + 14693 "00011000" // /* MW 1 */ + 14694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14695 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 14696 "01001000" // VMUL.f dm3, x1, x0, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14697 "00000001" // /* MW 3 */ + 14698 "11100010" // /* MW 2 */ + 14699 "01010011" // /* MW 1 */ + 14700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14701 "00000000" // /* MW 1 */ + 14702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14703 "00000000" // /* MW 1 */ +.src_ref 5 "add_accum.hpp" 19 92 first + 14704 "01001000" // VADD.f dm3, dm3, dm0, r11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14705 "00111101" // /* MW 3 */ + 14706 "01100000" // /* MW 2 */ + 14707 "01011011" // /* MW 1 */ + 14708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14709 "00000000" // /* MW 1 */ + 14710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14711 "00000000" // /* MW 1 */ + 14712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14713 "00000000" // /* MW 1 */ + 14714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14715 "00000000" // /* MW 1 */ + 14716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14717 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 14718 "00011000" // VCONV.bf16.fp32 x1, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14719 "10010110" // /* MW 3 */ + 14720 "10010001" // /* MW 2 */ + 14721 "00001000" // /* MW 1 */ + 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14723 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 14724 "11111000" // VCONV.fp32.bf16 cml4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14725 "10001010" // /* MW 3 */ + 14726 "00000011" // /* MW 2 */ + 14727 "00011100" // /* MW 1 */ + 14728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14729 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first + 14730 "11111000" // VMOV x6, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14731 "00010010" // /* MW 3 */ + 14732 "00110000" // /* MW 2 */ + 14733 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first + 14734 "10111000" // VEXTRACT.64 r13:r12, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14735 "00000001" // /* MW 3 */ + 14736 "00011011" // /* MW 2 */ + 14737 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 + 14738 "10111000" // VEXTRACT.64 r17:r16, x6, #1, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14739 "00000011" // /* MW 3 */ + 14740 "00011011" // /* MW 2 */ + 14741 "00011100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14742 "11100100" // INVSQRT r3, r13; VMOV x1, bmlh4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14743 "00100101" // /* MW 5 */ + 14744 "01100010" // /* MW 4 */ + 14745 "10000001" // /* MW 3 */ + 14746 "11010000" // /* MW 2 */ + 14747 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14748 "01100100" // INVSQRT r2, r12; VEXTRACT.64 r5:r4, x1, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14749 "00000011" // /* MW 5 */ + 14750 "00001110" // /* MW 4 */ + 14751 "10000010" // /* MW 3 */ + 14752 "10010000" // /* MW 2 */ + 14753 "01100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14754 "01100100" // INVSQRT r7, r17; VEXTRACT.64 r1:r0, x6, #2, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14755 "00001011" // /* MW 5 */ + 14756 "00110110" // /* MW 4 */ + 14757 "10000000" // /* MW 3 */ + 14758 "11010000" // /* MW 2 */ + 14759 "10001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14760 "01100100" // INVSQRT r6, r16; VEXTRACT.64 r15:r14, x6, #3, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14761 "00001111" // /* MW 5 */ + 14762 "00110110" // /* MW 4 */ + 14763 "10000111" // /* MW 3 */ + 14764 "10010000" // /* MW 2 */ + 14765 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14766 "01100100" // INVSQRT r13, r1; VEXTRACT.64 r3:r2, x1, #1, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14767 "00000111" // /* MW 5 */ + 14768 "00001110" // /* MW 4 */ + 14769 "10000001" // /* MW 3 */ + 14770 "01010000" // /* MW 2 */ + 14771 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14772 "11100100" // INVSQRT r25, r5; VPUSH.hi.64 x10, x0, r3:r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14773 "01101110" // /* MW 5 */ + 14774 "00000001" // /* MW 4 */ + 14775 "10001010" // /* MW 3 */ + 14776 "01010000" // /* MW 2 */ + 14777 "00101110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14778 "01100100" // INVSQRT r12, r0; VEXTRACT.64 r21:r20, x6, #4, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14779 "00010011" // /* MW 5 */ + 14780 "00110110" // /* MW 4 */ + 14781 "10001010" // /* MW 3 */ + 14782 "00010000" // /* MW 2 */ + 14783 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14784 "01100100" // INVSQRT r1, r15; VEXTRACT.64 r17:r16, x1, #2, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14785 "00001011" // /* MW 5 */ + 14786 "00001110" // /* MW 4 */ + 14787 "10001000" // /* MW 3 */ + 14788 "01010000" // /* MW 2 */ + 14789 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14790 "11100100" // INVSQRT r24, r4; VPUSH.hi.64 x10, x10, r7:r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14791 "01101110" // /* MW 5 */ + 14792 "10100011" // /* MW 4 */ + 14793 "10001010" // /* MW 3 */ + 14794 "00010000" // /* MW 2 */ + 14795 "00100110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14796 "01100100" // INVSQRT r5, r3; VEXTRACT.64 r15:r14, x6, #5, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14797 "00010111" // /* MW 5 */ + 14798 "00110110" // /* MW 4 */ + 14799 "10000111" // /* MW 3 */ + 14800 "01010000" // /* MW 2 */ + 14801 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14802 "01100100" // INVSQRT r0, r14; VEXTRACT.64 r7:r6, x6, #6, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14803 "00011011" // /* MW 5 */ + 14804 "00110110" // /* MW 4 */ + 14805 "10000011" // /* MW 3 */ + 14806 "00010000" // /* MW 2 */ + 14807 "01110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14808 "01100100" // INVSQRT r4, r2; VEXTRACT.64 r3:r2, x1, #3, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14809 "00001111" // /* MW 5 */ + 14810 "00001110" // /* MW 4 */ + 14811 "10000001" // /* MW 3 */ + 14812 "00010000" // /* MW 2 */ + 14813 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first + 14814 "11100100" // INVSQRT r19, r21; VPUSH.hi.64 x10, x10, r13:r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14815 "01101110" // /* MW 5 */ + 14816 "10100110" // /* MW 4 */ + 14817 "10001010" // /* MW 3 */ + 14818 "11010000" // /* MW 2 */ + 14819 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14820 "01100100" // INVSQRT r13, r17; VEXTRACT.64 r23:r22, x1, #4, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14821 "00010011" // /* MW 5 */ + 14822 "00001110" // /* MW 4 */ + 14823 "10001011" // /* MW 3 */ + 14824 "01010000" // /* MW 2 */ + 14825 "10001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first + 14826 "11100100" // INVSQRT r18, r20; VPUSH.hi.64 x10, x10, r1:r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14827 "01101110" // /* MW 5 */ + 14828 "10100000" // /* MW 4 */ + 14829 "10001010" // /* MW 3 */ + 14830 "10010000" // /* MW 2 */ + 14831 "10100100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14832 "11100100" // INVSQRT r12, r16; VPUSH.hi.64 x8, x0, r25:r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14833 "01101110" // /* MW 5 */ + 14834 "00001100" // /* MW 4 */ + 14835 "10001000" // /* MW 3 */ + 14836 "00010000" // /* MW 2 */ + 14837 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first + 14838 "11100100" // INVSQRT r17, r15; VPUSH.hi.64 x8, x8, r5:r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14839 "01101110" // /* MW 5 */ + 14840 "10000010" // /* MW 4 */ + 14841 "10001000" // /* MW 3 */ + 14842 "01010000" // /* MW 2 */ + 14843 "01111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 14844 "01100100" // INVSQRT r1, r7; VEXTRACT.64 r5:r4, x1, #5, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14845 "00010111" // /* MW 5 */ + 14846 "00001110" // /* MW 4 */ + 14847 "10000010" // /* MW 3 */ + 14848 "01010000" // /* MW 2 */ + 14849 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 14850 "01100100" // INVSQRT r16, r14; VEXTRACT.64 r7:r6, x6, #7, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14851 "00011111" // /* MW 5 */ + 14852 "00110110" // /* MW 4 */ + 14853 "10000011" // /* MW 3 */ + 14854 "00010000" // /* MW 2 */ + 14855 "01110100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14856 "11100100" // INVSQRT r0, r6; VPUSH.hi.64 x6, x8, r13:r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14857 "01101110" // /* MW 5 */ + 14858 "10000110" // /* MW 4 */ + 14859 "10000110" // /* MW 3 */ + 14860 "00010000" // /* MW 2 */ + 14861 "00110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14862 "01100100" // INVSQRT r13, r3; VEXTRACT.64 r15:r14, x1, #6, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14863 "00011011" // /* MW 5 */ + 14864 "00001110" // /* MW 4 */ + 14865 "10000111" // /* MW 3 */ + 14866 "01010000" // /* MW 2 */ + 14867 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 14868 "01100100" // INVSQRT r12, r2; VEXTRACT.64 r3:r2, x1, #7, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14869 "00011111" // /* MW 5 */ + 14870 "00001110" // /* MW 4 */ + 14871 "10000001" // /* MW 3 */ + 14872 "00010000" // /* MW 2 */ + 14873 "00010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 14874 "11100100" // INVSQRT r1, r23; VPUSH.hi.64 x10, x10, r19:r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14875 "01101110" // /* MW 5 */ + 14876 "10101001" // /* MW 4 */ + 14877 "10001010" // /* MW 3 */ + 14878 "01010000" // /* MW 2 */ + 14879 "10111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 14880 "11100100" // INVSQRT r0, r22; VPUSH.hi.64 x10, x10, r17:r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14881 "01101110" // /* MW 5 */ + 14882 "10101000" // /* MW 4 */ + 14883 "10001010" // /* MW 3 */ + 14884 "00010000" // /* MW 2 */ + 14885 "10110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14886 "11100100" // INVSQRT r19, r7; VPUSH.hi.64 x1, x10, r1:r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14887 "01101110" // /* MW 5 */ + 14888 "10100000" // /* MW 4 */ + 14889 "10000001" // /* MW 3 */ + 14890 "11010000" // /* MW 2 */ + 14891 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14892 "11100100" // INVSQRT r18, r6; VPUSH.hi.64 x6, x6, r13:r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14893 "01101110" // /* MW 5 */ + 14894 "01100110" // /* MW 4 */ + 14895 "10000110" // /* MW 3 */ + 14896 "10010000" // /* MW 2 */ + 14897 "00110100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14898 "00011000" // INVSQRT r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14899 "10000100" // /* MW 3 */ + 14900 "01001110" // /* MW 2 */ + 14901 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14902 "11100100" // INVSQRT r13, r15; VPUSH.hi.64 x6, x6, r1:r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14903 "01101110" // /* MW 5 */ + 14904 "01100000" // /* MW 4 */ + 14905 "10000110" // /* MW 3 */ + 14906 "01010000" // /* MW 2 */ + 14907 "01111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14908 "00011000" // INVSQRT r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14909 "10000100" // /* MW 3 */ + 14910 "00001100" // /* MW 2 */ + 14911 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14912 "11100100" // INVSQRT r1, r3; VPUSH.hi.64 x1, x1, r19:r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14913 "01101110" // /* MW 5 */ + 14914 "00011001" // /* MW 4 */ + 14915 "10000001" // /* MW 3 */ + 14916 "01010000" // /* MW 2 */ + 14917 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14918 "11100100" // INVSQRT r12, r14; VMOV bmll4, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14919 "00100101" // /* MW 5 */ + 14920 "00000101" // /* MW 4 */ + 14921 "10001000" // /* MW 3 */ + 14922 "00010000" // /* MW 2 */ + 14923 "01110011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14924 "00011000" // INVSQRT r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14925 "10000100" // /* MW 3 */ + 14926 "10000000" // /* MW 2 */ + 14927 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1454 19 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 5 "elementary.hpp" 381 23 first + 14928 "00000010" // VCONV.bf16.fp32 wl1, bmll4; VPUSH.hi.64 x6, x6, r7:r6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14929 "10110000" // /* MW 7 */ + 14930 "11011011" // /* MW 6 */ + 14931 "10011000" // /* MW 5 */ + 14932 "00000001" // /* MW 4 */ + 14933 "11000000" // /* MW 3 */ + 14934 "01000010" // /* MW 2 */ + 14935 "00011000" // /* MW 1 */ + 14936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14937 "00000000" // /* MW 1 */ +.src_ref 5 "elementary.hpp" 381 23 + 14938 "01111000" // VPUSH.hi.64 x6, x6, r13:r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14939 "00110111" // /* MW 3 */ + 14940 "00110011" // /* MW 2 */ + 14941 "00011011" // /* MW 1 */ +.src_ref 5 "elementary.hpp" 381 23 + 14942 "01111000" // VPUSH.hi.64 x6, x6, r1:r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14943 "00110111" // /* MW 3 */ + 14944 "00110000" // /* MW 2 */ + 14945 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 + 14946 "11111000" // VMOV bmll4, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14947 "10010010" // /* MW 3 */ + 14948 "00001100" // /* MW 2 */ + 14949 "00011100" // /* MW 1 */ + 14950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14951 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 + 14952 "00011000" // VCONV.bf16.fp32 wh1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14953 "00010110" // /* MW 3 */ + 14954 "10000010" // /* MW 2 */ + 14955 "00001000" // /* MW 1 */ + 14956 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14957 "01100111" // /* MW 3 */ + 14958 "00000001" // /* MW 2 */ + 14959 "00000000" // /* MW 1 */ + 14960 "11100001" // NOPA; NOPB; NOPS; NOPX; VEXTBCST.16 x1, x1, #0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14961 "00000000" // /* MW 15 */ + 14962 "00000000" // /* MW 14 */ + 14963 "10101000" // /* MW 13 */ + 14964 "10000001" // /* MW 12 */ + 14965 "01000100" // /* MW 11 */ + 14966 "00000000" // /* MW 10 */ + 14967 "00000000" // /* MW 9 */ + 14968 "00000000" // /* MW 8 */ + 14969 "01011011" // /* MW 7 */ + 14970 "00000001" // /* MW 6 */ + 14971 "00100000" // /* MW 5 */ + 14972 "00000000" // /* MW 4 */ + 14973 "11110000" // /* MW 3 */ + 14974 "00101100" // /* MW 2 */ + 14975 "00000000" // /* MW 1 */ +.label ZLS_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_512 +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "rmsnorm_row_major.h" 165 25 first +.begin_of_loop +.loop_nesting 2 + 14976 "00111100" // VLDA.2D x2, [p1], d0; VLDB x3, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14977 "11101000" // /* MW 5 */ + 14978 "00111001" // /* MW 4 */ + 14979 "01110000" // /* MW 3 */ + 14980 "00010011" // /* MW 2 */ + 14981 "00100010" // /* MW 1 */ + 14982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14983 "00000000" // /* MW 1 */ + 14984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14985 "00000000" // /* MW 1 */ + 14986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14987 "00000000" // /* MW 1 */ + 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14989 "00000000" // /* MW 1 */ + 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14991 "00000000" // /* MW 1 */ + 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14993 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 14994 "01001000" // VMUL.f dm1, x3, x1, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14995 "00100001" // /* MW 3 */ + 14996 "11100110" // /* MW 2 */ + 14997 "01010001" // /* MW 1 */ + 14998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14999 "00000000" // /* MW 1 */ + 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15001 "00000000" // /* MW 1 */ + 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15003 "00000000" // /* MW 1 */ + 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15005 "00000000" // /* MW 1 */ + 15006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15007 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 15008 "00011000" // VCONV.bf16.fp32 x4, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15009 "10010110" // /* MW 3 */ + 15010 "00010000" // /* MW 2 */ + 15011 "00001010" // /* MW 1 */ + 15012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15013 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 15014 "01001000" // VMUL.f dm2, x4, x2, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15015 "01000001" // /* MW 3 */ + 15016 "11101000" // /* MW 2 */ + 15017 "01010010" // /* MW 1 */ + 15018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15019 "00000000" // /* MW 1 */ + 15020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15021 "00000000" // /* MW 1 */ + 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15023 "00000000" // /* MW 1 */ + 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15025 "00000000" // /* MW 1 */ + 15026 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 15027 "00011100" // /* MW 13 */ + 15028 "00000000" // /* MW 12 */ + 15029 "00000000" // /* MW 11 */ + 15030 "01010111" // /* MW 10 */ + 15031 "00011010" // /* MW 9 */ + 15032 "01000000" // /* MW 8 */ + 15033 "00000000" // /* MW 7 */ + 15034 "00000000" // /* MW 6 */ + 15035 "10110110" // /* MW 5 */ + 15036 "00000010" // /* MW 4 */ + 15037 "11110000" // /* MW 3 */ + 15038 "00101100" // /* MW 2 */ + 15039 "00000000" // /* MW 1 */ +.label ZLE_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_576 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 3 "rmsnorm_row_major.h" 173 25 first +.end_of_loop + 15040 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 cml2, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 15041 "00000000" // /* MW 15 */ + 15042 "00000000" // /* MW 14 */ + 15043 "01111000" // /* MW 13 */ + 15044 "10100101" // /* MW 12 */ + 15045 "00000001" // /* MW 11 */ + 15046 "00000000" // /* MW 10 */ + 15047 "00000000" // /* MW 9 */ + 15048 "00000000" // /* MW 8 */ + 15049 "00100011" // /* MW 7 */ + 15050 "00011101" // /* MW 6 */ + 15051 "00100010" // /* MW 5 */ + 15052 "00000000" // /* MW 4 */ + 15053 "11110000" // /* MW 3 */ + 15054 "00101100" // /* MW 2 */ + 15055 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 147 4 first +.loop_nesting 1 + 15056 "00011000" // JNZD r8, r8, p4 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 15057 "00100000" // /* MW 3 */ + 15058 "00010001" // /* MW 2 */ + 15059 "00010010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15065 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15067 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15068 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15069 "01100111" // /* MW 3 */ + 15070 "00000001" // /* MW 2 */ + 15071 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_608 +.loop_nesting 0 + 15072 "00011000" // LDA r13, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15073 "10110001" // /* MW 3 */ + 15074 "11100101" // /* MW 2 */ + 15075 "00000111" // /* MW 1 */ + 15076 "00011000" // LDA r11, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15077 "01110001" // /* MW 3 */ + 15078 "11101001" // /* MW 2 */ + 15079 "00000111" // /* MW 1 */ + 15080 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15081 "11010001" // /* MW 3 */ + 15082 "11110001" // /* MW 2 */ + 15083 "00000111" // /* MW 1 */ + 15084 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15085 "11110001" // /* MW 3 */ + 15086 "11111001" // /* MW 2 */ + 15087 "00000111" // /* MW 1 */ + 15088 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15089 "10010001" // /* MW 3 */ + 15090 "11111101" // /* MW 2 */ + 15091 "00000111" // /* MW 1 */ + 15092 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15093 "00110001" // /* MW 3 */ + 15094 "11110101" // /* MW 2 */ + 15095 "00000111" // /* MW 1 */ + 15096 "00011000" // LDA r10, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15097 "01010001" // /* MW 3 */ + 15098 "11101101" // /* MW 2 */ + 15099 "00000111" // /* MW 1 */ + 15100 "00011000" // LDA r8, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15101 "00010001" // /* MW 3 */ + 15102 "11100001" // /* MW 2 */ + 15103 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 176 first + 15104 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15105 "00000000" // /* MW 3 */ + 15106 "00101000" // /* MW 2 */ + 15107 "00010000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 176 +.delay_slot + 15108 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15109 "00000001" // /* MW 5 */ + 15110 "00000000" // /* MW 4 */ + 15111 "00000000" // /* MW 3 */ + 15112 "11111000" // /* MW 2 */ + 15113 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params__end +.label __Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params___func_end0 + 15121 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.function rmsnorm_row_major_part3_4x4_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.src_ref 12 "rms_norm_adf_wrapper.cpp" 237 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 5 +.function_start + 15136 "01000100" // MOVXM p4, #508932 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15137 "00001000" // /* MW 5 */ + 15138 "11001000" // /* MW 4 */ + 15139 "11001000" // /* MW 3 */ + 15140 "00000111" // /* MW 2 */ + 15141 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 5 first + 15142 "10011000" // LDA r16, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15143 "00010110" // /* MW 3 */ + 15144 "00000110" // /* MW 2 */ + 15145 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 237 + 15146 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15147 "00000001" // /* MW 5 */ + 15148 "00000000" // /* MW 4 */ + 15149 "00000000" // /* MW 3 */ + 15150 "00010000" // /* MW 2 */ + 15151 "00000000" // /* MW 1 */ + 15152 "10011000" // ST p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15153 "10011101" // /* MW 3 */ + 15154 "11101000" // /* MW 2 */ + 15155 "00001111" // /* MW 1 */ + 15156 "10011000" // ST r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15157 "11110101" // /* MW 3 */ + 15158 "11110101" // /* MW 2 */ + 15159 "00001111" // /* MW 1 */ + 15160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15161 "00000000" // /* MW 1 */ + 15162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15163 "00000000" // /* MW 1 */ + 15164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15165 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 5 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 17 + 15166 "10000100" // JNZ r16, #15280 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15280 delay_slots=5 */ + 15167 "00000001" // /* MW 5 */ + 15168 "01000000" // /* MW 4 */ + 15169 "11011000" // /* MW 3 */ + 15170 "00011101" // /* MW 2 */ + 15171 "10000000" // /* MW 1 */ +.delay_slot + 15172 "10011000" // ST p0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15173 "00011101" // /* MW 3 */ + 15174 "11110000" // /* MW 2 */ + 15175 "00001111" // /* MW 1 */ +.delay_slot + 15176 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15177 "10011101" // /* MW 3 */ + 15178 "11111001" // /* MW 2 */ + 15179 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 +.delay_slot + 15180 "00000010" // MOVS p4, p6; MOV p6, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15181 "01110000" // /* MW 7 */ + 15182 "11110000" // /* MW 6 */ + 15183 "00110000" // /* MW 5 */ + 15184 "00000011" // /* MW 4 */ + 15185 "01100000" // /* MW 3 */ + 15186 "00010001" // /* MW 2 */ + 15187 "10010011" // /* MW 1 */ +.delay_slot + 15188 "10011000" // ST p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15189 "10011101" // /* MW 3 */ + 15190 "11101111" // /* MW 2 */ + 15191 "00001111" // /* MW 1 */ +.delay_slot + 15192 "10011000" // ST p4, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15193 "00011101" // /* MW 3 */ + 15194 "11111110" // /* MW 2 */ + 15195 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 + 15196 "10111010" // MOVA r19, #12; MOVX r20, #1; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15197 "01111000" // /* MW 9 */ + 15198 "01110000" // /* MW 8 */ + 15199 "00101101" // /* MW 7 */ + 15200 "00101010" // /* MW 6 */ + 15201 "01000000" // /* MW 5 */ + 15202 "00000001" // /* MW 4 */ + 15203 "00000000" // /* MW 3 */ + 15204 "10010011" // /* MW 2 */ + 15205 "00000001" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 244 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 36 + 15206 "10111010" // MOVA r18, #-16; MOVXM p7, #508920 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15207 "00010000" // /* MW 9 */ + 15208 "11111100" // /* MW 8 */ + 15209 "10110001" // /* MW 7 */ + 15210 "11110011" // /* MW 6 */ + 15211 "00000001" // /* MW 5 */ + 15212 "00000000" // /* MW 4 */ + 15213 "00000000" // /* MW 3 */ + 15214 "00010010" // /* MW 2 */ + 15215 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 244 14 first + 15216 "00111010" // ST r17, [p7]; MOVXM p7, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15217 "00010001" // /* MW 9 */ + 15218 "00000110" // /* MW 8 */ + 15219 "10110010" // /* MW 7 */ + 15220 "11110011" // /* MW 6 */ + 15221 "00000001" // /* MW 5 */ + 15222 "00000000" // /* MW 4 */ + 15223 "00110000" // /* MW 3 */ + 15224 "11000110" // /* MW 2 */ + 15225 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 4 "tile.hpp" 86 8 + 15226 "00111010" // ST r20, [p7]; MOVXM p7, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15227 "00010001" // /* MW 9 */ + 15228 "00001000" // /* MW 8 */ + 15229 "10110010" // /* MW 7 */ + 15230 "11110011" // /* MW 6 */ + 15231 "00000001" // /* MW 5 */ + 15232 "00000000" // /* MW 4 */ + 15233 "00110000" // /* MW 3 */ + 15234 "11010010" // /* MW 2 */ + 15235 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 36 first + 15236 "00101100" // ST.s8 r19, [p7]; LSHL r18, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15237 "01011011" // /* MW 5 */ + 15238 "11001010" // /* MW 4 */ + 15239 "11101000" // /* MW 3 */ + 15240 "11001100" // /* MW 2 */ + 15241 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 27 first + 15242 "00011000" // EXTEND.u8 r21, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15243 "10010000" // /* MW 3 */ + 15244 "01101010" // /* MW 2 */ + 15245 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 27 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 44 + 15246 "00100100" // EXTEND.u8 r18, r18; ADD.NC r17, r21, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15247 "11111110" // /* MW 5 */ + 15248 "10110101" // /* MW 4 */ + 15249 "00001000" // /* MW 3 */ + 15250 "10010010" // /* MW 2 */ + 15251 "10010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 15 + 15252 "01000100" // MOVXM p0, #508924 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15253 "11111000" // /* MW 5 */ + 15254 "11000111" // /* MW 4 */ + 15255 "11000000" // /* MW 3 */ + 15256 "00000111" // /* MW 2 */ + 15257 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 15 + 15258 "01000100" // MOVXM p7, #508928 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15259 "00000000" // /* MW 5 */ + 15260 "11001000" // /* MW 4 */ + 15261 "11001110" // /* MW 3 */ + 15262 "00000111" // /* MW 2 */ + 15263 "00000000" // /* MW 1 */ + 15264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15265 "00000000" // /* MW 1 */ + 15266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15267 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 15 first + 15268 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15269 "00110001" // /* MW 3 */ + 15270 "00000110" // /* MW 2 */ + 15271 "00001000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 15 first + 15272 "00000010" // ST r18, [p7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15273 "01110000" // /* MW 7 */ + 15274 "10100101" // /* MW 6 */ + 15275 "00000001" // /* MW 5 */ + 15276 "00000000" // /* MW 4 */ + 15277 "00110000" // /* MW 3 */ + 15278 "11001010" // /* MW 2 */ + 15279 "11100000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_144 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 256 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 257 30 + 15280 "10111010" // MOVA r17, #2; MOVXM p7, #508832 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15281 "00010000" // /* MW 9 */ + 15282 "11010000" // /* MW 8 */ + 15283 "10110001" // /* MW 7 */ + 15284 "11110011" // /* MW 6 */ + 15285 "00000001" // /* MW 5 */ + 15286 "00000000" // /* MW 4 */ + 15287 "00000000" // /* MW 3 */ + 15288 "01010001" // /* MW 2 */ + 15289 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 256 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 259 24 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 + 15290 "01110110" // MOVA dj0, #40; ST r16, [p7], #20; MOVX r24, #0; MOV r18, sp /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 15291 "01111000" // /* MW 11 */ + 15292 "11110000" // /* MW 10 */ + 15293 "01001010" // /* MW 9 */ + 15294 "00001010" // /* MW 8 */ + 15295 "10000000" // /* MW 7 */ + 15296 "10000001" // /* MW 6 */ + 15297 "00010001" // /* MW 5 */ + 15298 "01011110" // /* MW 4 */ + 15299 "10000111" // /* MW 3 */ + 15300 "00000010" // /* MW 2 */ + 15301 "00000101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 257 30 first + 15302 "00000010" // ST r17, [p7]; ADD.NC p0, r18, #-104 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15303 "00000000" // /* MW 7 */ + 15304 "10100110" // /* MW 6 */ + 15305 "00110100" // /* MW 5 */ + 15306 "00000000" // /* MW 4 */ + 15307 "00110000" // /* MW 3 */ + 15308 "11000110" // /* MW 2 */ + 15309 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 259 24 first + 15310 "10011000" // LDA r15, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15311 "11110110" // /* MW 3 */ + 15312 "00000001" // /* MW 2 */ + 15313 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 33 first + 15314 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15315 "00101110" // /* MW 3 */ + 15316 "00011100" // /* MW 2 */ + 15317 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 33 + 15318 "10011000" // LDA el3, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15319 "11101110" // /* MW 3 */ + 15320 "00011100" // /* MW 2 */ + 15321 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 67 + 15322 "10011000" // LDA el2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15323 "10101110" // /* MW 3 */ + 15324 "00011100" // /* MW 2 */ + 15325 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 67 + 15326 "10011000" // LDA eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15327 "00001110" // /* MW 3 */ + 15328 "00011100" // /* MW 2 */ + 15329 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 101 + 15330 "10011000" // LDA el1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15331 "01101110" // /* MW 3 */ + 15332 "00011100" // /* MW 2 */ + 15333 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 101 + 15334 "10011000" // LDA eh1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15335 "01001110" // /* MW 3 */ + 15336 "00011100" // /* MW 2 */ + 15337 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 50 first + 15338 "10011000" // LDA eh2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15339 "10001110" // /* MW 3 */ + 15340 "00011100" // /* MW 2 */ + 15341 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 50 + 15342 "00001100" // LDA el0, [p2], #4; ST el0, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15343 "01011011" // /* MW 5 */ + 15344 "00110000" // /* MW 4 */ + 15345 "11011111" // /* MW 3 */ + 15346 "10000101" // /* MW 2 */ + 15347 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15348 "10011000" // ST el3, [sp, #-100] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15349 "11101101" // /* MW 3 */ + 15350 "10011100" // /* MW 2 */ + 15351 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15352 "10011000" // ST el2, [sp, #-96] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15353 "10101101" // /* MW 3 */ + 15354 "10100000" // /* MW 2 */ + 15355 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15356 "10011000" // ST eh0, [sp, #-92] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15357 "00001101" // /* MW 3 */ + 15358 "10100100" // /* MW 2 */ + 15359 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15360 "10011000" // ST el1, [sp, #-88] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15361 "01101101" // /* MW 3 */ + 15362 "10101000" // /* MW 2 */ + 15363 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15364 "10011000" // ST eh1, [sp, #-84] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15365 "01001101" // /* MW 3 */ + 15366 "10101100" // /* MW 2 */ + 15367 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15368 "10011000" // ST eh2, [sp, #-80] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15369 "10001101" // /* MW 3 */ + 15370 "10110000" // /* MW 2 */ + 15371 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15372 "10011000" // ST el0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15373 "00101101" // /* MW 3 */ + 15374 "10110100" // /* MW 2 */ + 15375 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 84 first + 15376 "10011000" // LDA eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15377 "00001110" // /* MW 3 */ + 15378 "00000100" // /* MW 2 */ + 15379 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 84 + 15380 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15381 "00101110" // /* MW 3 */ + 15382 "00010100" // /* MW 2 */ + 15383 "00000010" // /* MW 1 */ + 15384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15385 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 264 6 first + 15386 "10000100" // JNZ r16, #15488 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15488 delay_slots=5 */ + 15387 "00000001" // /* MW 5 */ + 15388 "01000000" // /* MW 4 */ + 15389 "01000000" // /* MW 3 */ + 15390 "00011110" // /* MW 2 */ + 15391 "10000000" // /* MW 1 */ +.delay_slot + 15392 "10011000" // ST p7, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15393 "10011101" // /* MW 3 */ + 15394 "11100111" // /* MW 2 */ + 15395 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15399 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 first +.delay_slot + 15400 "10011000" // ST eh0, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15401 "00001101" // /* MW 3 */ + 15402 "10111000" // /* MW 2 */ + 15403 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 +.delay_slot + 15404 "10011000" // ST el0, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15405 "00101101" // /* MW 3 */ + 15406 "10111100" // /* MW 2 */ + 15407 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 268 4 first +.no_stack_arguments + 15408 "10111010" // MOVA dj0, #16; JL #11744 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 15409 "01000000" // /* MW 9 */ + 15410 "00000000" // /* MW 8 */ + 15411 "00000000" // /* MW 7 */ + 15412 "10111100" // /* MW 6 */ + 15413 "00000101" // /* MW 5 */ + 15414 "00000000" // /* MW 4 */ + 15415 "10000000" // /* MW 3 */ + 15416 "00000010" // /* MW 2 */ + 15417 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 +.delay_slot + 15418 "01000100" // MOVXM p7, #508840 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15419 "01010000" // /* MW 5 */ + 15420 "11000111" // /* MW 4 */ + 15421 "11001110" // /* MW 3 */ + 15422 "00000111" // /* MW 2 */ + 15423 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 first +.delay_slot + 15424 "10011000" // ST r24, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15425 "00010001" // /* MW 3 */ + 15426 "00011111" // /* MW 2 */ + 15427 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 +.delay_slot + 15428 "10011000" // ST dj0, [p7], #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15429 "01000001" // /* MW 3 */ + 15430 "10111100" // /* MW 2 */ + 15431 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15434 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15435 "00100000" // /* MW 5 */ + 15436 "00000000" // /* MW 4 */ + 15437 "11110000" // /* MW 3 */ + 15438 "00101100" // /* MW 2 */ + 15439 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 55 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 98 +.return_address + 15440 "10111010" // LDA r16, [p7], #4; MOVXM p2, #508924 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15441 "00010000" // /* MW 9 */ + 15442 "11111110" // /* MW 8 */ + 15443 "00110001" // /* MW 7 */ + 15444 "11110001" // /* MW 6 */ + 15445 "00000001" // /* MW 5 */ + 15446 "00000000" // /* MW 4 */ + 15447 "11010000" // /* MW 3 */ + 15448 "11000010" // /* MW 2 */ + 15449 "11100011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 86 + 15450 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15451 "01010110" // /* MW 3 */ + 15452 "00000110" // /* MW 2 */ + 15453 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 98 + 15454 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15455 "00110110" // /* MW 3 */ + 15456 "00000110" // /* MW 2 */ + 15457 "00000010" // /* MW 1 */ + 15458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15459 "00000000" // /* MW 1 */ + 15460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15461 "00000000" // /* MW 1 */ + 15462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15463 "00000000" // /* MW 1 */ + 15464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15465 "00000000" // /* MW 1 */ + 15466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15467 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 65 + 15468 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15469 "00001111" // /* MW 3 */ + 15470 "10100001" // /* MW 2 */ + 15471 "00010100" // /* MW 1 */ + 15472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15473 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 96 + 15474 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15475 "00001111" // /* MW 3 */ + 15476 "01100001" // /* MW 2 */ + 15477 "00010100" // /* MW 1 */ + 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15479 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 34 + 15480 "00000010" // ST r16, [p7, #20]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15481 "01110000" // /* MW 7 */ + 15482 "10100101" // /* MW 6 */ + 15483 "00000001" // /* MW 5 */ + 15484 "00000000" // /* MW 4 */ + 15485 "00110000" // /* MW 3 */ + 15486 "11000010" // /* MW 2 */ + 15487 "11101010" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_352 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 58 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 89 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 48 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 59 + 15488 "10111010" // LDA p4, [sp, #-28]; MOVX r16, #1; MOV m0, #-80 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15489 "01011000" // /* MW 9 */ + 15490 "10110000" // /* MW 8 */ + 15491 "00000111" // /* MW 7 */ + 15492 "00101000" // /* MW 6 */ + 15493 "00000000" // /* MW 5 */ + 15494 "00000001" // /* MW 4 */ + 15495 "00100000" // /* MW 3 */ + 15496 "11000011" // /* MW 2 */ + 15497 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 25 + 15498 "10111010" // LDA p2, [sp, #-16]; MOVXM p7, #508928 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15499 "00010000" // /* MW 9 */ + 15500 "00000000" // /* MW 8 */ + 15501 "10110010" // /* MW 7 */ + 15502 "11110011" // /* MW 6 */ + 15503 "00000001" // /* MW 5 */ + 15504 "00000000" // /* MW 4 */ + 15505 "00100000" // /* MW 3 */ + 15506 "00100011" // /* MW 2 */ + 15507 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 136 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 280 33 + 15508 "10111010" // LDA p0, [sp, #-24]; MOVX r19, #-5; MOV r17, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15509 "01011000" // /* MW 9 */ + 15510 "00000110" // /* MW 8 */ + 15511 "00101000" // /* MW 7 */ + 15512 "01101010" // /* MW 6 */ + 15513 "00110111" // /* MW 5 */ + 15514 "00111111" // /* MW 4 */ + 15515 "00100000" // /* MW 3 */ + 15516 "00000011" // /* MW 2 */ + 15517 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 15518 "00011000" // LDA p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15519 "10011001" // /* MW 3 */ + 15520 "11111001" // /* MW 2 */ + 15521 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 25 first + 15522 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15523 "01010110" // /* MW 3 */ + 15524 "00000110" // /* MW 2 */ + 15525 "00000111" // /* MW 1 */ + 15526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15527 "00000000" // /* MW 1 */ + 15528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15529 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 58 + 15530 "10011000" // LDA r21, [p4, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15531 "10110110" // /* MW 3 */ + 15532 "10100110" // /* MW 2 */ + 15533 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 89 + 15534 "10011000" // LDA r23, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15535 "11110110" // /* MW 3 */ + 15536 "10011110" // /* MW 2 */ + 15537 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 119 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 15538 "10011000" // LDA r20, [p4], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15539 "10010110" // /* MW 3 */ + 15540 "01101110" // /* MW 2 */ + 15541 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 15542 "10011000" // LDA p2, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15543 "00011110" // /* MW 3 */ + 15544 "00000101" // /* MW 2 */ + 15545 "00000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 48 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 15546 "10011000" // LDA r28, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15547 "10010110" // /* MW 3 */ + 15548 "00001011" // /* MW 2 */ + 15549 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15550 "10011000" // LDA r26, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15551 "01010110" // /* MW 3 */ + 15552 "00000111" // /* MW 2 */ + 15553 "00000010" // /* MW 1 */ + 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15555 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 15556 "10011000" // LDA r22, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15557 "11010110" // /* MW 3 */ + 15558 "00000110" // /* MW 2 */ + 15559 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 68 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 98 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 15560 "00011000" // MAC r20, r20, r23, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15561 "01010110" // /* MW 3 */ + 15562 "11101001" // /* MW 2 */ + 15563 "00010101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15564 "10011000" // LSHL r29, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15565 "00001101" // /* MW 3 */ + 15566 "00111011" // /* MW 2 */ + 15567 "00010101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 + 15568 "11111000" // MOV m0, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15569 "10100000" // /* MW 3 */ + 15570 "00001110" // /* MW 2 */ + 15571 "00011000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 59 first + 15572 "10011000" // LSHL r16, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15573 "00001101" // /* MW 3 */ + 15574 "00100001" // /* MW 2 */ + 15575 "00010111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 130 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 11 + 15576 "10100100" // ADD r16, r20, #31; ADD.NC p0, r26, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15577 "10000010" // /* MW 5 */ + 15578 "11011010" // /* MW 4 */ + 15579 "11100000" // /* MW 3 */ + 15580 "00001111" // /* MW 2 */ + 15581 "10100100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 136 + 15582 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15583 "00111101" // /* MW 3 */ + 15584 "00100001" // /* MW 2 */ + 15585 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 284 2 first +.no_stack_arguments + 15586 "00000100" // JL #14464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=14464 delay_slots=5 */ + 15587 "00000001" // /* MW 5 */ + 15588 "00000000" // /* MW 4 */ + 15589 "01000000" // /* MW 3 */ + 15590 "00011100" // /* MW 2 */ + 15591 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 142 first +.delay_slot + 15592 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15593 "00001111" // /* MW 3 */ + 15594 "10100001" // /* MW 2 */ + 15595 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 289 14 +.delay_slot + 15596 "01000100" // MOVXM p7, #508932 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15597 "00001000" // /* MW 5 */ + 15598 "11001000" // /* MW 4 */ + 15599 "11001110" // /* MW 3 */ + 15600 "00000111" // /* MW 2 */ + 15601 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 280 33 first +.delay_slot + 15602 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15603 "00011101" // /* MW 3 */ + 15604 "00100001" // /* MW 2 */ + 15605 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 280 33 +.delay_slot + 15606 "01011000" // ADD.NC p3, r22, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15607 "01000001" // /* MW 3 */ + 15608 "01101011" // /* MW 2 */ + 15609 "00011011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 284 2 +.delay_slot + 15610 "11110100" // PADDB [p3], m0; MOV p1, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15611 "10000001" // /* MW 5 */ + 15612 "11001101" // /* MW 4 */ + 15613 "00000010" // /* MW 3 */ + 15614 "01110010" // /* MW 2 */ + 15615 "01100001" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 289 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 291 6 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 +.return_address + 15616 "10111010" // LDA r17, [p7]; MOVX r16, #0; MOV lr, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15617 "01111000" // /* MW 9 */ + 15618 "01100000" // /* MW 8 */ + 15619 "10111110" // /* MW 7 */ + 15620 "00001011" // /* MW 6 */ + 15621 "00000000" // /* MW 5 */ + 15622 "00000001" // /* MW 4 */ + 15623 "11010000" // /* MW 3 */ + 15624 "11000110" // /* MW 2 */ + 15625 "11100000" // /* MW 1 */ + 15626 "00011000" // LDA r0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15627 "00010001" // /* MW 3 */ + 15628 "11110100" // /* MW 2 */ + 15629 "00000111" // /* MW 1 */ + 15630 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15631 "00011001" // /* MW 3 */ + 15632 "11101101" // /* MW 2 */ + 15633 "00000111" // /* MW 1 */ + 15634 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15635 "00011001" // /* MW 3 */ + 15636 "11111111" // /* MW 2 */ + 15637 "00000111" // /* MW 1 */ + 15638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15639 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 first + 15640 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15641 "00000000" // /* MW 3 */ + 15642 "00101000" // /* MW 2 */ + 15643 "00010000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 +.delay_slot + 15644 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15645 "00000001" // /* MW 5 */ + 15646 "00000000" // /* MW 4 */ + 15647 "00000000" // /* MW 3 */ + 15648 "11110000" // /* MW 2 */ + 15649 "11111111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 289 14 first +.delay_slot + 15650 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15651 "00000111" // /* MW 3 */ + 15652 "01100010" // /* MW 2 */ + 15653 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 291 18 first +.delay_slot + 15654 "11100100" // EQ r27, r15, r17; MOV r15, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15655 "01000001" // /* MW 5 */ + 15656 "10100000" // /* MW 4 */ + 15657 "11110111" // /* MW 3 */ + 15658 "11100010" // /* MW 2 */ + 15659 "01111110" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 291 6 +.delay_slot + 15660 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15661 "00000010" // /* MW 3 */ + 15662 "01100001" // /* MW 2 */ + 15663 "00010100" // /* MW 1 */ +.delay_slot + 15664 "00000010" // ST r16, [p7]; MOV p7, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15665 "01110000" // /* MW 7 */ + 15666 "01100000" // /* MW 6 */ + 15667 "10110010" // /* MW 5 */ + 15668 "00000011" // /* MW 4 */ + 15669 "00110000" // /* MW 3 */ + 15670 "11000010" // /* MW 2 */ +.label _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_end0 + 15671 "11100000" // /* MW 1 */ +.label __Z14_b8308_wrapperPPv___func_begin0 +.label _Z14_b8308_wrapperPPv +.function _b8308_wrapper _Z14_b8308_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 81 first +.src_ref 0 "0_0_reloadable5.cc" 83 79 +.function_start + 15680 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15681 "11000000" // /* MW 3 */ + 15682 "01100000" // /* MW 2 */ + 15683 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 83 79 first + 15684 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15685 "00011110" // /* MW 3 */ + 15686 "00011100" // /* MW 2 */ + 15687 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 84 79 first + 15688 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15689 "10011110" // /* MW 3 */ + 15690 "00101100" // /* MW 2 */ + 15691 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 86 80 first + 15692 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15693 "10011110" // /* MW 3 */ + 15694 "11110101" // /* MW 2 */ + 15695 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 85 47 first + 15696 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15697 "00011110" // /* MW 3 */ + 15698 "00000101" // /* MW 2 */ + 15699 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 82 4 first +.tail_call + 15700 "10000100" // J #15136 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=15136 delay_slots=5 */ + 15701 "00000000" // /* MW 5 */ + 15702 "00000000" // /* MW 4 */ + 15703 "10010000" // /* MW 3 */ + 15704 "00011101" // /* MW 2 */ + 15705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8308_wrapperPPv__end +.label __Z14_b8308_wrapperPPv___func_end0 + 15715 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 13 "me_div.c" 108 19 +.src_ref 13 "me_div.c" 108 19 +.src_ref 13 "me_div.c" 115 4 first +.function_start + 15728 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15729 "01000001" // /* MW 5 */ + 15730 "10100000" // /* MW 4 */ + 15731 "00101111" // /* MW 3 */ + 15732 "11000000" // /* MW 2 */ + 15733 "00000000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15735 "00011100" // /* MW 3 */ + 15736 "11000110" // /* MW 2 */ + 15737 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15739 "00011100" // /* MW 3 */ + 15740 "11000110" // /* MW 2 */ + 15741 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15743 "00011100" // /* MW 3 */ + 15744 "11000110" // /* MW 2 */ + 15745 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15747 "00011100" // /* MW 3 */ + 15748 "11000110" // /* MW 2 */ + 15749 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15751 "00011100" // /* MW 3 */ + 15752 "11000110" // /* MW 2 */ + 15753 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15755 "00011100" // /* MW 3 */ + 15756 "11000110" // /* MW 2 */ + 15757 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15759 "00011100" // /* MW 3 */ + 15760 "11000110" // /* MW 2 */ + 15761 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15763 "00011100" // /* MW 3 */ + 15764 "11000110" // /* MW 2 */ + 15765 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15767 "00011100" // /* MW 3 */ + 15768 "11000110" // /* MW 2 */ + 15769 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15771 "00011100" // /* MW 3 */ + 15772 "11000110" // /* MW 2 */ + 15773 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15775 "00011100" // /* MW 3 */ + 15776 "11000110" // /* MW 2 */ + 15777 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15779 "00011100" // /* MW 3 */ + 15780 "11000110" // /* MW 2 */ + 15781 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15782 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15783 "00011100" // /* MW 3 */ + 15784 "11000110" // /* MW 2 */ + 15785 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15787 "00011100" // /* MW 3 */ + 15788 "11000110" // /* MW 2 */ + 15789 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15791 "00011100" // /* MW 3 */ + 15792 "11000110" // /* MW 2 */ + 15793 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15795 "00011100" // /* MW 3 */ + 15796 "11000110" // /* MW 2 */ + 15797 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15799 "00011100" // /* MW 3 */ + 15800 "11000110" // /* MW 2 */ + 15801 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15802 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15803 "00011100" // /* MW 3 */ + 15804 "11000110" // /* MW 2 */ + 15805 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15806 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15807 "00011100" // /* MW 3 */ + 15808 "11000110" // /* MW 2 */ + 15809 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15810 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15811 "00011100" // /* MW 3 */ + 15812 "11000110" // /* MW 2 */ + 15813 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15814 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15815 "00011100" // /* MW 3 */ + 15816 "11000110" // /* MW 2 */ + 15817 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15818 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15819 "00011100" // /* MW 3 */ + 15820 "11000110" // /* MW 2 */ + 15821 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15822 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15823 "00011100" // /* MW 3 */ + 15824 "11000110" // /* MW 2 */ + 15825 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15826 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15827 "00011100" // /* MW 3 */ + 15828 "11000110" // /* MW 2 */ + 15829 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15830 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15831 "00011100" // /* MW 3 */ + 15832 "11000110" // /* MW 2 */ + 15833 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15834 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15835 "00011100" // /* MW 3 */ + 15836 "11000110" // /* MW 2 */ + 15837 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15838 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15839 "00011100" // /* MW 3 */ + 15840 "11000110" // /* MW 2 */ + 15841 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15842 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15843 "00011100" // /* MW 3 */ + 15844 "11000110" // /* MW 2 */ + 15845 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 119 first + 15846 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15847 "00000000" // /* MW 3 */ + 15848 "00101000" // /* MW 2 */ + 15849 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 first +.delay_slot + 15850 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15851 "00011100" // /* MW 3 */ + 15852 "11000110" // /* MW 2 */ + 15853 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 +.delay_slot + 15854 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15855 "00011100" // /* MW 3 */ + 15856 "11000110" // /* MW 2 */ + 15857 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 +.delay_slot + 15858 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15859 "00011100" // /* MW 3 */ + 15860 "11000110" // /* MW 2 */ + 15861 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 +.delay_slot + 15862 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15863 "00011100" // /* MW 3 */ + 15864 "11000110" // /* MW 2 */ + 15865 "00010000" // /* MW 1 */ +.delay_slot + 15866 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15867 "10100000" // /* MW 3 */ + 15868 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 15869 "00011000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/gemm" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 11 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail" +.dir 12 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 13 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.cmico b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.lst b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.lst new file mode 100644 index 0000000000000000000000000000000000000000..61ed6932244250349b843adbddb703e85749ea01 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.lst @@ -0,0 +1,5309 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2528 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2528 0x00 0xc2 0xd0 0xe9 0xe0 0x2c LDA r16, [p0]; NEZ r26, r1 + 2534 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2540 0x0f 0xef 0x1d 0x98 ST p6, [sp, #-20] + 2544 0xfe 0x3a 0xb0 0x01 0xc8 0xd0 0x70 0x02 ST r14, [sp, #-16]; MOV r14, r3 + 2552 0xff 0x3e 0xb0 0x01 0xe8 0x50 0x70 0x02 ST r15, [sp, #-8]; MOV r15, r1 + 2560 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 2564 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 2568 0x1e 0x68 0x02 0x18 ADD.NC p6, r16, #4 + 2572 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 2576 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 2580 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 2584 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2588 0x00 0x00 NOPX + 2590 0x00 0x00 NOPX + 2592 0x00 0x00 NOPX + 2594 0x00 0x00 NOPX + 2596 0x00 0x00 NOPX + 2598 0x00 0x00 NOPX + 2600 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2604 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 2608 0xfc 0x1f 0xa0 0x35 0x39 0xe4 MOVX r16, #-1; MOV el0, r26 + 2614 0x00 0x00 NOPX + 2616 0x00 0x00 NOPX + 2618 0x00 0x00 NOPX + 2620 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2624 0x04 0x41 0x29 0xa0 0x05 0x64 MOVX r17, #2; MOV r19, #1 + 2630 0xd5 0x23 0xb9 0x21 0x81 0xe4 LSHL r20, r26, r17; MOV r18, p0 + 2636 0x9c 0x9f 0x9c 0xd2 0xa2 0xa4 LTU r18, r19, r15; ADD.NC p6, r18, r20 + 2642 0xc0 0xd2 0xd7 0xe6 0x95 0x82 0x6e 0x60 0x72 0xba LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 + 2652 0xfd 0x4a 0xb0 0x03 0x4c 0x90 0x70 0x02 ST r18, [sp, #-24]; MOV r26, r18 + 2660 0x00 0x00 NOPX + 2662 0x00 0x00 NOPX + 2664 0x00 0x00 NOPX + 2666 0x00 0x00 NOPX + 2668 0x00 0x00 NOPX + 2670 0x1e 0x6a 0x02 0x18 ADD.NC p6, r20, #4 + 2674 0x06 0x1e 0x96 0x98 LDA r20, [p6], #4 + 2678 0x06 0x3e 0xd6 0x98 LDA r22, [p6], #12 + 2682 0x06 0xee 0xb6 0x98 LDA r21, [p6], #-8 + 2686 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2690 0x00 0x00 NOPX + 2692 0x00 0x00 NOPX + 2694 0x00 0x00 NOPX + 2696 0x00 0x00 NOPX + 2698 0x00 0x00 NOPX + 2700 0x00 0x00 NOPX + 2702 0x15 0x29 0x62 0x18 SEL.EQZ r20, r20, r22, r27 + 2706 0x0e 0xd6 0x91 0x98 ST r20, [p6, #-12] + 2710 0x00 0x00 NOPX + 2712 0x00 0x00 NOPX + 2714 0x00 0x00 NOPX + 2716 0x00 0x00 NOPX + 2718 0x15 0x57 0x08 0x18 ACQ.COND r21, r16, r26 + 2722 0x14 0xa5 0x1d 0x98 LSHL r18, r18, r17 + 2726 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 2732 0x76 0x9e 0x0c 0xd3 0x92 0xa4 NEZ r26, r14; ADD.NC p6, r19, r18 + 2738 0xc0 0xca 0xdf 0xc6 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-32] + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x00 0x00 NOPX + 2756 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 2760 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2764 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2768 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 2772 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2776 0x00 0x00 NOPX + 2778 0x00 0x00 NOPX + 2780 0x00 0x00 NOPX + 2782 0x00 0x00 NOPX + 2784 0x00 0x00 NOPX + 2786 0x00 0x00 NOPX + 2788 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 2792 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2796 0x00 0x00 NOPX + 2798 0x00 0x00 NOPX + 2800 0x00 0x00 NOPX + 2802 0x00 0x00 NOPX + 2804 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 2808 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 2812 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 2816 0x00 0x07 0xce 0xc8 0x80 0x44 MOVXM p7, #508992 + 2822 0xe0 0x13 0xdf 0xb8 0x5b 0x0c LDA p1, [p7, dj0]; ST el0, [sp, #-36] + 2828 0x00 0x00 NOPX + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX +.no_stack_arguments + 2840 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2844 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2848 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2854 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2864 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2870 0x07 0xdf 0x51 0x18 LDA r26, [sp, #-36] + 2874 0x07 0xe4 0x41 0x18 LDA dj0, [sp, #-28] + 2878 0x07 0xe8 0x29 0x18 LDA el0, [sp, #-24] + 2882 0x07 0xe0 0x09 0x18 LDA eh0, [sp, #-32] + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x18 0x68 0x88 0x18 ADD.NC p0, r17, #16 + 2894 0x00 0x06 0x36 0x98 LDA r17, [p0] + 2898 0x00 0x00 NOPX + 2900 0x00 0x00 NOPX + 2902 0x00 0x00 NOPX + 2904 0x00 0x00 NOPX + 2906 0x00 0x00 NOPX + 2908 0x00 0x00 NOPX + 2910 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2914 0x1e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p0, #-4]; MOV r27, r15 + 2920 0xe0 0x4a 0xdd 0x40 0x39 0xd4 LDA r18, [p7, dj0]; MOV r26, el0 + 2926 0x00 0x00 NOPX + 2928 0x00 0x00 NOPX + 2930 0x00 0x00 NOPX + 2932 0x00 0x00 NOPX + 2934 0x00 0x00 NOPX + 2936 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2940 0x8c 0x66 0x4e 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 + 2946 0xe0 0xc6 0xd1 0xec 0x63 0x0c LDA r17, [p7]; ST r17, [p0, #-4] + 2952 0x00 0x00 NOPX + 2954 0x00 0x00 NOPX + 2956 0x00 0x00 NOPX + 2958 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 0x1e 0xa1 0x1c 0xf8 MOV r26, eh0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2970 0xfe 0xc6 0xdd 0xc0 0x39 0xd4 LDA r17, [p7, #-4]; MOV r27, el0 + 2976 0x06 0x06 0x56 0x98 LDA r18, [p6] + 2980 0x00 0x00 NOPX + 2982 0x00 0x00 NOPX + 2984 0x00 0x00 NOPX + 2986 0x00 0x00 NOPX + 2988 0x00 0x00 NOPX + 2990 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2994 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 3000 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x00 0x00 NOPX + 3016 0x00 0x00 NOPX + 3018 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 3022 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3026 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] + 3030 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 0x1e 0xd7 0x20 0xf8 MOV r27, r14 +.delay_slot + 3066 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 3070 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 3088 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3088 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3092 0x00 0x07 0xc0 0xc9 0x40 0x44 MOVXM p0, #509088 +.delay_slot + 3098 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 3102 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 3106 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 3110 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3120 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3120 0x23 0x85 0xd0 0x00 0x01 0xf0 0x32 0x40 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #509056 + 3130 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3136 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 3140 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 3144 0x00 0x00 NOPX + 3146 0x00 0x00 NOPX + 3148 0x00 0x00 NOPX + 3150 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3154 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 3158 0x00 0x00 NOPX + 3160 0x00 0x00 NOPX + 3162 0x00 0x00 NOPX + 3164 0x00 0x00 NOPX + 3166 0x00 0x00 NOPX + 3168 0x00 0x00 NOPX + 3170 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3174 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x00 0x00 NOPX + 3184 0x00 0x00 NOPX + 3186 0x00 0x00 NOPX + 3188 0x00 0x00 NOPX + 3190 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3194 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 3198 0x00 0x00 NOPX + 3200 0x00 0x00 NOPX +.no_stack_arguments + 3202 0x00 0x06 0x08 0x00 0x01 0x04 JL #3088 +.delay_slot +.swstall delay_slot + 3208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3210 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3212 0x00 0x00 NOPX +.delay_slot + 3214 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 3218 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x5e 0x86 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV r15, p0; NOPV +.return_address + 3232 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 3242 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 3252 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 3262 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 3266 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 3296 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3312 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3312 0x02 0x80 0x80 0x00 0x01 0xf0 0x32 0x46 0x10 0xba MOVA m0, #20; MOVXM p0, #509068 + 3322 0x01 0x01 0x50 0x00 0x20 0x28 0x28 0x06 0x58 0xba LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 + 3332 0x00 0x00 NOPX + 3334 0x00 0x00 NOPX + 3336 0x00 0x00 NOPX + 3338 0x00 0x00 NOPX + 3340 0x00 0x00 NOPX + 3342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3346 0x10 0x06 0xf0 0x18 NEZ r3, r0 +.delay_slot + 3350 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 3354 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 3358 0x02 0x82 0x31 0x88 0x3b 0x5c ST r0, [p0, #4]; LSHL r2, r3, r1 +.delay_slot + 3364 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3376 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3376 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3382 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 3386 0x00 0x06 0x18 0x00 0x01 0x04 JL #3120 +.delay_slot + 3392 0x00 0x07 0xc0 0xc9 0x00 0x44 MOVXM p0, #509056 +.delay_slot +.swstall delay_slot + 3398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3404 0x00 0x01 0x67 0x98 NOPA +.return_address + 3408 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call + 3412 0x00 0x06 0x78 0x00 0x00 0x84 J #3312 +.delay_slot + 3418 0x00 0x07 0xc0 0xc9 0x00 0x44 MOVXM p0, #509056 +.delay_slot + 3424 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3434 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3440 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3440 0x02 0x80 0x80 0x00 0x01 0xf1 0xb2 0x40 0x10 0xba MOVA m0, #20; MOVXM p3, #509056 + 3450 0x03 0x3c 0x16 0x98 LDA r0, [p3], #12 + 3454 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 3460 0x00 0x00 NOPX + 3462 0x00 0x00 NOPX + 3464 0x00 0x00 NOPX + 3466 0x00 0x00 NOPX + 3468 0x00 0x00 NOPX + 3470 0x00 0x00 NOPX + 3472 0x08 0x06 0xe8 0x40 0x01 0x84 JNZ r1, #3536 +.delay_slot + 3478 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 3482 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 3486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3488 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3490 0x00 0x00 NOPX + 3492 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 3496 0x00 0x00 NOPX + 3498 0x00 0x00 NOPX + 3500 0x00 0x00 NOPX + 3502 0x00 0x06 0xf8 0x00 0x00 0x84 J #3568 +.delay_slot +.swstall delay_slot + 3508 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3510 0x00 0x00 NOPX +.delay_slot + 3512 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 3516 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 3520 0x00 0x2c 0xf0 0x00 0x20 0x04 0x13 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 + 3536 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 3540 0x00 0x00 NOPX + 3542 0x00 0x00 NOPX + 3544 0x00 0x00 NOPX + 3546 0x00 0x00 NOPX + 3548 0x00 0x00 NOPX + 3550 0x00 0x00 NOPX + 3552 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 3556 0x00 0x00 NOPX + 3558 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 + 3568 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 3578 0x62 0x90 0xd0 0x00 0x00 0x00 0x7f 0x30 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #3680 + 3588 0x00 0x00 0x06 0xfd 0x00 0x44 MOVXM le, #3712 + 3594 0x00 0x07 0xc8 0xc8 0x20 0x44 MOVXM p4, #508944 + 3600 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 3604 0x00 0x00 NOPX + 3606 0x00 0x00 NOPX + 3608 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 3612 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 3616 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3680 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3728 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 3756 0x00 0x00 NOPX +.delay_slot + 3758 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 3762 0x00 0x00 NOPX +.delay_slot + 3764 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 3768 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3776 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3776 0x50 0x91 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p2, p1; PADDXM [sp], #128 + 3786 0xff 0x87 0xb0 0x02 0x08 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p0 + 3794 0x1c 0x55 0xe0 0xf8 MOV r17, sp + 3798 0x00 0x07 0xc6 0xc9 0x18 0x44 MOVXM p3, #509068 + 3804 0x65 0xed 0x50 0xd1 0x80 0x14 LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 + 3810 0x73 0xca 0x50 0x0e 0x56 0x0c LDA.s16 r18, [p3], #-14; VST sfh, [p0] + 3816 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 3820 0x00 0x00 NOPX + 3822 0x00 0x00 NOPX +.no_stack_arguments + 3824 0x00 0x06 0xb8 0x00 0x01 0x04 JL #3440 +.delay_slot + 3830 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 3834 0x00 0x00 NOPX +.delay_slot + 3836 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 3840 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 3846 0x00 0x2c 0xf0 0x00 0x10 0x00 0x34 0x10 0x7e 0xba NOPA; NOPB; MOV p0, r16 +.return_address + 3856 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3860 0x00 0x00 NOPX + 3862 0x00 0x00 NOPX + 3864 0x00 0x00 NOPX + 3866 0x00 0x00 NOPX + 3868 0x00 0x00 NOPX + 3870 0x00 0x00 NOPX + 3872 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3876 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 3882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3888 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3904 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 3904 0x00 0x07 0xc6 0xc7 0x80 0x44 MOVXM p3, #508864 + 3910 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 3916 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3922 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 3930 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0xe6 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508876 + 3940 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 3944 0x00 0x00 NOPX + 3946 0x00 0x00 NOPX + 3948 0x80 0x08 0x08 0x40 0x01 0x84 JNZ r16, #4112 +.delay_slot + 3954 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 3958 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 3962 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 3966 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 3974 0x00 0x07 0xc0 0xc9 0x00 0x44 MOVXM p0, #509056 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 0x00 0x07 0xc4 0xc8 0x20 0x44 MOVXM p2, #508944 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x06 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508940 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 0x00 0x06 0x98 0x00 0x01 0x04 JL #3376 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4012 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4016 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 4032 0x00 0x07 0xc4 0xc7 0x98 0x44 MOVXM p2, #508876 + 4038 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x40 0x10 0xba LDA r16, [p2]; MOVXM p2, #509056 + 4048 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x40 0x10 0xba LDA r17, [p2]; MOVXM p2, #509056 + 4058 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb1 0xe8 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #508880 + 4068 0x00 0x00 NOPX + 4070 0x00 0x00 NOPX + 4072 0x00 0x08 0x10 0x00 0x00 0x84 J #4128 +.delay_slot + 4078 0x00 0x07 0xc0 0xc8 0x10 0x44 MOVXM p0, #508936 +.delay_slot +.swstall delay_slot + 4084 0x00 0x00 NOPX +.delay_slot + 4086 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 4090 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 4096 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 4112 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0xe8 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508880; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 4128 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 4136 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0xe0 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508864 + 4146 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 4150 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 4154 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 4158 0x00 0x00 NOPX + 4160 0x00 0x00 NOPX + 4162 0x00 0x00 NOPX + 4164 0x00 0x00 NOPX + 4166 0x00 0x00 NOPX + 4168 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 4172 0x0f 0x06 0x11 0x98 ST r16, [p7] + 4176 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 4180 0x00 0x00 NOPX + 4182 0x00 0x00 NOPX + 4184 0x00 0x00 NOPX + 4186 0x14 0x93 0x08 0x18 ACQ r18, r16 + 4190 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 4196 0x00 0x00 NOPX + 4198 0x00 0x00 NOPX + 4200 0x00 0x06 0x36 0x98 LDA r17, [p0] + 4204 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 4210 0x01 0x06 0x76 0x98 LDA r19, [p1] + 4214 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 4218 0x00 0x00 NOPX +.no_stack_arguments + 4220 0x00 0x07 0x60 0x00 0x01 0x04 JL #3776 +.delay_slot +.swstall delay_slot + 4226 0x00 0x00 NOPX +.delay_slot + 4228 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 4232 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 4236 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 4240 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 4256 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0xe0 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508864 + 4266 0x10 0x20 0x05 0x18 MOVX r16, #1 + 4270 0x00 0x00 NOPX + 4272 0x00 0x00 NOPX + 4274 0x00 0x00 NOPX + 4276 0x00 0x00 NOPX + 4278 0x00 0x00 NOPX + 4280 0x14 0x51 0x08 0x18 REL r17, r16 + 4284 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x04 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508936 + 4294 0x06 0x06 0x36 0x98 LDA r17, [p6] + 4298 0x02 0x06 0x56 0x98 LDA r18, [p2] + 4302 0x00 0x00 NOPX + 4304 0x00 0x00 NOPX + 4306 0x00 0x00 NOPX + 4308 0x00 0x00 NOPX + 4310 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 4314 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 4318 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 4322 0x80 0x08 0x80 0x40 0x01 0x84 JNZ r16, #4352 +.delay_slot +.swstall delay_slot + 4328 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4330 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4332 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4336 0x00 0x00 NOPX + 4338 0x10 0x20 0x01 0x18 MOVX r16, #0 + 4342 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 4352 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 4356 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 4360 0x00 0x00 NOPX + 4362 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 4380 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 4386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4390 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 4400 +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function_start + 4400 0x18 0x96 0xc0 0xf8 MOV r2, p3 + 4404 0x00 0x00 0x2a 0xc2 0x0e 0x24 MOVX r0, #0; ADD.NC p5, r2, #14 + 4410 0xa3 0x82 0x50 0x02 0xe5 0xd4 LDA.s16 r0, [p5], #2; VBCST.16 x0, r0 + 4416 0x05 0x04 0x56 0x98 LDA r2, [p5] + 4420 0x00 0x00 NOPX + 4422 0x00 0x00 NOPX + 4424 0x00 0x00 NOPX + 4426 0x00 0x00 NOPX + 4428 0x00 0x00 NOPX + 4430 0x10 0x02 0x09 0x18 MOVX r1, #2 + 4434 0x10 0x42 0x2c 0x98 LTU r1, r1, r2 + 4438 0x08 0x08 0xf0 0x40 0x01 0x84 JNZ r1, #4576 +.delay_slot + 4444 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 4450 0x1c 0x65 0xe0 0xf8 MOV p4, sp +.delay_slot + 4454 0x3c 0xff 0x90 0x18 PADDB [p4], #-64 +.delay_slot + 4458 0x0c 0x04 0x13 0x18 VST x0, [p4] +.delay_slot +.swstall delay_slot + 4462 0x00 0x00 NOPX + 4464 0x01 0x82 0x80 0x02 0xe5 0xd4 MOVA dj0, #12; VBCST.16 x0, r0 + 4470 0x03 0x00 0x0a 0x98 LDA.u8 r0, [p3, dj0] + 4474 0x00 0x00 NOPX + 4476 0x00 0x00 NOPX + 4478 0x00 0x00 NOPX + 4480 0x00 0x00 NOPX + 4482 0x00 0x00 NOPX + 4484 0x00 0x00 NOPX + 4486 0x00 0x08 0xd8 0x40 0x01 0x84 JNZ r0, #4528 +.delay_slot + 4492 0x18 0x00 0x00 0xb8 MOV m0, #0 +.delay_slot + 4496 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.delay_slot +.swstall delay_slot + 4500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4504 0x00 0x00 NOPX + 4506 0x00 0x04 0x80 0x00 0x02 0x38 0x00 0x00 0x20 0xba MOVA m1, #0; J #4544 +.delay_slot +.swstall delay_slot + 4516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4522 0x00 0x00 NOPX +.delay_slot + 4524 0x08 0x04 0x13 0x18 VST x0, [p0] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 + 4528 0x19 0x00 0x80 0xb8 MOV m1, #64 + 4532 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0 +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 4544 0x00 0x09 0x20 0x00 0x00 0x84 J #4672 +.delay_slot + 4550 0x12 0x11 0x60 0x02 0x30 0x60 0x70 0x02 MOVS p0, p4; MOV p4, p0 +.delay_slot +.swstall delay_slot + 4558 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4560 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4562 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4564 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 + 4576 0x10 0x02 0x0d 0x18 MOVX r1, #3 + 4580 0x10 0x42 0x27 0x98 EQ r1, r1, r2 + 4584 0x08 0x09 0x08 0x40 0x01 0x84 JNZ r1, #4624 +.delay_slot + 4590 0x00 0x07 0xc6 0xc8 0x20 0x44 MOVXM p3, #508944 +.delay_slot +.swstall delay_slot + 4596 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4600 0x00 0x00 NOPX +.delay_slot + 4602 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4608 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4624 0x60 0x80 0x50 0x01 0x01 0x54 LDA.s8 r0, [p3]; MOV m0, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4630 0x19 0x00 0x00 0xb8 MOV m1, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4634 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4638 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4640 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4642 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0 + 4646 0x19 0x00 0x92 0xf8 VMOV bmll1, x0 + 4650 0x10 0x3a 0x80 0x18 MOVX crRnd, r0 + 4654 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 + 4658 0x00 0x00 NOPX + 4660 0x18 0x01 0x03 0x58 VEXTBCST.16 x0, x0, #0 + 4664 0x00 0x00 NOPX + 4666 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 + 4672 0xb8 0x86 0xd8 0x50 0xe8 0x00 0x00 0x04 0x79 0x58 0x10 0xb6 LDA r1, [p5, #-16]; VLDB x1, [p4], m1; MOVXM ls, #4784 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4684 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x05 0xb9 0x70 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #4832 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4696 0x01 0x05 0x78 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p4], m1; MOVX r0, #60 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4704 0x29 0x03 0x70 0x00 0x01 0xf1 0xb2 0x08 0x10 0xba VLDA x0, [p1], m2; MOVXM p3, #508944 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4714 0x03 0x04 0x42 0x98 LDA.s8 r2, [p3] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4718 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4722 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4724 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4728 0x05 0x70 0xfe 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r1, #-3; VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4736 0x29 0x03 0x78 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p4], m1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4742 0x01 0x05 0x70 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPM +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x05 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX crRnd, r2; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4784 0x29 0x03 0x78 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p4], m1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4800 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4816 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4832 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4848 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4850 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4854 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4856 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4860 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4862 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4866 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 4870 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 4876 0x00 0x00 NOPX +.delay_slot + 4878 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 4882 0x00 0x00 NOPX +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + +.text_segment PM 4896 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 4896 0x23 0x85 0xd0 0x00 0x01 0xf0 0x32 0xa0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #509248 + 4906 0x00 0x00 NOPX + 4908 0x00 0x00 NOPX + 4910 0x00 0x00 NOPX + 4912 0x00 0x00 NOPX + 4914 0x00 0x00 NOPX + 4916 0x00 0x00 NOPX + 4918 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 4922 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 4926 0x00 0x00 NOPX + 4928 0x00 0x00 NOPX + 4930 0x00 0x00 NOPX + 4932 0x00 0x00 NOPX + 4934 0x00 0x00 NOPX + 4936 0x00 0x00 NOPX + 4938 0x08 0x04 0x29 0x98 ST el0, [p0] + 4942 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 4946 0x00 0x00 NOPX + 4948 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4952 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4956 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4958 0x00 0x00 NOPX +.delay_slot + 4960 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 4976 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 4976 0xff 0x40 0x00 0x3d 0x68 0x00 0x01 0xf1 0x32 0xa0 0x10 0xb6 MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #509248 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4988 0x40 0x8a 0xd0 0x3b 0xe8 0x00 0x01 0xf1 0x32 0x08 0x10 0xb6 LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508944 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5000 0x40 0x84 0x50 0x3d 0x68 0x00 0x00 0x10 0xc8 0x40 0x10 0xb6 LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5012 0x03 0xbe 0x80 0x32 0xe5 0xf4 VLDB x7, [p0], #64; VBCST.16 x0, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5018 0x00 0x00 0xc2 0x21 0x00 0x44 MOVXM r4, #49280 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5024 0x18 0x91 0x72 0xf8 VBCST.16 x1, r4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5028 0x00 0x00 0x71 0xbf 0xfe 0x44 MOVXM r3, #32767 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5034 0x1c 0x50 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5038 0x10 0x01 0xb6 0x81 0xd9 0xe4 LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5044 0x0f 0x50 0x08 0x70 0x59 0xe4 MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 + 5050 0x19 0x0d 0x72 0xf8 VBCST.16 x2, r3 + 5054 0x00 0x00 0x32 0xba 0x00 0x44 MOVXM r5, #15616 + 5060 0x19 0x95 0x72 0xf8 VBCST.16 x3, r5 + 5064 0x00 0x00 0x38 0xbe 0x00 0x44 MOVXM r17, #16128 + 5070 0x1d 0xb1 0x2b 0x78 VBAND x11, x6, x2 + 5074 0x64 0x5e 0x25 0x8a 0xe5 0xe4 MOVX r17, #828; VBCST.16 x5, r17 + 5080 0x04 0xc0 0xec 0xe6 0x8c 0xe7 0x61 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 + 5088 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 + 5092 0x00 0x00 0x31 0x3d 0x00 0x44 MOVXM r2, #16000 + 5098 0x02 0x09 0x72 0xe6 0x8a 0xe7 0x01 0x62 VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 + 5106 0x18 0x0b 0x8a 0xf8 VCONV.fp32.bf16 cml0, x5 + 5110 0x04 0x50 0x2c 0xe6 0x8b 0x0c 0x81 0x62 VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5118 0xb2 0x42 0xc0 0x00 0x00 0x8f 0x43 0x02 0x89 0x12 0x81 0x56 VCONV.bf16.fp32 x11, cml4; MOVXM ls, #5168; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5130 0x1b 0x40 0xec 0xf8 VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5134 0x00 0x00 0x00 0xb7 0x49 0x02 0x8a 0x76 0xc3 0x5a MOVXM le, #5264; VMSC.f dm2, dm3, x11, x6, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5144 0x52 0x22 0xc0 0x02 0xb8 0x3f 0x80 0x02 VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 + 5152 0x1c 0x38 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x7, x0 + 5156 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0xd8 0x95 0xb0 0xf6 NOPA; NOPB; NOPS; VBAND x11, x6, x2 +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5168 0x00 0x3d 0x6c 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x4a VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5178 0x00 0x3b 0xec 0x49 0x2b 0x66 0x8c 0xe7 0x61 0x4a VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5188 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5192 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5196 0x23 0xa4 0x60 0x02 0x89 0x12 0x81 0x62 VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5204 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5206 0x00 0x2c 0xf1 0x1e 0x23 0x00 0x00 0x00 0x00 0x7a NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5216 0x00 0x2c 0xf0 0x00 0x25 0x92 0x16 0x00 0x00 0x02 0x28 0x16 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5232 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0xa0 0x76 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5248 0x00 0x2c 0xf0 0x00 0x22 0x91 0x16 0x00 0x00 0x02 0x1c 0x16 0x7c 0x53 0xb6 0x1b NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.end_of_loop + 5264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xd8 0x95 0xb8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV +.loop_nesting 0 + 5280 0x04 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5288 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5292 0x8c 0xe7 0x61 0x48 VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5296 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5300 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5304 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5306 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 + 5310 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 + 5314 0xb2 0x42 0xc0 0x02 0x89 0x12 0x81 0x62 VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 + 5322 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 + 5326 0x8a 0x76 0xc3 0x48 VMSC.f dm2, dm3, x11, x6, r17 + 5330 0x8c 0x2b 0x23 0x48 VMSC.f dm4, dm1, x5, x9, r17 + 5334 0x00 0x00 NOPX + 5336 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 5340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5342 0x00 0x00 NOPX +.delay_slot + 5344 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.delay_slot + 5348 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 +.delay_slot +.swstall delay_slot + 5352 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 5360 +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 5360 0x00 0x07 0xc6 0xc7 0x80 0x44 MOVXM p3, #508864 + 5366 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 5372 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5378 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 5386 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0xe6 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508876 + 5396 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 5400 0x00 0x00 NOPX + 5402 0x00 0x00 NOPX + 5404 0x80 0x0a 0xe0 0x40 0x01 0x84 JNZ r16, #5568 +.delay_slot + 5410 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 5414 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 5418 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 5422 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 5430 0x00 0x07 0xc0 0xca 0x80 0x44 MOVXM p0, #509248 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5436 0x00 0x07 0xc4 0xc8 0x20 0x44 MOVXM p2, #508944 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5442 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x06 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508940 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5452 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5454 0x00 0x09 0x90 0x00 0x01 0x04 JL #4896 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5462 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5464 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 5468 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 5472 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 5488 0x00 0x07 0xc4 0xc7 0x98 0x44 MOVXM p2, #508876 + 5494 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xa0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509248 + 5504 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xa0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509248 + 5514 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb1 0xe8 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #508880 + 5524 0x00 0x00 NOPX + 5526 0x00 0x00 NOPX + 5528 0x00 0x0a 0xe8 0x00 0x00 0x84 J #5584 +.delay_slot + 5534 0x00 0x07 0xc0 0xc8 0x10 0x44 MOVXM p0, #508936 +.delay_slot +.swstall delay_slot + 5540 0x00 0x00 NOPX +.delay_slot + 5542 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 5546 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 5552 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 5568 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0xe8 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508880; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 5584 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 5592 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0xe0 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508864 + 5602 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 5606 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 5610 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 5614 0x00 0x00 NOPX + 5616 0x00 0x00 NOPX + 5618 0x00 0x00 NOPX + 5620 0x00 0x00 NOPX + 5622 0x00 0x00 NOPX + 5624 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 5628 0x0f 0x06 0x11 0x98 ST r16, [p7] + 5632 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 5636 0x00 0x00 NOPX + 5638 0x00 0x00 NOPX + 5640 0x00 0x00 NOPX + 5642 0x14 0x93 0x08 0x18 ACQ r18, r16 + 5646 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 5652 0x00 0x00 NOPX + 5654 0x00 0x00 NOPX + 5656 0x00 0x06 0x36 0x98 LDA r17, [p0] + 5660 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 5666 0x01 0x06 0x76 0x98 LDA r19, [p1] + 5670 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 5674 0x00 0x00 NOPX +.no_stack_arguments + 5676 0x00 0x09 0xb8 0x00 0x01 0x04 JL #4976 +.delay_slot +.swstall delay_slot + 5682 0x00 0x00 NOPX +.delay_slot + 5684 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 5688 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 5692 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 5696 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 5712 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0xe0 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508864 + 5722 0x10 0x20 0x05 0x18 MOVX r16, #1 + 5726 0x00 0x00 NOPX + 5728 0x00 0x00 NOPX + 5730 0x00 0x00 NOPX + 5732 0x00 0x00 NOPX + 5734 0x00 0x00 NOPX + 5736 0x14 0x51 0x08 0x18 REL r17, r16 + 5740 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x04 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508936 + 5750 0x06 0x06 0x36 0x98 LDA r17, [p6] + 5754 0x02 0x06 0x56 0x98 LDA r18, [p2] + 5758 0x00 0x00 NOPX + 5760 0x00 0x00 NOPX + 5762 0x00 0x00 NOPX + 5764 0x00 0x00 NOPX + 5766 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 5770 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 5774 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 5778 0x80 0x0b 0x58 0x40 0x01 0x84 JNZ r16, #5808 +.delay_slot +.swstall delay_slot + 5784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5788 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5790 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5792 0x00 0x00 NOPX + 5794 0x10 0x20 0x01 0x18 MOVX r16, #0 + 5798 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 5808 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 5812 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 5816 0x00 0x00 NOPX + 5818 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5820 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5822 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5826 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5828 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5832 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 5836 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 5842 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5844 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5846 0x00 0x00 NOPX +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 5856 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function_start + 5856 0x23 0x85 0xd0 0x00 0x01 0xf0 0x32 0x60 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #509120 + 5866 0xf0 0x00 0x00 0x08 0x10 0x0b 0x08 0x00 0x58 0xba MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 + 5876 0x00 0x00 NOPX + 5878 0x00 0x00 NOPX + 5880 0x00 0x00 NOPX + 5882 0x00 0x00 NOPX + 5884 0x00 0x00 NOPX + 5886 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5890 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 5894 0x00 0x00 NOPX + 5896 0x00 0x00 NOPX + 5898 0x00 0x00 NOPX + 5900 0x00 0x00 NOPX + 5902 0x00 0x00 NOPX + 5904 0x00 0x00 NOPX + 5906 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5910 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 5914 0x00 0x00 NOPX + 5916 0x00 0x00 NOPX + 5918 0x00 0x00 NOPX + 5920 0x00 0x00 NOPX + 5922 0x00 0x00 NOPX + 5924 0x00 0x00 NOPX + 5926 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5930 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 5934 0x00 0x00 NOPX + 5936 0x00 0x00 NOPX + 5938 0x00 0x00 NOPX + 5940 0x00 0x00 NOPX + 5942 0x00 0x00 NOPX + 5944 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5946 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5950 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5954 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5958 0x00 0x00 0xf1 0x3e 0x00 0x44 MOVXM r2, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5964 0x10 0xc4 0x24 0x98 AND r2, r3, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5968 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5972 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 5976 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 5984 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 5984 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5990 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 5994 0x00 0x0b 0x70 0x00 0x01 0x04 JL #5856 +.delay_slot + 6000 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.delay_slot + 6004 0xff 0x82 0xb0 0x00 0x01 0xf1 0xea 0x60 0x11 0x3a ST r0, [sp, #-4]; MOVXM r15, #509120 +.delay_slot + 6014 0x18 0x67 0xa0 0xf8 MOV p0, r15 +.delay_slot +.swstall delay_slot + 6018 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6020 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 6032 0xff 0x07 0x20 0x01 0x00 0x68 0xb3 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p1, r15, #16 + 6042 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 6052 0xff 0xbe 0x21 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p1], m0; MOVX r16, #-128 + 6062 0x01 0x06 0x4a 0x98 LDA.u8 r18, [p1] + 6066 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6068 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6070 0x01 0x02 0x17 0x18 ST.s16 r16, [p1, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6074 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6078 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6082 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6088 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6092 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 6096 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 6112 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.tail_call +.function_start + 6112 0x00 0x08 0x98 0x00 0x00 0x84 J #4400 +.delay_slot + 6118 0x00 0x07 0xc6 0xc9 0x80 0x44 MOVXM p3, #509120 +.delay_slot +.swstall delay_slot + 6124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6130 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 6144 +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 6144 0x00 0x07 0xc8 0xc7 0x80 0x44 MOVXM p4, #508864 + 6150 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 6156 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6162 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 6172 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 6180 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 6184 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 6188 0x00 0x00 NOPX + 6190 0x80 0x0c 0x60 0x40 0x01 0x84 JNZ r16, #6336 +.delay_slot + 6196 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 6200 0x00 0x07 0xc4 0xc7 0x98 0x44 MOVXM p2, #508876 +.delay_slot + 6206 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 6214 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 6218 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0x60 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509120 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6228 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x08 0x11 0x3a MOVS p0, p7; MOVXM p2, #508944 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6238 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x06 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508940 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6248 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 0x00 0x0b 0xb0 0x00 0x01 0x04 JL #5984 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6256 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6258 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6260 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 6264 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 6268 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 6272 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0xe6 0x10 0xba LDA r16, [p7]; MOVXM p1, #508876 + 6282 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0xe8 0x10 0xba LDA r17, [p1]; MOVXM p3, #508880 + 6292 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0xea 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508884 + 6302 0x00 0x00 NOPX + 6304 0x00 0x00 NOPX + 6306 0x00 0x00 NOPX + 6308 0x00 0x0c 0x68 0x00 0x00 0x84 J #6352 +.delay_slot + 6314 0x00 0x07 0xc4 0xc8 0x10 0x44 MOVXM p2, #508936 +.delay_slot + 6320 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 6324 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 6328 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 6332 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 6336 0x00 0x07 0xc6 0xc7 0xa0 0x44 MOVXM p3, #508880 + 6342 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0xea 0x10 0xba NOPA; MOVXM p1, #508884 +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 6352 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 6356 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0xe0 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508864 + 6366 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 6370 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 6374 0x02 0x06 0x56 0x98 LDA r18, [p2] + 6378 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 6382 0x00 0x00 NOPX + 6384 0x00 0x00 NOPX + 6386 0x00 0x00 NOPX + 6388 0x00 0x00 NOPX + 6390 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 6394 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 6400 0x0a 0x06 0x11 0x98 ST r16, [p2] + 6404 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 6408 0x00 0x00 NOPX + 6410 0x00 0x00 NOPX + 6412 0x00 0x00 NOPX + 6414 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6418 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 6422 0x00 0x00 NOPX + 6424 0x00 0x00 NOPX + 6426 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 6430 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 6434 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 6438 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 6442 0x00 0x00 NOPX + 6444 0x00 0x00 NOPX + 6446 0x00 0x00 NOPX + 6448 0x00 0x00 NOPX + 6450 0x00 0x00 NOPX + 6452 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 6456 0x0a 0x06 0x31 0x98 ST r17, [p2] + 6460 0x00 0x00 NOPX + 6462 0x00 0x00 NOPX + 6464 0x00 0x00 NOPX + 6466 0x00 0x00 NOPX + 6468 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6472 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 6482 0x00 0x00 NOPX + 6484 0x00 0x00 NOPX + 6486 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 6490 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 6496 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6502 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6506 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6510 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6514 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6516 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6520 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6522 0x00 0x0b 0xf0 0x00 0x01 0x04 JL #6112 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6528 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 6532 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 6536 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 6540 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 6544 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 6560 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 6570 0x00 0x07 0xcc 0xc8 0x10 0x44 MOVXM p6, #508936 + 6576 0x00 0x00 NOPX + 6578 0x00 0x00 NOPX + 6580 0x00 0x00 NOPX + 6582 0x00 0x00 NOPX + 6584 0x00 0x00 NOPX + 6586 0x14 0x51 0x08 0x18 REL r17, r16 + 6590 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 6594 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 6598 0x00 0x00 NOPX + 6600 0x00 0x00 NOPX + 6602 0x00 0x00 NOPX + 6604 0x00 0x00 NOPX + 6606 0x00 0x00 NOPX + 6608 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 6612 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 6618 0x00 0x00 NOPX + 6620 0x00 0x00 NOPX + 6622 0x00 0x00 NOPX + 6624 0x00 0x00 NOPX + 6626 0x00 0x00 NOPX + 6628 0x00 0x00 NOPX + 6630 0x14 0x51 0x08 0x18 REL r17, r16 + 6634 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0xe0 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508864 + 6644 0x06 0x06 0x56 0x98 LDA r18, [p6] + 6648 0x01 0x06 0x36 0x98 LDA r17, [p1] + 6652 0x00 0x00 NOPX + 6654 0x00 0x00 NOPX + 6656 0x00 0x00 NOPX + 6658 0x00 0x00 NOPX + 6660 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 6664 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 6668 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 6672 0x80 0x0d 0x18 0x40 0x01 0x84 JNZ r16, #6704 +.delay_slot +.swstall delay_slot + 6678 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6680 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6682 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6684 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6686 0x00 0x00 NOPX + 6688 0x10 0x20 0x01 0x18 MOVX r16, #0 + 6692 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 6704 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 6708 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 6712 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6716 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6718 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6722 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6724 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6726 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6730 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 6734 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6740 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6742 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6744 0x00 0x00 NOPX +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 6752 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 6752 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6756 0x00 0x07 0xc0 0xca 0x40 0x44 MOVXM p0, #509216 +.delay_slot + 6762 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 6766 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 6770 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 6774 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 6784 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 6784 0x23 0x85 0xd0 0x00 0x01 0xf0 0x32 0x80 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #509184 + 6794 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6800 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 6804 0x00 0x00 NOPX + 6806 0x00 0x00 NOPX + 6808 0x00 0x00 NOPX + 6810 0x00 0x00 NOPX + 6812 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 6816 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 6820 0x00 0x00 NOPX + 6822 0x00 0x00 NOPX + 6824 0x00 0x00 NOPX + 6826 0x00 0x00 NOPX + 6828 0x00 0x00 NOPX + 6830 0x00 0x00 NOPX + 6832 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 6836 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 6840 0x00 0x00 NOPX + 6842 0x00 0x00 NOPX + 6844 0x00 0x00 NOPX + 6846 0x00 0x00 NOPX + 6848 0x00 0x00 NOPX + 6850 0x00 0x00 NOPX + 6852 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 6856 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 6860 0x00 0x00 NOPX + 6862 0x00 0x00 NOPX +.no_stack_arguments + 6864 0x00 0x0d 0x30 0x00 0x01 0x04 JL #6752 +.delay_slot + 6870 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 6874 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6876 0x00 0x00 NOPX +.delay_slot + 6878 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 6882 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x7b 0x06 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p7, p0; NOPV +.return_address + 6896 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 6900 0x00 0x00 NOPX + 6902 0x00 0x00 NOPX + 6904 0x00 0x00 NOPX + 6906 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6908 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6910 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6914 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6922 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6924 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6928 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 6944 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 6944 0x04 0x00 0x80 0x00 0x01 0xf1 0xb2 0x80 0x10 0xba MOVA m0, #32; MOVXM p3, #509184 + 6954 0x61 0x06 0xd0 0x00 0x01 0xf2 0x32 0x08 0x10 0xba LDA r1, [p3], m0; MOVXM p4, #508944 + 6964 0x60 0x90 0xd0 0x18 0x07 0x88 0x6f 0xfa 0x58 0xba LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 + 6974 0x62 0x80 0xd0 0x00 0x00 0x04 0x7d 0xf0 0x10 0xba LDA m0, [p3, #4]; MOVXM ls, #7136 + 6984 0x80 0x88 0x50 0x00 0x00 0x05 0xbd 0xf8 0x10 0xba LDA.s8 r2, [p4]; MOVXM le, #7152 + 6994 0x00 0x00 NOPX + 6996 0x00 0x00 NOPX + 6998 0x00 0x00 NOPX + 7000 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 + 7004 0x1d 0x70 0xfc 0x98 ADD.NC lc, r1, #-7 + 7008 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7014 0x21 0x1b 0x70 0x50 0x68 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7022 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7028 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7034 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7040 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7046 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7052 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7062 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7072 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7082 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7092 0x21 0x1b 0x70 0x50 0x68 0x00 0xad 0x8e 0x00 0xe2 0x41 0x66 VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7104 0x21 0x13 0x70 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7120 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7136 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7152 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7168 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7176 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7184 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7192 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7200 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7208 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7216 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7224 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7228 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7234 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7238 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 7242 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 7246 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 7250 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 7264 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 7264 0x00 0x07 0xc8 0xc7 0x80 0x44 MOVXM p4, #508864 + 7270 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 7276 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7282 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 7292 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 7300 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 7304 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 7308 0x00 0x00 NOPX + 7310 0x80 0x0e 0x90 0x40 0x01 0x84 JNZ r16, #7456 +.delay_slot + 7316 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 7320 0x00 0x07 0xc4 0xc7 0x98 0x44 MOVXM p2, #508876 +.delay_slot + 7326 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 7334 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 7338 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0x80 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509184 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7348 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x08 0x11 0x3a MOVS p0, p7; MOVXM p2, #508944 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7358 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x06 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508940 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7368 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7370 0x00 0x0d 0x40 0x00 0x01 0x04 JL #6784 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7376 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7378 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7380 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 7384 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 7388 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 7392 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0xe6 0x10 0xba LDA r16, [p7]; MOVXM p1, #508876 + 7402 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0xe8 0x10 0xba LDA r17, [p1]; MOVXM p3, #508880 + 7412 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0xea 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508884 + 7422 0x00 0x00 NOPX + 7424 0x00 0x00 NOPX + 7426 0x00 0x00 NOPX + 7428 0x00 0x0e 0x98 0x00 0x00 0x84 J #7472 +.delay_slot + 7434 0x00 0x07 0xc4 0xc8 0x10 0x44 MOVXM p2, #508936 +.delay_slot + 7440 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 7444 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 7448 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 7452 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 7456 0x00 0x07 0xc6 0xc7 0xa0 0x44 MOVXM p3, #508880 + 7462 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0xea 0x10 0xba NOPA; MOVXM p1, #508884 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 7472 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 7476 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0xe0 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508864 + 7486 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 7490 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 7494 0x02 0x06 0x56 0x98 LDA r18, [p2] + 7498 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 7502 0x00 0x00 NOPX + 7504 0x00 0x00 NOPX + 7506 0x00 0x00 NOPX + 7508 0x00 0x00 NOPX + 7510 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 7514 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 7520 0x0a 0x06 0x11 0x98 ST r16, [p2] + 7524 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 7528 0x00 0x00 NOPX + 7530 0x00 0x00 NOPX + 7532 0x00 0x00 NOPX + 7534 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 7538 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 7542 0x00 0x00 NOPX + 7544 0x00 0x00 NOPX + 7546 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 7550 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 7554 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 7558 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 7562 0x00 0x00 NOPX + 7564 0x00 0x00 NOPX + 7566 0x00 0x00 NOPX + 7568 0x00 0x00 NOPX + 7570 0x00 0x00 NOPX + 7572 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 7576 0x0a 0x06 0x31 0x98 ST r17, [p2] + 7580 0x00 0x00 NOPX + 7582 0x00 0x00 NOPX + 7584 0x00 0x00 NOPX + 7586 0x00 0x00 NOPX + 7588 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 7592 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 7602 0x00 0x00 NOPX + 7604 0x00 0x00 NOPX + 7606 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 7610 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 7616 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7622 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7626 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7630 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7636 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7640 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7642 0x00 0x0d 0x90 0x00 0x01 0x04 JL #6944 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7648 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 7652 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 7656 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 7660 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 7664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 7680 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 7690 0x00 0x07 0xcc 0xc8 0x10 0x44 MOVXM p6, #508936 + 7696 0x00 0x00 NOPX + 7698 0x00 0x00 NOPX + 7700 0x00 0x00 NOPX + 7702 0x00 0x00 NOPX + 7704 0x00 0x00 NOPX + 7706 0x14 0x51 0x08 0x18 REL r17, r16 + 7710 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 7714 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 7718 0x00 0x00 NOPX + 7720 0x00 0x00 NOPX + 7722 0x00 0x00 NOPX + 7724 0x00 0x00 NOPX + 7726 0x00 0x00 NOPX + 7728 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 7732 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 7738 0x00 0x00 NOPX + 7740 0x00 0x00 NOPX + 7742 0x00 0x00 NOPX + 7744 0x00 0x00 NOPX + 7746 0x00 0x00 NOPX + 7748 0x00 0x00 NOPX + 7750 0x14 0x51 0x08 0x18 REL r17, r16 + 7754 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0xe0 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508864 + 7764 0x06 0x06 0x56 0x98 LDA r18, [p6] + 7768 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7772 0x00 0x00 NOPX + 7774 0x00 0x00 NOPX + 7776 0x00 0x00 NOPX + 7778 0x00 0x00 NOPX + 7780 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 7784 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 7788 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 7792 0x80 0x0f 0x48 0x40 0x01 0x84 JNZ r16, #7824 +.delay_slot +.swstall delay_slot + 7798 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7800 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7802 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7804 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7806 0x00 0x00 NOPX + 7808 0x10 0x20 0x01 0x18 MOVX r16, #0 + 7812 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 7824 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 7828 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 7832 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7836 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7838 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7842 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7844 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7846 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7850 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 7854 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7860 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7862 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7864 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 7872 +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_begin0 +.function_start + 7872 0x03 0x8e 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r3, [p0], #4; MOVXM p1, #508416 + 7882 0xff 0xdd 0x00 0x3e 0x67 0xa8 0xaf 0xfc 0x58 0xba MOVA r29, #-2; MOVX r6, #-3; MOV r5, #-4 + 7892 0x00 0x18 0x00 0x00 0x10 0x28 0x08 0x08 0x58 0xba MOVA r24, #0; MOVX r1, #1; MOV r0, #8 + 7902 0x20 0x04 0x00 0x00 0x3f 0xff 0x8f 0xff 0x10 0xba MOVA r4, #256; MOVXM r28, #16777214 + 7912 0x00 0xf0 0x00 0x01 0x31 0x28 0x4a 0x00 0x58 0xba MOVA r16, #7; MOVX r19, #9; MOV r2, #512 + 7922 0x11 0xc0 0x20 0x00 0xd1 0x64 MOVX r7, #128; MOV m0, #52 + 7928 0x18 0x80 0x00 0xf8 MOV dj0, m0 + 7932 0x23 0x8e 0x31 0xec 0xdb 0x5c ST r3, [p1], #4; LSHL r27, r3, r6 + 7938 0x03 0x8e 0xd1 0xc4 0xbb 0x2c LDA r3, [p0], #4; LSHL r17, r3, r5 + 7944 0xdf 0xc1 0xb8 0xb1 0xff 0x24 LSHL r31, r27, r0; ADD.NC r17, r17, #-1 + 7950 0xdc 0xe7 0xb9 0x3f 0x22 0xa4 LSHL r19, r27, r19; ADD.NC r18, r31, r4 + 7956 0x16 0xed 0x0d 0x98 LSHL r22, r27, r16 + 7960 0x00 0x00 NOPX + 7962 0x00 0x00 NOPX + 7964 0x00 0x00 NOPX + 7966 0x23 0x8e 0x31 0xe8 0xdb 0x5c ST r3, [p1], #4; LSHL r26, r3, r6 + 7972 0x03 0xd6 0xdd 0x53 0xfe 0x2c LDA r21, [p0], #4; ADD r20, r26, #-1 + 7978 0x15 0xaf 0x4f 0x98 MUL r23, r22, r20 + 7982 0x00 0x00 NOPX + 7984 0x11 0xfd 0x71 0x98 SUB r30, r7, r23 + 7988 0x00 0x00 NOPX + 7990 0x00 0x00 NOPX + 7992 0x00 0x00 NOPX + 7994 0x23 0xd6 0x31 0x8e 0xbf 0x5c ST r21, [p1], #4; MUL r3, r3, r21 + 8000 0x03 0x85 0xda 0x98 0xdb 0x2c LDA el0, [p0], #4; LSHL r6, r21, r6 + 8006 0xde 0x4d 0xfe 0x26 0xe2 0xa4 MUL r25, r27, r6; ADD.NC r28, r6, r28 + 8012 0xa9 0x4b 0xba 0xba 0xa2 0xa4 LSHL r5, r21, r5; ADD.NC r21, r26, r20 + 8018 0x10 0x19 0x00 0x33 0xde 0xec 0xa9 0x7f 0xc8 0xba MOVA r25, #128; LSHL r29, r25, r29; ADD.NC r5, r5, #-1 + 8028 0x17 0xc5 0xce 0x18 MSC r2, r2, r31, r28 + 8032 0x11 0x8d 0x0d 0x98 LSHL r6, r6, r16 + 8036 0x15 0x6b 0x0d 0x98 LSHL r21, r21, r16 + 8040 0x23 0x85 0x3d 0x70 0x1b 0x5c ST el0, [p1], #4; LSHL r28, r26, r0 + 8046 0x00 0x85 0xdd 0x42 0x1b 0x2c LDA el0, [p0]; LSHL r16, r26, r16 + 8052 0x17 0x37 0x51 0x98 SUB r27, r28, r21 + 8056 0x11 0x40 0x0d 0x98 LSHL r0, r5, r0 + 8060 0x11 0x81 0x46 0x18 MAC r0, r0, r6, r20 + 8064 0x11 0xb3 0x4e 0x18 MSC r25, r25, r6, r20 + 8068 0x00 0x00 NOPX + 8070 0x00 0x00 NOPX + 8072 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 8076 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4] + 8080 0x00 0x00 NOPX + 8082 0x00 0x00 NOPX + 8084 0x00 0x00 NOPX + 8086 0x00 0x00 NOPX + 8088 0x00 0x00 NOPX + 8090 0x00 0x00 NOPX + 8092 0x09 0x3c 0x29 0x98 ST el0, [p1], #12 + 8096 0x23 0x8e 0x30 0x00 0x69 0xa0 0x00 0x02 ST r3, [p1], #4; ADD.NC r3, r6, #-128 + 8104 0x23 0xf6 0x33 0xf6 0x03 0x5c ST r29, [p1], #4; SUB r29, r7, r16 + 8110 0x23 0xea 0x30 0x03 0x4d 0xa0 0x00 0x02 ST r26, [p1], #4; ADD.NC r26, r22, #-128 + 8118 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 8122 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 8126 0x09 0x1f 0xf1 0x98 ST r31, [p1], #4 + 8130 0x09 0x1c 0x91 0x98 ST r4, [p1], #4 + 8134 0x23 0xca 0x3d 0xca 0x01 0x5c ST r18, [p1], #4; ADD r18, r27, r16 + 8140 0x23 0xce 0x32 0x4e 0xe3 0x5c ST r19, [p1], #4; SUB r19, r4, r23 + 8146 0x23 0x96 0x33 0x12 0x9c 0x5c ST r5, [p1], #4; MSC r4, r4, r6, r20 + 8152 0x29 0x8a 0x3f 0x08 0x02 0x5c ST r2, [p1], #16; MOVX r2, #-128 + 8158 0x23 0xe2 0x3b 0xfc 0x4d 0x5c ST r24, [p1], #4; XOR r31, r23, r2 + 8164 0x3b 0xe2 0x3c 0x5e 0xe3 0x5c ST r24, [p1], #-12; SUB r23, r24, r23 + 8170 0x23 0xe2 0x31 0x08 0x0d 0x5c ST r24, [p1], #4; XOR r2, r2, r0 + 8176 0x3d 0xe2 0x3c 0x00 0x03 0x5c ST r24, [p1], #-8; SUB r0, r24, r0 + 8182 0x09 0x5f 0x11 0x98 ST r24, [p1], #20 + 8186 0x09 0x1e 0x31 0x98 ST r17, [p1], #4 + 8190 0x09 0x1f 0xd1 0x98 ST r30, [p1], #4 + 8194 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 8198 0x09 0x1f 0xf1 0x98 ST r31, [p1], #4 + 8202 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 8206 0x09 0x1f 0x51 0x98 ST r26, [p1], #4 + 8210 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 8214 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 8218 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 8222 0x09 0x1e 0xd1 0x98 ST r22, [p1], #4 + 8226 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 8230 0x09 0x1e 0xf1 0x98 ST r23, [p1], #4 + 8234 0x23 0xce 0x32 0xd7 0x8c 0x5c ST r19, [p1], #4; MAC r21, r21, r5, r28 + 8240 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 8244 0x23 0xc6 0x3c 0x72 0xa3 0x5c ST r17, [p1], #4; SUB r28, r24, r21 + 8250 0x23 0xf2 0x38 0x56 0xa3 0x5c ST r28, [p1], #4; SUB r21, r16, r21 + 8256 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 8260 0x09 0x1f 0x71 0x98 ST r27, [p1], #4 + 8264 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 8268 0x09 0x1f 0xb1 0x98 ST r29, [p1], #4 + 8272 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 8276 0x09 0x1e 0x11 0x98 ST r16, [p1], #4 + 8280 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 8284 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 8288 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 8292 0x09 0x1e 0x51 0x98 ST r18, [p1], #4 + 8296 0x09 0x1e 0xb1 0x98 ST r21, [p1], #4 + 8300 0x09 0x1e 0x11 0x98 ST r16, [p1], #4 + 8304 0x09 0x1e 0x31 0x98 ST r17, [p1], #4 + 8308 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 8312 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 8316 0x09 0x1f 0x31 0x98 ST r25, [p1], #4 + 8320 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 8324 0x09 0x1c 0x71 0x98 ST r3, [p1], #4 + 8328 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 8332 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 8336 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 8340 0x09 0x1c 0xd1 0x98 ST r6, [p1], #4 + 8344 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 8348 0x09 0x1c 0x91 0x98 ST r4, [p1], #4 + 8352 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 8356 0x09 0x08 0xf1 0x98 ST r7, [p1], m0 + 8360 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 8364 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 8368 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 8372 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 8376 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 8380 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 8384 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 8388 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 8392 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 8396 0x3b 0xe2 0x30 0x50 0x00 0x5c ST r24, [p1], #-12; RET lr +.delay_slot + 8402 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 +.delay_slot + 8406 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 +.delay_slot + 8410 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 +.delay_slot + 8414 0x09 0x07 0x11 0x98 ST r24, [p1] +.delay_slot + 8418 0x09 0x03 0x11 0x98 ST r24, [p1, dj0] +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv__end +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_end0 + +.text_segment PM 8432 +.label __Z8init_accILt1EEvPaS0_iii___func_begin0 +.label _Z8init_accILt1EEvPaS0_iii +.function_start + 8432 0x00 0x07 0xc4 0xc8 0x20 0x44 MOVXM p2, #508944 + 8438 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 8444 0x02 0x04 0x82 0x98 LDA.s8 r4, [p2] + 8448 0x00 0x00 0x21 0xe3 0x40 0x44 MOVXM ls, #8608 + 8454 0x00 0x1a 0x00 0x00 0x00 0x09 0xb8 0xf0 0x10 0xba MOVA r26, #0; MOVXM le, #8672 + 8464 0xff 0x85 0x00 0x00 0x00 0x09 0xb0 0xb8 0x10 0xba MOVA r5, #-4; MOVXM p3, #8560 + 8474 0x00 0xa3 0x00 0x02 0x52 0xec 0x41 0xa8 0xb8 0xba MOVA r3, #5; LSHL r5, r1, r5; VINSERT.32 x1, x0, #0, r26 + 8484 0x01 0xc7 0xb4 0xcb 0xc1 0xe4 LSHL r7, r0, r3; MOV p2, sp + 8490 0x10 0xc7 0xb0 0x05 0x25 0xe4 LSHL r3, r2, r3; VMOV bmll0, x1 + 8496 0x27 0x50 0x00 0xa5 0x81 0xe4 MOVX crRnd, r4; MOV r1, p1 + 8502 0x08 0x02 0xc5 0xff 0x20 0x01 0x00 0x28 0x80 0xd0 0x78 0x36 PADDB [p2], #-64; VCONV.bf16.fp32 wl0, bmll0; MOVX r16, #1; MOV m1, r3 + 8514 0xff 0xa6 0x00 0x05 0xb8 0x3c 0x6a 0x60 0x78 0xba MOVA r6, #-3; EQ r27, r2, r16; MOV r3, p2 + 8524 0x00 0x0d 0xb1 0x02 0x06 0xa4 LSHL r0, r0, r6; VEXTBCST.16 x1, x0, #0 + 8530 0x00 0x2c 0xf0 0x00 0x20 0xc2 0x12 0x00 0x3a 0x0f 0x30 0x11 0x60 0x7e NOPA; NOPB; MOVS p1, p0; SEL.EQZ r1, r3, r1, r27; MOV m0, r7 + 8544 0x00 0x2c 0xf0 0x17 0x22 0x04 0x53 0x0a 0x2f 0xf9 0x30 0x50 0x78 0x00 0x00 0xe1 NOPA; PADDB [p0], m0; VST x1, [p2]; ADD r2, r5, #-1; MOV p2, r1; NOPV +.label TGT_F_Z8init_accILt1EEvPaS0_iii_128 +.loop_nesting 1 + 8560 0x40 0x84 0x8a 0xe0 0x41 0xf4 VLDB wl0, [p2]; MOV lc, r0 + 8566 0x00 0x00 NOPX + 8568 0x00 0x00 NOPX + 8570 0x00 0x00 NOPX + 8572 0x00 0x00 NOPX + 8574 0x00 0x00 NOPX + 8576 0x00 0x00 NOPX + 8578 0x18 0x01 0x22 0xf8 VMOV wh0, wl0 + 8582 0x19 0x84 0x03 0x58 VEXTBCST.128 x3, x0, #0 + 8586 0x18 0x84 0x07 0x58 VEXTBCST.128 x1, x0, #1 + 8590 0x18 0x07 0x8a 0xf8 VCONV.fp32.bf16 cml0, x3 + 8594 0x18 0x83 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x1 + 8598 0x19 0x00 0x12 0xf8 VMOV bmll1, bmll0 + 8602 0x00 0x2c 0xf2 0x84 0x25 0xd4 NOPA; VMOV bmlh1, bmhl0 +.label ZLS_F_Z8init_accILt1EEvPaS0_iii_176 +.loop_nesting 2 +.begin_of_loop + 8608 0x09 0x14 0x26 0x98 VST bmlh0, [p1, #64] + 8612 0x09 0x2c 0x86 0x98 VST bmll1, [p1], #128 + 8616 0x09 0x14 0x26 0x98 VST bmlh0, [p1, #64] + 8620 0x09 0x2c 0x86 0x98 VST bmll1, [p1], #128 + 8624 0x00 0x2c 0xf0 0x00 0x20 0x14 0x66 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV + 8640 0x00 0x2c 0xf0 0x00 0x20 0x2c 0xa6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV + 8656 0x00 0x2c 0xf0 0x00 0x20 0x14 0x66 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV +.label ZLE_F_Z8init_accILt1EEvPaS0_iii_240 +.end_of_loop + 8672 0x00 0x2c 0xf0 0x00 0x20 0x2c 0xa6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV +.loop_nesting 1 + 8688 0x21 0x0c 0xf4 0x57 0x20 0x84 0xe0 0x12 PADDA [p1], m0; PADDB [p2], m1; JNZD r2, r2, p3 +.delay_slot + 8696 0x38 0x0b 0x90 0x18 PADDB [p0], m0 +.delay_slot +.swstall delay_slot + 8700 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8702 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8704 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8706 0x00 0x00 NOPX +.loop_nesting 0 + 8708 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8712 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8718 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8720 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8724 0x00 0x00 NOPX +.label _Z8init_accILt1EEvPaS0_iii__end +.label __Z8init_accILt1EEvPaS0_iii___func_end0 + +.text_segment PM 8736 +.label __Z12post_processPai___func_begin0 +.label _Z12post_processPai +.function_start + 8736 0x40 0x00 0x82 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x08 0x10 0x76 MOVA m0, #512; MOVS p2, p0; MOVXM p1, #508944 + 8748 0xff 0x21 0x00 0x00 0x20 0x08 0x88 0x01 0x58 0xba MOVA r1, #-7; MOVX r2, #0; MOV r4, #1 + 8758 0x20 0xe0 0x51 0x80 0x8b 0x05 0x02 0x2c 0x68 0x07 0x58 0x76 LDA.s8 r24, [p1]; MOVS p1, p0; OR r16, r2, r4; MOV r3, #7 + 8770 0x22 0x96 0xb0 0x00 0x10 0xec 0xa8 0x02 0x58 0xba VLDA bmlh1, [p1, #64]; LSHL r1, r0, r1; MOV r5, #2 + 8780 0x21 0x12 0xb0 0x21 0x21 0xec 0x08 0x60 0x78 0xba VLDA bmll1, [p1], m0; LSHL r18, r16, r3; MOV r0, p0 + 8790 0x00 0x66 0x00 0x0a 0x71 0x2d 0xb4 0x80 0xa8 0xba MOVA r6, #3; OR r7, r5, r2; ADD.NC p3, r18, r0 + 8800 0x62 0x8e 0xb0 0x0f 0x31 0xee 0xb8 0x7f 0xc8 0xba VLDA bmhh0, [p3, #64]; LSHL r19, r7, r3; ADD.NC lc, r1, #-1 + 8810 0x60 0x8a 0xb0 0x0d 0x11 0x2e 0x34 0xc0 0xa8 0xba VLDA bmhl0, [p3]; OR r17, r6, r2; ADD.NC p4, r19, r0 + 8820 0x82 0x86 0xb0 0x23 0x41 0xec 0x48 0x81 0x08 0xba VLDA bmlh0, [p4, #64]; LSHL r20, r17, r3; ADD.NC r2, r2, #4 + 8830 0x80 0x82 0xb0 0x31 0xd4 0x02 0xb5 0x00 0xa8 0xba VLDA bmll0, [p4]; MOVX crRnd, r24; ADD.NC p5, r20, r0 + 8840 0xa2 0x9e 0xb0 0x00 0x00 0x08 0x79 0x58 0x10 0xba VLDA bmhh1, [p5, #64]; MOVXM ls, #8880 + 8850 0xa0 0x9a 0xb0 0x00 0x00 0x09 0xb9 0x88 0x10 0xba VLDA bmhl1, [p5]; MOVXM le, #8976 + 8860 0x10 0x02 0x19 0x18 MOVX r1, #6 + 8864 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z12post_processPai_144 +.loop_nesting 1 +.begin_of_loop + 8880 0x49 0x94 0x68 0x54 0x3b 0x5c VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 + 8886 0x3d 0x83 0xb5 0x15 0x41 0xe4 LSHL r22, r7, r1; MOV dj2, r21 + 8892 0x08 0x0c 0x60 0x23 0x70 0xec 0x45 0x90 0x79 0x3a VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r23, r17, r1; MOV dj0, r22 + 8902 0x00 0x04 0x60 0x05 0x02 0x2c 0xc5 0xd0 0x79 0x3a VST.CONV.bf16.fp32 cml0, [p0, dj0];OR r16, r2, r4; MOV dj1, r23 + 8912 0x11 0x4e 0x25 0x98 OR r7, r5, r2 + 8916 0x04 0x1c 0x60 0x0d 0x11 0x2c 0x48 0x81 0x09 0x3a VST.CONV.bf16.fp32 cmh1, [p0, dj1];OR r17, r6, r2; ADD.NC r2, r2, #4 + 8926 0x22 0x96 0xb8 0x48 0x7b 0x2c VLDA bmlh1, [p1, #64]; LSHL r18, r16, r3 + 8932 0x21 0x12 0xb0 0x0f 0x31 0xed 0xb4 0x80 0xa8 0xba VLDA bmll1, [p1], m0; LSHL r19, r7, r3; ADD.NC p3, r18, r0 + 8942 0x62 0x8e 0xb0 0x23 0x41 0xee 0x34 0xc0 0xa8 0xba VLDA bmhh0, [p3, #64]; LSHL r20, r17, r3; ADD.NC p4, r19, r0 + 8952 0x60 0x8a 0xba 0xd4 0x02 0x94 VLDA bmhl0, [p3]; ADD.NC p5, r20, r0 + 8958 0x04 0x14 0x35 0x98 VLDA bmlh0, [p4, #64] + 8962 0x04 0x04 0x15 0x98 VLDA bmll0, [p4] + 8966 0x05 0x14 0xf5 0x98 VLDA bmhh1, [p5, #64] + 8970 0xa0 0x9a 0xb0 0x00 0x20 0x3c VLDA bmhl1, [p5]; NOPB +.label ZLE_F_Z12post_processPai_240 +.end_of_loop + 8976 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 8992 0x49 0x94 0x68 0x54 0x3b 0x5c VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 + 8998 0x05 0x00 0x05 0x15 0x41 0xe4 RET lr; MOV dj2, r21 +.delay_slot + 9004 0x08 0x0c 0x63 0xd8 0x3b 0x5c VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r22, r7, r1 +.delay_slot + 9010 0x8d 0xc3 0xb1 0x16 0x41 0xe4 LSHL r23, r17, r1; MOV dj0, r22 +.delay_slot + 9016 0x00 0x04 0x60 0x00 0xc5 0xd0 0x70 0x02 VST.CONV.bf16.fp32 cml0, [p0, dj0]; MOV dj1, r23 +.delay_slot + 9024 0x08 0x20 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p0, dj1] +.delay_slot +.swstall delay_slot + 9028 0x00 0x00 NOPX +.label _Z12post_processPai__end +.label __Z12post_processPai___func_end0 + +.text_segment PM 9040 +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_begin0 +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.function_start + 9040 0xda 0x90 0x84 0x9c 0x8b 0x00 0x01 0xf3 0xb1 0xa0 0x10 0x76 MOVA m4, #-300; MOVS p4, p7; MOVXM p7, #508736 + 9052 0x07 0x8a 0x16 0x98 LDA r16, [p7], m4 + 9056 0x00 0x00 NOPX + 9058 0x00 0x00 NOPX + 9060 0x00 0x00 NOPX + 9062 0x13 0x11 0x60 0x03 0x30 0x60 0x70 0x02 MOVS p0, p6; MOV p6, p0 + 9070 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9076 0x0f 0xfc 0x1d 0x98 ST p0, [sp, #-4] + 9080 0xfe 0x43 0xb0 0x20 0x04 0x7a 0x10 0x00 0x61 0x3a ST p4, [sp, #-16]; JNZ r16, #9168 +.delay_slot + 9090 0x0f 0xf5 0x1d 0x98 ST p2, [sp, #-12] +.delay_slot + 9094 0x0f 0xec 0x9d 0x98 ST p1, [sp, #-20] +.delay_slot + 9098 0x00 0x32 0x07 0xf8 0x3d 0x80 0x01 0xf0 0x32 0x06 0x10 0x76 MOVA r18, #1; ST lr, [sp, #-8]; MOVXM p0, #508940 +.delay_slot + 9110 0x01 0x71 0x00 0x06 0x51 0x80 0x01 0xf0 0x32 0x08 0x10 0x76 MOVA r17, #11; ST r18, [p0]; MOVXM p0, #508944 +.delay_slot + 9122 0x00 0xc4 0xe0 0x00 0x01 0xf2 0xb1 0x00 0x10 0xba ST.s8 r17, [p0]; MOVXM p5, #508416 + 9132 0xa5 0x82 0xd0 0xc9 0x81 0xd4 LDA r0, [p5], #8; MOV p0, p2 + 9138 0x05 0x04 0x36 0x98 LDA r1, [p5] + 9142 0x05 0x14 0x56 0x98 LDA r2, [p5, #4] +.no_stack_arguments + 9146 0x00 0x10 0x78 0x00 0x01 0x04 JL #8432 +.delay_slot + 9152 0x19 0x66 0xc0 0xf8 MOV p1, p3 +.delay_slot +.swstall delay_slot + 9156 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9158 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9160 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9162 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.return_address + 9168 0x17 0x10 0x00 0x3b 0x21 0x0a 0x00 0xdc 0x58 0xba MOVA r16, #184; MOVX r18, #-184; MOV m4, #220 + 9178 0x07 0x8b 0x76 0x98 LDA r27, [p7], m4 + 9182 0x1c 0xde 0xc0 0xf8 MOV r19, p7 + 9186 0x1d 0x19 0xe4 0x18 ADD.NC r20, r19, #-56 + 9190 0x1f 0x69 0x51 0x58 ADD.NC p7, r18, r20 + 9194 0x10 0xe2 0xc1 0x18 MOVX r17, #240 + 9198 0x00 0x00 NOPX + 9200 0x00 0x00 NOPX + 9202 0x15 0x25 0x32 0x18 SEL.EQZ r18, r20, r19, r27 + 9206 0x84 0x22 0x46 0xd2 0x04 0x24 SEL.EQZ r16, r16, r17, r27; ADD.NC p3, r18, #4 + 9212 0x63 0x98 0xd0 0x36 0x04 0xa0 0x00 0x00 0x60 0xba LDA dj1, [p3], #4; JZ r27, #9472 +.delay_slot + 9222 0x63 0xd4 0xd7 0x10 0x41 0xd4 LDA dn5, [p3], #4; MOV dj3, r16 +.delay_slot + 9228 0x03 0x1e 0xc6 0x98 LDA dj5, [p3], #4 +.delay_slot + 9232 0x07 0x60 0xa6 0x98 LDA dn1, [p7, dj3] +.delay_slot + 9236 0x03 0x06 0x16 0x98 LDA r16, [p3] +.delay_slot + 9240 0x03 0x16 0x06 0x98 LDA m4, [p3, #4] + 9244 0xfd 0xb3 0x20 0x00 0x01 0xf1 0x31 0x10 0x10 0xba LDA p3, [sp, #-20]; MOVXM p2, #508448 + 9254 0x40 0xce 0xd0 0x00 0x00 0x08 0x7a 0x40 0x10 0xba LDA r19, [p2]; MOVXM ls, #9344 + 9264 0xff 0x54 0x00 0x00 0x00 0x09 0xba 0x60 0x10 0xba MOVA r20, #-6; MOVXM le, #9408 + 9274 0x04 0x5a 0x29 0x20 0xd5 0x64 MOVX r17, #52; MOV r18, #53 + 9280 0x00 0x00 NOPX + 9282 0x00 0x00 NOPX + 9284 0x00 0x00 NOPX + 9286 0x1a 0x66 0xc0 0xf8 MOV p2, p3 + 9290 0x42 0x80 0xf9 0xce 0x9b 0x2c VLDA lfh0, [p2, #64]; LSHL r19, r19, r20 + 9296 0x45 0x90 0xfa 0xf3 0xfe 0x14 VLDA lfl0, [p2], #128; ADD.NC lc, r19, #-2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9302 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9304 0x02 0x14 0x07 0x98 VLDA lfh0, [p2, #64] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9308 0x02 0x2c 0x87 0x98 VLDA lfl0, [p2], #128 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9312 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9314 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9316 0x1c 0x21 0x92 0xf8 VMOV x8, lfh0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9320 0x00 0x2b 0x60 0x00 0x50 0xc9 0x70 0x02 NOPS; VMOV x1, lfh0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9328 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x12 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9344 0x42 0x80 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc2 0x24 0x38 0x00 0x00 0xe1 VLDA lfh0, [p2, #64]; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x8, r18; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9360 0x45 0x90 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x82 0x22 0x38 0x00 0x00 0xe1 VLDA lfl0, [p2], #128; NOPB; NOPS; NOPX; VSHUFFLE x2, x0, x8, r17; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9376 0x00 0x2c 0xf0 0x00 0x23 0x14 0xd3 0x00 0x00 0x02 0x10 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p3, #64]; NOPX; VMOV x8, lfh0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9392 0x00 0x2c 0xf0 0x00 0x23 0x2c 0x93 0x00 0x00 0x00 0x50 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x2, [p3], #128; NOPX; VMOV x1, lfh0; NOPV +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9408 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x12 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV +.loop_nesting 0 + 9424 0x19 0x84 0x48 0x78 VSHUFFLE x3, x0, x8, r18 + 9428 0x19 0x04 0x44 0x78 VSHUFFLE x2, x0, x8, r17 + 9432 0x62 0x9a 0x60 0x02 0x10 0xc9 0x70 0x02 VST x3, [p3, #64]; VMOV x8, lfh0 + 9440 0x65 0x92 0x60 0x00 0x12 0xc9 0x70 0x02 VST x2, [p3], #128; VMOV x0, lfl0 + 9448 0x19 0x84 0x48 0x78 VSHUFFLE x3, x0, x8, r18 + 9452 0x19 0x04 0x44 0x78 VSHUFFLE x2, x0, x8, r17 + 9456 0x62 0x9a 0x60 0x00 0x50 0xc9 0x70 0x02 VST x3, [p3, #64]; VMOV x1, lfh0 + 9464 0x65 0x92 0x60 0x00 0x01 0xa5 0x70 0x02 VST x2, [p3], #128; NOPM +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 + 9472 0x3f 0x1f 0x90 0x18 PADDB [p7], #64 + 9476 0x07 0x1d 0xc6 0x98 LDA dj3, [p7], #4 + 9480 0xe3 0xb4 0xda 0x1f 0x71 0x54 LDA dn3, [p7], #4; MOV m5, #-36 + 9486 0x07 0xaa 0x56 0x98 LDA r18, [p7], m5 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9490 0xe7 0xd2 0xd0 0x00 0x01 0xf1 0xb2 0x08 0x10 0xba LDA r20, [p7], #12; MOVXM p3, #508944 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9500 0x60 0xd0 0x50 0x00 0x00 0x0e 0xef 0xc0 0x10 0xba LDA.s8 r20, [p3]; MOVXM r23, #16256 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9510 0xfe 0xb3 0x25 0xba 0xe5 0xd4 LDA p3, [sp, #-12]; VBCST.16 x5, r23 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9516 0xc2 0xcd 0x7c 0x01 0x51 0x54 VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOV m6, #84 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9522 0xe3 0xf0 0xd4 0xba 0xe5 0xd4 LDA m7, [p7], #4; VBCST.16 x4, r23 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9528 0xe3 0xb0 0xda 0x51 0x25 0xd4 LDA m3, [p7], #4; VMOV x10, x4 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9534 0xe3 0x90 0xdb 0x55 0x25 0xd4 LDA m1, [p7], #4; VMOV x11, x5 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9540 0xf9 0x60 0xda 0x5f 0xf6 0x2c LDA m6, [p7], m6; ADD r23, r20, #-2 + 9546 0xf9 0x80 0xd4 0x04 0x61 0x54 LDA m0, [p7], #-16; MOV m2, #280 + 9552 0xe3 0x84 0xda 0x1e 0x51 0x54 LDA dn0, [p7], #4; MOV m5, #-108 + 9558 0x07 0x1c 0x46 0x98 LDA dj0, [p7], #4 + 9562 0x07 0x1e 0x26 0x98 LDA dn4, [p7], #4 + 9566 0x07 0x2e 0x46 0x98 LDA dj4, [p7], #8 + 9570 0xf5 0x50 0xd9 0x80 0x01 0x54 LDA m5, [p7], m5; MOV dc4, #0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 9576 0xe9 0x6a 0xd0 0x8c 0x8b 0x03 0x2f 0x60 0x72 0xba LDA r26, [p7], m2; MOVS p0, p3; MOV r25, p7 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 9586 0xfd 0xf3 0x20 0xd7 0x20 0x00 0x01 0x26 0x5e 0x02 0x02 0x09 0x60 0x7e LDA p7, [sp, #-20]; PADDB [p0], m3; MOVS dc0, dc4; MOVXM p2, #9696 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9600 0x02 0xa6 0xb2 0x10 0x4b 0x02 0xe4 0xc0 0x72 0xba VLDA bmlh2, [p0, #64]; MOVS dc2, dc4; MOV dc5, dc4 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9610 0x04 0xaa 0xb4 0x8c 0x8b 0x01 0x43 0x80 0x72 0xba VLDA bmhl2, [p0, #128]; MOVS p4, p3; MOV dj2, dj3 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9620 0x06 0xae 0xb7 0x97 0x23 0x22 0x31 0x61 0x20 0x0f 0x31 0x91 0x60 0x7e VLDA bmhh2, [p0, #192]; PADDB [p3], m6; MOVS p1, p3; MOVX r17, #780; MOV r24, m1 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9634 0x62 0x86 0xb0 0x00 0x25 0x3a 0x80 0x51 0xec 0x0f 0x60 0x09 0x60 0x7e VLDA bmlh0, [p3, #64]; NOPB; MOVS dc3, dc0; MOVX crRnd, r20; MOV r20, p7 + 9648 0x64 0x8a 0xb0 0x00 0x22 0x4e 0x4b 0x01 0x36 0x89 0x03 0x00 0x78 0x00 0x00 0xe1 VLDA bmhl0, [p3, #128]; NOPB; MOVS dn2, dn3; MOVX r19, #52; MOV m2, m3; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 9664 0x66 0x8e 0xb0 0x00 0x21 0x0c 0x4b 0x01 0x56 0xa9 0x84 0x90 0x78 0x00 0x00 0xe1 VLDA bmhh0, [p3, #192]; NOPB; MOVS dc1, dc3; MOVX r21, #53; MOV m3, r18; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 9680 0x60 0x82 0xbe 0x2a 0x6c 0x28 0x5b 0x01 0x67 0x8a 0xb4 0x60 0x78 0x00 0x00 0xe1 VLDA bmll0, [p3]; VLDB x4, [p7, #64]; PADDS [p4], m1; MOVX r22, #60; MOV p5, p4; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 +.loop_nesting 1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9696 0x82 0x96 0xbb 0xd7 0x23 0x94 0x8b 0x00 0x84 0x10 0x70 0xf6 VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9708 0x84 0x9a 0xb3 0x73 0x90 0x02 0xbe 0xbf 0x4e 0xba VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9718 0x86 0x9e 0xb0 0x00 0x00 0x08 0x7b 0x58 0x10 0xba VLDA bmhh1, [p4, #192]; MOVXM ls, #9904 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9728 0x80 0x92 0xbe 0x73 0xe8 0x00 0x01 0x37 0x70 0x02 0x93 0x91 0x60 0x7e VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #9984 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9742 0xa2 0xb6 0xb4 0x8b 0x90 0x01 0x82 0x00 0x7e 0xba VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9752 0x80 0xcb 0x78 0x2a 0xec 0x9c 0x8b 0x32 VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9760 0xa4 0xba 0xbe 0x2a 0x6c 0x88 0x5b 0x32 VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9768 0xa6 0xbe 0xbe 0x73 0xe8 0x3c VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9774 0xa0 0xb2 0xb4 0x15 0x74 0x02 0xb6 0x60 0x7e 0xba VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9784 0xc3 0x45 0x7b 0x57 0x20 0x3c VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9790 0x00 0xa2 0xb4 0x06 0x74 0x01 0x9d 0x26 0x3e 0xba VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9800 0x1b 0xba 0x54 0x78 VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9804 0x93 0x91 0x67 0x15 0x34 0x02 0x25 0x66 0x36 0xba VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9814 0xa0 0xc5 0x79 0x17 0x24 0xca 0xd4 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9826 0xa2 0xcd 0x78 0x2a 0xed 0x98 0x8b 0x01 0x9d 0x26 0x30 0xf6 VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9838 0xb5 0x0c 0xf7 0x74 0xa8 0xd4 PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9844 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9854 0xc3 0x45 0x77 0x39 0xf4 0x02 0x25 0x66 0x3e 0xba VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9864 0x1c 0xca 0xd4 0x78 VSHUFFLE x9, x9, x5, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9868 0x08 0x8a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex1, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9872 0xa0 0xc5 0x74 0x06 0x74 0x1d 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9882 0xa2 0xcd 0x72 0x14 0x6c 0x0c VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9888 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9904 0x0e 0x2a 0x6d 0x6c 0xc0 0xe6 0xb4 0xf1 0x51 0x4a VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9914 0xc2 0xcd 0x7e 0x73 0xec 0x9c 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9930 0xc3 0x45 0x79 0x17 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9946 0xb5 0x0c 0xf8 0x2a 0xe8 0x45 0x1b 0x0e 0x8a 0x40 0x69 0x66 PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9958 0xa0 0xc5 0x78 0x0c 0xec 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9970 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9984 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10000 0xc2 0xcd 0x7e 0x2a 0x6c 0x84 0x8b 0x00 0x00 0x02 0xb6 0x60 0x7d 0xa7 0x8a 0x8b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB x4, [p7, #64]; MOVS p4, p1; NOPX; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10016 0xc3 0x45 0x7b 0x57 0x20 0x84 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10032 0x0d 0x0c 0xf3 0xd7 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b PADDA [p0], m3; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10048 0x11 0x46 0xc1 0x0c 0x20 0xe4 0x8a 0x40 0x69 0x4a VCONV.bfp16ebs8.fp32 ex1, dm4; MOV m1, r24; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10058 0xa0 0xc5 0x74 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10068 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10082 0x31 0x46 0xc4 0x2b 0x90 0x02 0xb4 0x60 0x76 0xba PADDB [p4], m1; VCONV.bfp16ebs8.fp32 ex3, dm4; MOV p5, p4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10092 0x02 0x09 0x20 0xe6 0x8b 0x60 0x29 0x62 MOV m2, r18; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10100 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10110 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10114 0x11 0x46 0xc0 0x02 0x8a 0x40 0x69 0x62 VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10122 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10126 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10128 0x09 0x0a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10132 0x31 0x46 0xc0 0x02 0x8b 0x60 0x29 0x62 VCONV.bfp16ebs8.fp32 ex3, dm4; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10140 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10142 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10144 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10148 0x8a 0x40 0x69 0x48 VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10152 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10156 0x09 0x15 0xa6 0x98 VST bmlh3, [p1, #64] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10160 0x09 0x25 0xc6 0x98 VST bmhl3, [p1, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10164 0x09 0x35 0xe6 0x98 VST bmhh3, [p1, #192] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10168 0x20 0xb0 0xd5 0xcb 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10178 0x02 0xa4 0xd1 0x53 0x90 0x01 0x03 0x00 0x76 0xba PADDB.2D [p1], d2; VST bmlh2, [p0, #64]; MOV m2, m3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10188 0x08 0x25 0x46 0x98 VST bmhl2, [p0, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10192 0x08 0x35 0x66 0x98 VST bmhh2, [p0, #192] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10196 0x00 0xa0 0xd0 0x00 0x33 0x60 0x70 0x02 VST bmll2, [p0]; MOV p0, p3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10204 0x82 0x94 0xd0 0x6b 0x90 0x01 0x84 0x90 0x76 0xba PADDB [p0], m3; VST bmlh1, [p4, #64]; MOV m3, r18 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10214 0x02 0xa6 0xb8 0x49 0x8d 0x0c VLDA bmlh2, [p0, #64]; VST bmhl1, [p4, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10220 0x04 0xaa 0xb4 0x34 0xe6 0x80 0x05 0xee 0xa0 0x7a VLDA bmhl2, [p0, #128]; VST bmhh1, [p4, #192]; JNZD r23, r23, p2 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10230 0x06 0xae 0xb7 0x97 0x24 0x04 0x86 0x82 0x33 0x60 0x70 0xf6 VLDA bmhh2, [p0, #192]; PADDB [p3], m6; VST bmll1, [p4]; MOV p4, p3 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10242 0x62 0x86 0xba 0x28 0x4d 0x0c VLDA bmlh0, [p3, #64]; VST bmlh0, [p5, #64] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10248 0x64 0x8a 0xba 0x48 0x8d 0x0c VLDA bmhl0, [p3, #128]; VST bmhl0, [p5, #128] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10254 0x66 0x8e 0xba 0x68 0xcd 0x0c VLDA bmhh0, [p3, #192]; VST bmhh0, [p5, #192] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10260 0x60 0x82 0xb8 0x57 0x25 0x04 0x06 0x82 0xb4 0x60 0x70 0xf6 VLDA bmll0, [p3]; PADDB [p4], m1; VST bmll0, [p5]; MOV p5, p4 +.loop_nesting 0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10272 0x82 0x96 0xbb 0xd7 0x23 0x94 0x8b 0x00 0x84 0x10 0x70 0xf6 VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10284 0x84 0x9a 0xb3 0x73 0x90 0x02 0xbe 0xbf 0x4e 0xba VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10294 0x86 0x9e 0xb0 0x00 0x00 0x08 0x7c 0x78 0x10 0xba VLDA bmhh1, [p4, #192]; MOVXM ls, #10480 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10304 0x80 0x92 0xbe 0x73 0xe8 0x00 0x01 0x37 0x94 0x02 0x93 0x91 0x60 0x7e VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #10560 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10318 0xa2 0xb6 0xb4 0x8b 0x90 0x01 0x82 0x00 0x7e 0xba VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10328 0x80 0xcb 0x78 0x2a 0xec 0x9c 0x8b 0x32 VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10336 0xa4 0xba 0xbe 0x2a 0x6c 0x88 0x5b 0x32 VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10344 0xa6 0xbe 0xbe 0x73 0xe8 0x3c VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10350 0xa0 0xb2 0xb4 0x15 0x74 0x02 0xb6 0x60 0x7e 0xba VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10360 0xc3 0x45 0x7b 0x57 0x20 0x3c VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10366 0x00 0xa2 0xb4 0x06 0x74 0x01 0x9d 0x26 0x3e 0xba VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10376 0x1b 0xba 0x54 0x78 VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10380 0x93 0x91 0x67 0x15 0x34 0x02 0x25 0x66 0x36 0xba VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10390 0xa0 0xc5 0x79 0x17 0x24 0xca 0xd4 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10402 0xa2 0xcd 0x78 0x2a 0xed 0x98 0x8b 0x01 0x9d 0x26 0x30 0xf6 VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10414 0xb5 0x0c 0xf7 0x74 0xa8 0xd4 PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10420 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10430 0xc3 0x45 0x77 0x39 0xf4 0x02 0x25 0x66 0x3e 0xba VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10440 0x1c 0xca 0xd4 0x78 VSHUFFLE x9, x9, x5, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10444 0x08 0x8a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex1, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10448 0xa0 0xc5 0x74 0x06 0x74 0x1d 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10458 0xa2 0xcd 0x72 0x14 0x6c 0x0c VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10464 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10480 0x0e 0x2a 0x6d 0x6c 0xc0 0xe6 0xb4 0xf1 0x51 0x4a VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10490 0xc2 0xcd 0x7e 0x73 0xec 0x9c 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10506 0xc3 0x45 0x79 0x17 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10522 0xb5 0x0c 0xf8 0x2a 0xe8 0x45 0x1b 0x0e 0x8a 0x40 0x69 0x66 PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10534 0xa0 0xc5 0x78 0x0c 0xec 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10546 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10560 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10576 0xc2 0xcd 0x79 0x09 0x16 0x00 0x00 0x6b 0x66 0x07 0xb4 0xf1 0x51 0x6e VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOVS p4, p1; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10590 0xc3 0x45 0x7b 0x57 0x20 0x84 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10606 0xda 0x06 0x83 0xd7 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b MOVA dj1, #-304; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10622 0x0d 0x0c 0xf1 0x14 0x6c 0x00 0x00 0x7b 0x51 0x07 0x8a 0x40 0x69 0x6e PADDA [p0], m3; VCONV.bfp16ebs8.fp32 ex1, dm4; MOV p7, r20; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10636 0xa0 0xc5 0x7d 0x32 0x16 0x00 0x00 0x62 0x56 0x63 0xb4 0xed 0x51 0x6e VLDA.CONV.fp32.bf16 cml4, [p5]; MOVS p6, r25; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10650 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10664 0x31 0x46 0xc0 0x00 0x86 0x10 0x70 0x02 VCONV.bfp16ebs8.fp32 ex3, dm4; MOV m1, r24 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10672 0x08 0x57 0x25 0x68 0xc0 0xe6 0x8b 0x60 0x29 0x4a PADDB [p4], m1; MOV p5, p4; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10682 0x01 0x46 0xc0 0x02 0xb4 0xf1 0x51 0x62 VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10690 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10694 0x11 0x46 0xc0 0x02 0x8a 0x40 0x69 0x62 VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10702 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10706 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10708 0xe4 0x46 0xd1 0x0a 0x36 0x00 0xc1 0x18 0x52 0xba LDA r17, [p7, dj1]; VCONV.bfp16ebs8.fp32 ex2, dm4; MOV dj1, #280 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10718 0xc4 0x42 0xd0 0x00 0x21 0x8a 0x36 0x00 0x01 0xf3 0xb1 0x00 0x14 0x5b 0x01 0x4b LDA r16, [p6, dj1]; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;MOVXM p7, #508416; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10734 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10736 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10738 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10742 0x8a 0x40 0x69 0x48 VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10746 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 + 10750 0x22 0xb4 0xd8 0xc7 0xfe 0x5c VST bmlh3, [p1, #64]; ADD r17, r17, #-1 + 10756 0x24 0xb8 0xd8 0xc6 0x11 0x5c VST bmhl3, [p1, #128]; NE r17, r17, r16 + 10762 0x09 0x35 0xe6 0x98 VST bmhh3, [p1, #192] + 10766 0x20 0xb0 0xd5 0xcb 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 + 10776 0x08 0x15 0x26 0x98 VST bmlh2, [p0, #64] + 10780 0x08 0x25 0x46 0x98 VST bmhl2, [p0, #128] + 10784 0x08 0x35 0x66 0x98 VST bmhh2, [p0, #192] + 10788 0x08 0x05 0x06 0x98 VST bmll2, [p0] + 10792 0x0c 0x14 0xa6 0x98 VST bmlh1, [p4, #64] + 10796 0x0c 0x24 0xc6 0x98 VST bmhl1, [p4, #128] + 10800 0x86 0x9c 0xd0 0x22 0x05 0x54 0x10 0x00 0x61 0x3a VST bmhh1, [p4, #192]; JNZ r17, #10912 +.delay_slot + 10810 0x0c 0x04 0x86 0x98 VST bmll1, [p4] +.delay_slot + 10814 0x0d 0x14 0x26 0x98 VST bmlh0, [p5, #64] +.delay_slot + 10818 0x0d 0x24 0x46 0x98 VST bmhl0, [p5, #128] +.delay_slot + 10822 0xa6 0x8c 0xd0 0x01 0x04 0x90 0x70 0x02 VST bmhh0, [p5, #192]; MOV m2, r18 +.delay_slot + 10830 0x2a 0x72 0x0a 0x08 0x0d 0x4c PADDB.2D [p1], d2; VST bmll0, [p5] + 10836 0x07 0x06 0x16 0x98 LDA r16, [p7] + 10840 0x07 0x26 0x36 0x98 LDA r17, [p7, #8] + 10844 0x07 0xf4 0x19 0x18 LDA p0, [sp, #-12] + 10848 0x00 0x00 NOPX +.no_stack_arguments + 10850 0x00 0x11 0x10 0x00 0x01 0x04 JL #8736 +.delay_slot +.swstall delay_slot + 10856 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10858 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10860 0x00 0x00 NOPX +.delay_slot + 10862 0x14 0x41 0x0f 0x98 MUL r0, r17, r16 +.delay_slot +.swstall delay_slot + 10866 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address + 10880 0x00 0x15 0x58 0x00 0x00 0x84 J #10928 +.delay_slot + 10886 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.swstall delay_slot + 10890 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10892 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10894 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10896 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 + 10912 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x21 0x00 0x38 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ADD r16, r16, #1; NOPM; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 + 10928 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 10932 0x07 0xf3 0x99 0x18 LDA p7, [sp, #-16] + 10936 0x00 0x00 NOPX + 10938 0x00 0x00 NOPX + 10940 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 10942 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 10944 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 10948 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 10952 0x19 0x82 0x30 0xb8 MOV dj1, #280 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10956 0x0e 0x22 0x11 0x98 ST r16, [p6, dj1] +.delay_slot + 10960 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10966 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10968 0x00 0x00 NOPX +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params__end +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_end0 + +.text_segment PM 10976 +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 10976 0x90 0x91 0x60 0x00 0x01 0xf2 0xb1 0xe0 0x11 0x3a MOVS p4, p1; MOVXM p5, #508864 + 10986 0x05 0x06 0x16 0x98 LDA r16, [p5] + 10990 0x00 0x00 NOPX + 10992 0x00 0x00 NOPX + 10994 0x00 0x00 NOPX + 10996 0x00 0x00 NOPX + 10998 0x00 0x00 NOPX + 11000 0x00 0x00 NOPX + 11002 0x80 0x15 0xb8 0x40 0x01 0x84 JNZ r16, #11120 +.delay_slot + 11008 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 11014 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 11018 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] +.delay_slot + 11022 0xff 0x07 0xb0 0x03 0xb0 0x60 0x70 0x02 ST lr, [sp, #-8]; MOV p7, p0 +.delay_slot + 11030 0x1e 0x66 0xc0 0xf8 MOV p6, p3 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11034 0x11 0x11 0x60 0x00 0x01 0xf1 0xb2 0x08 0x11 0x3a MOVS p0, p2; MOVXM p3, #508944 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11044 0x60 0xc0 0xe0 0x00 0x01 0xf1 0xb2 0x06 0x10 0xba ST.s8 r16, [p3]; MOVXM p3, #508940 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11054 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11056 0x00 0x0f 0x60 0x00 0x01 0x04 JL #7872 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11062 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11064 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11066 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 11070 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 11074 0x00 0x2c 0xf6 0x0c 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p3]; NOPM; NOPV +.return_address + 11088 0x4c 0x85 0xd0 0x00 0x01 0xf1 0x31 0xe4 0x10 0xba LDA el0, [p2, #24]; MOVXM p2, #508872 + 11098 0x00 0x00 NOPX + 11100 0x00 0x00 NOPX + 11102 0x00 0x00 NOPX + 11104 0x00 0x00 NOPX + 11106 0x00 0x00 NOPX + 11108 0x00 0x00 NOPX + 11110 0x00 0x2c 0xf2 0x04 0x29 0x80 0x00 0x00 0x00 0x7a NOPA; ST el0, [p2]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 + 11120 0xa0 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xe2 0x10 0xba LDA r16, [p5]; MOVXM p2, #508868 + 11130 0x02 0x06 0x36 0x98 LDA r17, [p2] + 11134 0x00 0x00 NOPX + 11136 0x00 0x00 NOPX + 11138 0x00 0x00 NOPX + 11140 0x00 0x00 NOPX + 11142 0x00 0x00 NOPX + 11144 0x00 0x00 NOPX + 11146 0x88 0x15 0xf0 0x40 0x01 0x84 JNZ r17, #11232 +.delay_slot + 11152 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 11156 0x0d 0x06 0x11 0x98 ST r16, [p5] +.delay_slot + 11160 0x14 0x60 0x07 0x18 ADD r16, r17, #1 +.delay_slot + 11164 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 11168 0x00 0x00 NOPX + 11170 0x1c 0x1c 0xc0 0xf8 MOV r16, p6 + 11174 0x1a 0x68 0x06 0x18 ADD.NC p2, r16, #12 + 11178 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 11182 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 11186 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11190 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11192 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11196 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11198 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11200 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11202 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11204 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11208 0x0a 0x06 0x11 0x98 ST r16, [p2] + 11212 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 11216 0x00 0x00 NOPX + 11218 0x00 0x00 NOPX + 11220 0x00 0x00 NOPX + 11222 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 + 11232 0x00 0x18 0x00 0x1f 0xff 0xfe 0x0f 0xf0 0x10 0xba MOVA r24, #0; MOVXM r16, #2147483616 + 11242 0x10 0x22 0x05 0x18 MOVX r17, #1 + 11246 0x00 0x00 NOPX + 11248 0x80 0xb3 0xd0 0x00 0x01 0xf2 0x31 0x06 0x10 0xba LDA p3, [p4]; MOVXM p4, #508428 + 11258 0x04 0xff 0x76 0x98 LDA r27, [p4], #-4 + 11262 0x04 0xee 0x56 0x98 LDA r18, [p4], #-8 + 11266 0xe0 0x83 0xde 0xd1 0x81 0xd4 LDA p0, [p7]; MOV p7, p4 + 11272 0x06 0x05 0x1e 0x98 LDA p2, [p6] + 11276 0x00 0x00 NOPX + 11278 0x00 0x00 NOPX + 11280 0x1c 0xd6 0xc0 0xf8 MOV r19, p3 +.no_stack_arguments + 11284 0x00 0x11 0xa8 0x00 0x01 0x04 JL #9040 +.delay_slot + 11290 0x14 0xa4 0x7f 0x18 ADD r18, r18, #31 +.delay_slot + 11294 0x14 0xa1 0x04 0x98 AND r16, r18, r16 +.delay_slot + 11298 0x16 0x21 0x02 0x18 SEL.EQZ r16, r24, r16, r27 +.delay_slot + 11302 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 +.delay_slot + 11306 0x00 0x2c 0xf2 0xd3 0x82 0x94 NOPA; ADD.NC p1, r19, r16 +.return_address + 11312 0xe8 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xe2 0x10 0xba LDA r16, [p7, #16]; MOVXM p2, #508868 + 11322 0x40 0xca 0xd8 0xb9 0x81 0xd4 LDA r18, [p2]; MOV r17, p6 + 11328 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] + 11332 0x00 0x00 NOPX + 11334 0x00 0x00 NOPX + 11336 0x00 0x00 NOPX + 11338 0x00 0x00 NOPX + 11340 0x00 0x00 NOPX + 11342 0x14 0xa1 0x08 0x98 NE r16, r18, r16 + 11346 0x80 0x16 0x50 0x40 0x01 0x84 JNZ r16, #11424 +.delay_slot + 11352 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 11356 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11358 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11360 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11362 0x00 0x00 NOPX + 11364 0x40 0xe2 0x30 0x01 0x00 0x2b 0x34 0x45 0x09 0x3a ST r24, [p2]; MOVX r16, #1; ADD.NC p6, r17, #20 + 11374 0x06 0x06 0x36 0x98 LDA r17, [p6] + 11378 0x00 0x00 NOPX + 11380 0x00 0x00 NOPX + 11382 0x00 0x00 NOPX + 11384 0x00 0x00 NOPX + 11386 0x00 0x00 NOPX + 11388 0x00 0x00 NOPX + 11390 0x14 0x51 0x08 0x18 REL r17, r16 + 11394 0x06 0xe6 0x36 0x98 LDA r17, [p6, #-8] + 11398 0x00 0x00 NOPX + 11400 0x00 0x00 NOPX + 11402 0x00 0x00 NOPX + 11404 0x00 0x00 NOPX + 11406 0x00 0x00 NOPX + 11408 0x00 0x00 NOPX + 11410 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 11414 0x00 0x2c 0xf6 0xe6 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6, #-8]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 11424 0x00 0x07 0xc4 0xc7 0x80 0x44 MOVXM p2, #508864 + 11430 0x40 0xc2 0xd0 0x00 0x01 0xf1 0xb1 0xe4 0x10 0xba LDA r16, [p2]; MOVXM p3, #508872 + 11440 0x03 0x06 0x36 0x98 LDA r17, [p3] + 11444 0x00 0x00 NOPX + 11446 0x00 0x00 NOPX + 11448 0x00 0x00 NOPX + 11450 0x00 0x00 NOPX + 11452 0x00 0x00 NOPX + 11454 0x00 0x00 NOPX + 11456 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 11460 0x80 0x16 0x70 0x40 0x01 0x84 JNZ r16, #11488 +.delay_slot + 11466 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] +.delay_slot +.swstall delay_slot + 11470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11476 0x00 0x00 NOPX + 11478 0x00 0x2c 0xf2 0x07 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r24, [p2]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 + 11488 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 11492 0x00 0x00 NOPX + 11494 0x00 0x00 NOPX + 11496 0x00 0x00 NOPX + 11498 0x00 0x00 NOPX + 11500 0x00 0x00 NOPX + 11502 0x00 0x00 NOPX + 11504 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11508 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 11514 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11520 0x00 0x00 NOPX +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 11536 +.label __Z15_b13786_wrapperPPv___func_begin0 +.label _Z15_b13786_wrapperPPv +.function_start + 11536 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11540 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 11544 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 11548 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 11552 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11556 0x00 0x15 0x70 0x00 0x00 0x84 J #10976 +.delay_slot +.swstall delay_slot + 11562 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11564 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11566 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11568 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11570 0x00 0x00 NOPX +.label _Z15_b13786_wrapperPPv__end +.label __Z15_b13786_wrapperPPv___func_end0 + +.text_segment PM 11584 +.label __Z15_b13811_wrapperPPv___func_begin0 +.label _Z15_b13811_wrapperPPv +.function_start + 11584 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11588 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 11592 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 11596 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 11600 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11604 0x00 0x0c 0x00 0x00 0x00 0x84 J #6144 +.delay_slot +.swstall delay_slot + 11610 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11612 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11614 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11616 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11618 0x00 0x00 NOPX +.label _Z15_b13811_wrapperPPv__end +.label __Z15_b13811_wrapperPPv___func_end0 + +.text_segment PM 11632 +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function_start + 11632 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11636 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11640 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11644 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11648 0x00 0x07 0xa0 0x00 0x00 0x84 J #3904 +.delay_slot +.swstall delay_slot + 11654 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11660 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11662 0x00 0x00 NOPX +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function_start + 11664 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11668 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11672 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11676 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11680 0x00 0x0a 0x78 0x00 0x00 0x84 J #5360 +.delay_slot +.swstall delay_slot + 11686 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11690 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11692 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11694 0x00 0x00 NOPX +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function_start + 11696 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11700 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 11704 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 11708 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 11712 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11716 0x00 0x0e 0x30 0x00 0x00 0x84 J #7264 +.delay_slot +.swstall delay_slot + 11722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11724 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11726 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11728 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11730 0x00 0x00 NOPX +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + +.text_segment PM 11744 +.label _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj +.label __ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj___func_begin0 +.function_start + 11744 0xfd 0x00 0x81 0x9c 0x8b 0x3f 0x47 0xea 0x08 0x60 0x78 0x76 MOVA m0, #-24; MOVS p1, p7; MOVX r20, #-1; MOV r16, p0 + 11756 0x06 0xc4 0x80 0x01 0x00 0x28 0x34 0x05 0x08 0xba MOVA m1, #54; MOVX r16, #1; ADD.NC p0, r16, #20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11766 0x05 0xca 0x50 0x00 0x01 0xf3 0xb1 0xba 0x10 0xba LDA.s16 r18, [p0], #4; MOVXM p7, #508788 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11776 0xe3 0xca 0xe0 0x2f 0x41 0xd4 ST.s16 r18, [p7], #2; MOV r0, r15 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11782 0x01 0x46 0x50 0x1e 0xd1 0x54 LDA.s16 r17, [p0], m0; MOV m0, #-76 + 11788 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 11792 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11798 0x00 0x00 NOPX + 11800 0x00 0x00 NOPX + 11802 0x00 0x00 NOPX + 11804 0x07 0x2a 0x37 0x18 ST.s16 r17, [p7], m1 + 11808 0x00 0x00 NOPX + 11810 0x00 0x00 NOPX + 11812 0x00 0x00 NOPX + 11814 0x00 0x00 NOPX + 11816 0x00 0x00 NOPX + 11818 0x00 0x00 NOPX + 11820 0xe1 0x3e 0xdf 0xfb 0x6b 0x0c LDA r15, [p7], m0; ST r13, [sp, #-4] + 11826 0x0f 0x1c 0x29 0x98 ST el0, [p7], #4 + 11830 0x03 0xc6 0xdf 0xd8 0x7b 0x0c LDA r17, [p0], #4; ST lr, [sp, #-20] + 11836 0x0f 0xf8 0x15 0x98 ST r0, [sp, #-8] + 11840 0x0f 0xf1 0xd5 0x98 ST r14, [sp, #-16] + 11844 0x0f 0xf4 0x9d 0x98 ST p1, [sp, #-12] + 11848 0x00 0x00 NOPX + 11850 0x00 0x00 NOPX + 11852 0x00 0x00 NOPX + 11854 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 11858 0x00 0x1e 0x56 0x98 LDA r18, [p0], #4 + 11862 0x00 0x00 NOPX + 11864 0x00 0x00 NOPX + 11866 0x00 0x00 NOPX + 11868 0x00 0x00 NOPX + 11870 0x00 0x00 NOPX + 11872 0x00 0x00 NOPX + 11874 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 11878 0x00 0x1e 0x76 0x98 LDA r19, [p0], #4 + 11882 0x00 0x00 NOPX + 11884 0x00 0x00 NOPX + 11886 0x00 0x00 NOPX + 11888 0x00 0x00 NOPX + 11890 0x00 0x00 NOPX + 11892 0x00 0x00 NOPX + 11894 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 11898 0x00 0x3e 0xb6 0x98 LDA r21, [p0], #12 + 11902 0x00 0x00 NOPX + 11904 0x00 0x00 NOPX + 11906 0x00 0x00 NOPX + 11908 0x00 0x00 NOPX + 11910 0x00 0x00 NOPX + 11912 0x00 0x00 NOPX + 11914 0xe5 0xd6 0x3a 0xee 0x8f 0x5c ST r21, [p7], #8; EQ r27, r21, r20 + 11920 0x03 0xd2 0xd9 0x56 0x04 0x2c LDA r20, [p0], #4; SEL.EQZ r21, r18, r16, r27 + 11926 0x14 0x25 0x22 0x18 SEL.EQZ r18, r16, r18, r27 + 11930 0x14 0x5b 0x2f 0x98 MUL r13, r17, r18 + 11934 0x00 0x00 NOPX + 11936 0x00 0x00 NOPX + 11938 0x00 0x00 NOPX + 11940 0x00 0x00 NOPX + 11942 0x0f 0x1e 0x91 0x98 ST r20, [p7], #4 + 11946 0x00 0x06 0x36 0x98 LDA r17, [p0] + 11950 0x00 0x00 NOPX + 11952 0x00 0x00 NOPX + 11954 0x00 0x00 NOPX + 11956 0x00 0x00 NOPX + 11958 0x00 0x00 NOPX + 11960 0x00 0x00 NOPX + 11962 0xe3 0xc6 0x38 0xca 0x04 0x5c ST r17, [p7], #4; SEL.EQZ r18, r17, r16, r27 + 11968 0x02 0xc2 0xd8 0x46 0x24 0x2c LDA r16, [p0, #4]; SEL.EQZ r17, r16, r17, r27 + 11974 0x14 0x5d 0x4f 0x98 MUL r14, r17, r20 + 11978 0x00 0x00 NOPX + 11980 0x14 0xc1 0x5f 0x98 MUL r0, r19, r21 + 11984 0x00 0x00 NOPX + 11986 0x00 0x00 NOPX +.no_stack_arguments + 11988 0x00 0x1e 0xb8 0x00 0x01 0x04 JL #15728 +.delay_slot + 11994 0xe3 0xc2 0x39 0x06 0x1f 0x5c ST r16, [p7], #4; MUL r1, r18, r16 +.delay_slot + 12000 0x0f 0x1c 0x11 0x98 ST r0, [p7], #4 +.delay_slot + 12004 0x0f 0x1d 0xb1 0x98 ST r13, [p7], #4 +.delay_slot + 12008 0x0f 0x1c 0x31 0x98 ST r1, [p7], #4 +.delay_slot + 12012 0x0f 0x1d 0xd1 0x98 ST r14, [p7], #4 +.return_address + 12016 0xfd 0xb6 0x27 0x1c 0x51 0x85 0x07 0xf8 0x0b 0x50 0x78 0x76 LDA r13, [sp, #-20]; ST r2, [p7], #4; ADD r16, r2, #63; MOV r0, r13 +.no_stack_arguments + 12028 0x00 0x1e 0xb8 0x00 0x01 0x04 JL #15728 +.delay_slot + 12034 0xfc 0x40 0x20 0xae 0x41 0xe4 MOVX r17, #-64; MOV r1, r14 +.delay_slot + 12040 0x14 0x61 0x04 0x98 AND r16, r17, r16 +.delay_slot + 12044 0x0f 0x06 0x11 0x98 ST r16, [p7] +.delay_slot +.swstall delay_slot + 12048 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12050 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address +.no_stack_arguments + 12064 0x00 0x1e 0xb8 0x00 0x01 0x04 JL #15728 +.delay_slot + 12070 0x18 0x57 0xa0 0xf8 MOV r1, r15 +.delay_slot + 12074 0x18 0x11 0x20 0xf8 MOV r0, r2 +.delay_slot +.swstall delay_slot + 12078 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12080 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12082 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address + 12096 0xff 0x3e 0x2e 0xed 0x41 0xd4 LDA r15, [sp, #-8]; MOV lr, r13 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 0x07 0xfd 0xb1 0x18 LDA r13, [sp, #-4] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12118 0x0f 0x14 0x51 0x98 ST r2, [p7, #4] +.delay_slot + 12122 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 12128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12130 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12132 0x00 0x00 NOPX +.label _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj__end +.label __ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj___func_end0 + +.text_segment PM 12144 +.label __Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params___func_begin0 +.label _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.function_start + 12144 0x00 0x18 0x06 0x84 0x8b 0x02 0xd2 0x01 0x36 0x60 0x78 0x76 MOVA r24, #0; MOVS p6, p1; MOVX vaddSign0, #1; MOV p2, p6 + 12156 0x00 0x30 0x00 0x00 0x01 0xf0 0xb2 0x08 0x10 0xba MOVA r16, #1; MOVXM p1, #508944 + 12166 0x20 0xc4 0x51 0x9c 0x8b 0x00 0x01 0xf3 0xb1 0xcc 0x10 0x76 LDA.s8 r17, [p1]; MOVS p1, p7; MOVXM p7, #508824 + 12178 0xe5 0x85 0xd0 0x06 0x22 0xd4 LDA el0, [p7], #8; VINSERT.32 x0, x0, #0, r24 + 12184 0xe5 0x82 0xd0 0x01 0x25 0xd4 LDA r0, [p7], #8; VMOV bmll0, x0 + 12190 0x07 0x9e 0x56 0x98 LDA r18, [p7], #-28 + 12194 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 12200 0xe0 0x86 0xdf 0xd8 0x7b 0x0c LDA r1, [p7]; ST lr, [sp, #-20] + 12206 0x0f 0xfd 0x1d 0x98 ST p2, [sp, #-4] + 12210 0xff 0x46 0xb8 0xf5 0x00 0x5c ST r17, [sp, #-8]; MOVX crRnd, r17 + 12216 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first +.no_stack_arguments + 12220 0xfe 0x83 0xb0 0x00 0x07 0xae 0x00 0x00 0x41 0x3a ST p0, [sp, #-12]; JL #15728 +.delay_slot +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12230 0xfd 0x13 0xb0 0x02 0x00 0x80 0xd0 0x02 ST p1, [sp, #-24]; VEXTRACT.16 r16, x0, #0, vaddSign0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12238 0xfe 0x05 0xb9 0x46 0x1b 0x5c ST el0, [sp, #-16]; LSHL r17, r18, r16 +.delay_slot + 12244 0xfc 0xc2 0xb0 0x00 0x20 0xb9 0x70 0x02 ST r16, [sp, #-28]; VBCST.16 x0, r16 +.delay_slot + 12252 0x18 0x08 0xa0 0xf8 MOV m0, r17 +.delay_slot + 12256 0x00 0x2c 0xfc 0x17 0x27 0xf8 0x33 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p6], m0; VST x0, [sp, #-128]; NOPX; NOPM; NOPV +.return_address + 12272 0x07 0x46 0x16 0x98 LDA r16, [p7, #16] + 12276 0x18 0x18 0xf0 0x00 0x01 0x84 JZ r3, #12768 +.delay_slot +.swstall delay_slot + 12282 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12284 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12286 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12290 0x00 0x00 NOPX + 12292 0x07 0xec 0x39 0x18 LDA lr, [sp, #-20] + 12296 0xfd 0x73 0x20 0x04 0x00 0x00 0x1c 0x22 LDA p7, [sp, #-24]; NOPV +.label __ll6__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params + 12304 0x80 0x18 0xe0 0x00 0x01 0x84 JZ r16, #12736 +.delay_slot +.swstall delay_slot + 12310 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12312 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12314 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12316 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12318 0x00 0x00 NOPX +.label __ll14__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params + 12320 0xff 0x07 0x70 0x00 0x00 0x0c 0x78 0x58 0x10 0xba VLDA x0, [sp, #-128]; MOVXM ls, #12464 + 12330 0xfe 0x6e 0x20 0x00 0x00 0x0d 0xb8 0x70 0x10 0xba LDA r27, [sp, #-16]; MOVXM le, #12512 + 12340 0xff 0x6a 0x20 0x01 0x27 0x8a 0xa8 0x10 0x58 0xba LDA r26, [sp, #-8]; MOVX r18, #60; MOV r21, #16 + 12350 0xfe 0x83 0x20 0x19 0x47 0x8a 0x68 0x20 0x58 0xba LDA p0, [sp, #-12]; MOVX r20, #828; MOV r19, #32 + 12360 0x01 0x17 0x00 0x3f 0x07 0x6a 0x2c 0x3f 0xc8 0xba MOVA r23, #8; MOVX r16, #-5; ADD.NC r17, r16, #-1 + 12370 0x00 0x96 0x00 0x00 0x00 0x0c 0xb0 0x38 0x10 0xba MOVA r22, #4; MOVXM p1, #12400 + 12380 0x10 0x5a 0x40 0x18 MOVX vaddSign0, #1 + 12384 0x18 0x01 0x8a 0xf8 VCONV.fp32.bf16 cml0, x0 + 12388 0x00 0x2c 0xfd 0xc2 0x1b 0x2c NOPA; LSHL r16, r27, r16 + 12394 0xd7 0x50 0x02 0x01 0x15 0xe4 MOVX crRnd, r26; VMOV cml1, cml0 +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_256 +.loop_nesting 1 + 12400 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x02 0xbc 0x3f 0xc8 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; ADD.NC lc, r16, #-1; NOPV + 12416 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_320 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12464 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_368 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12512 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x22 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm0, dm0, x2, x2, r20 +.loop_nesting 1 + 12528 0x06 0x07 0x52 0x98 LDA.s16 r26, [p6] + 12532 0x00 0x00 NOPX + 12534 0x00 0x00 NOPX + 12536 0xa0 0x04 0x41 0x48 VMAC.f dm0, dm0, x2, x2, r20 + 12540 0x00 0x00 NOPX + 12542 0x00 0x00 NOPX + 12544 0x00 0x00 NOPX + 12546 0x16 0xb5 0x5e 0x98 ASHL r26, r26, r21 + 12550 0x00 0x00 NOPX + 12552 0x08 0x10 0x16 0x18 VCONV.bf16.fp32 x0, cml0 + 12556 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12558 0x1a 0x01 0x8a 0xf8 VCONV.fp32.bf16 cml2, x0 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12562 0x00 0x03 0x51 0x66 0x92 0x4c 0x3d 0x62 VINSERT.32 x0, x0, #0, r26; VADD.f dm2, dm2, dm3, r18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12570 0x1b 0x09 0x12 0xf8 VMOV bmll3, bmlh2 + 12574 0x1b 0x00 0x92 0xf8 VMOV bmll3, x0 + 12578 0x00 0x00 NOPX + 12580 0x00 0x00 NOPX + 12582 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 12584 0x18 0x28 0x12 0xf8 VMOV x0, bmll2 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 12588 0x00 0x00 0x4e 0xc6 0x92 0x50 0x3d 0x62 VSHIFT x0, x0, x0, r19; VADD.f dm2, dm2, dm4, r18 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12596 0x1c 0x00 0x92 0xf8 VMOV bmll4, x0 + 12600 0x00 0x00 NOPX + 12602 0x00 0x00 NOPX + 12604 0x00 0x00 NOPX + 12606 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 12608 0x18 0x28 0x12 0xf8 VMOV x0, bmll2 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 12612 0x00 0x00 0x56 0xc6 0x92 0x50 0x3d 0x62 VSHIFT x0, x0, x0, r21; VADD.f dm2, dm2, dm4, r18 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12620 0x1c 0x00 0x92 0xf8 VMOV bmll4, x0 + 12624 0x00 0x00 NOPX + 12626 0x00 0x00 NOPX + 12628 0x00 0x00 NOPX + 12630 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 12632 0x18 0x28 0x12 0xf8 VMOV x0, bmll2 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 12636 0x00 0x00 0x5e 0xc6 0x92 0x50 0x3d 0x62 VSHIFT x0, x0, x0, r23; VADD.f dm2, dm2, dm4, r18 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12644 0x1c 0x00 0x92 0xf8 VMOV bmll4, x0 + 12648 0x00 0x00 NOPX + 12650 0x00 0x00 NOPX + 12652 0x00 0x00 NOPX + 12654 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 12656 0x18 0x28 0x12 0xf8 VMOV x0, bmll2 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 12660 0x00 0x00 0x5a 0xc6 0x92 0x50 0x3d 0x62 VSHIFT x0, x0, x0, r22; VADD.f dm2, dm2, dm4, r18 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12668 0x1c 0x00 0x92 0xf8 VMOV bmll4, x0 + 12672 0x00 0x00 NOPX + 12674 0x00 0x00 NOPX + 12676 0x00 0x00 NOPX + 12678 0x00 0x00 NOPX + 12680 0x08 0x11 0x16 0x18 VCONV.bf16.fp32 x0, cml2 + 12684 0x00 0x00 NOPX + 12686 0x1e 0x81 0x01 0xb8 VEXTRACT.16 r26, x0, #0, vaddSign0 + 12690 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 12692 0x16 0xb5 0x5e 0x98 ASHL r26, r26, r21 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 12696 0x00 0x03 0x51 0x66 0x92 0x4c 0x3d 0x62 VINSERT.32 x0, x0, #0, r26; VADD.f dm2, dm2, dm3, r18 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12704 0x1a 0x00 0x92 0xf8 VMOV bmll2, x0 + 12708 0x00 0x00 NOPX +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 12710 0x14 0x62 0x60 0x18 JNZD r17, r17, p1 +.delay_slot +.aggressive_scheduled_block_id 9 +.noswbrkpt + 12714 0x06 0x2f 0x57 0x18 ST.s16 r26, [p6], #4 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12718 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12720 0x08 0x41 0x16 0x18 VCONV.bf16.fp32 wl0, bmll2 +.delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12724 0x18 0x04 0x8a 0xf8 VMOV cml0, cml1 +.delay_slot +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12728 0x00 0x2b 0x60 0x03 0x40 0x80 0xd0 0x02 NOPS; VEXTRACT.16 r26, x0, #0, vaddSign0 +.label __ll61__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.loop_nesting 0 + 12736 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 12740 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12744 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 12750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12756 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_624 + 12768 0x80 0x19 0x58 0x00 0x01 0x84 JZ r16, #12976 +.delay_slot +.swstall delay_slot + 12774 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12776 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12778 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12780 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12782 0x00 0x00 NOPX + 12784 0xfc 0xc6 0x20 0x98 0x8b 0x00 0x00 0x0c 0x79 0x08 0x10 0x76 LDA r17, [sp, #-28]; MOVS p0, p6; MOVXM ls, #12816 + 12796 0xfd 0x73 0x20 0x00 0x00 0x0d 0xb9 0x40 0x10 0xba LDA p7, [sp, #-24]; MOVXM le, #12928 + 12806 0x00 0x2c 0xf0 0x00 0x10 0x02 0xbc 0x10 0x7e 0xba NOPA; NOPB; MOV lc, r16 +.label ZLS_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_672 +.loop_nesting 1 +.begin_of_loop + 12816 0x00 0x1e 0x37 0x18 ST.s16 r17, [p0], #2 + 12820 0x00 0x00 NOPX + 12822 0x00 0x00 NOPX + 12824 0x00 0x00 NOPX + 12826 0x00 0x00 NOPX + 12828 0x00 0x00 NOPX + 12830 0x00 0x00 NOPX + 12832 0x03 0xc6 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s16 r17, [p0], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 12848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12864 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12880 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12896 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12912 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_784 +.end_of_loop + 12928 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 12944 0x00 0x18 0x10 0x00 0x00 0x84 J #12320 +.delay_slot + 12950 0x07 0xec 0x39 0x18 LDA lr, [sp, #-20] +.delay_slot +.swstall delay_slot + 12954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12956 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12958 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_832 + 12976 0x00 0x18 0x08 0x00 0x00 0x84 J #12304 +.delay_slot + 12982 0x07 0xec 0x39 0x18 LDA lr, [sp, #-20] +.delay_slot + 12986 0x07 0xeb 0x99 0x18 LDA p7, [sp, #-24] +.delay_slot +.swstall delay_slot + 12990 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12992 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12994 0x00 0x00 NOPX +.label _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params__end +.label __Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params___func_end0 + +.text_segment PM 13008 +.label __ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE +.function_start + 13008 0x00 0x07 0xc2 0xc7 0xc8 0x44 MOVXM p1, #508900 + 13014 0x20 0xc2 0xd0 0x98 0x8b 0x03 0x30 0x60 0x72 0xba LDA r16, [p1]; MOVS p0, p6; MOV p6, p0 + 13024 0x18 0x17 0xa0 0xf8 MOV r0, r15 + 13028 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 + 13032 0x00 0x00 NOPX + 13034 0x00 0x00 NOPX + 13036 0x00 0x00 NOPX + 13038 0x00 0x00 NOPX + 13040 0x80 0x19 0xb0 0x40 0x01 0x84 JNZ r16, #13152 +.delay_slot + 13046 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 +.delay_slot + 13052 0x0f 0xf4 0x1d 0x98 ST p0, [sp, #-12] +.delay_slot + 13056 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 13060 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] +.delay_slot + 13064 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 13068 0x01 0x92 0x00 0x3f 0x46 0x0a 0x2d 0x70 0x78 0xba MOVA r18, #12; MOVX r20, #-16; MOV r17, CORE_ID + 13078 0x00 0x33 0x00 0x00 0x01 0xf3 0xb1 0xec 0x10 0xba MOVA r19, #1; MOVXM p7, #508888 + 13088 0xe0 0xc6 0x38 0xd5 0x20 0x5c ST r17, [p7]; EXTEND.u8 r21, r17 + 13094 0x8c 0x69 0xba 0x35 0xfe 0x24 LSHL r17, r17, r20; ADD.NC r20, r21, #-2 + 13100 0x00 0x07 0xce 0xc7 0xb8 0x44 MOVXM p7, #508892 + 13106 0xe0 0xd2 0x30 0x00 0x01 0xf3 0xb2 0x08 0x11 0x3a ST r20, [p7]; MOVXM p7, #508944 + 13116 0xe0 0xc8 0xe0 0x00 0x01 0xf0 0x32 0x06 0x10 0xba ST.s8 r18, [p7]; MOVXM p0, #508940 + 13126 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 + 13130 0x00 0x07 0xce 0xc7 0xc0 0x44 MOVXM p7, #508896 + 13136 0x00 0x00 NOPX + 13138 0x00 0x00 NOPX + 13140 0x00 0x00 NOPX + 13142 0x00 0x00 NOPX + 13144 0x08 0x06 0x71 0x98 ST r19, [p0] + 13148 0x0f 0x06 0x31 0x98 ST r17, [p7] +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE_144 + 13152 0x05 0x02 0x80 0x00 0x01 0xf3 0xb1 0xd0 0x10 0xba MOVA dj0, #40; MOVXM p7, #508832 + 13162 0xf7 0xc2 0x30 0x01 0x80 0x0a 0x2a 0xf0 0x79 0x3a ST r16, [p7], #-20; MOVX r24, #0; MOV r17, sp + 13172 0x40 0x05 0xd0 0xd1 0x98 0x14 LDA el0, [p2, dj0]; ADD.NC p0, r17, #-104 + 13178 0x02 0x1c 0x0e 0x98 LDA eh0, [p2], #4 + 13182 0x02 0x1c 0xee 0x98 LDA el3, [p2], #4 + 13186 0x02 0x1c 0xae 0x98 LDA el2, [p2], #4 + 13190 0x02 0x1c 0x6e 0x98 LDA el1, [p2], #4 + 13194 0x02 0x1c 0x4e 0x98 LDA eh1, [p2], #4 + 13198 0x02 0x1c 0x8e 0x98 LDA eh2, [p2], #4 + 13202 0x43 0x85 0xdf 0xd8 0x5b 0x0c LDA el0, [p2], #4; ST el0, [sp, #-20] + 13208 0x43 0x81 0xdf 0x30 0x1b 0x0c LDA eh0, [p2], #4; ST eh0, [sp, #-104] + 13214 0x0f 0x9c 0xed 0x98 ST el3, [sp, #-100] + 13218 0x0f 0xa0 0xad 0x98 ST el2, [sp, #-96] + 13222 0x0f 0xa4 0x6d 0x98 ST el1, [sp, #-92] + 13226 0x0f 0xa8 0x4d 0x98 ST eh1, [sp, #-88] + 13230 0x0f 0xac 0x8d 0x98 ST eh2, [sp, #-84] + 13234 0x0f 0xb0 0x2d 0x98 ST el0, [sp, #-80] + 13238 0x0f 0xb4 0x0d 0x98 ST eh0, [sp, #-76] + 13242 0x02 0x04 0x0e 0x98 LDA eh0, [p2] + 13246 0x02 0x14 0x2e 0x98 LDA el0, [p2, #4] + 13250 0x00 0x00 NOPX + 13252 0x80 0x1a 0x18 0x40 0x01 0x84 JNZ r16, #13360 +.delay_slot + 13258 0x0f 0xeb 0x9d 0x98 ST p7, [sp, #-24] +.delay_slot +.swstall delay_slot + 13262 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13264 0x00 0x00 NOPX +.delay_slot + 13266 0x0f 0xb8 0x0d 0x98 ST eh0, [sp, #-72] +.delay_slot + 13270 0x0f 0xbc 0x2d 0x98 ST el0, [sp, #-68] +.no_stack_arguments + 13274 0x02 0x02 0x80 0x00 0x05 0xbc 0x00 0x00 0x40 0xba MOVA dj0, #16; JL #11744 +.delay_slot + 13284 0x00 0x07 0xce 0xc7 0x50 0x44 MOVXM p7, #508840 +.delay_slot + 13290 0x0f 0x1f 0x11 0x98 ST r24, [p7], #4 +.delay_slot + 13294 0x0f 0xbc 0x41 0x98 ST dj0, [p7], #-20 +.delay_slot +.swstall delay_slot + 13298 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13300 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 13312 0xe3 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xee 0x10 0xba LDA r16, [p7], #4; MOVXM p2, #508892 + 13322 0x07 0x06 0x56 0x98 LDA r18, [p7] + 13326 0x02 0x06 0x36 0x98 LDA r17, [p2] + 13330 0x00 0x00 NOPX + 13332 0x00 0x00 NOPX + 13334 0x00 0x00 NOPX + 13336 0x00 0x00 NOPX + 13338 0x00 0x00 NOPX + 13340 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 + 13344 0x00 0x00 NOPX + 13346 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 13350 0x00 0x00 NOPX + 13352 0xea 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p7, #20]; NOPM +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE_352 + 13360 0xfd 0x73 0x20 0x00 0x01 0xf1 0x31 0xf2 0x10 0xba LDA p7, [sp, #-24]; MOVXM p2, #508900 + 13370 0x40 0x82 0xd0 0x00 0x91 0x54 LDA r0, [p2]; MOV m0, #36 + 13376 0x00 0x00 NOPX + 13378 0x00 0x00 NOPX + 13380 0x00 0x00 NOPX + 13382 0x00 0x00 NOPX + 13384 0x00 0x00 NOPX + 13386 0x07 0x08 0x36 0x98 LDA r1, [p7], m0 +.no_stack_arguments + 13390 0x00 0x1e 0xb8 0x00 0x01 0x04 JL #15728 +.delay_slot +.swstall delay_slot + 13396 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13404 0x00 0x01 0x67 0x98 NOPA +.return_address + 13408 0x1e 0x9a 0x04 0xcf 0x0c 0x24 EQZ r26, r3; ADD.NC p2, r15, #12 + 13414 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 13418 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 13422 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 13426 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 13430 0x00 0x00 NOPX + 13432 0x00 0x00 NOPX + 13434 0x00 0x00 NOPX + 13436 0x00 0x00 NOPX + 13438 0x00 0x00 NOPX + 13440 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 13444 0x0a 0x06 0x11 0x98 ST r16, [p2] + 13448 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 13452 0x00 0x00 NOPX + 13454 0x00 0x00 NOPX + 13456 0x00 0x00 NOPX + 13458 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 13462 0xf6 0x00 0x80 0x01 0x00 0x29 0xea 0x60 0x78 0xba MOVA m0, #-80; MOVX r16, #1; MOV r15, p2 + 13472 0x00 0x00 NOPX + 13474 0x00 0x00 NOPX + 13476 0x07 0x0a 0x56 0x98 LDA r18, [p7], m0 + 13480 0x06 0x06 0x36 0x98 LDA r17, [p6] + 13484 0x02 0x5c 0x9e 0x98 LDA p1, [p2], #20 + 13488 0x00 0x00 NOPX +.no_stack_arguments + 13490 0x00 0x17 0xb8 0x00 0x01 0x04 JL #12144 +.delay_slot + 13496 0x53 0x91 0x60 0x03 0xb2 0x60 0x70 0x02 MOVS p2, p7; MOV p7, p2 +.delay_slot + 13504 0x1e 0x64 0xc0 0xf8 MOV p6, p2 +.delay_slot + 13508 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 +.delay_slot +.swstall delay_slot + 13512 0x00 0x00 NOPX +.delay_slot + 13514 0x00 0x2c 0xf0 0xd1 0x82 0x94 NOPA; ADD.NC p0, r17, r16 +.return_address + 13520 0x18 0x80 0x58 0xb8 MOV dj0, #44 + 13524 0xc0 0x06 0xd0 0x00 0x01 0xf0 0x31 0xf2 0x10 0xba LDA r1, [p6, dj0]; MOVXM p0, #508900 + 13534 0x00 0x04 0x16 0x98 LDA r0, [p0] +.no_stack_arguments + 13538 0x00 0x1e 0xb8 0x00 0x01 0x04 JL #15728 +.delay_slot + 13544 0x1e 0x6e 0xc0 0xf8 MOV p6, p7 +.delay_slot +.swstall delay_slot + 13548 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13550 0x00 0x00 NOPX +.delay_slot + 13552 0x1a 0x67 0xa0 0xf8 MOV p2, r15 +.delay_slot + 13556 0x00 0x2c 0xf0 0x00 0x27 0x88 0x8b 0x01 0xe8 0x50 0x70 0xf6 NOPA; NOPB; MOVS p7, p2; MOV r15, r1 +.return_address + 13568 0xea 0xc6 0xd0 0x1f 0x2f 0xfa 0x08 0x01 0x58 0xba LDA r17, [p7, #20]; ADD r18, r15, #-1; MOV r16, #1 + 13578 0x10 0xf5 0x27 0x98 EQ r26, r3, r18 + 13582 0x00 0x00 NOPX + 13584 0x00 0x00 NOPX + 13586 0x00 0x00 NOPX + 13588 0x00 0x00 NOPX + 13590 0x00 0x00 NOPX + 13592 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 13596 0xfe 0x07 0x20 0x00 0x01 0xf1 0x31 0xf2 0x10 0xba LDA lr, [sp, #-16]; MOVXM p2, #508900 + 13606 0xdc 0xc6 0xd0 0x01 0x80 0x0b 0x6e 0x90 0x78 0xba LDA r17, [p6, #-8]; MOVX r24, #0; MOV r27, r26 + 13616 0x07 0xee 0x51 0x18 LDA r18, [sp, #-20] + 13620 0x02 0x06 0x76 0x98 LDA r19, [p2] + 13624 0x07 0xf4 0x19 0x18 LDA p0, [sp, #-12] + 13628 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 13632 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 13636 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 13640 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot + 13644 0x8c 0x60 0x48 0x32 0xff 0x24 SEL.EQZ r17, r17, r16, r27; ADD.NC r16, r18, #-1 +.delay_slot + 13650 0xdc 0xc6 0x30 0x27 0xb8 0x3e 0x0c 0xc0 0x49 0x3a ST r17, [p6, #-8]; EQ r27, r19, r16; ADD.NC r16, r19, #1 +.delay_slot + 13660 0xd0 0x11 0x68 0x43 0x04 0x5c MOVS p6, p0; SEL.EQZ r16, r16, r24, r27 +.delay_slot + 13666 0x40 0xc2 0x30 0x3f 0xfc 0x00 0x00 0x00 0x71 0x3a ST r16, [p2]; PADDXM [sp], #-128 +.label _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE___func_end0 + +.text_segment PM 13680 +.label __Z14_b8292_wrapperPPv___func_begin0 +.label _Z14_b8292_wrapperPPv +.function_start + 13680 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 13684 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 13688 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 13692 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 13696 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 13700 0x00 0x19 0x68 0x00 0x00 0x84 J #13008 +.delay_slot +.swstall delay_slot + 13706 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13708 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13710 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13712 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13714 0x00 0x00 NOPX +.label _Z14_b8292_wrapperPPv__end +.label __Z14_b8292_wrapperPPv___func_end0 + +.text_segment PM 13728 +.label __ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.function_start + 13728 0x00 0x07 0xc8 0xc7 0xe8 0x44 MOVXM p4, #508916 + 13734 0x04 0x06 0x16 0x98 LDA r16, [p4] + 13738 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 13744 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] + 13748 0x00 0x00 NOPX + 13750 0x00 0x00 NOPX + 13752 0x00 0x00 NOPX + 13754 0x00 0x00 NOPX + 13756 0x80 0x1b 0x18 0x40 0x01 0x84 JNZ r16, #13872 +.delay_slot + 13762 0x0f 0xe8 0x1d 0x98 ST p0, [sp, #-24] +.delay_slot + 13766 0x0f 0xf1 0x9d 0x98 ST p3, [sp, #-16] +.delay_slot + 13770 0x0f 0xec 0x9d 0x98 ST p1, [sp, #-20] +.delay_slot + 13774 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] +.delay_slot + 13778 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] + 13782 0x01 0x91 0x00 0x3f 0x36 0x0a 0x0d 0x70 0x78 0xba MOVA r17, #12; MOVX r19, #-16; MOV r16, CORE_ID + 13792 0x00 0x32 0x00 0x00 0x01 0xf3 0xb1 0xf4 0x10 0xba MOVA r18, #1; MOVXM p7, #508904 + 13802 0xe0 0xc2 0x38 0x51 0x20 0x5c ST r16, [p7]; EXTEND.u8 r20, r16 + 13808 0x84 0x27 0xb9 0xb4 0xfe 0x24 LSHL r16, r16, r19; ADD.NC r19, r20, #-2 + 13814 0x00 0x07 0xce 0xc7 0xd8 0x44 MOVXM p7, #508908 + 13820 0xe0 0xce 0x30 0x00 0x01 0xf3 0xb2 0x08 0x11 0x3a ST r19, [p7]; MOVXM p7, #508944 + 13830 0xe0 0xc4 0xe0 0x00 0x01 0xf0 0x32 0x06 0x10 0xba ST.s8 r17, [p7]; MOVXM p0, #508940 + 13840 0x14 0x20 0x90 0x18 EXTEND.u8 r16, r16 + 13844 0x00 0x07 0xce 0xc7 0xe0 0x44 MOVXM p7, #508912 + 13850 0x00 0x00 NOPX + 13852 0x00 0x00 NOPX + 13854 0x00 0x00 NOPX + 13856 0x00 0x00 NOPX + 13858 0x08 0x06 0x51 0x98 ST r18, [p0] + 13862 0x00 0x2c 0xf7 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p7]; NOPX +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_144 + 13872 0x43 0x85 0xd1 0x88 0x8b 0x01 0x10 0x48 0x40 0x28 0x58 0x76 LDA el0, [p2], #4; MOVS p1, p2; MOVX r17, #2; MOV dj0, #40 + 13884 0x43 0x9d 0xd0 0x00 0x01 0xf3 0xb1 0xda 0x10 0xba LDA el3, [p2], #4; MOVXM p7, #508852 + 13894 0x43 0x95 0xd9 0x2b 0xc1 0xd4 LDA el2, [p2], #4; MOV r18, sp + 13900 0x43 0x81 0xd0 0xd2 0x98 0x14 LDA eh0, [p2], #4; ADD.NC p0, r18, #-104 + 13906 0x02 0x1c 0x6e 0x98 LDA el1, [p2], #4 + 13910 0x02 0x1c 0x4e 0x98 LDA eh1, [p2], #4 + 13914 0x02 0x1c 0x8e 0x98 LDA eh2, [p2], #4 + 13918 0x43 0x85 0xdf 0x30 0x5b 0x0c LDA el0, [p2], #4; ST el0, [sp, #-104] + 13924 0x20 0x3e 0xdf 0x39 0xdb 0x0c LDA r15, [p1, dj0]; ST el3, [sp, #-100] + 13930 0x0f 0xa0 0xad 0x98 ST el2, [sp, #-96] + 13934 0x0f 0xa4 0x0d 0x98 ST eh0, [sp, #-92] + 13938 0x0f 0xa8 0x6d 0x98 ST el1, [sp, #-88] + 13942 0x0f 0xac 0x4d 0x98 ST eh1, [sp, #-84] + 13946 0x0f 0xb0 0x8d 0x98 ST eh2, [sp, #-80] + 13950 0x0f 0xb4 0x2d 0x98 ST el0, [sp, #-76] + 13954 0x02 0x04 0x0e 0x98 LDA eh0, [p2] + 13958 0x02 0x14 0x2e 0x98 LDA el0, [p2, #4] + 13962 0x00 0x00 NOPX + 13964 0x00 0x00 NOPX + 13966 0x00 0x00 NOPX +.no_stack_arguments + 13968 0x00 0x16 0xf0 0x00 0x01 0x04 JL #11744 +.delay_slot +.swstall delay_slot + 13974 0x00 0x00 NOPX +.delay_slot + 13976 0x0f 0xb8 0x0d 0x98 ST eh0, [sp, #-72] +.delay_slot + 13980 0x0f 0xbc 0x2d 0x98 ST el0, [sp, #-68] +.delay_slot + 13984 0xfd 0xc6 0x30 0x40 0x82 0x5c ST r17, [p7], #-8; MOVX r16, #16 +.delay_slot + 13990 0x00 0x2c 0xf7 0xde 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p7], #-12; NOPX +.return_address + 14000 0xfd 0x13 0x20 0x00 0x01 0xf1 0x31 0xfa 0x10 0xba LDA p1, [sp, #-24]; MOVXM p2, #508916 + 14010 0x40 0xc2 0xd0 0x00 0x01 0xf0 0x31 0xf6 0x10 0xba LDA r16, [p2]; MOVXM p0, #508908 + 14020 0xfd 0xb3 0x20 0x00 0x00 0x0c 0x7b 0x98 0x10 0xba LDA p3, [sp, #-20]; MOVXM ls, #14128 + 14030 0x00 0xd2 0xd0 0x3f 0x37 0x68 0xc8 0x01 0x58 0xba LDA r20, [p0]; MOVX r19, #-5; MOV r6, #1 + 14040 0xfe 0x03 0x20 0x00 0x00 0x0d 0xbb 0xc8 0x10 0xba LDA p0, [sp, #-16]; MOVXM le, #14224 + 14050 0x10 0x30 0x01 0x18 MOVX r24, #0 + 14054 0x00 0x00 NOPX + 14056 0x01 0x06 0xb6 0x98 LDA r21, [p1] + 14060 0x0f 0x2e 0x11 0x98 ST r16, [p7], #8 + 14064 0x60 0x93 0xde 0x7e 0x23 0x0c LDA p1, [p3]; ST r24, [p7], #12 + 14070 0x07 0x9e 0xd6 0x98 LDA r22, [p7], #-28 + 14074 0x07 0x1e 0xf6 0x98 LDA r23, [p7], #4 + 14078 0x07 0x06 0x36 0x98 LDA r17, [p7] + 14082 0x00 0x04 0x1e 0x98 LDA p0, [p0] + 14086 0x00 0x00 NOPX + 14088 0x00 0x00 NOPX + 14090 0x00 0x00 NOPX + 14092 0x00 0x00 NOPX + 14094 0x15 0xe7 0x3d 0x98 LSHL r19, r23, r19 + 14098 0x8c 0xed 0xfa 0xf3 0x00 0x24 MUL r19, r17, r22; ADD.NC lc, r19, #0 + 14104 0x00 0x00 NOPX + 14106 0x14 0xe7 0x4f 0x98 MUL r19, r19, r20 + 14110 0x00 0x00 NOPX + 14112 0xea 0xce 0x39 0x98 0xdb 0x5c ST r19, [p7, #20]; LSHL r6, r19, r6 + 14118 0x00 0x2c 0xf0 0x00 0x10 0x03 0xb5 0x4c 0xae 0xba NOPA; NOPB; ADD.NC p7, r21, r6 +.label ZLS_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_400 +.loop_nesting 1 +.begin_of_loop + 14128 0x01 0x1c 0x15 0x98 VLDA bmll0, [p1], #64 + 14132 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX + 14144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 14160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 14176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 14192 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 14208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_496 +.end_of_loop + 14224 0x00 0x2c 0xf0 0x00 0x20 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p0], #64; NOPX; NOPM; NOPV +.loop_nesting 0 + 14240 0x00 0x00 0x31 0xef 0x60 0x44 MOVXM ls, #14256 + 14246 0x00 0x00 0x36 0xf0 0x20 0x44 MOVXM le, #14352 + 14252 0x1d 0x78 0x80 0x18 ADD.NC lc, r17, #0 +.label ZLS_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_528 +.loop_nesting 1 +.begin_of_loop + 14256 0xe5 0xca 0x50 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA.s16 r18, [p7], #4; NOPB; NOPS; NOPX; NOPM; NOPV + 14272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 14288 0x05 0xca 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s16 r18, [p0], #4; NOPB; NOPS; NOPX; NOPM; NOPV + 14304 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 14320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 14336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_624 +.end_of_loop + 14352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 14368 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 14372 0x00 0x00 NOPX + 14374 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14378 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14380 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14384 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14390 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14392 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14396 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14400 0x13 0xf7 0x07 0x98 EQ r27, r15, r16 +.delay_slot + 14404 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot + 14408 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 14412 0x00 0x00 NOPX +.label _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_end0 + +.text_segment PM 14416 +.label __Z14_b8300_wrapperPPv___func_begin0 +.label _Z14_b8300_wrapperPPv +.function_start + 14416 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 14420 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 14424 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 14428 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 14432 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 14436 0x00 0x1a 0xd0 0x00 0x00 0x84 J #13728 +.delay_slot +.swstall delay_slot + 14442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14444 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14450 0x00 0x00 NOPX +.label _Z14_b8300_wrapperPPv__end +.label __Z14_b8300_wrapperPPv___func_end0 + +.text_segment PM 14464 +.label __Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params___func_begin0 +.label _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params +.function_start + 14464 0x00 0x07 0xc8 0xc7 0x30 0x44 MOVXM p4, #508824 + 14470 0x83 0x9a 0xd0 0x1f 0x61 0x54 LDA r6, [p4], #4; MOV m0, #-40 + 14476 0x04 0x08 0x56 0x98 LDA r2, [p4], m0 + 14480 0x04 0x1c 0x12 0x98 LDA.s16 r0, [p4], #2 + 14484 0x00 0x00 NOPX + 14486 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14492 0x80 0x96 0x50 0x00 0x01 0xf2 0xb2 0x08 0x10 0xba LDA.s16 r5, [p4]; MOVXM p5, #508944 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14502 0xa0 0xb8 0x50 0x3e 0x47 0x68 0xc0 0x32 0x58 0xba LDA.s8 r14, [p5]; MOVX r4, #-5; MOV dj1, #50 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14512 0x84 0x2e 0xd7 0xfd 0x95 0x80 0x01 0x98 0x4d 0xfa LDA r11, [p4, dj1]; ST r12, [sp, #-4]; LSHL r12, r6, r4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14522 0x08 0x02 0x87 0xf5 0x35 0x80 0x32 0x09 0x2b 0x3f 0xc8 0x76 MOVA dj0, #64; ST r9, [sp, #-12]; MOVX r3, #16; ADD.NC r9, r12, #-1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14534 0x00 0x21 0x07 0xe5 0xb5 0x80 0xc1 0xf5 0xa8 0x80 0x78 0x76 MOVA r1, #1; ST r13, [sp, #-28]; ASHL r12, r0, r3; MOV r13, dj0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14546 0xff 0x3e 0xb0 0x0c 0xf0 0xec 0x00 0xc8 0xb9 0x3a ST r15, [sp, #-8]; LSHL r15, r6, r1; VINSERT.32 x0, x0, #0, r12 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14556 0xfe 0x3a 0xb0 0x04 0x07 0x5c 0x00 0x00 0x61 0x3a ST r14, [sp, #-16]; JZ r2, #15072 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14566 0xfc 0x22 0xb0 0x0a 0x81 0xf4 0xa0 0x49 0x79 0x3a ST r8, [sp, #-32]; ASHL r8, r5, r3; VMOV bmlh1, x0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14576 0xfd 0x2e 0xb0 0x1d 0xd4 0x00 0x00 0x88 0xb9 0x3a ST r11, [sp, #-24]; MOVX crRnd, r14; VINSERT.32 x0, x0, #0, r8 +.delay_slot + 14586 0xf8 0x07 0x01 0xc0 0xb6 0x16 0x80 0xec 0x80 0x49 0x78 0x76 MOVA r7, #-64; VCONV.bf16.fp32 wl3, bmlh1; LSHL r8, r11, r1; VMOV bmll1, x0 +.delay_slot + 14598 0xfd 0xaa 0xb0 0x1e 0xa3 0xa4 0x02 0x10 0x79 0x3a ST r10, [sp, #-20]; AND r10, r15, r7; MOV m0, r8 +.delay_slot + 14608 0x08 0x12 0xc6 0x17 0x23 0x58 0xa1 0xd2 PADDB [p3], m0; VCONV.bf16.fp32 wl0, bmll1; SUB r12, r13, r10 + 14616 0x00 0x03 0x80 0x49 0x0b 0x00 0x00 0x0c 0x7d 0x40 0x10 0x76 MOVA dc0, #0; MOVS dn0, r9; MOVXM ls, #14976 + 14628 0x67 0x8a 0x00 0x00 0x00 0x0d 0xbd 0x60 0x10 0xba MOVA r10, #828; MOVXM le, #15040 + 14638 0x07 0x8b 0x00 0x04 0x8f 0xf8 0x40 0x81 0xa8 0xba MOVA r11, #60; ADD r8, r2, #-1; VEXTBCST.16 x1, x0, #0 + 14648 0x0b 0x48 0x00 0x0c 0x41 0xe4 MOVX vaddSign0, #1; MOV m0, r12 + 14654 0x18 0x19 0x03 0x58 VEXTBCST.16 x0, x3, #0 + 14658 0x18 0x03 0x8a 0xf8 VCONV.fp32.bf16 cml0, x1 + 14662 0x00 0x2c 0xf0 0x00 0x00 0x0e 0x34 0xa8 0x10 0xba NOPA; MOVXM p4, #14672 +.label TGT_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_208 +.loop_nesting 1 + 14672 0x65 0xb2 0x5a 0xe9 0x01 0x14 LDA.s16 r12, [p3], #4; ADD.NC lc, r9, #1 + 14678 0x00 0x00 NOPX + 14680 0x00 0x00 NOPX + 14682 0x00 0x00 NOPX + 14684 0x00 0x00 NOPX + 14686 0x00 0x00 NOPX + 14688 0x00 0x00 NOPX + 14690 0x18 0xb1 0x72 0xf8 VBCST.16 x1, r12 + 14694 0x00 0x00 NOPX + 14696 0x53 0xe2 0x01 0x48 VMUL.f dm3, x1, x0, r10 + 14700 0x00 0x00 NOPX + 14702 0x00 0x00 NOPX + 14704 0x5b 0x60 0x3d 0x48 VADD.f dm3, dm3, dm0, r11 + 14708 0x00 0x00 NOPX + 14710 0x00 0x00 NOPX + 14712 0x00 0x00 NOPX + 14714 0x00 0x00 NOPX + 14716 0x00 0x00 NOPX + 14718 0x08 0x91 0x96 0x18 VCONV.bf16.fp32 x1, cml3 + 14722 0x00 0x00 NOPX + 14724 0x1c 0x03 0x8a 0xf8 VCONV.fp32.bf16 cml4, x1 + 14728 0x00 0x00 NOPX + 14730 0x1b 0x30 0x12 0xf8 VMOV x6, bmll4 + 14734 0x1b 0x1b 0x01 0xb8 VEXTRACT.64 r13:r12, x6, #0, vaddSign0 + 14738 0x1c 0x1b 0x03 0xb8 VEXTRACT.64 r17:r16, x6, #1, vaddSign0 + 14742 0x68 0xd0 0x81 0x62 0x25 0xe4 INVSQRT r3, r13; VMOV x1, bmlh4 + 14748 0x60 0x90 0x82 0x0e 0x03 0x64 INVSQRT r2, r12; VEXTRACT.64 r5:r4, x1, #0, vaddSign0 + 14754 0x89 0xd0 0x80 0x36 0x0b 0x64 INVSQRT r7, r17; VEXTRACT.64 r1:r0, x6, #2, vaddSign0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14760 0x81 0x90 0x87 0x36 0x0f 0x64 INVSQRT r6, r16; VEXTRACT.64 r15:r14, x6, #3, vaddSign0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14766 0x0b 0x50 0x81 0x0e 0x07 0x64 INVSQRT r13, r1; VEXTRACT.64 r3:r2, x1, #1, vaddSign0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14772 0x2e 0x50 0x8a 0x01 0x6e 0xe4 INVSQRT r25, r5; VPUSH.hi.64 x10, x0, r3:r2 + 14778 0x03 0x10 0x8a 0x36 0x13 0x64 INVSQRT r12, r0; VEXTRACT.64 r21:r20, x6, #4, vaddSign0 + 14784 0x78 0x50 0x88 0x0e 0x0b 0x64 INVSQRT r1, r15; VEXTRACT.64 r17:r16, x1, #2, vaddSign0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14790 0x26 0x10 0x8a 0xa3 0x6e 0xe4 INVSQRT r24, r4; VPUSH.hi.64 x10, x10, r7:r6 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14796 0x19 0x50 0x87 0x36 0x17 0x64 INVSQRT r5, r3; VEXTRACT.64 r15:r14, x6, #5, vaddSign0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14802 0x70 0x10 0x83 0x36 0x1b 0x64 INVSQRT r0, r14; VEXTRACT.64 r7:r6, x6, #6, vaddSign0 + 14808 0x11 0x10 0x81 0x0e 0x0f 0x64 INVSQRT r4, r2; VEXTRACT.64 r3:r2, x1, #3, vaddSign0 + 14814 0xac 0xd0 0x8a 0xa6 0x6e 0xe4 INVSQRT r19, r21; VPUSH.hi.64 x10, x10, r13:r12 + 14820 0x8b 0x50 0x8b 0x0e 0x13 0x64 INVSQRT r13, r17; VEXTRACT.64 r23:r22, x1, #4, vaddSign0 + 14826 0xa4 0x90 0x8a 0xa0 0x6e 0xe4 INVSQRT r18, r20; VPUSH.hi.64 x10, x10, r1:r0 + 14832 0x83 0x10 0x88 0x0c 0x6e 0xe4 INVSQRT r12, r16; VPUSH.hi.64 x8, x0, r25:r24 + 14838 0x7c 0x50 0x88 0x82 0x6e 0xe4 INVSQRT r17, r15; VPUSH.hi.64 x8, x8, r5:r4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 14844 0x38 0x50 0x82 0x0e 0x17 0x64 INVSQRT r1, r7; VEXTRACT.64 r5:r4, x1, #5, vaddSign0 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 14850 0x74 0x10 0x83 0x36 0x1f 0x64 INVSQRT r16, r14; VEXTRACT.64 r7:r6, x6, #7, vaddSign0 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14856 0x30 0x10 0x86 0x86 0x6e 0xe4 INVSQRT r0, r6; VPUSH.hi.64 x6, x8, r13:r12 + 14862 0x1b 0x50 0x87 0x0e 0x1b 0x64 INVSQRT r13, r3; VEXTRACT.64 r15:r14, x1, #6, vaddSign0 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 14868 0x13 0x10 0x81 0x0e 0x1f 0x64 INVSQRT r12, r2; VEXTRACT.64 r3:r2, x1, #7, vaddSign0 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 14874 0xb8 0x50 0x8a 0xa9 0x6e 0xe4 INVSQRT r1, r23; VPUSH.hi.64 x10, x10, r19:r18 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 14880 0xb0 0x10 0x8a 0xa8 0x6e 0xe4 INVSQRT r0, r22; VPUSH.hi.64 x10, x10, r17:r16 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14886 0x3c 0xd0 0x81 0xa0 0x6e 0xe4 INVSQRT r19, r7; VPUSH.hi.64 x1, x10, r1:r0 + 14892 0x34 0x90 0x86 0x66 0x6e 0xe4 INVSQRT r18, r6; VPUSH.hi.64 x6, x6, r13:r12 + 14898 0x11 0x4e 0x84 0x18 INVSQRT r7, r5 + 14902 0x7b 0x50 0x86 0x60 0x6e 0xe4 INVSQRT r13, r15; VPUSH.hi.64 x6, x6, r1:r0 + 14908 0x11 0x0c 0x84 0x18 INVSQRT r6, r4 + 14912 0x18 0x50 0x81 0x19 0x6e 0xe4 INVSQRT r1, r3; VPUSH.hi.64 x1, x1, r19:r18 + 14918 0x73 0x10 0x88 0x05 0x25 0xe4 INVSQRT r12, r14; VMOV bmll4, x1 + 14924 0x10 0x80 0x84 0x18 INVSQRT r0, r2 + 14928 0x18 0x42 0xc0 0x01 0x98 0xdb 0xb0 0x02 VCONV.bf16.fp32 wl1, bmll4; VPUSH.hi.64 x6, x6, r7:r6 + 14936 0x00 0x00 NOPX + 14938 0x1b 0x33 0x37 0x78 VPUSH.hi.64 x6, x6, r13:r12 + 14942 0x1b 0x30 0x37 0x78 VPUSH.hi.64 x6, x6, r1:r0 + 14946 0x1c 0x0c 0x92 0xf8 VMOV bmll4, x6 + 14950 0x00 0x00 NOPX + 14952 0x08 0x82 0x16 0x18 VCONV.bf16.fp32 wh1, bmll4 + 14956 0x00 0x01 0x67 0x98 NOPA + 14960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x81 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VEXTBCST.16 x1, x1, #0; NOPV +.label ZLS_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_512 +.loop_nesting 2 +.begin_of_loop + 14976 0x22 0x13 0x70 0x39 0xe8 0x3c VLDA.2D x2, [p1], d0; VLDB x3, [p0], #64 + 14982 0x00 0x00 NOPX + 14984 0x00 0x00 NOPX + 14986 0x00 0x00 NOPX + 14988 0x00 0x00 NOPX + 14990 0x00 0x00 NOPX + 14992 0x00 0x00 NOPX + 14994 0x51 0xe6 0x21 0x48 VMUL.f dm1, x3, x1, r10 + 14998 0x00 0x00 NOPX + 15000 0x00 0x00 NOPX + 15002 0x00 0x00 NOPX + 15004 0x00 0x00 NOPX + 15006 0x00 0x00 NOPX + 15008 0x0a 0x10 0x96 0x18 VCONV.bf16.fp32 x4, cml1 + 15012 0x00 0x00 NOPX + 15014 0x52 0xe8 0x41 0x48 VMUL.f dm2, x4, x2, r10 + 15018 0x00 0x00 NOPX + 15020 0x00 0x00 NOPX + 15022 0x00 0x00 NOPX + 15024 0x00 0x00 NOPX + 15026 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label ZLE_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_576 +.end_of_loop + 15040 0x00 0x2c 0xf0 0x00 0x22 0x1d 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.CONV.bf16.fp32 cml2, [p2], #64;NOPX; NOPM; NOPV +.loop_nesting 1 + 15056 0x12 0x11 0x20 0x18 JNZD r8, r8, p4 +.delay_slot +.swstall delay_slot + 15060 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15062 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15064 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15066 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15068 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_608 +.loop_nesting 0 + 15072 0x07 0xe5 0xb1 0x18 LDA r13, [sp, #-28] + 15076 0x07 0xe9 0x71 0x18 LDA r11, [sp, #-24] + 15080 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] + 15084 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] + 15088 0x07 0xfd 0x91 0x18 LDA r12, [sp, #-4] + 15092 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12] + 15096 0x07 0xed 0x51 0x18 LDA r10, [sp, #-20] + 15100 0x07 0xe1 0x11 0x18 LDA r8, [sp, #-32] + 15104 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 15108 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 15114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15120 0x00 0x00 NOPX +.label _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params__end +.label __Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params___func_end0 + +.text_segment PM 15136 +.label __ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.function_start + 15136 0x00 0x07 0xc8 0xc8 0x08 0x44 MOVXM p4, #508932 + 15142 0x04 0x06 0x16 0x98 LDA r16, [p4] + 15146 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 15152 0x0f 0xe8 0x9d 0x98 ST p1, [sp, #-24] + 15156 0x0f 0xf5 0xf5 0x98 ST r15, [sp, #-12] + 15160 0x00 0x00 NOPX + 15162 0x00 0x00 NOPX + 15164 0x00 0x00 NOPX + 15166 0x80 0x1d 0xd8 0x40 0x01 0x84 JNZ r16, #15280 +.delay_slot + 15172 0x0f 0xf0 0x1d 0x98 ST p0, [sp, #-16] +.delay_slot + 15176 0x0f 0xf9 0x9d 0x98 ST p3, [sp, #-8] +.delay_slot + 15180 0x93 0x11 0x60 0x03 0x30 0xf0 0x70 0x02 MOVS p4, p6; MOV p6, lr +.delay_slot + 15188 0x0f 0xef 0x9d 0x98 ST p7, [sp, #-20] +.delay_slot + 15192 0x0f 0xfe 0x1d 0x98 ST p4, [sp, #-4] + 15196 0x01 0x93 0x00 0x01 0x40 0x2a 0x2d 0x70 0x78 0xba MOVA r19, #12; MOVX r20, #1; MOV r17, CORE_ID + 15206 0xfe 0x12 0x00 0x00 0x01 0xf3 0xb1 0xfc 0x10 0xba MOVA r18, #-16; MOVXM p7, #508920 + 15216 0xe0 0xc6 0x30 0x00 0x01 0xf3 0xb2 0x06 0x11 0x3a ST r17, [p7]; MOVXM p7, #508940 + 15226 0xe0 0xd2 0x30 0x00 0x01 0xf3 0xb2 0x08 0x11 0x3a ST r20, [p7]; MOVXM p7, #508944 + 15236 0xe0 0xcc 0xe8 0xca 0x5b 0x2c ST.s8 r19, [p7]; LSHL r18, r17, r18 + 15242 0x14 0x6a 0x90 0x18 EXTEND.u8 r21, r17 + 15246 0x94 0x92 0x08 0xb5 0xfe 0x24 EXTEND.u8 r18, r18; ADD.NC r17, r21, #-2 + 15252 0x00 0x07 0xc0 0xc7 0xf8 0x44 MOVXM p0, #508924 + 15258 0x00 0x07 0xce 0xc8 0x00 0x44 MOVXM p7, #508928 + 15264 0x00 0x00 NOPX + 15266 0x00 0x00 NOPX + 15268 0x08 0x06 0x31 0x98 ST r17, [p0] + 15272 0xe0 0xca 0x30 0x00 0x01 0xa5 0x70 0x02 ST r18, [p7]; NOPM +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_144 + 15280 0x00 0x51 0x00 0x00 0x01 0xf3 0xb1 0xd0 0x10 0xba MOVA r17, #2; MOVXM p7, #508832 + 15290 0x05 0x02 0x87 0x5e 0x11 0x81 0x80 0x0a 0x4a 0xf0 0x78 0x76 MOVA dj0, #40; ST r16, [p7], #20; MOVX r24, #0; MOV r18, sp + 15302 0xe0 0xc6 0x30 0x00 0x34 0xa6 0x00 0x02 ST r17, [p7]; ADD.NC p0, r18, #-104 + 15310 0x02 0x01 0xf6 0x98 LDA r15, [p2, dj0] + 15314 0x02 0x1c 0x2e 0x98 LDA el0, [p2], #4 + 15318 0x02 0x1c 0xee 0x98 LDA el3, [p2], #4 + 15322 0x02 0x1c 0xae 0x98 LDA el2, [p2], #4 + 15326 0x02 0x1c 0x0e 0x98 LDA eh0, [p2], #4 + 15330 0x02 0x1c 0x6e 0x98 LDA el1, [p2], #4 + 15334 0x02 0x1c 0x4e 0x98 LDA eh1, [p2], #4 + 15338 0x02 0x1c 0x8e 0x98 LDA eh2, [p2], #4 + 15342 0x43 0x85 0xdf 0x30 0x5b 0x0c LDA el0, [p2], #4; ST el0, [sp, #-104] + 15348 0x0f 0x9c 0xed 0x98 ST el3, [sp, #-100] + 15352 0x0f 0xa0 0xad 0x98 ST el2, [sp, #-96] + 15356 0x0f 0xa4 0x0d 0x98 ST eh0, [sp, #-92] + 15360 0x0f 0xa8 0x6d 0x98 ST el1, [sp, #-88] + 15364 0x0f 0xac 0x4d 0x98 ST eh1, [sp, #-84] + 15368 0x0f 0xb0 0x8d 0x98 ST eh2, [sp, #-80] + 15372 0x0f 0xb4 0x2d 0x98 ST el0, [sp, #-76] + 15376 0x02 0x04 0x0e 0x98 LDA eh0, [p2] + 15380 0x02 0x14 0x2e 0x98 LDA el0, [p2, #4] + 15384 0x00 0x00 NOPX + 15386 0x80 0x1e 0x40 0x40 0x01 0x84 JNZ r16, #15488 +.delay_slot + 15392 0x0f 0xe7 0x9d 0x98 ST p7, [sp, #-28] +.delay_slot +.swstall delay_slot + 15396 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15398 0x00 0x00 NOPX +.delay_slot + 15400 0x0f 0xb8 0x0d 0x98 ST eh0, [sp, #-72] +.delay_slot + 15404 0x0f 0xbc 0x2d 0x98 ST el0, [sp, #-68] +.no_stack_arguments + 15408 0x02 0x02 0x80 0x00 0x05 0xbc 0x00 0x00 0x40 0xba MOVA dj0, #16; JL #11744 +.delay_slot + 15418 0x00 0x07 0xce 0xc7 0x50 0x44 MOVXM p7, #508840 +.delay_slot + 15424 0x0f 0x1f 0x11 0x98 ST r24, [p7], #4 +.delay_slot + 15428 0x0f 0xbc 0x41 0x98 ST dj0, [p7], #-20 +.delay_slot +.swstall delay_slot + 15432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15434 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.return_address + 15440 0xe3 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xfe 0x10 0xba LDA r16, [p7], #4; MOVXM p2, #508924 + 15450 0x07 0x06 0x56 0x98 LDA r18, [p7] + 15454 0x02 0x06 0x36 0x98 LDA r17, [p2] + 15458 0x00 0x00 NOPX + 15460 0x00 0x00 NOPX + 15462 0x00 0x00 NOPX + 15464 0x00 0x00 NOPX + 15466 0x00 0x00 NOPX + 15468 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 + 15472 0x00 0x00 NOPX + 15474 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 15478 0x00 0x00 NOPX + 15480 0xea 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p7, #20]; NOPM +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_352 + 15488 0xfc 0xc3 0x20 0x01 0x00 0x28 0x07 0xb0 0x58 0xba LDA p4, [sp, #-28]; MOVX r16, #1; MOV m0, #-80 + 15498 0xfe 0x23 0x20 0x00 0x01 0xf3 0xb2 0x00 0x10 0xba LDA p2, [sp, #-16]; MOVXM p7, #508928 + 15508 0xfd 0x03 0x20 0x3f 0x37 0x6a 0x28 0x06 0x58 0xba LDA p0, [sp, #-24]; MOVX r19, #-5; MOV r17, #6 + 15518 0x07 0xf9 0x99 0x18 LDA p3, [sp, #-8] + 15522 0x07 0x06 0x56 0x98 LDA r18, [p7] + 15526 0x00 0x00 NOPX + 15528 0x00 0x00 NOPX + 15530 0x04 0xa6 0xb6 0x98 LDA r21, [p4, #-24] + 15534 0x04 0x9e 0xf6 0x98 LDA r23, [p4], #-28 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 15538 0x04 0x6e 0x96 0x98 LDA r20, [p4], #24 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 15542 0x03 0x05 0x1e 0x98 LDA p2, [p3] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 15546 0x04 0x0b 0x96 0x98 LDA r28, [p4], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15550 0x02 0x07 0x56 0x98 LDA r26, [p2] + 15554 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 15556 0x00 0x06 0xd6 0x98 LDA r22, [p0] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 15560 0x15 0xe9 0x56 0x18 MAC r20, r20, r23, r21 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15564 0x15 0x3b 0x0d 0x98 LSHL r29, r20, r16 + 15568 0x18 0x0e 0xa0 0xf8 MOV m0, r29 + 15572 0x17 0x21 0x0d 0x98 LSHL r16, r28, r16 + 15576 0xa4 0x0f 0xe0 0xda 0x82 0xa4 ADD r16, r20, #31; ADD.NC p0, r26, r16 + 15582 0x14 0x21 0x3d 0x98 LSHL r16, r16, r19 +.no_stack_arguments + 15586 0x00 0x1c 0x40 0x00 0x01 0x04 JL #14464 +.delay_slot + 15592 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 +.delay_slot + 15596 0x00 0x07 0xce 0xc8 0x08 0x44 MOVXM p7, #508932 +.delay_slot + 15602 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 +.delay_slot + 15606 0x1b 0x6b 0x41 0x58 ADD.NC p3, r22, r16 +.delay_slot + 15610 0x61 0x72 0x02 0xcd 0x81 0xf4 PADDB [p3], m0; MOV p1, p3 +.return_address + 15616 0xe0 0xc6 0xd0 0x01 0x00 0x0b 0xbe 0x60 0x78 0xba LDA r17, [p7]; MOVX r16, #0; MOV lr, p6 + 15626 0x07 0xf4 0x11 0x18 LDA r0, [sp, #-12] + 15630 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 15634 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 15638 0x00 0x00 NOPX + 15640 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 15644 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot + 15650 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 15654 0x7e 0xe2 0xf7 0xa0 0x41 0xe4 EQ r27, r15, r17; MOV r15, r0 +.delay_slot + 15660 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 15664 0xe0 0xc2 0x30 0x03 0xb2 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p2 +.label _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_end0 + +.text_segment PM 15680 +.label __Z14_b8308_wrapperPPv___func_begin0 +.label _Z14_b8308_wrapperPPv +.function_start + 15680 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 15684 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 15688 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 15692 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 15696 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 15700 0x00 0x1d 0x90 0x00 0x00 0x84 J #15136 +.delay_slot +.swstall delay_slot + 15706 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15708 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15710 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15712 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15714 0x00 0x00 NOPX +.label _Z14_b8308_wrapperPPv__end +.label __Z14_b8308_wrapperPPv___func_end0 + +.text_segment PM 15728 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 15728 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 15734 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15738 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15742 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15746 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15750 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15754 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15758 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15762 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15766 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15770 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15774 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15778 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15782 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15786 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15790 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15794 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15798 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15802 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15806 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15810 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15814 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15818 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15822 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15826 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15830 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15834 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15838 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15842 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15846 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 15850 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15854 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15858 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15862 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15866 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.bss_segment DMb 508416 448 + +.bss_segment DMb 508864 72 + +.data_segment DMb 508936 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 508940 4 + +.bss_segment DMb 508944 1 + +.rodata_segment DMb 508992 +.label _ZL20g_uniformKernelFuncs + 0x10 + 0x2d + 0x0 + 0x0 + 0x40 + 0x2d + 0x0 + 0x0 + 0x70 + 0x2d + 0x0 + 0x0 + 0x90 + 0x2d + 0x0 + 0x0 + 0xb0 + 0x2d + 0x0 + 0x0 + 0x70 + 0x35 + 0x0 + 0x0 + 0x50 + 0x38 + 0x0 + 0x0 + 0x40 + 0x3d + 0x0 + 0x0 + +.bss_segment DMb 509056 256 + +.stack DM_stack 507264 508352 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.map b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.map new file mode 100644 index 0000000000000000000000000000000000000000..528722a50fb4de14cf31d5c35bc591a879c8cf15 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.map @@ -0,0 +1,382 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577694 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1088 + + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1905 + + 0x00000000..0x0007bd7f ( 507264 items) : Reserved + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + 0x0007c1c0..0x0007c1ff ( 64 items) : Reserved + 0x0007c200..0x0007c35f ( 352 items) : ../Release/0_0_reloadable5.o::gem_bfp_param (Data, Global, .bss.DMb.32) + 0x0007c360..0x0007c3bf ( 96 items) : ../Release/0_0_reloadable5.o::_ZN11ns_rms_norm5paramE (Data, Global, .bss.DMb.32) + 0x0007c3c0..0x0007c3c3 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c3c4..0x0007c3c7 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c3c8..0x0007c3cb ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11total_iters (Data, Local, .bss.DMb.4) + 0x0007c3cc..0x0007c3cf ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c3d0..0x0007c3d3 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c3d4..0x0007c3d7 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c3d8..0x0007c3db ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE7core_id (Data, Weak, .bss.DMb.4) + 0x0007c3dc..0x0007c3df ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE8core_row (Data, Weak, .bss.DMb.4) + 0x0007c3e0..0x0007c3e3 ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE8core_col (Data, Weak, .bss.DMb.4) + 0x0007c3e4..0x0007c3e7 ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE11kernel_iter (Data, Weak, .bss.DMb.4) + 0x0007c3e8..0x0007c3eb ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE7core_id (Data, Weak, .bss.DMb.4) + 0x0007c3ec..0x0007c3ef ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_row (Data, Weak, .bss.DMb.4) + 0x0007c3f0..0x0007c3f3 ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_col (Data, Weak, .bss.DMb.4) + 0x0007c3f4..0x0007c3f7 ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE11kernel_iter (Data, Weak, .bss.DMb.4) + 0x0007c3f8..0x0007c3fb ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE7core_id (Data, Weak, .bss.DMb.4) + 0x0007c3fc..0x0007c3ff ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_row (Data, Weak, .bss.DMb.4) + 0x0007c400..0x0007c403 ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_col (Data, Weak, .bss.DMb.4) + 0x0007c404..0x0007c407 ( 4 items) : ../Release/0_0_reloadable5.o::_ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE11kernel_iter (Data, Weak, .bss.DMb.4) + 0x0007c408..0x0007c40b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c40c..0x0007c40f ( 4 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c410..0x0007c410 ( 1 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c440..0x0007c45f ( 32 items) : ../Release/0_0_reloadable5.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z15_b13786_wrapperPPv + _Z15_b13811_wrapperPPv + _Z15_b13739_wrapperPPv + _Z15_b13744_wrapperPPv + _Z15_b13749_wrapperPPv + _Z14_b8292_wrapperPPv + _Z14_b8300_wrapperPPv + _Z14_b8308_wrapperPPv + + 0x0007c480..0x0007c4bf ( 64 items) : ../Release/0_0_reloadable5.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable5.o::add1d_params (Data, Global, .bss.DMb.64) + 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable5.o::sigmoid1d_params (Data, Global, .bss.DMb.64) + 0x0007ca00..0x000fffff ( 538112 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 12986 + + 0x00000000..0x000009df ( 2528 items) : Reserved + 0x000009e0..0x00000c01 ( 546 items) : ../Release/0_0_reloadable5.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000c10..0x00000c27 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000c30..0x00000ce1 ( 178 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000cf0..0x00000d27 ( 56 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d30..0x00000d6b ( 60 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d70..0x00000eb9 ( 330 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + + 0x00000ec0..0x00000f31 ( 114 items) : ../Release/0_0_reloadable5.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000f40..0x00001127 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00001130..0x00001313 ( 484 items) : ../Release/0_0_reloadable5.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 64) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001320..0x00001363 ( 68 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + + 0x00001370..0x000014e9 ( 378 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + _ZN12me_primitive11control_rndE + + 0x000014f0..0x000016d7 ( 488 items) : ../Release/0_0_reloadable5.o::_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + sigmoid1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x000016e0..0x00001759 ( 122 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_params + + 0x00001760..0x000017d1 ( 114 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + + Referenced symbols: add1d_params + + 0x000017e0..0x000017f3 ( 20 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + Referenced symbols: add1d_params + + 0x00001800..0x00001a59 ( 602 items) : ../Release/0_0_reloadable5.o::_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00001a60..0x00001a77 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + + 0x00001a80..0x00001b19 ( 154 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + Referenced symbols: mul1d_params + + 0x00001b20..0x00001c53 ( 308 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + _ZN12me_primitive11control_rndE + + 0x00001c60..0x00001eb9 ( 602 items) : ../Release/0_0_reloadable5.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00001ec0..0x000020e5 ( 550 items) : ../Release/0_0_reloadable5.o::_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv (Function, Local, .text) (stack frame size = 0) + + Referenced symbols: gem_bfp_param + + 0x000020f0..0x00002215 ( 294 items) : ../Release/0_0_reloadable5.o::_Z8init_accILt1EEvPaS0_iii (Function, Weak, .text) (stack frame size = 64) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002220..0x00002345 ( 294 items) : ../Release/0_0_reloadable5.o::_Z12post_processPai (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002350..0x00002ad9 ( 1930 items) : ../Release/0_0_reloadable5.o::_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z8init_accILt1EEvPaS0_iii + _Z12post_processPai + + Referenced symbols: gem_bfp_param + _ZN12me_primitive11control_satE + _ZN12me_primitive11control_rndE + + 0x00002ae0..0x00002d01 ( 546 items) : ../Release/0_0_reloadable5.o::_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv + _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params + + Referenced symbols: _ZL9curr_iter + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11total_iters + _ZL10depth_iter + gem_bfp_param + + 0x00002d10..0x00002d33 ( 36 items) : ../Release/0_0_reloadable5.o::_Z15_b13786_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002d40..0x00002d63 ( 36 items) : ../Release/0_0_reloadable5.o::_Z15_b13811_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00002d70..0x00002d8f ( 32 items) : ../Release/0_0_reloadable5.o::_Z15_b13739_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002d90..0x00002daf ( 32 items) : ../Release/0_0_reloadable5.o::_Z15_b13744_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002db0..0x00002dd3 ( 36 items) : ../Release/0_0_reloadable5.o::_Z15_b13749_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00002de0..0x00002f65 ( 390 items) : ../Release/0_0_reloadable5.o::_ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj (Function, Local, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: _ZN11ns_rms_norm5paramE + + 0x00002f70..0x000032c3 ( 852 items) : ../Release/0_0_reloadable5.o::_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: _ZN12me_primitive11control_rndE + _ZN11ns_rms_norm5paramE + + 0x000032d0..0x0000356b ( 668 items) : ../Release/0_0_reloadable5.o::_ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params + + Referenced symbols: _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE11kernel_iter + _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE7core_id + _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE8core_col + _ZN11ns_rms_norm5paramE + + 0x00003570..0x00003593 ( 36 items) : ../Release/0_0_reloadable5.o::_Z14_b8292_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE + + 0x000035a0..0x0000384d ( 686 items) : ../Release/0_0_reloadable5.o::_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj + + Referenced symbols: _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE11kernel_iter + _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE7core_id + _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_col + _ZN11ns_rms_norm5paramE + + 0x00003850..0x00003873 ( 36 items) : ../Release/0_0_reloadable5.o::_Z14_b8300_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE + + 0x00003880..0x00003b11 ( 658 items) : ../Release/0_0_reloadable5.o::_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params (Function, Weak, .text) (stack frame size = 64) + + Referenced symbols: _ZN11ns_rms_norm5paramE + _ZN12me_primitive11control_rndE + + 0x00003b20..0x00003d37 ( 536 items) : ../Release/0_0_reloadable5.o::_ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj + _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params + + Referenced symbols: _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE11kernel_iter + _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE7core_id + _ZN12me_primitive11control_satE + _ZN12me_primitive11control_rndE + _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_row + _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_col + _ZN11ns_rms_norm5paramE + + 0x00003d40..0x00003d63 ( 36 items) : ../Release/0_0_reloadable5.o::_Z14_b8308_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE + + 0x00003d70..0x00003dfd ( 142 items) : me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x3dfe + _pc_start = 0x9e0 + _sp_end_DM_stack = 0x7c1c0 + _sp_start_DM_stack = 0x7bd80 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 1088 + ---------- ---------- + 1088 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 776 4 32 ../Release/0_0_reloadable5.o + 5 0 0 me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 781 4 32 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 12844 ../Release/0_0_reloadable5.o + 142 me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- + 12986 Total + +File summary: + +../Release/0_0_reloadable5.o + DMb 812 + PM 12844 + +me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + PM 142 + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.sdr b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.sdr new file mode 100644 index 0000000000000000000000000000000000000000..51428f9fddddfb6a9cbdf2c4c42d734182eb62b3 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.sdr @@ -0,0 +1,140 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577694 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol gem_bfp_param 0x0007c200 +_symbol _ZN11ns_rms_norm5paramE 0x0007c360 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE7core_id 0x0007c3d8 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE8core_row 0x0007c3dc +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE8core_col 0x0007c3e0 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EEE11kernel_iter 0x0007c3e4 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE7core_id 0x0007c3e8 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_row 0x0007c3ec +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_col 0x0007c3f0 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE11kernel_iter 0x0007c3f4 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE7core_id 0x0007c3f8 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_row 0x0007c3fc +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE8core_col 0x0007c400 +_symbol _ZZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EEE11kernel_iter 0x0007c404 +_symbol _ZN12me_primitive11control_satE 0x0007c40c +_symbol _ZN12me_primitive11control_rndE 0x0007c410 +_symbol add1d_attribute_broadcasting_params 0x0007c480 +_symbol add1d_params 0x0007c4c0 +_symbol mul1d_params 0x0007c500 +_symbol sigmoid1d_params 0x0007c540 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x000009e0 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00000c10 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00000c30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00000cf0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00000d30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00000d70 +_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00000ec0 +_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00000f40 +_symbol _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00001320 +_symbol _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00001370 +_symbol _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000014f0 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv 0x000016e0 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001760 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x000017e0 +_symbol _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00001800 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00001a60 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00001a80 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00001b20 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00001c60 +_symbol _Z8init_accILt1EEvPaS0_iii 0x000020f0 +_symbol _Z12post_processPai 0x00002220 +_symbol _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params 0x00002350 +_symbol _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002ae0 +_symbol _Z15_b13786_wrapperPPv 0x00002d10 +_symbol _Z15_b13811_wrapperPPv 0x00002d40 +_symbol _Z15_b13739_wrapperPPv 0x00002d70 +_symbol _Z15_b13744_wrapperPPv 0x00002d90 +_symbol _Z15_b13749_wrapperPPv 0x00002db0 +_symbol _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params 0x00002f70 +_symbol _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE 0x000032d0 +_symbol _Z14_b8292_wrapperPPv 0x00003570 +_symbol _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE 0x000035a0 +_symbol _Z14_b8300_wrapperPPv 0x00003850 +_symbol _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params 0x00003880 +_symbol _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE 0x00003b20 +_symbol _Z14_b8308_wrapperPPv 0x00003d40 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x00003d70 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.srv b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.srv new file mode 100644 index 0000000000000000000000000000000000000000..9c124c0a9693c0611e9ab76c180f2bbbe7f0b969 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.srv @@ -0,0 +1,18572 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable5.cc" 103 first +.src_ref 0 "0_0_reloadable5.cc" 105 60 +.src_ref 0 "0_0_reloadable5.cc" 105 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 103 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable5.cc" 110 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable5.cc" 107 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 107 110 +.src_ref 0 "0_0_reloadable5.cc" 110 60 +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 107 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 107 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.src_ref 0 "0_0_reloadable5.cc" 110 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 +.src_ref 0 "0_0_reloadable5.cc" 110 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 +.src_ref 0 "0_0_reloadable5.cc" 110 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2816 "01000100" // MOVXM p7, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "10000000" // /* MW 5 */ + 2818 "11001000" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 113 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 116 60 +.src_ref 0 "0_0_reloadable5.cc" 118 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable5.cc" 116 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 118 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable5.cc" 118 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 121 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 123 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 123 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 123 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 3088 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3089 "00000000" // /* MW 3 */ + 3090 "00101000" // /* MW 2 */ + 3091 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3092 "01000100" // MOVXM p0, #509088 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3093 "01000000" // /* MW 5 */ + 3094 "11001001" // /* MW 4 */ + 3095 "11000000" // /* MW 3 */ + 3096 "00000111" // /* MW 2 */ + 3097 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3098 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3099 "10000000" // /* MW 3 */ + 3100 "00000000" // /* MW 2 */ + 3101 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 3102 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3103 "00000001" // /* MW 3 */ + 3104 "00000100" // /* MW 2 */ + 3105 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3106 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3107 "00000001" // /* MW 3 */ + 3108 "00010100" // /* MW 2 */ + 3109 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3111 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 3120 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3121 "00010000" // /* MW 9 */ + 3122 "01000000" // /* MW 8 */ + 3123 "00110010" // /* MW 7 */ + 3124 "11110000" // /* MW 6 */ + 3125 "00000001" // /* MW 5 */ + 3126 "00000000" // /* MW 4 */ + 3127 "11010000" // /* MW 3 */ + 3128 "10000101" // /* MW 2 */ + 3129 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 3130 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3131 "00000001" // /* MW 5 */ + 3132 "00000000" // /* MW 4 */ + 3133 "00000000" // /* MW 3 */ + 3134 "00001000" // /* MW 2 */ + 3135 "00000000" // /* MW 1 */ + 3136 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3137 "00111101" // /* MW 3 */ + 3138 "11111000" // /* MW 2 */ + 3139 "00001111" // /* MW 1 */ + 3140 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "11110101" // /* MW 3 */ + 3142 "11111101" // /* MW 2 */ + 3143 "00001111" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 3150 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "00101001" // /* MW 3 */ + 3152 "00011100" // /* MW 2 */ + 3153 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 3154 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3155 "00101110" // /* MW 3 */ + 3156 "00011100" // /* MW 2 */ + 3157 "00000001" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 3170 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3171 "00101001" // /* MW 3 */ + 3172 "00011100" // /* MW 2 */ + 3173 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 3174 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3175 "00101110" // /* MW 3 */ + 3176 "00000100" // /* MW 2 */ + 3177 "00000001" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ + 3182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3183 "00000000" // /* MW 1 */ + 3184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3185 "00000000" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 3190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00101001" // /* MW 3 */ + 3192 "00011100" // /* MW 2 */ + 3193 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 3194 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "00101110" // /* MW 3 */ + 3196 "00010100" // /* MW 2 */ + 3197 "00000001" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ + 3200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3201 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 3202 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 3203 "00000001" // /* MW 5 */ + 3204 "00000000" // /* MW 4 */ + 3205 "00001000" // /* MW 3 */ + 3206 "00000110" // /* MW 2 */ + 3207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3213 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 3214 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3215 "00101001" // /* MW 3 */ + 3216 "11011100" // /* MW 2 */ + 3217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.delay_slot + 3218 "00101110" // NOPA; NOPS; MOV r15, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3219 "00011100" // /* MW 13 */ + 3220 "00000000" // /* MW 12 */ + 3221 "00000000" // /* MW 11 */ + 3222 "00000111" // /* MW 10 */ + 3223 "10000110" // /* MW 9 */ + 3224 "01011110" // /* MW 8 */ + 3225 "00000000" // /* MW 7 */ + 3226 "00000000" // /* MW 6 */ + 3227 "10110110" // /* MW 5 */ + 3228 "00000010" // /* MW 4 */ + 3229 "11110000" // /* MW 3 */ + 3230 "00101100" // /* MW 2 */ + 3231 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 3232 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3233 "00001000" // /* MW 9 */ + 3234 "11000100" // /* MW 8 */ + 3235 "00110011" // /* MW 7 */ + 3236 "01101000" // /* MW 6 */ + 3237 "00000000" // /* MW 5 */ + 3238 "00000001" // /* MW 4 */ + 3239 "00100000" // /* MW 3 */ + 3240 "00000111" // /* MW 2 */ + 3241 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 3242 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3243 "01011000" // /* MW 9 */ + 3244 "11111101" // /* MW 8 */ + 3245 "00000111" // /* MW 7 */ + 3246 "00001000" // /* MW 6 */ + 3247 "10000000" // /* MW 5 */ + 3248 "00000001" // /* MW 4 */ + 3249 "10000000" // /* MW 3 */ + 3250 "11100010" // /* MW 2 */ + 3251 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 3252 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3253 "00000001" // /* MW 9 */ + 3254 "10100000" // /* MW 8 */ + 3255 "00000111" // /* MW 7 */ + 3256 "10000000" // /* MW 6 */ + 3257 "00010001" // /* MW 5 */ + 3258 "00001010" // /* MW 4 */ + 3259 "00100000" // /* MW 3 */ + 3260 "10111110" // /* MW 2 */ + 3261 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 3262 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3263 "01001010" // /* MW 3 */ + 3264 "00000110" // /* MW 2 */ + 3265 "00000000" // /* MW 1 */ + 3266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3271 "00010111" // /* MW 3 */ + 3272 "00000010" // /* MW 2 */ + 3273 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3275 "00000000" // /* MW 3 */ + 3276 "00101000" // /* MW 2 */ + 3277 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3279 "00000101" // /* MW 3 */ + 3280 "00100010" // /* MW 2 */ + 3281 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3283 "00000001" // /* MW 5 */ + 3284 "00000000" // /* MW 4 */ + 3285 "00000000" // /* MW 3 */ + 3286 "11111000" // /* MW 2 */ + 3287 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00100111" // /* MW 3 */ + 3290 "01110111" // /* MW 2 */ + 3291 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "10000010" // /* MW 3 */ + 3294 "00100001" // /* MW 2 */ + 3295 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3297 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 40 first +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.function_start + 3312 "10111010" // MOVA m0, #20; MOVXM p0, #509068 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3313 "00010000" // /* MW 9 */ + 3314 "01000110" // /* MW 8 */ + 3315 "00110010" // /* MW 7 */ + 3316 "11110000" // /* MW 6 */ + 3317 "00000001" // /* MW 5 */ + 3318 "00000000" // /* MW 4 */ + 3319 "10000000" // /* MW 3 */ + 3320 "10000000" // /* MW 2 */ + 3321 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 + 3322 "10111010" // LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3323 "01011000" // /* MW 9 */ + 3324 "00000110" // /* MW 8 */ + 3325 "00101000" // /* MW 7 */ + 3326 "00101000" // /* MW 6 */ + 3327 "00100000" // /* MW 5 */ + 3328 "00000000" // /* MW 4 */ + 3329 "01010000" // /* MW 3 */ + 3330 "00000001" // /* MW 2 */ + 3331 "00000001" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 43 4 first + 3342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3343 "00000000" // /* MW 3 */ + 3344 "00101000" // /* MW 2 */ + 3345 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.delay_slot + 3346 "00011000" // NEZ r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "11110000" // /* MW 3 */ + 3348 "00000110" // /* MW 2 */ + 3349 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.delay_slot + 3350 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00001000" // /* MW 3 */ + 3352 "10000000" // /* MW 2 */ + 3353 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 first +.delay_slot + 3354 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00011101" // /* MW 3 */ + 3356 "00000000" // /* MW 2 */ + 3357 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 23 +.delay_slot + 3358 "01011100" // ST r0, [p0, #4]; LSHL r2, r3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3359 "00111011" // /* MW 5 */ + 3360 "10001000" // /* MW 4 */ + 3361 "00110001" // /* MW 3 */ + 3362 "10000010" // /* MW 2 */ + 3363 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 23 +.delay_slot + 3364 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3365 "01010001" // /* MW 3 */ + 3366 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3367 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_broadcasting.h" 35 +.src_ref 2 "elementwise_binary_broadcasting.h" 35 first +.function_start + 3376 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000000" // /* MW 4 */ + 3379 "00000000" // /* MW 3 */ + 3380 "00001000" // /* MW 2 */ + 3381 "00000000" // /* MW 1 */ + 3382 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00111101" // /* MW 3 */ + 3384 "11111100" // /* MW 2 */ + 3385 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 first +.no_stack_arguments + 3386 "00000100" // JL #3120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3120 delay_slots=5 */ + 3387 "00000001" // /* MW 5 */ + 3388 "00000000" // /* MW 4 */ + 3389 "00011000" // /* MW 3 */ + 3390 "00000110" // /* MW 2 */ + 3391 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 +.delay_slot + 3392 "01000100" // MOVXM p0, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000000" // /* MW 5 */ + 3394 "11001001" // /* MW 4 */ + 3395 "11000000" // /* MW 3 */ + 3396 "00000111" // /* MW 2 */ + 3397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "00000001" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.return_address + 3408 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00111001" // /* MW 3 */ + 3410 "11111100" // /* MW 2 */ + 3411 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 first +.tail_call + 3412 "10000100" // J #3312 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3312 delay_slots=5 */ + 3413 "00000000" // /* MW 5 */ + 3414 "00000000" // /* MW 4 */ + 3415 "01111000" // /* MW 3 */ + 3416 "00000110" // /* MW 2 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.delay_slot + 3418 "01000100" // MOVXM p0, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "00000000" // /* MW 5 */ + 3420 "11001001" // /* MW 4 */ + 3421 "11000000" // /* MW 3 */ + 3422 "00000111" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 38 4 first +.delay_slot + 3424 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3425 "00000001" // /* MW 5 */ + 3426 "00000000" // /* MW 4 */ + 3427 "00000000" // /* MW 3 */ + 3428 "11111000" // /* MW 2 */ + 3429 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3435 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 48 first +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 +.function_start + 3440 "10111010" // MOVA m0, #20; MOVXM p3, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3441 "00010000" // /* MW 9 */ + 3442 "01000000" // /* MW 8 */ + 3443 "10110010" // /* MW 7 */ + 3444 "11110001" // /* MW 6 */ + 3445 "00000001" // /* MW 5 */ + 3446 "00000000" // /* MW 4 */ + 3447 "10000000" // /* MW 3 */ + 3448 "10000000" // /* MW 2 */ + 3449 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 first + 3450 "10011000" // LDA r0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3451 "00010110" // /* MW 3 */ + 3452 "00111100" // /* MW 2 */ + 3453 "00000011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3454 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3455 "10000001" // /* MW 5 */ + 3456 "11001101" // /* MW 4 */ + 3457 "01011000" // /* MW 3 */ + 3458 "00000101" // /* MW 2 */ + 3459 "01100001" // /* MW 1 */ + 3460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3461 "00000000" // /* MW 1 */ + 3462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3463 "00000000" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 12 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 35 + 3472 "10000100" // JNZ r1, #3536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3536 delay_slots=5 */ + 3473 "00000001" // /* MW 5 */ + 3474 "01000000" // /* MW 4 */ + 3475 "11101000" // /* MW 3 */ + 3476 "00000110" // /* MW 2 */ + 3477 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 +.delay_slot + 3478 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3479 "11101001" // /* MW 3 */ + 3480 "11000100" // /* MW 2 */ + 3481 "00010111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 first +.delay_slot + 3482 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3483 "00101101" // /* MW 3 */ + 3484 "00000000" // /* MW 2 */ + 3485 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 62 28 first + 3492 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00110010" // /* MW 3 */ + 3494 "00000100" // /* MW 2 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "10000100" // J #3568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3568 delay_slots=5 */ + 3503 "00000000" // /* MW 5 */ + 3504 "00000000" // /* MW 4 */ + 3505 "11111000" // /* MW 3 */ + 3506 "00000110" // /* MW 2 */ + 3507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3511 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 3512 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "01110010" // /* MW 3 */ + 3514 "00000101" // /* MW 2 */ + 3515 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3516 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "01100111" // /* MW 3 */ + 3518 "00000001" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3520 "11100001" // NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00000000" // /* MW 15 */ + 3522 "00000000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "00010011" // /* MW 7 */ + 3530 "00000100" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.src_ref 2 "elementwise_binary_broadcasting.h" 65 28 first + 3536 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "00110010" // /* MW 3 */ + 3538 "00000100" // /* MW 2 */ + 3539 "00000001" // /* MW 1 */ + 3540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3541 "00000000" // /* MW 1 */ + 3542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3543 "00000000" // /* MW 1 */ + 3544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3545 "00000000" // /* MW 1 */ + 3546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3547 "00000000" // /* MW 1 */ + 3548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3549 "00000000" // /* MW 1 */ + 3550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3551 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 3552 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "01110010" // /* MW 3 */ + 3554 "00000101" // /* MW 2 */ + 3555 "00011000" // /* MW 1 */ + 3556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3557 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3558 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "00000000" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00010011" // /* MW 5 */ + 3564 "00000100" // /* MW 4 */ + 3565 "11110001" // /* MW 3 */ + 3566 "00101100" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first + 3568 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01001000" // /* MW 9 */ + 3570 "00111111" // /* MW 8 */ + 3571 "10111000" // /* MW 7 */ + 3572 "10001010" // /* MW 6 */ + 3573 "00000111" // /* MW 5 */ + 3574 "00000000" // /* MW 4 */ + 3575 "11010000" // /* MW 3 */ + 3576 "10000000" // /* MW 2 */ + 3577 "10001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3578 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #3680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3579 "00010000" // /* MW 9 */ + 3580 "00110000" // /* MW 8 */ + 3581 "01111111" // /* MW 7 */ + 3582 "00000000" // /* MW 6 */ + 3583 "00000000" // /* MW 5 */ + 3584 "00000000" // /* MW 4 */ + 3585 "11010000" // /* MW 3 */ + 3586 "10010000" // /* MW 2 */ + 3587 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3588 "01000100" // MOVXM le, #3712 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3589 "00000000" // /* MW 5 */ + 3590 "11111101" // /* MW 4 */ + 3591 "00000110" // /* MW 3 */ + 3592 "00000000" // /* MW 2 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3594 "01000100" // MOVXM p4, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "00100000" // /* MW 5 */ + 3596 "11001000" // /* MW 4 */ + 3597 "11001000" // /* MW 3 */ + 3598 "00000111" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3600 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00100010" // /* MW 3 */ + 3602 "00000100" // /* MW 2 */ + 3603 "00000100" // /* MW 1 */ + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first + 3608 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "10101011" // /* MW 3 */ + 3610 "00001000" // /* MW 2 */ + 3611 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 148 20 first + 3612 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "00101011" // /* MW 3 */ + 3614 "00101001" // /* MW 2 */ + 3615 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first + 3616 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00101011" // /* MW 3 */ + 3618 "00001000" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "00101011" // /* MW 3 */ + 3622 "00101010" // /* MW 2 */ + 3623 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "00000000" // /* MW 5 */ + 3626 "11110101" // /* MW 4 */ + 3627 "01110000" // /* MW 3 */ + 3628 "00010101" // /* MW 2 */ + 3629 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3631 "00111101" // /* MW 7 */ + 3632 "00101000" // /* MW 6 */ + 3633 "00000011" // /* MW 5 */ + 3634 "00000100" // /* MW 4 */ + 3635 "01110000" // /* MW 3 */ + 3636 "00100101" // /* MW 2 */ + 3637 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3639 "00101011" // /* MW 3 */ + 3640 "00001000" // /* MW 2 */ + 3641 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3643 "00111101" // /* MW 7 */ + 3644 "00010000" // /* MW 6 */ + 3645 "00000100" // /* MW 5 */ + 3646 "00000100" // /* MW 4 */ + 3647 "01110000" // /* MW 3 */ + 3648 "01000101" // /* MW 2 */ + 3649 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3651 "10101011" // /* MW 3 */ + 3652 "00001000" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3655 "00111101" // /* MW 7 */ + 3656 "00101000" // /* MW 6 */ + 3657 "00000011" // /* MW 5 */ + 3658 "00000100" // /* MW 4 */ + 3659 "01110000" // /* MW 3 */ + 3660 "00100101" // /* MW 2 */ + 3661 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3663 "00101011" // /* MW 3 */ + 3664 "00001000" // /* MW 2 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3667 "00111101" // /* MW 13 */ + 3668 "00010000" // /* MW 12 */ + 3669 "00000100" // /* MW 11 */ + 3670 "01010111" // /* MW 10 */ + 3671 "00011010" // /* MW 9 */ + 3672 "01000000" // /* MW 8 */ + 3673 "00000000" // /* MW 7 */ + 3674 "00000000" // /* MW 6 */ + 3675 "01000110" // /* MW 5 */ + 3676 "00111011" // /* MW 4 */ + 3677 "01110100" // /* MW 3 */ + 3678 "01000101" // /* MW 2 */ + 3679 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3680 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "10101011" // /* MW 3 */ + 3682 "00001000" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3685 "00111101" // /* MW 11 */ + 3686 "00101000" // /* MW 10 */ + 3687 "00000011" // /* MW 9 */ + 3688 "10001110" // /* MW 8 */ + 3689 "00010001" // /* MW 7 */ + 3690 "00001111" // /* MW 6 */ + 3691 "00100001" // /* MW 5 */ + 3692 "00000000" // /* MW 4 */ + 3693 "01110000" // /* MW 3 */ + 3694 "00100101" // /* MW 2 */ + 3695 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3697 "00000000" // /* MW 15 */ + 3698 "00000000" // /* MW 14 */ + 3699 "01111000" // /* MW 13 */ + 3700 "10100101" // /* MW 12 */ + 3701 "00000001" // /* MW 11 */ + 3702 "00000000" // /* MW 10 */ + 3703 "00000000" // /* MW 9 */ + 3704 "00000000" // /* MW 8 */ + 3705 "01011011" // /* MW 7 */ + 3706 "00000001" // /* MW 6 */ + 3707 "00100000" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "01110000" // /* MW 3 */ + 3710 "00000101" // /* MW 2 */ + 3711 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3713 "10000001" // /* MW 15 */ + 3714 "00100000" // /* MW 14 */ + 3715 "01111000" // /* MW 13 */ + 3716 "10100101" // /* MW 12 */ + 3717 "00000001" // /* MW 11 */ + 3718 "00000000" // /* MW 10 */ + 3719 "00000000" // /* MW 9 */ + 3720 "00000000" // /* MW 8 */ + 3721 "10100011" // /* MW 7 */ + 3722 "00011101" // /* MW 6 */ + 3723 "00100010" // /* MW 5 */ + 3724 "00000000" // /* MW 4 */ + 3725 "01110000" // /* MW 3 */ + 3726 "01000101" // /* MW 2 */ + 3727 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3729 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3731 "00111101" // /* MW 7 */ + 3732 "00101000" // /* MW 6 */ + 3733 "00000011" // /* MW 5 */ + 3734 "00000010" // /* MW 4 */ + 3735 "01100000" // /* MW 3 */ + 3736 "11000100" // /* MW 2 */ + 3737 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3741 "00111101" // /* MW 7 */ + 3742 "00010000" // /* MW 6 */ + 3743 "00000100" // /* MW 5 */ + 3744 "00000010" // /* MW 4 */ + 3745 "01100000" // /* MW 3 */ + 3746 "10110100" // /* MW 2 */ + 3747 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.src_ref 2 "elementwise_binary_broadcasting.h" 80 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3751 "00000000" // /* MW 5 */ + 3752 "01010000" // /* MW 4 */ + 3753 "01100000" // /* MW 3 */ + 3754 "11000100" // /* MW 2 */ + 3755 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 3758 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3759 "10100011" // /* MW 3 */ + 3760 "00011101" // /* MW 2 */ + 3761 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 3764 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3765 "00100011" // /* MW 3 */ + 3766 "00011110" // /* MW 2 */ + 3767 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3769 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 first +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.function_start + 3776 "00111010" // MOVS p2, p1; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3777 "01110001" // /* MW 9 */ + 3778 "00000000" // /* MW 8 */ + 3779 "00000000" // /* MW 7 */ + 3780 "00000000" // /* MW 6 */ + 3781 "00000100" // /* MW 5 */ + 3782 "00000000" // /* MW 4 */ + 3783 "01100000" // /* MW 3 */ + 3784 "10010001" // /* MW 2 */ + 3785 "01010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 + 3786 "00000010" // ST lr, [sp, #-4]; MOV r16, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3787 "01110000" // /* MW 7 */ + 3788 "01100000" // /* MW 6 */ + 3789 "00001000" // /* MW 5 */ + 3790 "00000010" // /* MW 4 */ + 3791 "10110000" // /* MW 3 */ + 3792 "10000111" // /* MW 2 */ + 3793 "11111111" // /* MW 1 */ + 3794 "11111000" // MOV r17, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "11100000" // /* MW 3 */ + 3796 "01010101" // /* MW 2 */ + 3797 "00011100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 + 3798 "01000100" // MOVXM p3, #509068 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3799 "00011000" // /* MW 5 */ + 3800 "11001001" // /* MW 4 */ + 3801 "11000110" // /* MW 3 */ + 3802 "00000111" // /* MW 2 */ + 3803 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 first + 3804 "00010100" // LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3805 "10000000" // /* MW 5 */ + 3806 "11010001" // /* MW 4 */ + 3807 "01010000" // /* MW 3 */ + 3808 "11101101" // /* MW 2 */ + 3809 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 3810 "00001100" // LDA.s16 r18, [p3], #-14; VST sfh, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3811 "01010110" // /* MW 5 */ + 3812 "00001110" // /* MW 4 */ + 3813 "01010000" // /* MW 3 */ + 3814 "11001010" // /* MW 2 */ + 3815 "01110011" // /* MW 1 */ + 3816 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3817 "01010111" // /* MW 3 */ + 3818 "00000110" // /* MW 2 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ + 3822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3823 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 first +.no_stack_arguments + 3824 "00000100" // JL #3440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3440 delay_slots=5 */ + 3825 "00000001" // /* MW 5 */ + 3826 "00000000" // /* MW 4 */ + 3827 "10111000" // /* MW 3 */ + 3828 "00000110" // /* MW 2 */ + 3829 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.delay_slot + 3830 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3831 "11000000" // /* MW 3 */ + 3832 "01010000" // /* MW 2 */ + 3833 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 first +.delay_slot + 3836 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "00010010" // /* MW 3 */ + 3838 "00100101" // /* MW 2 */ + 3839 "00010100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3840 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000001" // /* MW 5 */ + 3842 "11010010" // /* MW 4 */ + 3843 "01000010" // /* MW 3 */ + 3844 "00100000" // /* MW 2 */ + 3845 "10001100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3846 "10111010" // NOPA; NOPB; MOV p0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111110" // /* MW 9 */ + 3848 "00010000" // /* MW 8 */ + 3849 "00110100" // /* MW 7 */ + 3850 "00000000" // /* MW 6 */ + 3851 "00010000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.return_address + 3856 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00111001" // /* MW 3 */ + 3858 "11111100" // /* MW 2 */ + 3859 "00000111" // /* MW 1 */ + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ + 3862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3863 "00000000" // /* MW 1 */ + 3864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3865 "00000000" // /* MW 1 */ + 3866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3867 "00000000" // /* MW 1 */ + 3868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3869 "00000000" // /* MW 1 */ + 3870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 first + 3872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3873 "00000000" // /* MW 3 */ + 3874 "00101000" // /* MW 2 */ + 3875 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.delay_slot + 3876 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3877 "00000001" // /* MW 5 */ + 3878 "00000000" // /* MW 4 */ + 3879 "00000000" // /* MW 3 */ + 3880 "11110000" // /* MW 2 */ + 3881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3889 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 152 first +.src_ref 6 "superkernels.cpp" 157 6 +.function_start + 3904 "01000100" // MOVXM p3, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3905 "10000000" // /* MW 5 */ + 3906 "11000111" // /* MW 4 */ + 3907 "11000110" // /* MW 3 */ + 3908 "00000111" // /* MW 2 */ + 3909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 first + 3910 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3911 "11000001" // /* MW 5 */ + 3912 "10110101" // /* MW 4 */ + 3913 "11011000" // /* MW 3 */ + 3914 "11000010" // /* MW 2 */ + 3915 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 152 + 3916 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3917 "00000001" // /* MW 5 */ + 3918 "00000000" // /* MW 4 */ + 3919 "00000000" // /* MW 3 */ + 3920 "00001000" // /* MW 2 */ + 3921 "00000000" // /* MW 1 */ + 3922 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3923 "01110000" // /* MW 7 */ + 3924 "11010000" // /* MW 6 */ + 3925 "00001011" // /* MW 5 */ + 3926 "00000000" // /* MW 4 */ + 3927 "10110000" // /* MW 3 */ + 3928 "01100011" // /* MW 2 */ + 3929 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 11 + 3930 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3931 "00010001" // /* MW 9 */ + 3932 "11100110" // /* MW 8 */ + 3933 "00110001" // /* MW 7 */ + 3934 "11110011" // /* MW 6 */ + 3935 "00000001" // /* MW 5 */ + 3936 "00000000" // /* MW 4 */ + 3937 "10110000" // /* MW 3 */ + 3938 "10000010" // /* MW 2 */ + 3939 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3940 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3941 "11000000" // /* MW 3 */ + 3942 "11010100" // /* MW 2 */ + 3943 "00011011" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 +.src_ref 6 "superkernels.cpp" 157 16 + 3948 "10000100" // JNZ r16, #4112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4112 delay_slots=5 */ + 3949 "00000001" // /* MW 5 */ + 3950 "01000000" // /* MW 4 */ + 3951 "00001000" // /* MW 3 */ + 3952 "00001000" // /* MW 2 */ + 3953 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 22 first +.delay_slot + 3954 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10010000" // /* MW 3 */ + 3956 "01100010" // /* MW 2 */ + 3957 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 30 +.delay_slot + 3958 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3959 "11111011" // /* MW 3 */ + 3960 "01100011" // /* MW 2 */ + 3961 "00010100" // /* MW 1 */ +.delay_slot + 3962 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3963 "00111101" // /* MW 3 */ + 3964 "11110100" // /* MW 2 */ + 3965 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 154 11 +.delay_slot + 3966 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3967 "01110000" // /* MW 7 */ + 3968 "01100000" // /* MW 6 */ + 3969 "00110000" // /* MW 5 */ + 3970 "00000011" // /* MW 4 */ + 3971 "00110000" // /* MW 3 */ + 3972 "11000110" // /* MW 2 */ + 3973 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 +.src_ref 6 "superkernels.cpp" 171 2 +.delay_slot + 3974 "01000100" // MOVXM p0, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3975 "00000000" // /* MW 5 */ + 3976 "11001001" // /* MW 4 */ + 3977 "11000000" // /* MW 3 */ + 3978 "00000111" // /* MW 2 */ + 3979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 "01000100" // MOVXM p2, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3981 "00100000" // /* MW 5 */ + 3982 "11001000" // /* MW 4 */ + 3983 "11000100" // /* MW 3 */ + 3984 "00000111" // /* MW 2 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "00010000" // /* MW 9 */ + 3988 "00000110" // /* MW 8 */ + 3989 "00110010" // /* MW 7 */ + 3990 "11110001" // /* MW 6 */ + 3991 "00000001" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "11100000" // /* MW 3 */ + 3994 "11000000" // /* MW 2 */ + 3995 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 "00000100" // JL #3376 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3376 delay_slots=5 */ + 3999 "00000001" // /* MW 5 */ + 4000 "00000000" // /* MW 4 */ + 4001 "10011000" // /* MW 3 */ + 4002 "00000110" // /* MW 2 */ + 4003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4009 "00110001" // /* MW 3 */ + 4010 "00100000" // /* MW 2 */ + 4011 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4012 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4013 "00000101" // /* MW 3 */ + 4014 "00100000" // /* MW 2 */ + 4015 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4016 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "01111000" // /* MW 13 */ + 4020 "10100101" // /* MW 12 */ + 4021 "00000001" // /* MW 11 */ + 4022 "00000000" // /* MW 10 */ + 4023 "00000000" // /* MW 9 */ + 4024 "10000000" // /* MW 8 */ + 4025 "00010001" // /* MW 7 */ + 4026 "00000110" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 +.return_address + 4032 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4033 "10011000" // /* MW 5 */ + 4034 "11000111" // /* MW 4 */ + 4035 "11000100" // /* MW 3 */ + 4036 "00000111" // /* MW 2 */ + 4037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 first +.src_ref 6 "superkernels.cpp" 164 65 + 4038 "10111010" // LDA r16, [p2]; MOVXM p2, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4039 "00010000" // /* MW 9 */ + 4040 "01000000" // /* MW 8 */ + 4041 "00110010" // /* MW 7 */ + 4042 "11110001" // /* MW 6 */ + 4043 "00000001" // /* MW 5 */ + 4044 "00000000" // /* MW 4 */ + 4045 "11010000" // /* MW 3 */ + 4046 "11000010" // /* MW 2 */ + 4047 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 +.src_ref 6 "superkernels.cpp" 164 65 +.src_ref 6 "superkernels.cpp" 171 2 + 4048 "10111010" // LDA r17, [p2]; MOVXM p2, #509056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "00010000" // /* MW 9 */ + 4050 "01000000" // /* MW 8 */ + 4051 "00110010" // /* MW 7 */ + 4052 "11110001" // /* MW 6 */ + 4053 "00000001" // /* MW 5 */ + 4054 "00000000" // /* MW 4 */ + 4055 "11010000" // /* MW 3 */ + 4056 "11000110" // /* MW 2 */ + 4057 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 first +.src_ref 6 "superkernels.cpp" 164 16 +.src_ref 6 "superkernels.cpp" 169 47 + 4058 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4059 "00010000" // /* MW 9 */ + 4060 "11101000" // /* MW 8 */ + 4061 "10110001" // /* MW 7 */ + 4062 "11110000" // /* MW 6 */ + 4063 "00000001" // /* MW 5 */ + 4064 "00000000" // /* MW 4 */ + 4065 "01010000" // /* MW 3 */ + 4066 "11001011" // /* MW 2 */ + 4067 "01001010" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "10000100" // J #4128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4128 delay_slots=5 */ + 4073 "00000000" // /* MW 5 */ + 4074 "00000000" // /* MW 4 */ + 4075 "00010000" // /* MW 3 */ + 4076 "00001000" // /* MW 2 */ + 4077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 +.delay_slot + 4078 "01000100" // MOVXM p0, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4079 "00010000" // /* MW 5 */ + 4080 "11001000" // /* MW 4 */ + 4081 "11000000" // /* MW 3 */ + 4082 "00000111" // /* MW 2 */ + 4083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 27 first +.delay_slot + 4086 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4087 "00001111" // /* MW 3 */ + 4088 "01100001" // /* MW 2 */ + 4089 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 first +.delay_slot + 4090 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4091 "10100011" // /* MW 5 */ + 4092 "00001100" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 16 first +.delay_slot + 4096 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4097 "00000000" // /* MW 15 */ + 4098 "00000000" // /* MW 14 */ + 4099 "01111000" // /* MW 13 */ + 4100 "10100101" // /* MW 12 */ + 4101 "00000001" // /* MW 11 */ + 4102 "00000000" // /* MW 10 */ + 4103 "00000000" // /* MW 9 */ + 4104 "10000000" // /* MW 8 */ + 4105 "00010001" // /* MW 7 */ + 4106 "00000110" // /* MW 6 */ + 4107 "00100001" // /* MW 5 */ + 4108 "00000000" // /* MW 4 */ + 4109 "11110000" // /* MW 3 */ + 4110 "00101100" // /* MW 2 */ + 4111 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 169 47 +.src_ref 6 "superkernels.cpp" 171 2 + 4112 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508880; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4113 "00000000" // /* MW 15 */ + 4114 "00000000" // /* MW 14 */ + 4115 "00010000" // /* MW 13 */ + 4116 "11101000" // /* MW 12 */ + 4117 "10110001" // /* MW 11 */ + 4118 "11110000" // /* MW 10 */ + 4119 "00000001" // /* MW 9 */ + 4120 "00000000" // /* MW 8 */ + 4121 "10001011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "00100010" // /* MW 5 */ + 4124 "00000000" // /* MW 4 */ + 4125 "11110000" // /* MW 3 */ + 4126 "00101100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4128 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4129 "00000000" // /* MW 7 */ + 4130 "11000011" // /* MW 6 */ + 4131 "10110011" // /* MW 5 */ + 4132 "00000011" // /* MW 4 */ + 4133 "01100000" // /* MW 3 */ + 4134 "10010001" // /* MW 2 */ + 4135 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 168 2 + 4136 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4137 "00010000" // /* MW 9 */ + 4138 "11100000" // /* MW 8 */ + 4139 "00110001" // /* MW 7 */ + 4140 "11110000" // /* MW 6 */ + 4141 "00000001" // /* MW 5 */ + 4142 "00000000" // /* MW 4 */ + 4143 "11010000" // /* MW 3 */ + 4144 "11101110" // /* MW 2 */ + 4145 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4146 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4147 "00010110" // /* MW 3 */ + 4148 "11111110" // /* MW 2 */ + 4149 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4150 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "00110110" // /* MW 3 */ + 4152 "11111110" // /* MW 2 */ + 4153 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4154 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4155 "01010110" // /* MW 3 */ + 4156 "01000110" // /* MW 2 */ + 4157 "00000111" // /* MW 1 */ + 4158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4159 "00000000" // /* MW 1 */ + 4160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4161 "00000000" // /* MW 1 */ + 4162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4163 "00000000" // /* MW 1 */ + 4164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4165 "00000000" // /* MW 1 */ + 4166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4167 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4168 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "00000010" // /* MW 3 */ + 4170 "01100001" // /* MW 2 */ + 4171 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4172 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4173 "00010001" // /* MW 3 */ + 4174 "00000110" // /* MW 2 */ + 4175 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4176 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4177 "11111101" // /* MW 3 */ + 4178 "11100000" // /* MW 2 */ + 4179 "00010111" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ + 4184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4186 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4187 "00001000" // /* MW 3 */ + 4188 "10010011" // /* MW 2 */ + 4189 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 + 4190 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4191 "10000001" // /* MW 5 */ + 4192 "10101101" // /* MW 4 */ + 4193 "10100111" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00000100" // /* MW 1 */ + 4196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4197 "00000000" // /* MW 1 */ + 4198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4199 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first + 4200 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4201 "00110110" // /* MW 3 */ + 4202 "00000110" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4204 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4205 "10000001" // /* MW 5 */ + 4206 "11011101" // /* MW 4 */ + 4207 "11011100" // /* MW 3 */ + 4208 "11001010" // /* MW 2 */ + 4209 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 47 first + 4210 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "01110110" // /* MW 3 */ + 4212 "00000110" // /* MW 2 */ + 4213 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4214 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "10011110" // /* MW 3 */ + 4216 "01011100" // /* MW 2 */ + 4217 "00000111" // /* MW 1 */ + 4218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 171 2 first +.no_stack_arguments + 4220 "00000100" // JL #3776 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 4221 "00000001" // /* MW 5 */ + 4222 "00000000" // /* MW 4 */ + 4223 "01100000" // /* MW 3 */ + 4224 "00000111" // /* MW 2 */ + 4225 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4227 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first +.delay_slot + 4228 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4229 "00000111" // /* MW 3 */ + 4230 "01100010" // /* MW 2 */ + 4231 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 +.delay_slot + 4232 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4233 "00110001" // /* MW 3 */ + 4234 "00000110" // /* MW 2 */ + 4235 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 first +.delay_slot + 4236 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4237 "00001101" // /* MW 3 */ + 4238 "11100001" // /* MW 2 */ + 4239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 +.delay_slot + 4240 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4241 "00000000" // /* MW 15 */ + 4242 "00000000" // /* MW 14 */ + 4243 "10101000" // /* MW 13 */ + 4244 "10100000" // /* MW 12 */ + 4245 "00110100" // /* MW 11 */ + 4246 "00000000" // /* MW 10 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "01011011" // /* MW 7 */ + 4250 "00000001" // /* MW 6 */ + 4251 "00100000" // /* MW 5 */ + 4252 "00000000" // /* MW 4 */ + 4253 "11110000" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 173 6 +.src_ref 6 "superkernels.cpp" 174 14 +.return_address + 4256 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4257 "00010000" // /* MW 9 */ + 4258 "11100000" // /* MW 8 */ + 4259 "00110001" // /* MW 7 */ + 4260 "11110011" // /* MW 6 */ + 4261 "00000001" // /* MW 5 */ + 4262 "00000000" // /* MW 4 */ + 4263 "11010000" // /* MW 3 */ + 4264 "11000110" // /* MW 2 */ + 4265 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4266 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4267 "00000101" // /* MW 3 */ + 4268 "00100000" // /* MW 2 */ + 4269 "00010000" // /* MW 1 */ + 4270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4271 "00000000" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4280 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "00001000" // /* MW 3 */ + 4282 "01010001" // /* MW 2 */ + 4283 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 173 19 + 4284 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508936 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4285 "00010000" // /* MW 9 */ + 4286 "00000100" // /* MW 8 */ + 4287 "00110010" // /* MW 7 */ + 4288 "11110001" // /* MW 6 */ + 4289 "00000001" // /* MW 5 */ + 4290 "00000000" // /* MW 4 */ + 4291 "11010000" // /* MW 3 */ + 4292 "11001110" // /* MW 2 */ + 4293 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 first + 4294 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4295 "00110110" // /* MW 3 */ + 4296 "00000110" // /* MW 2 */ + 4297 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 19 + 4298 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4299 "01010110" // /* MW 3 */ + 4300 "00000110" // /* MW 2 */ + 4301 "00000010" // /* MW 1 */ + 4302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4303 "00000000" // /* MW 1 */ + 4304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4305 "00000000" // /* MW 1 */ + 4306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4307 "00000000" // /* MW 1 */ + 4308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4309 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4310 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "00110001" // /* MW 3 */ + 4312 "00100001" // /* MW 2 */ + 4313 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4314 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4315 "00010001" // /* MW 3 */ + 4316 "11100110" // /* MW 2 */ + 4317 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 16 first + 4318 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4319 "00101000" // /* MW 3 */ + 4320 "01100001" // /* MW 2 */ + 4321 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 + 4322 "10000100" // JNZ r16, #4352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4352 delay_slots=5 */ + 4323 "00000001" // /* MW 5 */ + 4324 "01000000" // /* MW 4 */ + 4325 "10000000" // /* MW 3 */ + 4326 "00001000" // /* MW 2 */ + 4327 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4337 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 + 4338 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00000001" // /* MW 3 */ + 4340 "00100000" // /* MW 2 */ + 4341 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 first + 4342 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "00000000" // /* MW 7 */ + 4346 "10000000" // /* MW 6 */ + 4347 "00010001" // /* MW 5 */ + 4348 "00000110" // /* MW 4 */ + 4349 "11110110" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 176 + 4352 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4353 "00111001" // /* MW 3 */ + 4354 "11110100" // /* MW 2 */ + 4355 "00000111" // /* MW 1 */ + 4356 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4357 "00011001" // /* MW 3 */ + 4358 "11111011" // /* MW 2 */ + 4359 "00000111" // /* MW 1 */ + 4360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4361 "00000000" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4367 "11110001" // /* MW 3 */ + 4368 "11111101" // /* MW 2 */ + 4369 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4373 "00000000" // /* MW 3 */ + 4374 "00101000" // /* MW 2 */ + 4375 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4377 "10100000" // /* MW 3 */ + 4378 "01100111" // /* MW 2 */ + 4379 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 +.delay_slot + 4380 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4381 "00000001" // /* MW 5 */ + 4382 "00000000" // /* MW 4 */ + 4383 "00000000" // /* MW 3 */ + 4384 "11111000" // /* MW 2 */ + 4385 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4391 "00000000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 2 "elementwise_binary_shared.h" 66 first +.src_ref 2 "elementwise_binary_shared.h" 78 37 +.function_start + 4400 "11111000" // MOV r2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4401 "11000000" // /* MW 3 */ + 4402 "10010110" // /* MW 2 */ + 4403 "00011000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 +.src_ref 2 "elementwise_binary_shared.h" 78 37 first + 4404 "00100100" // MOVX r0, #0; ADD.NC p5, r2, #14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4405 "00001110" // /* MW 5 */ + 4406 "11000010" // /* MW 4 */ + 4407 "00101010" // /* MW 3 */ + 4408 "00000000" // /* MW 2 */ + 4409 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 first +.src_ref 2 "elementwise_binary_shared.h" 81 22 first + 4410 "11010100" // LDA.s16 r0, [p5], #2; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4411 "11100101" // /* MW 5 */ + 4412 "00000010" // /* MW 4 */ + 4413 "01010000" // /* MW 3 */ + 4414 "10000010" // /* MW 2 */ + 4415 "10100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 15 first + 4416 "10011000" // LDA r2, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4417 "01010110" // /* MW 3 */ + 4418 "00000100" // /* MW 2 */ + 4419 "00000101" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ + 4422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4423 "00000000" // /* MW 1 */ + 4424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4425 "00000000" // /* MW 1 */ + 4426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4427 "00000000" // /* MW 1 */ + 4428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4429 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 4430 "00011000" // MOVX r1, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4431 "00001001" // /* MW 3 */ + 4432 "00000010" // /* MW 2 */ + 4433 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 4434 "10011000" // LTU r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4435 "00101100" // /* MW 3 */ + 4436 "01000010" // /* MW 2 */ + 4437 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 8 + 4438 "10000100" // JNZ r1, #4576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4576 delay_slots=5 */ + 4439 "00000001" // /* MW 5 */ + 4440 "01000000" // /* MW 4 */ + 4441 "11110000" // /* MW 3 */ + 4442 "00001000" // /* MW 2 */ + 4443 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 66 +.delay_slot + 4444 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4445 "00000001" // /* MW 5 */ + 4446 "00000000" // /* MW 4 */ + 4447 "00000000" // /* MW 3 */ + 4448 "00001000" // /* MW 2 */ + 4449 "00000000" // /* MW 1 */ +.delay_slot + 4450 "11111000" // MOV p4, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4451 "11100000" // /* MW 3 */ + 4452 "01100101" // /* MW 2 */ + 4453 "00011100" // /* MW 1 */ +.delay_slot + 4454 "00011000" // PADDB [p4], #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4455 "10010000" // /* MW 3 */ + 4456 "11111111" // /* MW 2 */ + 4457 "00111100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 78 37 first +.delay_slot + 4458 "00011000" // VST x0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4459 "00010011" // /* MW 3 */ + 4460 "00000100" // /* MW 2 */ + 4461 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 85 34 +.src_ref 2 "elementwise_binary_shared.h" 90 19 + 4464 "11010100" // MOVA dj0, #12; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4465 "11100101" // /* MW 5 */ + 4466 "00000010" // /* MW 4 */ + 4467 "10000000" // /* MW 3 */ + 4468 "10000010" // /* MW 2 */ + 4469 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 85 34 first +.src_ref 2 "elementwise_binary_shared.h" 90 19 first + 4470 "10011000" // LDA.u8 r0, [p3, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4471 "00001010" // /* MW 3 */ + 4472 "00000000" // /* MW 2 */ + 4473 "00000011" // /* MW 1 */ + 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4475 "00000000" // /* MW 1 */ + 4476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4477 "00000000" // /* MW 1 */ + 4478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4479 "00000000" // /* MW 1 */ + 4480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4481 "00000000" // /* MW 1 */ + 4482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4483 "00000000" // /* MW 1 */ + 4484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4485 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 90 12 +.src_ref 2 "elementwise_binary_shared.h" 90 35 + 4486 "10000100" // JNZ r0, #4528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4528 delay_slots=5 */ + 4487 "00000001" // /* MW 5 */ + 4488 "01000000" // /* MW 4 */ + 4489 "11011000" // /* MW 3 */ + 4490 "00001000" // /* MW 2 */ + 4491 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 4492 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4493 "00000000" // /* MW 3 */ + 4494 "00000000" // /* MW 2 */ + 4495 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.delay_slot + 4496 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4497 "10000000" // /* MW 3 */ + 4498 "00000000" // /* MW 2 */ + 4499 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4505 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 4506 "10111010" // MOVA m1, #0; J #4544 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4544 delay_slots=5 */ + 4507 "00100000" // /* MW 9 */ + 4508 "00000000" // /* MW 8 */ + 4509 "00000000" // /* MW 7 */ + 4510 "00111000" // /* MW 6 */ + 4511 "00000010" // /* MW 5 */ + 4512 "00000000" // /* MW 4 */ + 4513 "10000000" // /* MW 3 */ + 4514 "00000100" // /* MW 2 */ + 4515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4523 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 4524 "00011000" // VST x0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4525 "00010011" // /* MW 3 */ + 4526 "00000100" // /* MW 2 */ + 4527 "00001000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 4528 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4529 "10000000" // /* MW 3 */ + 4530 "00000000" // /* MW 2 */ + 4531 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "elementwise_binary_shared.h" 130 16 + 4532 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4533 "01010000" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000001" // /* MW 8 */ + 4537 "00010011" // /* MW 7 */ + 4538 "00000100" // /* MW 6 */ + 4539 "00100001" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "11110000" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 4544 "10000100" // J #4672 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4672 delay_slots=5 */ + 4545 "00000000" // /* MW 5 */ + 4546 "00000000" // /* MW 4 */ + 4547 "00100000" // /* MW 3 */ + 4548 "00001001" // /* MW 2 */ + 4549 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 4550 "00000010" // MOVS p0, p4; MOV p4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01110000" // /* MW 7 */ + 4552 "01100000" // /* MW 6 */ + 4553 "00110000" // /* MW 5 */ + 4554 "00000010" // /* MW 4 */ + 4555 "01100000" // /* MW 3 */ + 4556 "00010001" // /* MW 2 */ + 4557 "00010010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4559 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4564 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4565 "10000001" // /* MW 11 */ + 4566 "10101101" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "00000000" // /* MW 7 */ + 4570 "00000000" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "00101100" // /* MW 2 */ + 4575 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 +.src_ref 2 "elementwise_binary_shared.h" 109 97 + 4576 "00011000" // MOVX r1, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4577 "00001101" // /* MW 3 */ + 4578 "00000010" // /* MW 2 */ + 4579 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 97 first + 4580 "10011000" // EQ r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00100111" // /* MW 3 */ + 4582 "01000010" // /* MW 2 */ + 4583 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4584 "10000100" // JNZ r1, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */ + 4585 "00000001" // /* MW 5 */ + 4586 "01000000" // /* MW 4 */ + 4587 "00001000" // /* MW 3 */ + 4588 "00001001" // /* MW 2 */ + 4589 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 4590 "01000100" // MOVXM p3, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4591 "00100000" // /* MW 5 */ + 4592 "11001000" // /* MW 4 */ + 4593 "11000110" // /* MW 3 */ + 4594 "00000111" // /* MW 2 */ + 4595 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4601 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 4602 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4603 "00000000" // /* MW 5 */ + 4604 "00100000" // /* MW 4 */ + 4605 "00000000" // /* MW 3 */ + 4606 "10000000" // /* MW 2 */ + 4607 "00111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4608 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "00010000" // /* MW 13 */ + 4612 "00000000" // /* MW 12 */ + 4613 "00001000" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "11100000" // /* MW 9 */ + 4616 "00101111" // /* MW 8 */ + 4617 "01011011" // /* MW 7 */ + 4618 "00000001" // /* MW 6 */ + 4619 "00100000" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4624 "01010100" // LDA.s8 r0, [p3]; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "00000001" // /* MW 5 */ + 4626 "00000001" // /* MW 4 */ + 4627 "01010000" // /* MW 3 */ + 4628 "10000000" // /* MW 2 */ + 4629 "01100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4630 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "00000000" // /* MW 3 */ + 4632 "00000000" // /* MW 2 */ + 4633 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4634 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "10000000" // /* MW 3 */ + 4636 "00000000" // /* MW 2 */ + 4637 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4639 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4641 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4642 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4643 "00010001" // /* MW 3 */ + 4644 "00000000" // /* MW 2 */ + 4645 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4646 "11111000" // VMOV bmll1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4647 "10010010" // /* MW 3 */ + 4648 "00000000" // /* MW 2 */ + 4649 "00011001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4650 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4651 "10000000" // /* MW 3 */ + 4652 "00111010" // /* MW 2 */ + 4653 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4654 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4655 "10010110" // /* MW 3 */ + 4656 "01000000" // /* MW 2 */ + 4657 "00001000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 4660 "01011000" // VEXTBCST.16 x0, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4661 "00000011" // /* MW 3 */ + 4662 "00000001" // /* MW 2 */ + 4663 "00011000" // /* MW 1 */ + 4664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 4666 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4667 "01100110" // /* MW 5 */ + 4668 "11111000" // /* MW 4 */ + 4669 "11111111" // /* MW 3 */ + 4670 "00101100" // /* MW 2 */ + 4671 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 125 4 first +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first + 4672 "10110110" // LDA r1, [p5, #-16]; VLDB x1, [p4], m1; MOVXM ls, #4784 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4673 "00010000" // /* MW 11 */ + 4674 "01011000" // /* MW 10 */ + 4675 "01111001" // /* MW 9 */ + 4676 "00000100" // /* MW 8 */ + 4677 "00000000" // /* MW 7 */ + 4678 "00000000" // /* MW 6 */ + 4679 "11101000" // /* MW 5 */ + 4680 "01010000" // /* MW 4 */ + 4681 "11011000" // /* MW 3 */ + 4682 "10000110" // /* MW 2 */ + 4683 "10111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4684 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #4832 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4685 "00010000" // /* MW 11 */ + 4686 "01110000" // /* MW 10 */ + 4687 "10111001" // /* MW 9 */ + 4688 "00000101" // /* MW 8 */ + 4689 "00000000" // /* MW 7 */ + 4690 "00000000" // /* MW 6 */ + 4691 "01101000" // /* MW 5 */ + 4692 "10010000" // /* MW 4 */ + 4693 "00000010" // /* MW 3 */ + 4694 "01100011" // /* MW 2 */ + 4695 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 136 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4696 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p4], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4697 "11110001" // /* MW 7 */ + 4698 "00000000" // /* MW 6 */ + 4699 "11101000" // /* MW 5 */ + 4700 "01010000" // /* MW 4 */ + 4701 "01111000" // /* MW 3 */ + 4702 "00000101" // /* MW 2 */ + 4703 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4704 "10111010" // VLDA x0, [p1], m2; MOVXM p3, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4705 "00010000" // /* MW 9 */ + 4706 "00001000" // /* MW 8 */ + 4707 "10110010" // /* MW 7 */ + 4708 "11110001" // /* MW 6 */ + 4709 "00000001" // /* MW 5 */ + 4710 "00000000" // /* MW 4 */ + 4711 "01110000" // /* MW 3 */ + 4712 "00000011" // /* MW 2 */ + 4713 "00101001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4714 "10011000" // LDA.s8 r2, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4715 "01000010" // /* MW 3 */ + 4716 "00000100" // /* MW 2 */ + 4717 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4718 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4719 "00101011" // /* MW 3 */ + 4720 "00001000" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4723 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4724 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4725 "00111101" // /* MW 3 */ + 4726 "01000010" // /* MW 2 */ + 4727 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4728 "01100010" // ADD.NC lc, r1, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4729 "00000001" // /* MW 7 */ + 4730 "00000010" // /* MW 6 */ + 4731 "00000001" // /* MW 5 */ + 4732 "10000110" // /* MW 4 */ + 4733 "11111110" // /* MW 3 */ + 4734 "01110000" // /* MW 2 */ + 4735 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4736 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p4], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4737 "11101000" // /* MW 5 */ + 4738 "01010000" // /* MW 4 */ + 4739 "01111000" // /* MW 3 */ + 4740 "00000011" // /* MW 2 */ + 4741 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4742 "10111010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4743 "01111110" // /* MW 9 */ + 4744 "10100101" // /* MW 8 */ + 4745 "00000001" // /* MW 7 */ + 4746 "00000000" // /* MW 6 */ + 4747 "00010000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "01110000" // /* MW 3 */ + 4750 "00000101" // /* MW 2 */ + 4751 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary_shared.h" 144 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4752 "11100001" // NOPA; NOPB; NOPS; MOVX crRnd, r2; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "00000000" // /* MW 15 */ + 4754 "00000000" // /* MW 14 */ + 4755 "01111000" // /* MW 13 */ + 4756 "10100101" // /* MW 12 */ + 4757 "00000001" // /* MW 11 */ + 4758 "00000000" // /* MW 10 */ + 4759 "11010100" // /* MW 9 */ + 4760 "00000101" // /* MW 8 */ + 4761 "01011011" // /* MW 7 */ + 4762 "00000001" // /* MW 6 */ + 4763 "00100000" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4768 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00010000" // /* MW 15 */ + 4770 "00001000" // /* MW 14 */ + 4771 "01111000" // /* MW 13 */ + 4772 "10100101" // /* MW 12 */ + 4773 "00000001" // /* MW 11 */ + 4774 "00000000" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4784 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p4], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4785 "00000000" // /* MW 15 */ + 4786 "00000000" // /* MW 14 */ + 4787 "01111000" // /* MW 13 */ + 4788 "10100101" // /* MW 12 */ + 4789 "00000001" // /* MW 11 */ + 4790 "00000000" // /* MW 10 */ + 4791 "00000000" // /* MW 9 */ + 4792 "00000000" // /* MW 8 */ + 4793 "01011011" // /* MW 7 */ + 4794 "00000001" // /* MW 6 */ + 4795 "11101000" // /* MW 5 */ + 4796 "01010000" // /* MW 4 */ + 4797 "01111000" // /* MW 3 */ + 4798 "00000011" // /* MW 2 */ + 4799 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4800 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4801 "00000000" // /* MW 15 */ + 4802 "00000000" // /* MW 14 */ + 4803 "01111000" // /* MW 13 */ + 4804 "10100101" // /* MW 12 */ + 4805 "00000001" // /* MW 11 */ + 4806 "00000000" // /* MW 10 */ + 4807 "00000000" // /* MW 9 */ + 4808 "00000000" // /* MW 8 */ + 4809 "10100011" // /* MW 7 */ + 4810 "00011100" // /* MW 6 */ + 4811 "00100010" // /* MW 5 */ + 4812 "00000000" // /* MW 4 */ + 4813 "01110000" // /* MW 3 */ + 4814 "00000101" // /* MW 2 */ + 4815 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4817 "00000000" // /* MW 15 */ + 4818 "00000000" // /* MW 14 */ + 4819 "01111000" // /* MW 13 */ + 4820 "10100101" // /* MW 12 */ + 4821 "00000001" // /* MW 11 */ + 4822 "00000000" // /* MW 10 */ + 4823 "00000000" // /* MW 9 */ + 4824 "00000000" // /* MW 8 */ + 4825 "01011011" // /* MW 7 */ + 4826 "00000001" // /* MW 6 */ + 4827 "00100000" // /* MW 5 */ + 4828 "00000000" // /* MW 4 */ + 4829 "11110000" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4832 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4833 "00010000" // /* MW 15 */ + 4834 "00001000" // /* MW 14 */ + 4835 "01111000" // /* MW 13 */ + 4836 "10100101" // /* MW 12 */ + 4837 "00000001" // /* MW 11 */ + 4838 "00000000" // /* MW 10 */ + 4839 "00000000" // /* MW 9 */ + 4840 "00000000" // /* MW 8 */ + 4841 "01011011" // /* MW 7 */ + 4842 "00000001" // /* MW 6 */ + 4843 "00100000" // /* MW 5 */ + 4844 "00000000" // /* MW 4 */ + 4845 "11110000" // /* MW 3 */ + 4846 "00101100" // /* MW 2 */ + 4847 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4849 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4850 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4851 "10100011" // /* MW 3 */ + 4852 "00011100" // /* MW 2 */ + 4853 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4856 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4857 "00000001" // /* MW 3 */ + 4858 "00000010" // /* MW 2 */ + 4859 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4861 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4862 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4863 "00000000" // /* MW 3 */ + 4864 "00101000" // /* MW 2 */ + 4865 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4866 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4867 "10100011" // /* MW 3 */ + 4868 "00011100" // /* MW 2 */ + 4869 "00001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.delay_slot + 4870 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4871 "00000001" // /* MW 5 */ + 4872 "00000000" // /* MW 4 */ + 4873 "00000000" // /* MW 3 */ + 4874 "11111000" // /* MW 2 */ + 4875 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4877 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot + 4878 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4879 "10100011" // /* MW 3 */ + 4880 "00011100" // /* MW 2 */ + 4881 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 4883 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 2 "elementwise_unary.h" 95 first +.src_ref 2 "elementwise_unary.h" 97 22 +.src_ref 2 "elementwise_unary.h" 97 24 first +.function_start + 4896 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4897 "00010000" // /* MW 9 */ + 4898 "10100000" // /* MW 8 */ + 4899 "00110010" // /* MW 7 */ + 4900 "11110000" // /* MW 6 */ + 4901 "00000001" // /* MW 5 */ + 4902 "00000000" // /* MW 4 */ + 4903 "11010000" // /* MW 3 */ + 4904 "10000101" // /* MW 2 */ + 4905 "00100011" // /* MW 1 */ + 4906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4907 "00000000" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ + 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4917 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 97 22 first + 4918 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4919 "00101001" // /* MW 3 */ + 4920 "00011100" // /* MW 2 */ + 4921 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 24 first + 4922 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4923 "00101110" // /* MW 3 */ + 4924 "00000100" // /* MW 2 */ + 4925 "00000001" // /* MW 1 */ + 4926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4927 "00000000" // /* MW 1 */ + 4928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4929 "00000000" // /* MW 1 */ + 4930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4931 "00000000" // /* MW 1 */ + 4932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4933 "00000000" // /* MW 1 */ + 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4935 "00000000" // /* MW 1 */ + 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4937 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 22 + 4938 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "00101001" // /* MW 3 */ + 4940 "00000100" // /* MW 2 */ + 4941 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 24 first + 4942 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "00101110" // /* MW 3 */ + 4944 "00010100" // /* MW 2 */ + 4945 "00000001" // /* MW 1 */ + 4946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4947 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 101 4 first + 4948 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4949 "00000000" // /* MW 3 */ + 4950 "00101000" // /* MW 2 */ + 4951 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 22 first +.delay_slot + 4960 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4961 "00101001" // /* MW 3 */ + 4962 "00010100" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 4963 "00001000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 107 first +.src_ref 2 "elementwise_unary.h" 113 37 +.src_ref 2 "elementwise_unary.h" 113 78 +.src_ref 2 "elementwise_unary.h" 142 19 +.function_start + 4976 "10110110" // MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #509248 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4977 "00010000" // /* MW 11 */ + 4978 "10100000" // /* MW 10 */ + 4979 "00110010" // /* MW 9 */ + 4980 "11110001" // /* MW 8 */ + 4981 "00000001" // /* MW 7 */ + 4982 "00000000" // /* MW 6 */ + 4983 "01101000" // /* MW 5 */ + 4984 "00111101" // /* MW 4 */ + 4985 "00000000" // /* MW 3 */ + 4986 "01000000" // /* MW 2 */ + 4987 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 113 37 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4988 "10110110" // LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508944 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4989 "00010000" // /* MW 11 */ + 4990 "00001000" // /* MW 10 */ + 4991 "00110010" // /* MW 9 */ + 4992 "11110001" // /* MW 8 */ + 4993 "00000001" // /* MW 7 */ + 4994 "00000000" // /* MW 6 */ + 4995 "11101000" // /* MW 5 */ + 4996 "00111011" // /* MW 4 */ + 4997 "11010000" // /* MW 3 */ + 4998 "10001010" // /* MW 2 */ + 4999 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 142 19 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5000 "10110110" // LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5001 "00010000" // /* MW 11 */ + 5002 "01000000" // /* MW 10 */ + 5003 "11001000" // /* MW 9 */ + 5004 "00010000" // /* MW 8 */ + 5005 "00000000" // /* MW 7 */ + 5006 "00000000" // /* MW 6 */ + 5007 "01101000" // /* MW 5 */ + 5008 "00111101" // /* MW 4 */ + 5009 "01010000" // /* MW 3 */ + 5010 "10000100" // /* MW 2 */ + 5011 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5012 "11110100" // VLDB x7, [p0], #64; VBCST.16 x0, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5013 "11100101" // /* MW 5 */ + 5014 "00110010" // /* MW 4 */ + 5015 "10000000" // /* MW 3 */ + 5016 "10111110" // /* MW 2 */ + 5017 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5018 "01000100" // MOVXM r4, #49280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "00000000" // /* MW 5 */ + 5020 "00100001" // /* MW 4 */ + 5021 "11000010" // /* MW 3 */ + 5022 "00000000" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5024 "11111000" // VBCST.16 x1, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5025 "01110010" // /* MW 3 */ + 5026 "10010001" // /* MW 2 */ + 5027 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5028 "01000100" // MOVXM r3, #32767 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5029 "11111110" // /* MW 5 */ + 5030 "10111111" // /* MW 4 */ + 5031 "01110001" // /* MW 3 */ + 5032 "00000000" // /* MW 2 */ + 5033 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5034 "11111000" // VMIN_GE.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5035 "00101100" // /* MW 3 */ + 5036 "01010000" // /* MW 2 */ + 5037 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "elementwise_unary.h" 113 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5038 "11100100" // LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5039 "11011001" // /* MW 5 */ + 5040 "10000001" // /* MW 4 */ + 5041 "10110110" // /* MW 3 */ + 5042 "00000001" // /* MW 2 */ + 5043 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 +.src_ref 2 "elementwise_unary.h" 166 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5044 "11100100" // MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5045 "01011001" // /* MW 5 */ + 5046 "01110000" // /* MW 4 */ + 5047 "00001000" // /* MW 3 */ + 5048 "01010000" // /* MW 2 */ + 5049 "00001111" // /* MW 1 */ + 5050 "11111000" // VBCST.16 x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5051 "01110010" // /* MW 3 */ + 5052 "00001101" // /* MW 2 */ + 5053 "00011001" // /* MW 1 */ + 5054 "01000100" // MOVXM r5, #15616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5055 "00000000" // /* MW 5 */ + 5056 "10111010" // /* MW 4 */ + 5057 "00110010" // /* MW 3 */ + 5058 "00000000" // /* MW 2 */ + 5059 "00000000" // /* MW 1 */ + 5060 "11111000" // VBCST.16 x3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5061 "01110010" // /* MW 3 */ + 5062 "10010101" // /* MW 2 */ + 5063 "00011001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 + 5064 "01000100" // MOVXM r17, #16128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5065 "00000000" // /* MW 5 */ + 5066 "10111110" // /* MW 4 */ + 5067 "00111000" // /* MW 3 */ + 5068 "00000000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 5070 "01111000" // VBAND x11, x6, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101011" // /* MW 3 */ + 5072 "10110001" // /* MW 2 */ + 5073 "00011101" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 5074 "11100100" // MOVX r17, #828; VBCST.16 x5, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5075 "11100101" // /* MW 5 */ + 5076 "10001010" // /* MW 4 */ + 5077 "00100101" // /* MW 3 */ + 5078 "01011110" // /* MW 2 */ + 5079 "01100100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 5080 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5081 "01100001" // /* MW 7 */ + 5082 "11100111" // /* MW 6 */ + 5083 "10001100" // /* MW 5 */ + 5084 "11100110" // /* MW 4 */ + 5085 "11101100" // /* MW 3 */ + 5086 "11000000" // /* MW 2 */ + 5087 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 5088 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5089 "00101011" // /* MW 3 */ + 5090 "01001001" // /* MW 2 */ + 5091 "00011100" // /* MW 1 */ + 5092 "01000100" // MOVXM r2, #16000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5093 "00000000" // /* MW 5 */ + 5094 "00111101" // /* MW 4 */ + 5095 "00110001" // /* MW 3 */ + 5096 "00000000" // /* MW 2 */ + 5097 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 5098 "01100010" // VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5099 "00000001" // /* MW 7 */ + 5100 "11100111" // /* MW 6 */ + 5101 "10001010" // /* MW 5 */ + 5102 "11100110" // /* MW 4 */ + 5103 "01110010" // /* MW 3 */ + 5104 "00001001" // /* MW 2 */ + 5105 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 5106 "11111000" // VCONV.fp32.bf16 cml0, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5107 "10001010" // /* MW 3 */ + 5108 "00001011" // /* MW 2 */ + 5109 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 5110 "01100010" // VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5111 "10000001" // /* MW 7 */ + 5112 "00001100" // /* MW 6 */ + 5113 "10001011" // /* MW 5 */ + 5114 "11100110" // /* MW 4 */ + 5115 "00101100" // /* MW 3 */ + 5116 "01010000" // /* MW 2 */ + 5117 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5118 "01010110" // VCONV.bf16.fp32 x11, cml4; MOVXM ls, #5168; VMAC.f dm1, dm0, x9, x4, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5119 "10000001" // /* MW 11 */ + 5120 "00010010" // /* MW 10 */ + 5121 "10001001" // /* MW 9 */ + 5122 "00000010" // /* MW 8 */ + 5123 "01000011" // /* MW 7 */ + 5124 "10001111" // /* MW 6 */ + 5125 "00000000" // /* MW 5 */ + 5126 "00000000" // /* MW 4 */ + 5127 "11000000" // /* MW 3 */ + 5128 "01000010" // /* MW 2 */ + 5129 "10110010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5130 "11111000" // VMAX_LT.bf16 x6, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5131 "11101100" // /* MW 3 */ + 5132 "01000000" // /* MW 2 */ + 5133 "00011011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5134 "01011010" // MOVXM le, #5264; VMSC.f dm2, dm3, x11, x6, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5135 "11000011" // /* MW 9 */ + 5136 "01110110" // /* MW 8 */ + 5137 "10001010" // /* MW 7 */ + 5138 "00000010" // /* MW 6 */ + 5139 "01001001" // /* MW 5 */ + 5140 "10110111" // /* MW 4 */ + 5141 "00000000" // /* MW 3 */ + 5142 "00000000" // /* MW 2 */ + 5143 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 125 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5144 "00000010" // VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5145 "10000000" // /* MW 7 */ + 5146 "00111111" // /* MW 6 */ + 5147 "10111000" // /* MW 5 */ + 5148 "00000010" // /* MW 4 */ + 5149 "11000000" // /* MW 3 */ + 5150 "00100010" // /* MW 2 */ + 5151 "01010010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first + 5152 "11111000" // VMIN_GE.bf16 x8, r16, x7, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "00101100" // /* MW 3 */ + 5154 "00111000" // /* MW 2 */ + 5155 "00011100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 5156 "11110110" // NOPA; NOPB; NOPS; VBAND x11, x6, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5157 "10110000" // /* MW 11 */ + 5158 "10010101" // /* MW 10 */ + 5159 "11011000" // /* MW 9 */ + 5160 "00000010" // /* MW 8 */ + 5161 "01011011" // /* MW 7 */ + 5162 "00000001" // /* MW 6 */ + 5163 "00100000" // /* MW 5 */ + 5164 "00000000" // /* MW 4 */ + 5165 "11110000" // /* MW 3 */ + 5166 "00101100" // /* MW 2 */ + 5167 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 142 19 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first +.loop_nesting 1 + 5168 "01001010" // VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5169 "00100011" // /* MW 9 */ + 5170 "00101011" // /* MW 8 */ + 5171 "10001100" // /* MW 7 */ + 5172 "11100110" // /* MW 6 */ + 5173 "11101100" // /* MW 5 */ + 5174 "11000000" // /* MW 4 */ + 5175 "01101100" // /* MW 3 */ + 5176 "00111101" // /* MW 2 */ + 5177 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "abs.hpp" 32 22 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5178 "01001010" // VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5179 "01100001" // /* MW 9 */ + 5180 "11100111" // /* MW 8 */ + 5181 "10001100" // /* MW 7 */ + 5182 "01100110" // /* MW 6 */ + 5183 "00101011" // /* MW 5 */ + 5184 "01001001" // /* MW 4 */ + 5185 "11101100" // /* MW 3 */ + 5186 "00111011" // /* MW 2 */ + 5187 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5188 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10000001" // /* MW 3 */ + 5190 "00001100" // /* MW 2 */ + 5191 "10001011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5192 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "00000001" // /* MW 3 */ + 5194 "11100111" // /* MW 2 */ + 5195 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5196 "01100010" // VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5197 "10000001" // /* MW 7 */ + 5198 "00010010" // /* MW 6 */ + 5199 "10001001" // /* MW 5 */ + 5200 "00000010" // /* MW 4 */ + 5201 "01100000" // /* MW 3 */ + 5202 "10100100" // /* MW 2 */ + 5203 "00100011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5205 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5206 "01111010" // NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "00000000" // /* MW 7 */ + 5210 "00000000" // /* MW 6 */ + 5211 "00100011" // /* MW 5 */ + 5212 "00011110" // /* MW 4 */ + 5213 "11110001" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5216 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5217 "00000000" // /* MW 15 */ + 5218 "00000000" // /* MW 14 */ + 5219 "01111000" // /* MW 13 */ + 5220 "00010110" // /* MW 12 */ + 5221 "00101000" // /* MW 11 */ + 5222 "00000010" // /* MW 10 */ + 5223 "00000000" // /* MW 9 */ + 5224 "00000000" // /* MW 8 */ + 5225 "00010110" // /* MW 7 */ + 5226 "10010010" // /* MW 6 */ + 5227 "00100101" // /* MW 5 */ + 5228 "00000000" // /* MW 4 */ + 5229 "11110000" // /* MW 3 */ + 5230 "00101100" // /* MW 2 */ + 5231 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5232 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5233 "00000000" // /* MW 15 */ + 5234 "00000000" // /* MW 14 */ + 5235 "01111000" // /* MW 13 */ + 5236 "01110110" // /* MW 12 */ + 5237 "10100000" // /* MW 11 */ + 5238 "00000001" // /* MW 10 */ + 5239 "00000000" // /* MW 9 */ + 5240 "00000000" // /* MW 8 */ + 5241 "01011011" // /* MW 7 */ + 5242 "00000001" // /* MW 6 */ + 5243 "00100000" // /* MW 5 */ + 5244 "00000000" // /* MW 4 */ + 5245 "11110000" // /* MW 3 */ + 5246 "00101100" // /* MW 2 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5248 "00011011" // NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5249 "10110110" // /* MW 15 */ + 5250 "01010011" // /* MW 14 */ + 5251 "01111100" // /* MW 13 */ + 5252 "00010110" // /* MW 12 */ + 5253 "00011100" // /* MW 11 */ + 5254 "00000010" // /* MW 10 */ + 5255 "00000000" // /* MW 9 */ + 5256 "00000000" // /* MW 8 */ + 5257 "00010110" // /* MW 7 */ + 5258 "10010001" // /* MW 6 */ + 5259 "00100010" // /* MW 5 */ + 5260 "00000000" // /* MW 4 */ + 5261 "11110000" // /* MW 3 */ + 5262 "00101100" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.src_ref 4 "abs.hpp" 32 22 first +.end_of_loop + 5264 "11100001" // NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5265 "00000000" // /* MW 15 */ + 5266 "00000000" // /* MW 14 */ + 5267 "10111000" // /* MW 13 */ + 5268 "10010101" // /* MW 12 */ + 5269 "11011000" // /* MW 11 */ + 5270 "00000010" // /* MW 10 */ + 5271 "00000000" // /* MW 9 */ + 5272 "00000000" // /* MW 8 */ + 5273 "01011011" // /* MW 7 */ + 5274 "00000001" // /* MW 6 */ + 5275 "00100000" // /* MW 5 */ + 5276 "00000000" // /* MW 4 */ + 5277 "11110000" // /* MW 3 */ + 5278 "00101100" // /* MW 2 */ + 5279 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.loop_nesting 0 + 5280 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5281 "00100011" // /* MW 7 */ + 5282 "00101011" // /* MW 6 */ + 5283 "10001100" // /* MW 5 */ + 5284 "11100110" // /* MW 4 */ + 5285 "11101100" // /* MW 3 */ + 5286 "11000000" // /* MW 2 */ + 5287 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5288 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5289 "00101011" // /* MW 3 */ + 5290 "01001001" // /* MW 2 */ + 5291 "00011100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5292 "01001000" // VMUL.f dm4, x3, x11, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5293 "01100001" // /* MW 3 */ + 5294 "11100111" // /* MW 2 */ + 5295 "10001100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5296 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5297 "00000001" // /* MW 3 */ + 5298 "11100111" // /* MW 2 */ + 5299 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5300 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5301 "00100011" // /* MW 3 */ + 5302 "00011101" // /* MW 2 */ + 5303 "00001001" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 5304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5305 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5306 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5307 "00100011" // /* MW 3 */ + 5308 "00011110" // /* MW 2 */ + 5309 "00001001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 5310 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5311 "10000001" // /* MW 3 */ + 5312 "00001100" // /* MW 2 */ + 5313 "10001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 + 5314 "01100010" // VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5315 "10000001" // /* MW 7 */ + 5316 "00010010" // /* MW 6 */ + 5317 "10001001" // /* MW 5 */ + 5318 "00000010" // /* MW 4 */ + 5319 "11000000" // /* MW 3 */ + 5320 "01000010" // /* MW 2 */ + 5321 "10110010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 + 5322 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5323 "00010110" // /* MW 3 */ + 5324 "10010001" // /* MW 2 */ + 5325 "00001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first + 5326 "01001000" // VMSC.f dm2, dm3, x11, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "11000011" // /* MW 3 */ + 5328 "01110110" // /* MW 2 */ + 5329 "10001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 5330 "01001000" // VMSC.f dm4, dm1, x5, x9, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5331 "00100011" // /* MW 3 */ + 5332 "00101011" // /* MW 2 */ + 5333 "10001100" // /* MW 1 */ + 5334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5335 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 129 4 first + 5336 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5337 "00000000" // /* MW 3 */ + 5338 "00101000" // /* MW 2 */ + 5339 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5343 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.delay_slot + 5344 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "00100011" // /* MW 3 */ + 5346 "00011101" // /* MW 2 */ + 5347 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.delay_slot + 5348 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5349 "00100011" // /* MW 3 */ + 5350 "00011110" // /* MW 2 */ + 5351 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 5353 "00000000" // /* MW 1 */ +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_sigmoid1d _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 210 first +.src_ref 6 "superkernels.cpp" 215 6 +.function_start + 5360 "01000100" // MOVXM p3, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5361 "10000000" // /* MW 5 */ + 5362 "11000111" // /* MW 4 */ + 5363 "11000110" // /* MW 3 */ + 5364 "00000111" // /* MW 2 */ + 5365 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 first + 5366 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5367 "11000001" // /* MW 5 */ + 5368 "10110101" // /* MW 4 */ + 5369 "11011000" // /* MW 3 */ + 5370 "11000010" // /* MW 2 */ + 5371 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 210 + 5372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5373 "00000001" // /* MW 5 */ + 5374 "00000000" // /* MW 4 */ + 5375 "00000000" // /* MW 3 */ + 5376 "00001000" // /* MW 2 */ + 5377 "00000000" // /* MW 1 */ + 5378 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5379 "01110000" // /* MW 7 */ + 5380 "11010000" // /* MW 6 */ + 5381 "00001011" // /* MW 5 */ + 5382 "00000000" // /* MW 4 */ + 5383 "10110000" // /* MW 3 */ + 5384 "01100011" // /* MW 2 */ + 5385 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 11 + 5386 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5387 "00010001" // /* MW 9 */ + 5388 "11100110" // /* MW 8 */ + 5389 "00110001" // /* MW 7 */ + 5390 "11110011" // /* MW 6 */ + 5391 "00000001" // /* MW 5 */ + 5392 "00000000" // /* MW 4 */ + 5393 "10110000" // /* MW 3 */ + 5394 "10000010" // /* MW 2 */ + 5395 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 5396 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "11000000" // /* MW 3 */ + 5398 "11010100" // /* MW 2 */ + 5399 "00011011" // /* MW 1 */ + 5400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5401 "00000000" // /* MW 1 */ + 5402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5403 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 +.src_ref 6 "superkernels.cpp" 215 16 + 5404 "10000100" // JNZ r16, #5568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5568 delay_slots=5 */ + 5405 "00000001" // /* MW 5 */ + 5406 "01000000" // /* MW 4 */ + 5407 "11100000" // /* MW 3 */ + 5408 "00001010" // /* MW 2 */ + 5409 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 22 first +.delay_slot + 5410 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5411 "10010000" // /* MW 3 */ + 5412 "01100010" // /* MW 2 */ + 5413 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 30 +.delay_slot + 5414 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5415 "11111011" // /* MW 3 */ + 5416 "01100011" // /* MW 2 */ + 5417 "00010100" // /* MW 1 */ +.delay_slot + 5418 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5419 "00111101" // /* MW 3 */ + 5420 "11110100" // /* MW 2 */ + 5421 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 212 11 +.delay_slot + 5422 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5423 "01110000" // /* MW 7 */ + 5424 "01100000" // /* MW 6 */ + 5425 "00110000" // /* MW 5 */ + 5426 "00000011" // /* MW 4 */ + 5427 "00110000" // /* MW 3 */ + 5428 "11000110" // /* MW 2 */ + 5429 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 +.src_ref 6 "superkernels.cpp" 229 2 +.delay_slot + 5430 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5431 "10000000" // /* MW 5 */ + 5432 "11001010" // /* MW 4 */ + 5433 "11000000" // /* MW 3 */ + 5434 "00000111" // /* MW 2 */ + 5435 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5436 "01000100" // MOVXM p2, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5437 "00100000" // /* MW 5 */ + 5438 "11001000" // /* MW 4 */ + 5439 "11000100" // /* MW 3 */ + 5440 "00000111" // /* MW 2 */ + 5441 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5442 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5443 "00010000" // /* MW 9 */ + 5444 "00000110" // /* MW 8 */ + 5445 "00110010" // /* MW 7 */ + 5446 "11110001" // /* MW 6 */ + 5447 "00000001" // /* MW 5 */ + 5448 "00000000" // /* MW 4 */ + 5449 "11100000" // /* MW 3 */ + 5450 "11000000" // /* MW 2 */ + 5451 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5453 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5454 "00000100" // JL #4896 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4896 delay_slots=5 */ + 5455 "00000001" // /* MW 5 */ + 5456 "00000000" // /* MW 4 */ + 5457 "10010000" // /* MW 3 */ + 5458 "00001001" // /* MW 2 */ + 5459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5463 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5464 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5465 "00110001" // /* MW 3 */ + 5466 "00100000" // /* MW 2 */ + 5467 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5468 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5469 "00000101" // /* MW 3 */ + 5470 "00100000" // /* MW 2 */ + 5471 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5472 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5473 "00000000" // /* MW 15 */ + 5474 "00000000" // /* MW 14 */ + 5475 "01111000" // /* MW 13 */ + 5476 "10100101" // /* MW 12 */ + 5477 "00000001" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "00000000" // /* MW 9 */ + 5480 "10000000" // /* MW 8 */ + 5481 "00010001" // /* MW 7 */ + 5482 "00000110" // /* MW 6 */ + 5483 "00100010" // /* MW 5 */ + 5484 "00000000" // /* MW 4 */ + 5485 "11110000" // /* MW 3 */ + 5486 "00101100" // /* MW 2 */ + 5487 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 +.return_address + 5488 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5489 "10011000" // /* MW 5 */ + 5490 "11000111" // /* MW 4 */ + 5491 "11000100" // /* MW 3 */ + 5492 "00000111" // /* MW 2 */ + 5493 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 first +.src_ref 6 "superkernels.cpp" 222 46 + 5494 "10111010" // LDA r16, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5495 "00010000" // /* MW 9 */ + 5496 "10100000" // /* MW 8 */ + 5497 "00110010" // /* MW 7 */ + 5498 "11110001" // /* MW 6 */ + 5499 "00000001" // /* MW 5 */ + 5500 "00000000" // /* MW 4 */ + 5501 "11010000" // /* MW 3 */ + 5502 "11000010" // /* MW 2 */ + 5503 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 +.src_ref 6 "superkernels.cpp" 222 46 +.src_ref 6 "superkernels.cpp" 229 2 + 5504 "10111010" // LDA r17, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5505 "00010000" // /* MW 9 */ + 5506 "10100000" // /* MW 8 */ + 5507 "00110010" // /* MW 7 */ + 5508 "11110001" // /* MW 6 */ + 5509 "00000001" // /* MW 5 */ + 5510 "00000000" // /* MW 4 */ + 5511 "11010000" // /* MW 3 */ + 5512 "11000110" // /* MW 2 */ + 5513 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 first +.src_ref 6 "superkernels.cpp" 222 16 +.src_ref 6 "superkernels.cpp" 227 47 + 5514 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5515 "00010000" // /* MW 9 */ + 5516 "11101000" // /* MW 8 */ + 5517 "10110001" // /* MW 7 */ + 5518 "11110000" // /* MW 6 */ + 5519 "00000001" // /* MW 5 */ + 5520 "00000000" // /* MW 4 */ + 5521 "01010000" // /* MW 3 */ + 5522 "11001011" // /* MW 2 */ + 5523 "01001000" // /* MW 1 */ + 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5525 "00000000" // /* MW 1 */ + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ + 5528 "10000100" // J #5584 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5584 delay_slots=5 */ + 5529 "00000000" // /* MW 5 */ + 5530 "00000000" // /* MW 4 */ + 5531 "11101000" // /* MW 3 */ + 5532 "00001010" // /* MW 2 */ + 5533 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 +.delay_slot + 5534 "01000100" // MOVXM p0, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00010000" // /* MW 5 */ + 5536 "11001000" // /* MW 4 */ + 5537 "11000000" // /* MW 3 */ + 5538 "00000111" // /* MW 2 */ + 5539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5541 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 27 first +.delay_slot + 5542 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5543 "00001111" // /* MW 3 */ + 5544 "01100001" // /* MW 2 */ + 5545 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 first +.delay_slot + 5546 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "10100011" // /* MW 5 */ + 5548 "00001100" // /* MW 4 */ + 5549 "11110000" // /* MW 3 */ + 5550 "00101100" // /* MW 2 */ + 5551 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 16 first +.delay_slot + 5552 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5553 "00000000" // /* MW 15 */ + 5554 "00000000" // /* MW 14 */ + 5555 "01111000" // /* MW 13 */ + 5556 "10100101" // /* MW 12 */ + 5557 "00000001" // /* MW 11 */ + 5558 "00000000" // /* MW 10 */ + 5559 "00000000" // /* MW 9 */ + 5560 "10000000" // /* MW 8 */ + 5561 "00010001" // /* MW 7 */ + 5562 "00000110" // /* MW 6 */ + 5563 "00100001" // /* MW 5 */ + 5564 "00000000" // /* MW 4 */ + 5565 "11110000" // /* MW 3 */ + 5566 "00101100" // /* MW 2 */ + 5567 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 227 47 +.src_ref 6 "superkernels.cpp" 229 2 + 5568 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508880; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5569 "00000000" // /* MW 15 */ + 5570 "00000000" // /* MW 14 */ + 5571 "00010000" // /* MW 13 */ + 5572 "11101000" // /* MW 12 */ + 5573 "10110001" // /* MW 11 */ + 5574 "11110000" // /* MW 10 */ + 5575 "00000001" // /* MW 9 */ + 5576 "00000000" // /* MW 8 */ + 5577 "10001011" // /* MW 7 */ + 5578 "10000000" // /* MW 6 */ + 5579 "00100010" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "11110000" // /* MW 3 */ + 5582 "00101100" // /* MW 2 */ + 5583 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5584 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5585 "00000000" // /* MW 7 */ + 5586 "11000011" // /* MW 6 */ + 5587 "10110011" // /* MW 5 */ + 5588 "00000011" // /* MW 4 */ + 5589 "01100000" // /* MW 3 */ + 5590 "10010001" // /* MW 2 */ + 5591 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 226 2 + 5592 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5593 "00010000" // /* MW 9 */ + 5594 "11100000" // /* MW 8 */ + 5595 "00110001" // /* MW 7 */ + 5596 "11110000" // /* MW 6 */ + 5597 "00000001" // /* MW 5 */ + 5598 "00000000" // /* MW 4 */ + 5599 "11010000" // /* MW 3 */ + 5600 "11101110" // /* MW 2 */ + 5601 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5602 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5603 "00010110" // /* MW 3 */ + 5604 "11111110" // /* MW 2 */ + 5605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5606 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5607 "00110110" // /* MW 3 */ + 5608 "11111110" // /* MW 2 */ + 5609 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5610 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5611 "01010110" // /* MW 3 */ + 5612 "01000110" // /* MW 2 */ + 5613 "00000111" // /* MW 1 */ + 5614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5615 "00000000" // /* MW 1 */ + 5616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5617 "00000000" // /* MW 1 */ + 5618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5619 "00000000" // /* MW 1 */ + 5620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5621 "00000000" // /* MW 1 */ + 5622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5623 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5624 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5625 "00000010" // /* MW 3 */ + 5626 "01100001" // /* MW 2 */ + 5627 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 5628 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5629 "00010001" // /* MW 3 */ + 5630 "00000110" // /* MW 2 */ + 5631 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 5632 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5633 "11111101" // /* MW 3 */ + 5634 "11100000" // /* MW 2 */ + 5635 "00010111" // /* MW 1 */ + 5636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5637 "00000000" // /* MW 1 */ + 5638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5639 "00000000" // /* MW 1 */ + 5640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5641 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5642 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5643 "00001000" // /* MW 3 */ + 5644 "10010011" // /* MW 2 */ + 5645 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 + 5646 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5647 "10000001" // /* MW 5 */ + 5648 "10101101" // /* MW 4 */ + 5649 "10100111" // /* MW 3 */ + 5650 "00000000" // /* MW 2 */ + 5651 "00000100" // /* MW 1 */ + 5652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5653 "00000000" // /* MW 1 */ + 5654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5655 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first + 5656 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00110110" // /* MW 3 */ + 5658 "00000110" // /* MW 2 */ + 5659 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 5660 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5661 "10000001" // /* MW 5 */ + 5662 "11011101" // /* MW 4 */ + 5663 "11011100" // /* MW 3 */ + 5664 "11001010" // /* MW 2 */ + 5665 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 47 first + 5666 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5667 "01110110" // /* MW 3 */ + 5668 "00000110" // /* MW 2 */ + 5669 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 5670 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5671 "10011110" // /* MW 3 */ + 5672 "01011100" // /* MW 2 */ + 5673 "00000111" // /* MW 1 */ + 5674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5675 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 229 2 first +.no_stack_arguments + 5676 "00000100" // JL #4976 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4976 delay_slots=5 */ + 5677 "00000001" // /* MW 5 */ + 5678 "00000000" // /* MW 4 */ + 5679 "10111000" // /* MW 3 */ + 5680 "00001001" // /* MW 2 */ + 5681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5683 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first +.delay_slot + 5684 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5685 "00000111" // /* MW 3 */ + 5686 "01100010" // /* MW 2 */ + 5687 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 +.delay_slot + 5688 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5689 "00110001" // /* MW 3 */ + 5690 "00000110" // /* MW 2 */ + 5691 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 first +.delay_slot + 5692 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5693 "00001101" // /* MW 3 */ + 5694 "11100001" // /* MW 2 */ + 5695 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 +.delay_slot + 5696 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "00000000" // /* MW 15 */ + 5698 "00000000" // /* MW 14 */ + 5699 "10101000" // /* MW 13 */ + 5700 "10100000" // /* MW 12 */ + 5701 "00110100" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00100000" // /* MW 5 */ + 5708 "00000000" // /* MW 4 */ + 5709 "11110000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 231 6 +.src_ref 6 "superkernels.cpp" 232 14 +.return_address + 5712 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5713 "00010000" // /* MW 9 */ + 5714 "11100000" // /* MW 8 */ + 5715 "00110001" // /* MW 7 */ + 5716 "11110011" // /* MW 6 */ + 5717 "00000001" // /* MW 5 */ + 5718 "00000000" // /* MW 4 */ + 5719 "11010000" // /* MW 3 */ + 5720 "11000110" // /* MW 2 */ + 5721 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 5722 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5723 "00000101" // /* MW 3 */ + 5724 "00100000" // /* MW 2 */ + 5725 "00010000" // /* MW 1 */ + 5726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5727 "00000000" // /* MW 1 */ + 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5729 "00000000" // /* MW 1 */ + 5730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5731 "00000000" // /* MW 1 */ + 5732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5733 "00000000" // /* MW 1 */ + 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5735 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5736 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5737 "00001000" // /* MW 3 */ + 5738 "01010001" // /* MW 2 */ + 5739 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 231 19 + 5740 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508936 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5741 "00010000" // /* MW 9 */ + 5742 "00000100" // /* MW 8 */ + 5743 "00110010" // /* MW 7 */ + 5744 "11110001" // /* MW 6 */ + 5745 "00000001" // /* MW 5 */ + 5746 "00000000" // /* MW 4 */ + 5747 "11010000" // /* MW 3 */ + 5748 "11001110" // /* MW 2 */ + 5749 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 first + 5750 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5751 "00110110" // /* MW 3 */ + 5752 "00000110" // /* MW 2 */ + 5753 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 19 + 5754 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5755 "01010110" // /* MW 3 */ + 5756 "00000110" // /* MW 2 */ + 5757 "00000010" // /* MW 1 */ + 5758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5759 "00000000" // /* MW 1 */ + 5760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5761 "00000000" // /* MW 1 */ + 5762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5763 "00000000" // /* MW 1 */ + 5764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5765 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5766 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5767 "00110001" // /* MW 3 */ + 5768 "00100001" // /* MW 2 */ + 5769 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5770 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5771 "00010001" // /* MW 3 */ + 5772 "11100110" // /* MW 2 */ + 5773 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 16 first + 5774 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5775 "00101000" // /* MW 3 */ + 5776 "01100001" // /* MW 2 */ + 5777 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 + 5778 "10000100" // JNZ r16, #5808 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5808 delay_slots=5 */ + 5779 "00000001" // /* MW 5 */ + 5780 "01000000" // /* MW 4 */ + 5781 "01011000" // /* MW 3 */ + 5782 "00001011" // /* MW 2 */ + 5783 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5789 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5793 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 + 5794 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5795 "00000001" // /* MW 3 */ + 5796 "00100000" // /* MW 2 */ + 5797 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 first + 5798 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5799 "00000000" // /* MW 9 */ + 5800 "00000000" // /* MW 8 */ + 5801 "00000000" // /* MW 7 */ + 5802 "10000000" // /* MW 6 */ + 5803 "00010001" // /* MW 5 */ + 5804 "00000110" // /* MW 4 */ + 5805 "11110110" // /* MW 3 */ + 5806 "00101100" // /* MW 2 */ + 5807 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 234 + 5808 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5809 "00111001" // /* MW 3 */ + 5810 "11110100" // /* MW 2 */ + 5811 "00000111" // /* MW 1 */ + 5812 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5813 "00011001" // /* MW 3 */ + 5814 "11111011" // /* MW 2 */ + 5815 "00000111" // /* MW 1 */ + 5816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5817 "00000000" // /* MW 1 */ + 5818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5819 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5821 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5822 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5823 "11110001" // /* MW 3 */ + 5824 "11111101" // /* MW 2 */ + 5825 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5827 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5828 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5829 "00000000" // /* MW 3 */ + 5830 "00101000" // /* MW 2 */ + 5831 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5832 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "10100000" // /* MW 3 */ + 5834 "01100111" // /* MW 2 */ + 5835 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 +.delay_slot + 5836 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5837 "00000001" // /* MW 5 */ + 5838 "00000000" // /* MW 4 */ + 5839 "00000000" // /* MW 3 */ + 5840 "11111000" // /* MW 2 */ + 5841 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5843 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 5847 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 5856 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5857 "00010000" // /* MW 9 */ + 5858 "01100000" // /* MW 8 */ + 5859 "00110010" // /* MW 7 */ + 5860 "11110000" // /* MW 6 */ + 5861 "00000001" // /* MW 5 */ + 5862 "00000000" // /* MW 4 */ + 5863 "11010000" // /* MW 3 */ + 5864 "10000101" // /* MW 2 */ + 5865 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 5866 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5867 "01011000" // /* MW 9 */ + 5868 "00000000" // /* MW 8 */ + 5869 "00001000" // /* MW 7 */ + 5870 "00001011" // /* MW 6 */ + 5871 "00010000" // /* MW 5 */ + 5872 "00001000" // /* MW 4 */ + 5873 "00000000" // /* MW 3 */ + 5874 "00000000" // /* MW 2 */ + 5875 "11110000" // /* MW 1 */ + 5876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5877 "00000000" // /* MW 1 */ + 5878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5879 "00000000" // /* MW 1 */ + 5880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5881 "00000000" // /* MW 1 */ + 5882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5883 "00000000" // /* MW 1 */ + 5884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5885 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 5886 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5887 "00101001" // /* MW 3 */ + 5888 "00011100" // /* MW 2 */ + 5889 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 5890 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5891 "00101110" // /* MW 3 */ + 5892 "00011100" // /* MW 2 */ + 5893 "00000001" // /* MW 1 */ + 5894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5895 "00000000" // /* MW 1 */ + 5896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5897 "00000000" // /* MW 1 */ + 5898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5899 "00000000" // /* MW 1 */ + 5900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5901 "00000000" // /* MW 1 */ + 5902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5903 "00000000" // /* MW 1 */ + 5904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5905 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 5906 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5907 "00101001" // /* MW 3 */ + 5908 "00011100" // /* MW 2 */ + 5909 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 5910 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5911 "00101110" // /* MW 3 */ + 5912 "00000100" // /* MW 2 */ + 5913 "00000001" // /* MW 1 */ + 5914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5915 "00000000" // /* MW 1 */ + 5916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5917 "00000000" // /* MW 1 */ + 5918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5919 "00000000" // /* MW 1 */ + 5920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5921 "00000000" // /* MW 1 */ + 5922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5923 "00000000" // /* MW 1 */ + 5924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5925 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 5926 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5927 "00101001" // /* MW 3 */ + 5928 "00011100" // /* MW 2 */ + 5929 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 5930 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5931 "01110110" // /* MW 3 */ + 5932 "00010100" // /* MW 2 */ + 5933 "00000001" // /* MW 1 */ + 5934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5935 "00000000" // /* MW 1 */ + 5936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5937 "00000000" // /* MW 1 */ + 5938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5939 "00000000" // /* MW 1 */ + 5940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5941 "00000000" // /* MW 1 */ + 5942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5943 "00000000" // /* MW 1 */ + 5944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5945 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5946 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "01110001" // /* MW 3 */ + 5948 "01001100" // /* MW 2 */ + 5949 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5950 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5951 "00010111" // /* MW 3 */ + 5952 "00000100" // /* MW 2 */ + 5953 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5954 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5955 "00000000" // /* MW 3 */ + 5956 "00101000" // /* MW 2 */ + 5957 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5958 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5959 "00000000" // /* MW 5 */ + 5960 "00111110" // /* MW 4 */ + 5961 "11110001" // /* MW 3 */ + 5962 "00000000" // /* MW 2 */ + 5963 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5964 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5965 "00100100" // /* MW 3 */ + 5966 "11000100" // /* MW 2 */ + 5967 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5968 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5969 "00100111" // /* MW 3 */ + 5970 "01110110" // /* MW 2 */ + 5971 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5972 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5973 "10000010" // /* MW 3 */ + 5974 "00000001" // /* MW 2 */ + 5975 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 5977 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 5984 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5985 "00000001" // /* MW 5 */ + 5986 "00000000" // /* MW 4 */ + 5987 "00000000" // /* MW 3 */ + 5988 "00001000" // /* MW 2 */ + 5989 "00000000" // /* MW 1 */ + 5990 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5991 "00111101" // /* MW 3 */ + 5992 "11111000" // /* MW 2 */ + 5993 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 5994 "00000100" // JL #5856 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5856 delay_slots=5 */ + 5995 "00000001" // /* MW 5 */ + 5996 "00000000" // /* MW 4 */ + 5997 "01110000" // /* MW 3 */ + 5998 "00001011" // /* MW 2 */ + 5999 "00000000" // /* MW 1 */ +.delay_slot + 6000 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6001 "10100000" // /* MW 3 */ + 6002 "00010111" // /* MW 2 */ + 6003 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 6004 "00111010" // ST r0, [sp, #-4]; MOVXM r15, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6005 "00010001" // /* MW 9 */ + 6006 "01100000" // /* MW 8 */ + 6007 "11101010" // /* MW 7 */ + 6008 "11110001" // /* MW 6 */ + 6009 "00000001" // /* MW 5 */ + 6010 "00000000" // /* MW 4 */ + 6011 "10110000" // /* MW 3 */ + 6012 "10000010" // /* MW 2 */ + 6013 "11111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 6014 "11111000" // MOV p0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6015 "10100000" // /* MW 3 */ + 6016 "01100111" // /* MW 2 */ + 6017 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6020 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6021 "10000001" // /* MW 11 */ + 6022 "10101101" // /* MW 10 */ + 6023 "00000000" // /* MW 9 */ + 6024 "00000000" // /* MW 8 */ + 6025 "00000000" // /* MW 7 */ + 6026 "00000000" // /* MW 6 */ + 6027 "00100000" // /* MW 5 */ + 6028 "00000000" // /* MW 4 */ + 6029 "11110000" // /* MW 3 */ + 6030 "00101100" // /* MW 2 */ + 6031 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 6032 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p1, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6033 "00001000" // /* MW 9 */ + 6034 "11000100" // /* MW 8 */ + 6035 "10110011" // /* MW 7 */ + 6036 "01101000" // /* MW 6 */ + 6037 "00000000" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "00100000" // /* MW 3 */ + 6040 "00000111" // /* MW 2 */ + 6041 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 6042 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6043 "01011000" // /* MW 9 */ + 6044 "11111101" // /* MW 8 */ + 6045 "00000111" // /* MW 7 */ + 6046 "00001000" // /* MW 6 */ + 6047 "10000000" // /* MW 5 */ + 6048 "00000001" // /* MW 4 */ + 6049 "10000000" // /* MW 3 */ + 6050 "11100010" // /* MW 2 */ + 6051 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 6052 "01111010" // LDA r15, [sp, #-4]; ST r16, [p1], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6053 "00000001" // /* MW 9 */ + 6054 "10100000" // /* MW 8 */ + 6055 "00000111" // /* MW 7 */ + 6056 "10000000" // /* MW 6 */ + 6057 "00010001" // /* MW 5 */ + 6058 "00001010" // /* MW 4 */ + 6059 "00100001" // /* MW 3 */ + 6060 "10111110" // /* MW 2 */ + 6061 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 6062 "10011000" // LDA.u8 r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6063 "01001010" // /* MW 3 */ + 6064 "00000110" // /* MW 2 */ + 6065 "00000001" // /* MW 1 */ + 6066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6067 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6069 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6070 "00011000" // ST.s16 r16, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6071 "00010111" // /* MW 3 */ + 6072 "00000010" // /* MW 2 */ + 6073 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6074 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6075 "00000000" // /* MW 3 */ + 6076 "00101000" // /* MW 2 */ + 6077 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6078 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6079 "00000101" // /* MW 3 */ + 6080 "00100010" // /* MW 2 */ + 6081 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6082 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6083 "00000001" // /* MW 5 */ + 6084 "00000000" // /* MW 4 */ + 6085 "00000000" // /* MW 3 */ + 6086 "11111000" // /* MW 2 */ + 6087 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6088 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6089 "00100111" // /* MW 3 */ + 6090 "01110111" // /* MW 2 */ + 6091 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6092 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6093 "10000010" // /* MW 3 */ + 6094 "00100001" // /* MW 2 */ + 6095 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 6097 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_shared.h" 186 first +.src_ref 2 "elementwise_binary_shared.h" 191 8 first +.tail_call +.function_start + 6112 "10000100" // J #4400 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4400 delay_slots=5 */ + 6113 "00000000" // /* MW 5 */ + 6114 "00000000" // /* MW 4 */ + 6115 "10011000" // /* MW 3 */ + 6116 "00001000" // /* MW 2 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 191 8 +.delay_slot + 6118 "01000100" // MOVXM p3, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6119 "10000000" // /* MW 5 */ + 6120 "11001001" // /* MW 4 */ + 6121 "11000110" // /* MW 3 */ + 6122 "00000111" // /* MW 2 */ + 6123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 6131 "00000000" // /* MW 1 */ +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_add1d _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 240 first +.src_ref 6 "superkernels.cpp" 245 6 +.function_start + 6144 "01000100" // MOVXM p4, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6145 "10000000" // /* MW 5 */ + 6146 "11000111" // /* MW 4 */ + 6147 "11001000" // /* MW 3 */ + 6148 "00000111" // /* MW 2 */ + 6149 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first + 6150 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6151 "11000001" // /* MW 5 */ + 6152 "10110101" // /* MW 4 */ + 6153 "11011000" // /* MW 3 */ + 6154 "11000010" // /* MW 2 */ + 6155 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 240 + 6156 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6157 "00000001" // /* MW 5 */ + 6158 "00000000" // /* MW 4 */ + 6159 "00000000" // /* MW 3 */ + 6160 "00001000" // /* MW 2 */ + 6161 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 242 22 first + 6162 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6163 "01111001" // /* MW 9 */ + 6164 "01100000" // /* MW 8 */ + 6165 "11001010" // /* MW 7 */ + 6166 "10000001" // /* MW 6 */ + 6167 "00010100" // /* MW 5 */ + 6168 "00100011" // /* MW 4 */ + 6169 "10110000" // /* MW 3 */ + 6170 "00111010" // /* MW 2 */ + 6171 "11111111" // /* MW 1 */ + 6172 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6173 "01110000" // /* MW 7 */ + 6174 "11010000" // /* MW 6 */ + 6175 "00001011" // /* MW 5 */ + 6176 "00000000" // /* MW 4 */ + 6177 "10110000" // /* MW 3 */ + 6178 "10000011" // /* MW 2 */ + 6179 "11111101" // /* MW 1 */ + 6180 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6181 "00010101" // /* MW 3 */ + 6182 "11111100" // /* MW 2 */ + 6183 "00001111" // /* MW 1 */ + 6184 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6185 "00111101" // /* MW 3 */ + 6186 "11110000" // /* MW 2 */ + 6187 "00001111" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first +.src_ref 6 "superkernels.cpp" 245 16 first + 6190 "10000100" // JNZ r16, #6336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6336 delay_slots=5 */ + 6191 "00000001" // /* MW 5 */ + 6192 "01000000" // /* MW 4 */ + 6193 "01100000" // /* MW 3 */ + 6194 "00001100" // /* MW 2 */ + 6195 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 30 first +.delay_slot + 6196 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6197 "11111011" // /* MW 3 */ + 6198 "01100011" // /* MW 2 */ + 6199 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 6200 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6201 "10011000" // /* MW 5 */ + 6202 "11000111" // /* MW 4 */ + 6203 "11000100" // /* MW 3 */ + 6204 "00000111" // /* MW 2 */ + 6205 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 6206 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6207 "01110000" // /* MW 7 */ + 6208 "01100000" // /* MW 6 */ + 6209 "00110111" // /* MW 5 */ + 6210 "00000001" // /* MW 4 */ + 6211 "00110000" // /* MW 3 */ + 6212 "11000110" // /* MW 2 */ + 6213 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 6214 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6215 "11000000" // /* MW 3 */ + 6216 "11010110" // /* MW 2 */ + 6217 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 +.src_ref 6 "superkernels.cpp" 250 28 +.src_ref 6 "superkernels.cpp" 252 42 +.src_ref 6 "superkernels.cpp" 264 2 +.delay_slot + 6218 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6219 "00010001" // /* MW 9 */ + 6220 "01100000" // /* MW 8 */ + 6221 "10110010" // /* MW 7 */ + 6222 "11110011" // /* MW 6 */ + 6223 "00000001" // /* MW 5 */ + 6224 "00000000" // /* MW 4 */ + 6225 "10110000" // /* MW 3 */ + 6226 "10100011" // /* MW 2 */ + 6227 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 248 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6228 "00111010" // MOVS p0, p7; MOVXM p2, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6229 "00010001" // /* MW 9 */ + 6230 "00001000" // /* MW 8 */ + 6231 "00110010" // /* MW 7 */ + 6232 "11110001" // /* MW 6 */ + 6233 "00000001" // /* MW 5 */ + 6234 "00000000" // /* MW 4 */ + 6235 "01100000" // /* MW 3 */ + 6236 "10010001" // /* MW 2 */ + 6237 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6238 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6239 "00010000" // /* MW 9 */ + 6240 "00000110" // /* MW 8 */ + 6241 "00110010" // /* MW 7 */ + 6242 "11110001" // /* MW 6 */ + 6243 "00000001" // /* MW 5 */ + 6244 "00000000" // /* MW 4 */ + 6245 "11100000" // /* MW 3 */ + 6246 "11000000" // /* MW 2 */ + 6247 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 "00000100" // JL #5984 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5984 delay_slots=5 */ + 6251 "00000001" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "10110000" // /* MW 3 */ + 6254 "00001011" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6259 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6260 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "00110001" // /* MW 3 */ + 6262 "00100000" // /* MW 2 */ + 6263 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6264 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000101" // /* MW 3 */ + 6266 "00100000" // /* MW 2 */ + 6267 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6268 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "00010001" // /* MW 3 */ + 6270 "00000110" // /* MW 2 */ + 6271 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 252 42 first +.return_address + 6272 "10111010" // LDA r16, [p7]; MOVXM p1, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6273 "00010000" // /* MW 9 */ + 6274 "11100110" // /* MW 8 */ + 6275 "10110001" // /* MW 7 */ + 6276 "11110000" // /* MW 6 */ + 6277 "00000001" // /* MW 5 */ + 6278 "00000000" // /* MW 4 */ + 6279 "11010000" // /* MW 3 */ + 6280 "11000010" // /* MW 2 */ + 6281 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 261 48 + 6282 "10111010" // LDA r17, [p1]; MOVXM p3, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6283 "00010000" // /* MW 9 */ + 6284 "11101000" // /* MW 8 */ + 6285 "10110001" // /* MW 7 */ + 6286 "11110001" // /* MW 6 */ + 6287 "00000001" // /* MW 5 */ + 6288 "00000000" // /* MW 4 */ + 6289 "11010000" // /* MW 3 */ + 6290 "11000110" // /* MW 2 */ + 6291 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 28 first +.src_ref 6 "superkernels.cpp" 253 16 +.src_ref 6 "superkernels.cpp" 262 48 + 6292 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6293 "00010000" // /* MW 9 */ + 6294 "11101010" // /* MW 8 */ + 6295 "10110001" // /* MW 7 */ + 6296 "11110000" // /* MW 6 */ + 6297 "00000001" // /* MW 5 */ + 6298 "00000000" // /* MW 4 */ + 6299 "01010000" // /* MW 3 */ + 6300 "11001011" // /* MW 2 */ + 6301 "11101010" // /* MW 1 */ + 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ + 6308 "10000100" // J #6352 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6352 delay_slots=5 */ + 6309 "00000000" // /* MW 5 */ + 6310 "00000000" // /* MW 4 */ + 6311 "01101000" // /* MW 3 */ + 6312 "00001100" // /* MW 2 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 +.delay_slot + 6314 "01000100" // MOVXM p2, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6315 "00010000" // /* MW 5 */ + 6316 "11001000" // /* MW 4 */ + 6317 "11000100" // /* MW 3 */ + 6318 "00000111" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 27 first +.delay_slot + 6320 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6321 "00001111" // /* MW 3 */ + 6322 "01100001" // /* MW 2 */ + 6323 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 first +.delay_slot + 6324 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6325 "01010001" // /* MW 3 */ + 6326 "00000110" // /* MW 2 */ + 6327 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 first +.delay_slot + 6328 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6329 "00010001" // /* MW 3 */ + 6330 "00000110" // /* MW 2 */ + 6331 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 253 16 first +.delay_slot + 6332 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6333 "00010001" // /* MW 3 */ + 6334 "00000110" // /* MW 2 */ + 6335 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 261 48 + 6336 "01000100" // MOVXM p3, #508880 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6337 "10100000" // /* MW 5 */ + 6338 "11000111" // /* MW 4 */ + 6339 "11000110" // /* MW 3 */ + 6340 "00000111" // /* MW 2 */ + 6341 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 + 6342 "10111010" // NOPA; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6343 "00010000" // /* MW 9 */ + 6344 "11101010" // /* MW 8 */ + 6345 "10110001" // /* MW 7 */ + 6346 "11110000" // /* MW 6 */ + 6347 "00000001" // /* MW 5 */ + 6348 "00000000" // /* MW 4 */ + 6349 "11110000" // /* MW 3 */ + 6350 "00101100" // /* MW 2 */ + 6351 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6352 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6353 "10000110" // /* MW 3 */ + 6354 "01100111" // /* MW 2 */ + 6355 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 256 2 + 6356 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6357 "00010000" // /* MW 9 */ + 6358 "11100000" // /* MW 8 */ + 6359 "00110001" // /* MW 7 */ + 6360 "11110001" // /* MW 6 */ + 6361 "00000001" // /* MW 5 */ + 6362 "00000000" // /* MW 4 */ + 6363 "11010000" // /* MW 3 */ + 6364 "11101110" // /* MW 2 */ + 6365 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6366 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6367 "00010110" // /* MW 3 */ + 6368 "11111110" // /* MW 2 */ + 6369 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6370 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6371 "00110110" // /* MW 3 */ + 6372 "11111110" // /* MW 2 */ + 6373 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 first + 6374 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6375 "01010110" // /* MW 3 */ + 6376 "00000110" // /* MW 2 */ + 6377 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6378 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6379 "01110110" // /* MW 3 */ + 6380 "01000110" // /* MW 2 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ + 6388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6390 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6391 "00000010" // /* MW 3 */ + 6392 "01100001" // /* MW 2 */ + 6393 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 256 2 first + 6394 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6395 "00001110" // /* MW 5 */ + 6396 "01000000" // /* MW 4 */ + 6397 "00111001" // /* MW 3 */ + 6398 "11000010" // /* MW 2 */ + 6399 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 + 6400 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6401 "00010001" // /* MW 3 */ + 6402 "00000110" // /* MW 2 */ + 6403 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6404 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6405 "11111101" // /* MW 3 */ + 6406 "11100000" // /* MW 2 */ + 6407 "00010111" // /* MW 1 */ + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6414 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00001000" // /* MW 3 */ + 6416 "11010011" // /* MW 2 */ + 6417 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6418 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6419 "00000110" // /* MW 3 */ + 6420 "01100111" // /* MW 2 */ + 6421 "00011010" // /* MW 1 */ + 6422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6423 "00000000" // /* MW 1 */ + 6424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6425 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6426 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6427 "01110110" // /* MW 3 */ + 6428 "11111111" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6430 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6431 "00110110" // /* MW 3 */ + 6432 "11111110" // /* MW 2 */ + 6433 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6434 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6435 "01010110" // /* MW 3 */ + 6436 "11111110" // /* MW 2 */ + 6437 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6438 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6439 "01110110" // /* MW 3 */ + 6440 "01010110" // /* MW 2 */ + 6441 "00000010" // /* MW 1 */ + 6442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6443 "00000000" // /* MW 1 */ + 6444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6445 "00000000" // /* MW 1 */ + 6446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6447 "00000000" // /* MW 1 */ + 6448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6449 "00000000" // /* MW 1 */ + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6452 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "00010010" // /* MW 3 */ + 6454 "10100011" // /* MW 2 */ + 6455 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6456 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "00110001" // /* MW 3 */ + 6458 "00000110" // /* MW 2 */ + 6459 "00001010" // /* MW 1 */ + 6460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6461 "00000000" // /* MW 1 */ + 6462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6463 "00000000" // /* MW 1 */ + 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6465 "00000000" // /* MW 1 */ + 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6467 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6468 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6469 "00001000" // /* MW 3 */ + 6470 "11010011" // /* MW 2 */ + 6471 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 261 46 +.src_ref 6 "superkernels.cpp" 262 46 + 6472 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6473 "01111001" // /* MW 9 */ + 6474 "01100000" // /* MW 8 */ + 6475 "11001110" // /* MW 7 */ + 6476 "00101001" // /* MW 6 */ + 6477 "00000000" // /* MW 5 */ + 6478 "00000001" // /* MW 4 */ + 6479 "01100000" // /* MW 3 */ + 6480 "00010001" // /* MW 2 */ + 6481 "11010001" // /* MW 1 */ + 6482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6483 "00000000" // /* MW 1 */ + 6484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6485 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6486 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6487 "00011001" // /* MW 3 */ + 6488 "11101110" // /* MW 2 */ + 6489 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 48 first + 6490 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6491 "00111011" // /* MW 5 */ + 6492 "11011000" // /* MW 4 */ + 6493 "11011111" // /* MW 3 */ + 6494 "11000110" // /* MW 2 */ + 6495 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 first +.src_ref 6 "superkernels.cpp" 264 2 + 6496 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10000001" // /* MW 5 */ + 6498 "11011101" // /* MW 4 */ + 6499 "11010110" // /* MW 3 */ + 6500 "11010010" // /* MW 2 */ + 6501 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6502 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6503 "01010110" // /* MW 3 */ + 6504 "01001110" // /* MW 2 */ + 6505 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6506 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6507 "00011110" // /* MW 3 */ + 6508 "01011101" // /* MW 2 */ + 6509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6510 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6511 "11000000" // /* MW 3 */ + 6512 "01100000" // /* MW 2 */ + 6513 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6516 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "01110110" // /* MW 3 */ + 6518 "00000110" // /* MW 2 */ + 6519 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6521 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 264 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6522 "00000100" // JL #6112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6112 delay_slots=5 */ + 6523 "00000001" // /* MW 5 */ + 6524 "00000000" // /* MW 4 */ + 6525 "11110000" // /* MW 3 */ + 6526 "00001011" // /* MW 2 */ + 6527 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6528 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6529 "11000000" // /* MW 3 */ + 6530 "11010100" // /* MW 2 */ + 6531 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 6532 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6533 "00001101" // /* MW 3 */ + 6534 "01100011" // /* MW 2 */ + 6535 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 first +.delay_slot + 6536 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00001101" // /* MW 3 */ + 6538 "00100001" // /* MW 2 */ + 6539 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 +.delay_slot + 6540 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "01000001" // /* MW 3 */ + 6542 "01101001" // /* MW 2 */ + 6543 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 6544 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "10101000" // /* MW 13 */ + 6548 "11100010" // /* MW 12 */ + 6549 "00110100" // /* MW 11 */ + 6550 "00000000" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00100000" // /* MW 5 */ + 6556 "00000000" // /* MW 4 */ + 6557 "11110000" // /* MW 3 */ + 6558 "00101100" // /* MW 2 */ + 6559 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6560 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6561 "01111000" // /* MW 9 */ + 6562 "11010000" // /* MW 8 */ + 6563 "10110011" // /* MW 7 */ + 6564 "00101000" // /* MW 6 */ + 6565 "00000000" // /* MW 5 */ + 6566 "00000001" // /* MW 4 */ + 6567 "11010000" // /* MW 3 */ + 6568 "11000110" // /* MW 2 */ + 6569 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 + 6570 "01000100" // MOVXM p6, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6571 "00010000" // /* MW 5 */ + 6572 "11001000" // /* MW 4 */ + 6573 "11001100" // /* MW 3 */ + 6574 "00000111" // /* MW 2 */ + 6575 "00000000" // /* MW 1 */ + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ + 6580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6581 "00000000" // /* MW 1 */ + 6582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6583 "00000000" // /* MW 1 */ + 6584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6585 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6586 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6587 "00001000" // /* MW 3 */ + 6588 "01010001" // /* MW 2 */ + 6589 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6590 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6591 "00110110" // /* MW 3 */ + 6592 "11110110" // /* MW 2 */ + 6593 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6594 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6595 "00011001" // /* MW 3 */ + 6596 "11101101" // /* MW 2 */ + 6597 "00000111" // /* MW 1 */ + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ + 6606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6607 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6608 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6609 "00010001" // /* MW 3 */ + 6610 "00100011" // /* MW 2 */ + 6611 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6612 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6613 "01100011" // /* MW 5 */ + 6614 "11101100" // /* MW 4 */ + 6615 "11010011" // /* MW 3 */ + 6616 "11000110" // /* MW 2 */ + 6617 "01001010" // /* MW 1 */ + 6618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6619 "00000000" // /* MW 1 */ + 6620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6621 "00000000" // /* MW 1 */ + 6622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6623 "00000000" // /* MW 1 */ + 6624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6625 "00000000" // /* MW 1 */ + 6626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6627 "00000000" // /* MW 1 */ + 6628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6629 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6630 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6631 "00001000" // /* MW 3 */ + 6632 "01010001" // /* MW 2 */ + 6633 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 268 6 +.src_ref 6 "superkernels.cpp" 269 14 + 6634 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6635 "00010000" // /* MW 9 */ + 6636 "11100000" // /* MW 8 */ + 6637 "10110001" // /* MW 7 */ + 6638 "11110000" // /* MW 6 */ + 6639 "00000001" // /* MW 5 */ + 6640 "00000000" // /* MW 4 */ + 6641 "11010000" // /* MW 3 */ + 6642 "11001110" // /* MW 2 */ + 6643 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 first + 6644 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6645 "01010110" // /* MW 3 */ + 6646 "00000110" // /* MW 2 */ + 6647 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 6648 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6649 "00110110" // /* MW 3 */ + 6650 "00000110" // /* MW 2 */ + 6651 "00000001" // /* MW 1 */ + 6652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6653 "00000000" // /* MW 1 */ + 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6655 "00000000" // /* MW 1 */ + 6656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6657 "00000000" // /* MW 1 */ + 6658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6659 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6660 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6661 "00110001" // /* MW 3 */ + 6662 "00100001" // /* MW 2 */ + 6663 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6664 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6665 "00010001" // /* MW 3 */ + 6666 "11100110" // /* MW 2 */ + 6667 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 16 first + 6668 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "00101000" // /* MW 3 */ + 6670 "01100001" // /* MW 2 */ + 6671 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 6672 "10000100" // JNZ r16, #6704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6704 delay_slots=5 */ + 6673 "00000001" // /* MW 5 */ + 6674 "01000000" // /* MW 4 */ + 6675 "00011000" // /* MW 3 */ + 6676 "00001101" // /* MW 2 */ + 6677 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6679 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6687 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 + 6688 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "00000001" // /* MW 3 */ + 6690 "00100000" // /* MW 2 */ + 6691 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 first + 6692 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6693 "11000001" // /* MW 11 */ + 6694 "00001000" // /* MW 10 */ + 6695 "10000011" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "00000000" // /* MW 7 */ + 6698 "00000000" // /* MW 6 */ + 6699 "00100000" // /* MW 5 */ + 6700 "00000000" // /* MW 4 */ + 6701 "11110000" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 271 + 6704 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6705 "00111001" // /* MW 3 */ + 6706 "11110000" // /* MW 2 */ + 6707 "00000111" // /* MW 1 */ + 6708 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6709 "11110001" // /* MW 3 */ + 6710 "11111101" // /* MW 2 */ + 6711 "00000111" // /* MW 1 */ + 6712 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6713 "10011001" // /* MW 3 */ + 6714 "11110111" // /* MW 2 */ + 6715 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6717 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6718 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6719 "11010001" // /* MW 3 */ + 6720 "11111001" // /* MW 2 */ + 6721 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6723 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6725 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6726 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6727 "00000000" // /* MW 3 */ + 6728 "00101000" // /* MW 2 */ + 6729 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6730 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "00001011" // /* MW 3 */ + 6732 "10001110" // /* MW 2 */ + 6733 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 +.delay_slot + 6734 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6735 "00000001" // /* MW 5 */ + 6736 "00000000" // /* MW 4 */ + 6737 "00000000" // /* MW 3 */ + 6738 "11111000" // /* MW 2 */ + 6739 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6741 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6743 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6745 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 6752 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6753 "00000000" // /* MW 3 */ + 6754 "00101000" // /* MW 2 */ + 6755 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 6756 "01000100" // MOVXM p0, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6757 "01000000" // /* MW 5 */ + 6758 "11001010" // /* MW 4 */ + 6759 "11000000" // /* MW 3 */ + 6760 "00000111" // /* MW 2 */ + 6761 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 6762 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6763 "10000000" // /* MW 3 */ + 6764 "00000000" // /* MW 2 */ + 6765 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 6766 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6767 "00000001" // /* MW 3 */ + 6768 "00000100" // /* MW 2 */ + 6769 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 6770 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6771 "00000001" // /* MW 3 */ + 6772 "00010100" // /* MW 2 */ + 6773 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 6775 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 6784 "10111010" // LDA el0, [p1], #4; MOVXM p0, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6785 "00010000" // /* MW 9 */ + 6786 "10000000" // /* MW 8 */ + 6787 "00110010" // /* MW 7 */ + 6788 "11110000" // /* MW 6 */ + 6789 "00000001" // /* MW 5 */ + 6790 "00000000" // /* MW 4 */ + 6791 "11010000" // /* MW 3 */ + 6792 "10000101" // /* MW 2 */ + 6793 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 6794 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "00000000" // /* MW 3 */ + 6798 "00001000" // /* MW 2 */ + 6799 "00000000" // /* MW 1 */ + 6800 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6801 "00111101" // /* MW 3 */ + 6802 "11111100" // /* MW 2 */ + 6803 "00001111" // /* MW 1 */ + 6804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6805 "00000000" // /* MW 1 */ + 6806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6807 "00000000" // /* MW 1 */ + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 6812 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6813 "00101001" // /* MW 3 */ + 6814 "00011100" // /* MW 2 */ + 6815 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 6816 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00101110" // /* MW 3 */ + 6818 "00011100" // /* MW 2 */ + 6819 "00000001" // /* MW 1 */ + 6820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6821 "00000000" // /* MW 1 */ + 6822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6823 "00000000" // /* MW 1 */ + 6824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6825 "00000000" // /* MW 1 */ + 6826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6827 "00000000" // /* MW 1 */ + 6828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6829 "00000000" // /* MW 1 */ + 6830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6831 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 6832 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "00101001" // /* MW 3 */ + 6834 "00011100" // /* MW 2 */ + 6835 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 6836 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "00101110" // /* MW 3 */ + 6838 "00000100" // /* MW 2 */ + 6839 "00000001" // /* MW 1 */ + 6840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6841 "00000000" // /* MW 1 */ + 6842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6843 "00000000" // /* MW 1 */ + 6844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6845 "00000000" // /* MW 1 */ + 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6847 "00000000" // /* MW 1 */ + 6848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6849 "00000000" // /* MW 1 */ + 6850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6851 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 6852 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00101001" // /* MW 3 */ + 6854 "00011100" // /* MW 2 */ + 6855 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 6856 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "00101110" // /* MW 3 */ + 6858 "00010100" // /* MW 2 */ + 6859 "00000001" // /* MW 1 */ + 6860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6861 "00000000" // /* MW 1 */ + 6862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6863 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 6864 "00000100" // JL #6752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6752 delay_slots=5 */ + 6865 "00000001" // /* MW 5 */ + 6866 "00000000" // /* MW 4 */ + 6867 "00110000" // /* MW 3 */ + 6868 "00001101" // /* MW 2 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot + 6870 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6871 "10011101" // /* MW 3 */ + 6872 "11111011" // /* MW 2 */ + 6873 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6877 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 6878 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6879 "00101001" // /* MW 3 */ + 6880 "11011100" // /* MW 2 */ + 6881 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 6882 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6883 "00011100" // /* MW 13 */ + 6884 "00000000" // /* MW 12 */ + 6885 "00000000" // /* MW 11 */ + 6886 "00000111" // /* MW 10 */ + 6887 "00000110" // /* MW 9 */ + 6888 "01111011" // /* MW 8 */ + 6889 "00000000" // /* MW 7 */ + 6890 "00000000" // /* MW 6 */ + 6891 "10110110" // /* MW 5 */ + 6892 "00000010" // /* MW 4 */ + 6893 "11110000" // /* MW 3 */ + 6894 "00101100" // /* MW 2 */ + 6895 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 6896 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "00111001" // /* MW 3 */ + 6898 "11111100" // /* MW 2 */ + 6899 "00000111" // /* MW 1 */ + 6900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6901 "00000000" // /* MW 1 */ + 6902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6903 "00000000" // /* MW 1 */ + 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6905 "00000000" // /* MW 1 */ + 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6907 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6909 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6910 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6911 "10011001" // /* MW 3 */ + 6912 "11111011" // /* MW 2 */ + 6913 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6914 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6915 "00000000" // /* MW 3 */ + 6916 "00101000" // /* MW 2 */ + 6917 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6923 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6924 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "00000001" // /* MW 3 */ + 6926 "00100000" // /* MW 2 */ + 6927 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6928 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6929 "01110001" // /* MW 9 */ + 6930 "00000000" // /* MW 8 */ + 6931 "00000000" // /* MW 7 */ + 6932 "00000000" // /* MW 6 */ + 6933 "11111110" // /* MW 5 */ + 6934 "00111111" // /* MW 4 */ + 6935 "00110000" // /* MW 3 */ + 6936 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 6937 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 6944 "10111010" // MOVA m0, #32; MOVXM p3, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6945 "00010000" // /* MW 9 */ + 6946 "10000000" // /* MW 8 */ + 6947 "10110010" // /* MW 7 */ + 6948 "11110001" // /* MW 6 */ + 6949 "00000001" // /* MW 5 */ + 6950 "00000000" // /* MW 4 */ + 6951 "10000000" // /* MW 3 */ + 6952 "00000000" // /* MW 2 */ + 6953 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 6954 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6955 "00010000" // /* MW 9 */ + 6956 "00001000" // /* MW 8 */ + 6957 "00110010" // /* MW 7 */ + 6958 "11110010" // /* MW 6 */ + 6959 "00000001" // /* MW 5 */ + 6960 "00000000" // /* MW 4 */ + 6961 "11010000" // /* MW 3 */ + 6962 "00000110" // /* MW 2 */ + 6963 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 6964 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6965 "01011000" // /* MW 9 */ + 6966 "11111010" // /* MW 8 */ + 6967 "01101111" // /* MW 7 */ + 6968 "10001000" // /* MW 6 */ + 6969 "00000111" // /* MW 5 */ + 6970 "00011000" // /* MW 4 */ + 6971 "11010000" // /* MW 3 */ + 6972 "10010000" // /* MW 2 */ + 6973 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 6974 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #7136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6975 "00010000" // /* MW 9 */ + 6976 "11110000" // /* MW 8 */ + 6977 "01111101" // /* MW 7 */ + 6978 "00000100" // /* MW 6 */ + 6979 "00000000" // /* MW 5 */ + 6980 "00000000" // /* MW 4 */ + 6981 "11010000" // /* MW 3 */ + 6982 "10000000" // /* MW 2 */ + 6983 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 6984 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #7152 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6985 "00010000" // /* MW 9 */ + 6986 "11111000" // /* MW 8 */ + 6987 "10111101" // /* MW 7 */ + 6988 "00000101" // /* MW 6 */ + 6989 "00000000" // /* MW 5 */ + 6990 "00000000" // /* MW 4 */ + 6991 "01010000" // /* MW 3 */ + 6992 "10001000" // /* MW 2 */ + 6993 "10000000" // /* MW 1 */ + 6994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6995 "00000000" // /* MW 1 */ + 6996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6997 "00000000" // /* MW 1 */ + 6998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6999 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 7000 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7001 "00111101" // /* MW 3 */ + 7002 "01000010" // /* MW 2 */ + 7003 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 7004 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7005 "11111100" // /* MW 3 */ + 7006 "01110000" // /* MW 2 */ + 7007 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 7008 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7009 "11101000" // /* MW 5 */ + 7010 "01010000" // /* MW 4 */ + 7011 "01110000" // /* MW 3 */ + 7012 "00010011" // /* MW 2 */ + 7013 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7014 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7015 "10000000" // /* MW 7 */ + 7016 "10111010" // /* MW 6 */ + 7017 "01101000" // /* MW 5 */ + 7018 "01010000" // /* MW 4 */ + 7019 "01110000" // /* MW 3 */ + 7020 "00011011" // /* MW 2 */ + 7021 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7022 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7023 "11101000" // /* MW 5 */ + 7024 "01010000" // /* MW 4 */ + 7025 "01110000" // /* MW 3 */ + 7026 "00010011" // /* MW 2 */ + 7027 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7028 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7029 "01101000" // /* MW 5 */ + 7030 "01010000" // /* MW 4 */ + 7031 "01110000" // /* MW 3 */ + 7032 "00011011" // /* MW 2 */ + 7033 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7034 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7035 "11101000" // /* MW 5 */ + 7036 "01010000" // /* MW 4 */ + 7037 "01110000" // /* MW 3 */ + 7038 "00010011" // /* MW 2 */ + 7039 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7040 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7041 "01101000" // /* MW 5 */ + 7042 "01010000" // /* MW 4 */ + 7043 "01110000" // /* MW 3 */ + 7044 "00011011" // /* MW 2 */ + 7045 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7046 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7047 "11101000" // /* MW 5 */ + 7048 "01010000" // /* MW 4 */ + 7049 "01110000" // /* MW 3 */ + 7050 "00010011" // /* MW 2 */ + 7051 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7052 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7053 "01000001" // /* MW 9 */ + 7054 "11100010" // /* MW 8 */ + 7055 "00000000" // /* MW 7 */ + 7056 "00011101" // /* MW 6 */ + 7057 "00110100" // /* MW 5 */ + 7058 "00101000" // /* MW 4 */ + 7059 "01110000" // /* MW 3 */ + 7060 "00011011" // /* MW 2 */ + 7061 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7062 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7063 "01100001" // /* MW 9 */ + 7064 "11100000" // /* MW 8 */ + 7065 "00000001" // /* MW 7 */ + 7066 "00011101" // /* MW 6 */ + 7067 "01110100" // /* MW 5 */ + 7068 "00101000" // /* MW 4 */ + 7069 "01110000" // /* MW 3 */ + 7070 "00010011" // /* MW 2 */ + 7071 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7072 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7073 "01000001" // /* MW 9 */ + 7074 "11100010" // /* MW 8 */ + 7075 "00000000" // /* MW 7 */ + 7076 "00011101" // /* MW 6 */ + 7077 "00110100" // /* MW 5 */ + 7078 "00101000" // /* MW 4 */ + 7079 "01110000" // /* MW 3 */ + 7080 "00011011" // /* MW 2 */ + 7081 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7082 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7083 "01100001" // /* MW 9 */ + 7084 "11100000" // /* MW 8 */ + 7085 "00000001" // /* MW 7 */ + 7086 "00011101" // /* MW 6 */ + 7087 "01110100" // /* MW 5 */ + 7088 "00101000" // /* MW 4 */ + 7089 "01110000" // /* MW 3 */ + 7090 "00010011" // /* MW 2 */ + 7091 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7092 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7093 "01000001" // /* MW 11 */ + 7094 "11100010" // /* MW 10 */ + 7095 "00000000" // /* MW 9 */ + 7096 "10001110" // /* MW 8 */ + 7097 "10101101" // /* MW 7 */ + 7098 "00000000" // /* MW 6 */ + 7099 "01101000" // /* MW 5 */ + 7100 "01010000" // /* MW 4 */ + 7101 "01110000" // /* MW 3 */ + 7102 "00011011" // /* MW 2 */ + 7103 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7104 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7105 "00000011" // /* MW 15 */ + 7106 "00001111" // /* MW 14 */ + 7107 "01111000" // /* MW 13 */ + 7108 "10100101" // /* MW 12 */ + 7109 "00000001" // /* MW 11 */ + 7110 "00000000" // /* MW 10 */ + 7111 "00000000" // /* MW 9 */ + 7112 "00000000" // /* MW 8 */ + 7113 "01011011" // /* MW 7 */ + 7114 "00000001" // /* MW 6 */ + 7115 "11101000" // /* MW 5 */ + 7116 "01010000" // /* MW 4 */ + 7117 "01110000" // /* MW 3 */ + 7118 "00010011" // /* MW 2 */ + 7119 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7120 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7121 "00010010" // /* MW 15 */ + 7122 "00000111" // /* MW 14 */ + 7123 "01111000" // /* MW 13 */ + 7124 "10100101" // /* MW 12 */ + 7125 "00000001" // /* MW 11 */ + 7126 "00000000" // /* MW 10 */ + 7127 "00000000" // /* MW 9 */ + 7128 "00000000" // /* MW 8 */ + 7129 "00100011" // /* MW 7 */ + 7130 "00011100" // /* MW 6 */ + 7131 "01101010" // /* MW 5 */ + 7132 "01010000" // /* MW 4 */ + 7133 "01110000" // /* MW 3 */ + 7134 "00011011" // /* MW 2 */ + 7135 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7136 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7137 "00000011" // /* MW 15 */ + 7138 "00001111" // /* MW 14 */ + 7139 "01111000" // /* MW 13 */ + 7140 "10100101" // /* MW 12 */ + 7141 "00000001" // /* MW 11 */ + 7142 "00000000" // /* MW 10 */ + 7143 "00000000" // /* MW 9 */ + 7144 "00000000" // /* MW 8 */ + 7145 "10100011" // /* MW 7 */ + 7146 "00011100" // /* MW 6 */ + 7147 "11101010" // /* MW 5 */ + 7148 "01010000" // /* MW 4 */ + 7149 "01110000" // /* MW 3 */ + 7150 "00010011" // /* MW 2 */ + 7151 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7152 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7153 "00010010" // /* MW 15 */ + 7154 "00000111" // /* MW 14 */ + 7155 "01111000" // /* MW 13 */ + 7156 "10100101" // /* MW 12 */ + 7157 "00000001" // /* MW 11 */ + 7158 "00000000" // /* MW 10 */ + 7159 "00000000" // /* MW 9 */ + 7160 "00000000" // /* MW 8 */ + 7161 "00100011" // /* MW 7 */ + 7162 "00011100" // /* MW 6 */ + 7163 "01101010" // /* MW 5 */ + 7164 "01010000" // /* MW 4 */ + 7165 "01110000" // /* MW 3 */ + 7166 "00011011" // /* MW 2 */ + 7167 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7168 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7169 "01100001" // /* MW 7 */ + 7170 "11100000" // /* MW 6 */ + 7171 "00000001" // /* MW 5 */ + 7172 "00000010" // /* MW 4 */ + 7173 "01100000" // /* MW 3 */ + 7174 "10010100" // /* MW 2 */ + 7175 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7176 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7177 "01000001" // /* MW 7 */ + 7178 "11100010" // /* MW 6 */ + 7179 "00000000" // /* MW 5 */ + 7180 "00000010" // /* MW 4 */ + 7181 "01100000" // /* MW 3 */ + 7182 "10000100" // /* MW 2 */ + 7183 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7184 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7185 "01100001" // /* MW 7 */ + 7186 "11100000" // /* MW 6 */ + 7187 "00000001" // /* MW 5 */ + 7188 "00000010" // /* MW 4 */ + 7189 "01100000" // /* MW 3 */ + 7190 "10010100" // /* MW 2 */ + 7191 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7192 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7193 "01000001" // /* MW 7 */ + 7194 "11100010" // /* MW 6 */ + 7195 "00000000" // /* MW 5 */ + 7196 "00000010" // /* MW 4 */ + 7197 "01100000" // /* MW 3 */ + 7198 "10000100" // /* MW 2 */ + 7199 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7200 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7201 "01100001" // /* MW 7 */ + 7202 "11100000" // /* MW 6 */ + 7203 "00000001" // /* MW 5 */ + 7204 "00000010" // /* MW 4 */ + 7205 "01100000" // /* MW 3 */ + 7206 "10010100" // /* MW 2 */ + 7207 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7208 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7209 "01000001" // /* MW 7 */ + 7210 "11100010" // /* MW 6 */ + 7211 "00000000" // /* MW 5 */ + 7212 "00000010" // /* MW 4 */ + 7213 "01100000" // /* MW 3 */ + 7214 "10000100" // /* MW 2 */ + 7215 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7216 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7217 "01100001" // /* MW 7 */ + 7218 "11100000" // /* MW 6 */ + 7219 "00000001" // /* MW 5 */ + 7220 "00000010" // /* MW 4 */ + 7221 "01100000" // /* MW 3 */ + 7222 "10010100" // /* MW 2 */ + 7223 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7224 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7225 "00100011" // /* MW 3 */ + 7226 "00011100" // /* MW 2 */ + 7227 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7228 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7229 "00000000" // /* MW 5 */ + 7230 "01010000" // /* MW 4 */ + 7231 "01100000" // /* MW 3 */ + 7232 "10010100" // /* MW 2 */ + 7233 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7234 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7235 "00100011" // /* MW 3 */ + 7236 "00011100" // /* MW 2 */ + 7237 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7238 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7239 "10100011" // /* MW 3 */ + 7240 "00011100" // /* MW 2 */ + 7241 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 7242 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7243 "00100011" // /* MW 3 */ + 7244 "00011100" // /* MW 2 */ + 7245 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 7246 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7247 "10100011" // /* MW 3 */ + 7248 "00011100" // /* MW 2 */ + 7249 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 7251 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 7264 "01000100" // MOVXM p4, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7265 "10000000" // /* MW 5 */ + 7266 "11000111" // /* MW 4 */ + 7267 "11001000" // /* MW 3 */ + 7268 "00000111" // /* MW 2 */ + 7269 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 7270 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7271 "11000001" // /* MW 5 */ + 7272 "10110101" // /* MW 4 */ + 7273 "11011000" // /* MW 3 */ + 7274 "11000010" // /* MW 2 */ + 7275 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 7276 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7277 "00000001" // /* MW 5 */ + 7278 "00000000" // /* MW 4 */ + 7279 "00000000" // /* MW 3 */ + 7280 "00001000" // /* MW 2 */ + 7281 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 7282 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7283 "01111001" // /* MW 9 */ + 7284 "01100000" // /* MW 8 */ + 7285 "11001010" // /* MW 7 */ + 7286 "10000001" // /* MW 6 */ + 7287 "00010100" // /* MW 5 */ + 7288 "00100011" // /* MW 4 */ + 7289 "10110000" // /* MW 3 */ + 7290 "00111010" // /* MW 2 */ + 7291 "11111111" // /* MW 1 */ + 7292 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7293 "01110000" // /* MW 7 */ + 7294 "11010000" // /* MW 6 */ + 7295 "00001011" // /* MW 5 */ + 7296 "00000000" // /* MW 4 */ + 7297 "10110000" // /* MW 3 */ + 7298 "10000011" // /* MW 2 */ + 7299 "11111101" // /* MW 1 */ + 7300 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "00010101" // /* MW 3 */ + 7302 "11111100" // /* MW 2 */ + 7303 "00001111" // /* MW 1 */ + 7304 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00111101" // /* MW 3 */ + 7306 "11110000" // /* MW 2 */ + 7307 "00001111" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 7310 "10000100" // JNZ r16, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */ + 7311 "00000001" // /* MW 5 */ + 7312 "01000000" // /* MW 4 */ + 7313 "10010000" // /* MW 3 */ + 7314 "00001110" // /* MW 2 */ + 7315 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 7316 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7317 "11111011" // /* MW 3 */ + 7318 "01100011" // /* MW 2 */ + 7319 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 7320 "01000100" // MOVXM p2, #508876 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7321 "10011000" // /* MW 5 */ + 7322 "11000111" // /* MW 4 */ + 7323 "11000100" // /* MW 3 */ + 7324 "00000111" // /* MW 2 */ + 7325 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 7326 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7327 "01110000" // /* MW 7 */ + 7328 "01100000" // /* MW 6 */ + 7329 "00110111" // /* MW 5 */ + 7330 "00000001" // /* MW 4 */ + 7331 "00110000" // /* MW 3 */ + 7332 "11000110" // /* MW 2 */ + 7333 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 7334 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7335 "11000000" // /* MW 3 */ + 7336 "11010110" // /* MW 2 */ + 7337 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 7338 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7339 "00010001" // /* MW 9 */ + 7340 "10000000" // /* MW 8 */ + 7341 "10110010" // /* MW 7 */ + 7342 "11110011" // /* MW 6 */ + 7343 "00000001" // /* MW 5 */ + 7344 "00000000" // /* MW 4 */ + 7345 "10110000" // /* MW 3 */ + 7346 "10100011" // /* MW 2 */ + 7347 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7348 "00111010" // MOVS p0, p7; MOVXM p2, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7349 "00010001" // /* MW 9 */ + 7350 "00001000" // /* MW 8 */ + 7351 "00110010" // /* MW 7 */ + 7352 "11110001" // /* MW 6 */ + 7353 "00000001" // /* MW 5 */ + 7354 "00000000" // /* MW 4 */ + 7355 "01100000" // /* MW 3 */ + 7356 "10010001" // /* MW 2 */ + 7357 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7358 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7359 "00010000" // /* MW 9 */ + 7360 "00000110" // /* MW 8 */ + 7361 "00110010" // /* MW 7 */ + 7362 "11110001" // /* MW 6 */ + 7363 "00000001" // /* MW 5 */ + 7364 "00000000" // /* MW 4 */ + 7365 "11100000" // /* MW 3 */ + 7366 "11000000" // /* MW 2 */ + 7367 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7369 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7370 "00000100" // JL #6784 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6784 delay_slots=5 */ + 7371 "00000001" // /* MW 5 */ + 7372 "00000000" // /* MW 4 */ + 7373 "01000000" // /* MW 3 */ + 7374 "00001101" // /* MW 2 */ + 7375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7379 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7380 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7381 "00110001" // /* MW 3 */ + 7382 "00100000" // /* MW 2 */ + 7383 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 7384 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7385 "00000101" // /* MW 3 */ + 7386 "00100000" // /* MW 2 */ + 7387 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 7388 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00010001" // /* MW 3 */ + 7390 "00000110" // /* MW 2 */ + 7391 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 7392 "10111010" // LDA r16, [p7]; MOVXM p1, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7393 "00010000" // /* MW 9 */ + 7394 "11100110" // /* MW 8 */ + 7395 "10110001" // /* MW 7 */ + 7396 "11110000" // /* MW 6 */ + 7397 "00000001" // /* MW 5 */ + 7398 "00000000" // /* MW 4 */ + 7399 "11010000" // /* MW 3 */ + 7400 "11000010" // /* MW 2 */ + 7401 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 7402 "10111010" // LDA r17, [p1]; MOVXM p3, #508880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7403 "00010000" // /* MW 9 */ + 7404 "11101000" // /* MW 8 */ + 7405 "10110001" // /* MW 7 */ + 7406 "11110001" // /* MW 6 */ + 7407 "00000001" // /* MW 5 */ + 7408 "00000000" // /* MW 4 */ + 7409 "11010000" // /* MW 3 */ + 7410 "11000110" // /* MW 2 */ + 7411 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 7412 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7413 "00010000" // /* MW 9 */ + 7414 "11101010" // /* MW 8 */ + 7415 "10110001" // /* MW 7 */ + 7416 "11110000" // /* MW 6 */ + 7417 "00000001" // /* MW 5 */ + 7418 "00000000" // /* MW 4 */ + 7419 "01010000" // /* MW 3 */ + 7420 "11001011" // /* MW 2 */ + 7421 "11101010" // /* MW 1 */ + 7422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7423 "00000000" // /* MW 1 */ + 7424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7425 "00000000" // /* MW 1 */ + 7426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7427 "00000000" // /* MW 1 */ + 7428 "10000100" // J #7472 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7472 delay_slots=5 */ + 7429 "00000000" // /* MW 5 */ + 7430 "00000000" // /* MW 4 */ + 7431 "10011000" // /* MW 3 */ + 7432 "00001110" // /* MW 2 */ + 7433 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 7434 "01000100" // MOVXM p2, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7435 "00010000" // /* MW 5 */ + 7436 "11001000" // /* MW 4 */ + 7437 "11000100" // /* MW 3 */ + 7438 "00000111" // /* MW 2 */ + 7439 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 7440 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7441 "00001111" // /* MW 3 */ + 7442 "01100001" // /* MW 2 */ + 7443 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 7444 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "01010001" // /* MW 3 */ + 7446 "00000110" // /* MW 2 */ + 7447 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 7448 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7449 "00010001" // /* MW 3 */ + 7450 "00000110" // /* MW 2 */ + 7451 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 7452 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7453 "00010001" // /* MW 3 */ + 7454 "00000110" // /* MW 2 */ + 7455 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 7456 "01000100" // MOVXM p3, #508880 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7457 "10100000" // /* MW 5 */ + 7458 "11000111" // /* MW 4 */ + 7459 "11000110" // /* MW 3 */ + 7460 "00000111" // /* MW 2 */ + 7461 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 7462 "10111010" // NOPA; MOVXM p1, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7463 "00010000" // /* MW 9 */ + 7464 "11101010" // /* MW 8 */ + 7465 "10110001" // /* MW 7 */ + 7466 "11110000" // /* MW 6 */ + 7467 "00000001" // /* MW 5 */ + 7468 "00000000" // /* MW 4 */ + 7469 "11110000" // /* MW 3 */ + 7470 "00101100" // /* MW 2 */ + 7471 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 7472 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7473 "10000110" // /* MW 3 */ + 7474 "01100111" // /* MW 2 */ + 7475 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 7476 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7477 "00010000" // /* MW 9 */ + 7478 "11100000" // /* MW 8 */ + 7479 "00110001" // /* MW 7 */ + 7480 "11110001" // /* MW 6 */ + 7481 "00000001" // /* MW 5 */ + 7482 "00000000" // /* MW 4 */ + 7483 "11010000" // /* MW 3 */ + 7484 "11101110" // /* MW 2 */ + 7485 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 7486 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7487 "00010110" // /* MW 3 */ + 7488 "11111110" // /* MW 2 */ + 7489 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 7490 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7491 "00110110" // /* MW 3 */ + 7492 "11111110" // /* MW 2 */ + 7493 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 7494 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7495 "01010110" // /* MW 3 */ + 7496 "00000110" // /* MW 2 */ + 7497 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 7498 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "01110110" // /* MW 3 */ + 7500 "01000110" // /* MW 2 */ + 7501 "00000000" // /* MW 1 */ + 7502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7503 "00000000" // /* MW 1 */ + 7504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7505 "00000000" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 7510 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7511 "00000010" // /* MW 3 */ + 7512 "01100001" // /* MW 2 */ + 7513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 7514 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7515 "00001110" // /* MW 5 */ + 7516 "01000000" // /* MW 4 */ + 7517 "00111001" // /* MW 3 */ + 7518 "11000010" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 7520 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00010001" // /* MW 3 */ + 7522 "00000110" // /* MW 2 */ + 7523 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 7524 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7525 "11111101" // /* MW 3 */ + 7526 "11100000" // /* MW 2 */ + 7527 "00010111" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ + 7532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7533 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 7534 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7535 "00001000" // /* MW 3 */ + 7536 "11010011" // /* MW 2 */ + 7537 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 7538 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7539 "00000110" // /* MW 3 */ + 7540 "01100111" // /* MW 2 */ + 7541 "00011010" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 7546 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7547 "01110110" // /* MW 3 */ + 7548 "11111111" // /* MW 2 */ + 7549 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 7550 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00110110" // /* MW 3 */ + 7552 "11111110" // /* MW 2 */ + 7553 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 7554 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7555 "01010110" // /* MW 3 */ + 7556 "11111110" // /* MW 2 */ + 7557 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 7558 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7559 "01110110" // /* MW 3 */ + 7560 "01010110" // /* MW 2 */ + 7561 "00000010" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ + 7564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7565 "00000000" // /* MW 1 */ + 7566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7567 "00000000" // /* MW 1 */ + 7568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7569 "00000000" // /* MW 1 */ + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 7572 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7573 "00010010" // /* MW 3 */ + 7574 "10100011" // /* MW 2 */ + 7575 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 7576 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00110001" // /* MW 3 */ + 7578 "00000110" // /* MW 2 */ + 7579 "00001010" // /* MW 1 */ + 7580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7581 "00000000" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 7588 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7589 "00001000" // /* MW 3 */ + 7590 "11010011" // /* MW 2 */ + 7591 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 7592 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7593 "01111001" // /* MW 9 */ + 7594 "01100000" // /* MW 8 */ + 7595 "11001110" // /* MW 7 */ + 7596 "00101001" // /* MW 6 */ + 7597 "00000000" // /* MW 5 */ + 7598 "00000001" // /* MW 4 */ + 7599 "01100000" // /* MW 3 */ + 7600 "00010001" // /* MW 2 */ + 7601 "11010001" // /* MW 1 */ + 7602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7603 "00000000" // /* MW 1 */ + 7604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7605 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 7606 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7607 "00011001" // /* MW 3 */ + 7608 "11101110" // /* MW 2 */ + 7609 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 7610 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7611 "00111011" // /* MW 5 */ + 7612 "11011000" // /* MW 4 */ + 7613 "11011111" // /* MW 3 */ + 7614 "11000110" // /* MW 2 */ + 7615 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 7616 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7617 "10000001" // /* MW 5 */ + 7618 "11011101" // /* MW 4 */ + 7619 "11010110" // /* MW 3 */ + 7620 "11010010" // /* MW 2 */ + 7621 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7622 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "01010110" // /* MW 3 */ + 7624 "01001110" // /* MW 2 */ + 7625 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7626 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7627 "00011110" // /* MW 3 */ + 7628 "01011101" // /* MW 2 */ + 7629 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7630 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "11000000" // /* MW 3 */ + 7632 "01100000" // /* MW 2 */ + 7633 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7635 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7636 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7637 "01110110" // /* MW 3 */ + 7638 "00000110" // /* MW 2 */ + 7639 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7641 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7642 "00000100" // JL #6944 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6944 delay_slots=5 */ + 7643 "00000001" // /* MW 5 */ + 7644 "00000000" // /* MW 4 */ + 7645 "10010000" // /* MW 3 */ + 7646 "00001101" // /* MW 2 */ + 7647 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7648 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7649 "11000000" // /* MW 3 */ + 7650 "11010100" // /* MW 2 */ + 7651 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 7652 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7653 "00001101" // /* MW 3 */ + 7654 "01100011" // /* MW 2 */ + 7655 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 7656 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7657 "00001101" // /* MW 3 */ + 7658 "00100001" // /* MW 2 */ + 7659 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 7660 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7661 "01000001" // /* MW 3 */ + 7662 "01101001" // /* MW 2 */ + 7663 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "10101000" // /* MW 13 */ + 7668 "11100010" // /* MW 12 */ + 7669 "00110100" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 7680 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "01111000" // /* MW 9 */ + 7682 "11010000" // /* MW 8 */ + 7683 "10110011" // /* MW 7 */ + 7684 "00101000" // /* MW 6 */ + 7685 "00000000" // /* MW 5 */ + 7686 "00000001" // /* MW 4 */ + 7687 "11010000" // /* MW 3 */ + 7688 "11000110" // /* MW 2 */ + 7689 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 7690 "01000100" // MOVXM p6, #508936 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7691 "00010000" // /* MW 5 */ + 7692 "11001000" // /* MW 4 */ + 7693 "11001100" // /* MW 3 */ + 7694 "00000111" // /* MW 2 */ + 7695 "00000000" // /* MW 1 */ + 7696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7697 "00000000" // /* MW 1 */ + 7698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7699 "00000000" // /* MW 1 */ + 7700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7701 "00000000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ + 7704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7705 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7706 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7707 "00001000" // /* MW 3 */ + 7708 "01010001" // /* MW 2 */ + 7709 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 7710 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7711 "00110110" // /* MW 3 */ + 7712 "11110110" // /* MW 2 */ + 7713 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 7714 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7715 "00011001" // /* MW 3 */ + 7716 "11101101" // /* MW 2 */ + 7717 "00000111" // /* MW 1 */ + 7718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7719 "00000000" // /* MW 1 */ + 7720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7721 "00000000" // /* MW 1 */ + 7722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7723 "00000000" // /* MW 1 */ + 7724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7725 "00000000" // /* MW 1 */ + 7726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7727 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 7728 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7729 "00010001" // /* MW 3 */ + 7730 "00100011" // /* MW 2 */ + 7731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 7732 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7733 "01100011" // /* MW 5 */ + 7734 "11101100" // /* MW 4 */ + 7735 "11010011" // /* MW 3 */ + 7736 "11000110" // /* MW 2 */ + 7737 "01001010" // /* MW 1 */ + 7738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7739 "00000000" // /* MW 1 */ + 7740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7741 "00000000" // /* MW 1 */ + 7742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7743 "00000000" // /* MW 1 */ + 7744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7745 "00000000" // /* MW 1 */ + 7746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7747 "00000000" // /* MW 1 */ + 7748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7749 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7750 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7751 "00001000" // /* MW 3 */ + 7752 "01010001" // /* MW 2 */ + 7753 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 7754 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7755 "00010000" // /* MW 9 */ + 7756 "11100000" // /* MW 8 */ + 7757 "10110001" // /* MW 7 */ + 7758 "11110000" // /* MW 6 */ + 7759 "00000001" // /* MW 5 */ + 7760 "00000000" // /* MW 4 */ + 7761 "11010000" // /* MW 3 */ + 7762 "11001110" // /* MW 2 */ + 7763 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 7764 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "01010110" // /* MW 3 */ + 7766 "00000110" // /* MW 2 */ + 7767 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 7768 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7769 "00110110" // /* MW 3 */ + 7770 "00000110" // /* MW 2 */ + 7771 "00000001" // /* MW 1 */ + 7772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7773 "00000000" // /* MW 1 */ + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ + 7776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7777 "00000000" // /* MW 1 */ + 7778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7779 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 7780 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7781 "00110001" // /* MW 3 */ + 7782 "00100001" // /* MW 2 */ + 7783 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 7784 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7785 "00010001" // /* MW 3 */ + 7786 "11100110" // /* MW 2 */ + 7787 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 7788 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7789 "00101000" // /* MW 3 */ + 7790 "01100001" // /* MW 2 */ + 7791 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 7792 "10000100" // JNZ r16, #7824 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7824 delay_slots=5 */ + 7793 "00000001" // /* MW 5 */ + 7794 "01000000" // /* MW 4 */ + 7795 "01001000" // /* MW 3 */ + 7796 "00001111" // /* MW 2 */ + 7797 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7799 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 7808 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7809 "00000001" // /* MW 3 */ + 7810 "00100000" // /* MW 2 */ + 7811 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 7812 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7813 "11000001" // /* MW 11 */ + 7814 "00001000" // /* MW 10 */ + 7815 "10000011" // /* MW 9 */ + 7816 "00000000" // /* MW 8 */ + 7817 "00000000" // /* MW 7 */ + 7818 "00000000" // /* MW 6 */ + 7819 "00100000" // /* MW 5 */ + 7820 "00000000" // /* MW 4 */ + 7821 "11110000" // /* MW 3 */ + 7822 "00101100" // /* MW 2 */ + 7823 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 7824 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7825 "00111001" // /* MW 3 */ + 7826 "11110000" // /* MW 2 */ + 7827 "00000111" // /* MW 1 */ + 7828 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7829 "11110001" // /* MW 3 */ + 7830 "11111101" // /* MW 2 */ + 7831 "00000111" // /* MW 1 */ + 7832 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7833 "10011001" // /* MW 3 */ + 7834 "11110111" // /* MW 2 */ + 7835 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7837 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7838 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7839 "11010001" // /* MW 3 */ + 7840 "11111001" // /* MW 2 */ + 7841 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7846 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7847 "00000000" // /* MW 3 */ + 7848 "00101000" // /* MW 2 */ + 7849 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7850 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7851 "00001011" // /* MW 3 */ + 7852 "10001110" // /* MW 2 */ + 7853 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 7854 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7855 "00000001" // /* MW 5 */ + 7856 "00000000" // /* MW 4 */ + 7857 "00000000" // /* MW 3 */ + 7858 "11111000" // /* MW 2 */ + 7859 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7863 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 7865 "00000000" // /* MW 1 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_begin0 +.function setup_gemm_bfp16_params _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.src_ref 7 "gemm_bfp16_params.h" 128 first +.src_ref 7 "gemm_bfp16_params.h" 130 24 +.src_ref 7 "gemm_bfp16_params.h" 130 26 first +.function_start + 7872 "10111010" // LDA r3, [p0], #4; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7873 "00010000" // /* MW 9 */ + 7874 "00000000" // /* MW 8 */ + 7875 "10110001" // /* MW 7 */ + 7876 "11110000" // /* MW 6 */ + 7877 "00000001" // /* MW 5 */ + 7878 "00000000" // /* MW 4 */ + 7879 "11010000" // /* MW 3 */ + 7880 "10001110" // /* MW 2 */ + 7881 "00000011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 58 39 +.src_ref 7 "gemm_bfp16_params.h" 59 38 +.src_ref 7 "gemm_bfp16_params.h" 61 39 +.src_ref 7 "gemm_bfp16_params.h" 71 52 +.src_ref 7 "gemm_bfp16_params.h" 86 29 +.src_ref 7 "gemm_bfp16_params.h" 93 56 + 7882 "10111010" // MOVA r29, #-2; MOVX r6, #-3; MOV r5, #-4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7883 "01011000" // /* MW 9 */ + 7884 "11111100" // /* MW 8 */ + 7885 "10101111" // /* MW 7 */ + 7886 "10101000" // /* MW 6 */ + 7887 "01100111" // /* MW 5 */ + 7888 "00111110" // /* MW 4 */ + 7889 "00000000" // /* MW 3 */ + 7890 "11011101" // /* MW 2 */ + 7891 "11111111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 8 "aie.hpp" 7072 95 +.src_ref 7 "gemm_bfp16_params.h" 44 26 +.src_ref 7 "gemm_bfp16_params.h" 44 26 +.src_ref 7 "gemm_bfp16_params.h" 80 39 +.src_ref 7 "gemm_bfp16_params.h" 99 73 +.src_ref 7 "gemm_bfp16_params.h" 138 24 + 7892 "10111010" // MOVA r24, #0; MOVX r1, #1; MOV r0, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7893 "01011000" // /* MW 9 */ + 7894 "00001000" // /* MW 8 */ + 7895 "00001000" // /* MW 7 */ + 7896 "00101000" // /* MW 6 */ + 7897 "00010000" // /* MW 5 */ + 7898 "00000000" // /* MW 4 */ + 7899 "00000000" // /* MW 3 */ + 7900 "00011000" // /* MW 2 */ + 7901 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7072 95 +.src_ref 8 "aie.hpp" 7073 95 +.src_ref 7 "gemm_bfp16_params.h" 44 26 +.src_ref 7 "gemm_bfp16_params.h" 88 55 + 7902 "10111010" // MOVA r4, #256; MOVXM r28, #16777214 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7903 "00010000" // /* MW 9 */ + 7904 "11111111" // /* MW 8 */ + 7905 "10001111" // /* MW 7 */ + 7906 "11111111" // /* MW 6 */ + 7907 "00111111" // /* MW 5 */ + 7908 "00000000" // /* MW 4 */ + 7909 "00000000" // /* MW 3 */ + 7910 "00000100" // /* MW 2 */ + 7911 "00100000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 +.src_ref 8 "aie.hpp" 7053 42 +.src_ref 8 "aie.hpp" 7053 42 +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 7 "gemm_bfp16_params.h" 85 38 +.src_ref 7 "gemm_bfp16_params.h" 88 66 + 7912 "10111010" // MOVA r16, #7; MOVX r19, #9; MOV r2, #512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7913 "01011000" // /* MW 9 */ + 7914 "00000000" // /* MW 8 */ + 7915 "01001010" // /* MW 7 */ + 7916 "00101000" // /* MW 6 */ + 7917 "00110001" // /* MW 5 */ + 7918 "00000001" // /* MW 4 */ + 7919 "00000000" // /* MW 3 */ + 7920 "11110000" // /* MW 2 */ + 7921 "00000000" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 138 24 + 7922 "01100100" // MOVX r7, #128; MOV m0, #52 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7923 "11010001" // /* MW 5 */ + 7924 "00000000" // /* MW 4 */ + 7925 "00100000" // /* MW 3 */ + 7926 "11000000" // /* MW 2 */ + 7927 "00010001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 138 24 + 7928 "11111000" // MOV dj0, m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7929 "00000000" // /* MW 3 */ + 7930 "10000000" // /* MW 2 */ + 7931 "00011000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 58 39 first +.src_ref 7 "gemm_bfp16_params.h" 130 24 first + 7932 "01011100" // ST r3, [p1], #4; LSHL r27, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7933 "11011011" // /* MW 5 */ + 7934 "11101100" // /* MW 4 */ + 7935 "00110001" // /* MW 3 */ + 7936 "10001110" // /* MW 2 */ + 7937 "00100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 93 56 first +.src_ref 7 "gemm_bfp16_params.h" 131 26 first + 7938 "00101100" // LDA r3, [p0], #4; LSHL r17, r3, r5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7939 "10111011" // /* MW 5 */ + 7940 "11000100" // /* MW 4 */ + 7941 "11010001" // /* MW 3 */ + 7942 "10001110" // /* MW 2 */ + 7943 "00000011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 59 first +.src_ref 7 "gemm_bfp16_params.h" 80 39 first + 7944 "00100100" // LSHL r31, r27, r0; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7945 "11111111" // /* MW 5 */ + 7946 "10110001" // /* MW 4 */ + 7947 "10111000" // /* MW 3 */ + 7948 "11000001" // /* MW 2 */ + 7949 "11011111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 82 45 +.src_ref 7 "gemm_bfp16_params.h" 85 38 first + 7950 "10100100" // LSHL r19, r27, r19; ADD.NC r18, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7951 "00100010" // /* MW 5 */ + 7952 "00111111" // /* MW 4 */ + 7953 "10111001" // /* MW 3 */ + 7954 "11100111" // /* MW 2 */ + 7955 "11011100" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 first + 7956 "10011000" // LSHL r22, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00001101" // /* MW 3 */ + 7958 "11101101" // /* MW 2 */ + 7959 "00010110" // /* MW 1 */ + 7960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7961 "00000000" // /* MW 1 */ + 7962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7963 "00000000" // /* MW 1 */ + 7964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7965 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 59 38 first +.src_ref 7 "gemm_bfp16_params.h" 131 24 first + 7966 "01011100" // ST r3, [p1], #4; LSHL r26, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7967 "11011011" // /* MW 5 */ + 7968 "11101000" // /* MW 4 */ + 7969 "00110001" // /* MW 3 */ + 7970 "10001110" // /* MW 2 */ + 7971 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 59 first +.src_ref 7 "gemm_bfp16_params.h" 132 26 first + 7972 "00101100" // LDA r21, [p0], #4; ADD r20, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7973 "11111110" // /* MW 5 */ + 7974 "01010011" // /* MW 4 */ + 7975 "11011101" // /* MW 3 */ + 7976 "11010110" // /* MW 2 */ + 7977 "00000011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 + 7978 "10011000" // MUL r23, r22, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7979 "01001111" // /* MW 3 */ + 7980 "10101111" // /* MW 2 */ + 7981 "00010101" // /* MW 1 */ + 7982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7983 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first + 7984 "10011000" // SUB r30, r7, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7985 "01110001" // /* MW 3 */ + 7986 "11111101" // /* MW 2 */ + 7987 "00010001" // /* MW 1 */ + 7988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7989 "00000000" // /* MW 1 */ + 7990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7991 "00000000" // /* MW 1 */ + 7992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7993 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 65 24 first +.src_ref 7 "gemm_bfp16_params.h" 132 24 first + 7994 "01011100" // ST r21, [p1], #4; MUL r3, r3, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7995 "10111111" // /* MW 5 */ + 7996 "10001110" // /* MW 4 */ + 7997 "00110001" // /* MW 3 */ + 7998 "11010110" // /* MW 2 */ + 7999 "00100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 61 39 first +.src_ref 7 "gemm_bfp16_params.h" 133 26 first + 8000 "00101100" // LDA el0, [p0], #4; LSHL r6, r21, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8001 "11011011" // /* MW 5 */ + 8002 "10011000" // /* MW 4 */ + 8003 "11011010" // /* MW 3 */ + 8004 "10000101" // /* MW 2 */ + 8005 "00000011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 71 36 first +.src_ref 7 "gemm_bfp16_params.h" 88 55 + 8006 "10100100" // MUL r25, r27, r6; ADD.NC r28, r6, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8007 "11100010" // /* MW 5 */ + 8008 "00100110" // /* MW 4 */ + 8009 "11111110" // /* MW 3 */ + 8010 "01001101" // /* MW 2 */ + 8011 "11011110" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 first +.src_ref 7 "gemm_bfp16_params.h" 86 29 first + 8012 "10100100" // LSHL r5, r21, r5; ADD.NC r21, r26, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8013 "10100010" // /* MW 5 */ + 8014 "10111010" // /* MW 4 */ + 8015 "10111010" // /* MW 3 */ + 8016 "01001011" // /* MW 2 */ + 8017 "10101001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 71 52 first +.src_ref 7 "gemm_bfp16_params.h" 86 38 + 8018 "10111010" // MOVA r25, #128; LSHL r29, r25, r29; ADD.NC r5, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8019 "11001000" // /* MW 9 */ + 8020 "01111111" // /* MW 8 */ + 8021 "10101001" // /* MW 7 */ + 8022 "11101100" // /* MW 6 */ + 8023 "11011110" // /* MW 5 */ + 8024 "00110011" // /* MW 4 */ + 8025 "00000000" // /* MW 3 */ + 8026 "00011001" // /* MW 2 */ + 8027 "00010000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 88 66 first + 8028 "00011000" // MSC r2, r2, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "11001110" // /* MW 3 */ + 8030 "11000101" // /* MW 2 */ + 8031 "00010111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 first + 8032 "10011000" // LSHL r6, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "00001101" // /* MW 3 */ + 8034 "10001101" // /* MW 2 */ + 8035 "00010001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 first + 8036 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "00001101" // /* MW 3 */ + 8038 "01101011" // /* MW 2 */ + 8039 "00010101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 99 73 first +.src_ref 7 "gemm_bfp16_params.h" 133 24 first + 8040 "01011100" // ST el0, [p1], #4; LSHL r28, r26, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8041 "00011011" // /* MW 5 */ + 8042 "01110000" // /* MW 4 */ + 8043 "00111101" // /* MW 3 */ + 8044 "10000101" // /* MW 2 */ + 8045 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7053 42 first +.src_ref 7 "gemm_bfp16_params.h" 134 26 first + 8046 "00101100" // LDA el0, [p0]; LSHL r16, r26, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8047 "00011011" // /* MW 5 */ + 8048 "01000010" // /* MW 4 */ + 8049 "11011101" // /* MW 3 */ + 8050 "10000101" // /* MW 2 */ + 8051 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first + 8052 "10011000" // SUB r27, r28, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8053 "01010001" // /* MW 3 */ + 8054 "00110111" // /* MW 2 */ + 8055 "00010111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 first + 8056 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8057 "00001101" // /* MW 3 */ + 8058 "01000000" // /* MW 2 */ + 8059 "00010001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 +.src_ref 8 "aie.hpp" 7057 21 first + 8060 "00011000" // MAC r0, r0, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8061 "01000110" // /* MW 3 */ + 8062 "10000001" // /* MW 2 */ + 8063 "00010001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 first +.src_ref 8 "aie.hpp" 7056 79 first + 8064 "00011000" // MSC r25, r25, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8065 "01001110" // /* MW 3 */ + 8066 "10110011" // /* MW 2 */ + 8067 "00010001" // /* MW 1 */ + 8068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8069 "00000000" // /* MW 1 */ + 8070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8071 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 134 24 first + 8072 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8073 "00101001" // /* MW 3 */ + 8074 "00011100" // /* MW 2 */ + 8075 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 135 26 first + 8076 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8077 "00101110" // /* MW 3 */ + 8078 "00010100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ + 8080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8081 "00000000" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ + 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8087 "00000000" // /* MW 1 */ + 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8089 "00000000" // /* MW 1 */ + 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8091 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 135 24 + 8092 "10011000" // ST el0, [p1], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8093 "00101001" // /* MW 3 */ + 8094 "00111100" // /* MW 2 */ + 8095 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8096 "00000010" // ST r3, [p1], #4; ADD.NC r3, r6, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8097 "00000000" // /* MW 7 */ + 8098 "10100000" // /* MW 6 */ + 8099 "01101001" // /* MW 5 */ + 8100 "00000000" // /* MW 4 */ + 8101 "00110000" // /* MW 3 */ + 8102 "10001110" // /* MW 2 */ + 8103 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8104 "01011100" // ST r29, [p1], #4; SUB r29, r7, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8105 "00000011" // /* MW 5 */ + 8106 "11110110" // /* MW 4 */ + 8107 "00110011" // /* MW 3 */ + 8108 "11110110" // /* MW 2 */ + 8109 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8110 "00000010" // ST r26, [p1], #4; ADD.NC r26, r22, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8111 "00000000" // /* MW 7 */ + 8112 "10100000" // /* MW 6 */ + 8113 "01001101" // /* MW 5 */ + 8114 "00000011" // /* MW 4 */ + 8115 "00110000" // /* MW 3 */ + 8116 "11101010" // /* MW 2 */ + 8117 "00100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8118 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8119 "00110001" // /* MW 3 */ + 8120 "00011100" // /* MW 2 */ + 8121 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8122 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8123 "00010001" // /* MW 3 */ + 8124 "00011111" // /* MW 2 */ + 8125 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8126 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8127 "11110001" // /* MW 3 */ + 8128 "00011111" // /* MW 2 */ + 8129 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8130 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8131 "10010001" // /* MW 3 */ + 8132 "00011100" // /* MW 2 */ + 8133 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7072 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8134 "01011100" // ST r18, [p1], #4; ADD r18, r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "00000001" // /* MW 5 */ + 8136 "11001010" // /* MW 4 */ + 8137 "00111101" // /* MW 3 */ + 8138 "11001010" // /* MW 2 */ + 8139 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7073 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8140 "01011100" // ST r19, [p1], #4; SUB r19, r4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11100011" // /* MW 5 */ + 8142 "01001110" // /* MW 4 */ + 8143 "00110010" // /* MW 3 */ + 8144 "11001110" // /* MW 2 */ + 8145 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7054 44 first +.src_ref 8 "aie.hpp" 7072 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8146 "01011100" // ST r5, [p1], #4; MSC r4, r4, r6, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8147 "10011100" // /* MW 5 */ + 8148 "00010010" // /* MW 4 */ + 8149 "00110011" // /* MW 3 */ + 8150 "10010110" // /* MW 2 */ + 8151 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8152 "01011100" // ST r2, [p1], #16; MOVX r2, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8153 "00000010" // /* MW 5 */ + 8154 "00001000" // /* MW 4 */ + 8155 "00111111" // /* MW 3 */ + 8156 "10001010" // /* MW 2 */ + 8157 "00101001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7056 79 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8158 "01011100" // ST r24, [p1], #4; XOR r31, r23, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8159 "01001101" // /* MW 5 */ + 8160 "11111100" // /* MW 4 */ + 8161 "00111011" // /* MW 3 */ + 8162 "11100010" // /* MW 2 */ + 8163 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7072 95 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8164 "01011100" // ST r24, [p1], #-12; SUB r23, r24, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8165 "11100011" // /* MW 5 */ + 8166 "01011110" // /* MW 4 */ + 8167 "00111100" // /* MW 3 */ + 8168 "11100010" // /* MW 2 */ + 8169 "00111011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 first +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8170 "01011100" // ST r24, [p1], #4; XOR r2, r2, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8171 "00001101" // /* MW 5 */ + 8172 "00001000" // /* MW 4 */ + 8173 "00110001" // /* MW 3 */ + 8174 "11100010" // /* MW 2 */ + 8175 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 7057 21 +.src_ref 7 "gemm_bfp16_params.h" 44 26 first + 8176 "01011100" // ST r24, [p1], #-8; SUB r0, r24, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8177 "00000011" // /* MW 5 */ + 8178 "00000000" // /* MW 4 */ + 8179 "00111100" // /* MW 3 */ + 8180 "11100010" // /* MW 2 */ + 8181 "00111101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 44 26 + 8182 "10011000" // ST r24, [p1], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8183 "00010001" // /* MW 3 */ + 8184 "01011111" // /* MW 2 */ + 8185 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8186 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8187 "00110001" // /* MW 3 */ + 8188 "00011110" // /* MW 2 */ + 8189 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8190 "10011000" // ST r30, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8191 "11010001" // /* MW 3 */ + 8192 "00011111" // /* MW 2 */ + 8193 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8194 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8195 "10110001" // /* MW 3 */ + 8196 "00011100" // /* MW 2 */ + 8197 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8198 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8199 "11110001" // /* MW 3 */ + 8200 "00011111" // /* MW 2 */ + 8201 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8202 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8203 "10010001" // /* MW 3 */ + 8204 "00011110" // /* MW 2 */ + 8205 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8206 "10011000" // ST r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8207 "01010001" // /* MW 3 */ + 8208 "00011111" // /* MW 2 */ + 8209 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8210 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8211 "00110001" // /* MW 3 */ + 8212 "00011100" // /* MW 2 */ + 8213 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8214 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8215 "11110001" // /* MW 3 */ + 8216 "00011100" // /* MW 2 */ + 8217 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8218 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8219 "10010001" // /* MW 3 */ + 8220 "00011110" // /* MW 2 */ + 8221 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8222 "10011000" // ST r22, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8223 "11010001" // /* MW 3 */ + 8224 "00011110" // /* MW 2 */ + 8225 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8226 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8227 "10110001" // /* MW 3 */ + 8228 "00011100" // /* MW 2 */ + 8229 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8230 "10011000" // ST r23, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8231 "11110001" // /* MW 3 */ + 8232 "00011110" // /* MW 2 */ + 8233 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 8 "aie.hpp" 7054 44 first +.src_ref 8 "aie.hpp" 7057 21 first + 8234 "01011100" // ST r19, [p1], #4; MAC r21, r21, r5, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8235 "10001100" // /* MW 5 */ + 8236 "11010111" // /* MW 4 */ + 8237 "00110010" // /* MW 3 */ + 8238 "11001110" // /* MW 2 */ + 8239 "00100011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8240 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8241 "11110001" // /* MW 3 */ + 8242 "00011100" // /* MW 2 */ + 8243 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first +.src_ref 8 "aie.hpp" 7056 79 first + 8244 "01011100" // ST r17, [p1], #4; SUB r28, r24, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8245 "10100011" // /* MW 5 */ + 8246 "01110010" // /* MW 4 */ + 8247 "00111100" // /* MW 3 */ + 8248 "11000110" // /* MW 2 */ + 8249 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 8 "aie.hpp" 7073 95 first + 8250 "01011100" // ST r28, [p1], #4; SUB r21, r16, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8251 "10100011" // /* MW 5 */ + 8252 "01010110" // /* MW 4 */ + 8253 "00111000" // /* MW 3 */ + 8254 "11110010" // /* MW 2 */ + 8255 "00100011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8256 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8257 "10110001" // /* MW 3 */ + 8258 "00011100" // /* MW 2 */ + 8259 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8260 "10011000" // ST r27, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8261 "01110001" // /* MW 3 */ + 8262 "00011111" // /* MW 2 */ + 8263 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8264 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8265 "10010001" // /* MW 3 */ + 8266 "00011110" // /* MW 2 */ + 8267 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8268 "10011000" // ST r29, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8269 "10110001" // /* MW 3 */ + 8270 "00011111" // /* MW 2 */ + 8271 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8272 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8273 "00110001" // /* MW 3 */ + 8274 "00011100" // /* MW 2 */ + 8275 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8276 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "00010001" // /* MW 3 */ + 8278 "00011110" // /* MW 2 */ + 8279 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8280 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8281 "10010001" // /* MW 3 */ + 8282 "00011110" // /* MW 2 */ + 8283 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8284 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8285 "11110001" // /* MW 3 */ + 8286 "00011100" // /* MW 2 */ + 8287 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8288 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8289 "10110001" // /* MW 3 */ + 8290 "00011100" // /* MW 2 */ + 8291 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8292 "10011000" // ST r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8293 "01010001" // /* MW 3 */ + 8294 "00011110" // /* MW 2 */ + 8295 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8296 "10011000" // ST r21, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8297 "10110001" // /* MW 3 */ + 8298 "00011110" // /* MW 2 */ + 8299 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8300 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8301 "00010001" // /* MW 3 */ + 8302 "00011110" // /* MW 2 */ + 8303 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8304 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8305 "00110001" // /* MW 3 */ + 8306 "00011110" // /* MW 2 */ + 8307 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8308 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8309 "01010001" // /* MW 3 */ + 8310 "00011100" // /* MW 2 */ + 8311 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8312 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8313 "10110001" // /* MW 3 */ + 8314 "00011100" // /* MW 2 */ + 8315 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8316 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8317 "00110001" // /* MW 3 */ + 8318 "00011111" // /* MW 2 */ + 8319 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8320 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8321 "10010001" // /* MW 3 */ + 8322 "00011110" // /* MW 2 */ + 8323 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8324 "10011000" // ST r3, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8325 "01110001" // /* MW 3 */ + 8326 "00011100" // /* MW 2 */ + 8327 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8328 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8329 "00110001" // /* MW 3 */ + 8330 "00011100" // /* MW 2 */ + 8331 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8332 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8333 "11110001" // /* MW 3 */ + 8334 "00011100" // /* MW 2 */ + 8335 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first + 8336 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8337 "10010001" // /* MW 3 */ + 8338 "00011110" // /* MW 2 */ + 8339 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8340 "10011000" // ST r6, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8341 "11010001" // /* MW 3 */ + 8342 "00011100" // /* MW 2 */ + 8343 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8344 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "10110001" // /* MW 3 */ + 8346 "00011100" // /* MW 2 */ + 8347 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8348 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "10010001" // /* MW 3 */ + 8350 "00011100" // /* MW 2 */ + 8351 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8352 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8353 "00010001" // /* MW 3 */ + 8354 "00011100" // /* MW 2 */ + 8355 "00001001" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 + 8356 "10011000" // ST r7, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8357 "11110001" // /* MW 3 */ + 8358 "00001000" // /* MW 2 */ + 8359 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first + 8360 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8361 "00010001" // /* MW 3 */ + 8362 "00011111" // /* MW 2 */ + 8363 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8364 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8365 "00010001" // /* MW 3 */ + 8366 "11011111" // /* MW 2 */ + 8367 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8368 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8369 "00010001" // /* MW 3 */ + 8370 "00011111" // /* MW 2 */ + 8371 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8372 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8373 "00010001" // /* MW 3 */ + 8374 "11011111" // /* MW 2 */ + 8375 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8376 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8377 "00010001" // /* MW 3 */ + 8378 "00011111" // /* MW 2 */ + 8379 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8380 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8381 "00010001" // /* MW 3 */ + 8382 "11011111" // /* MW 2 */ + 8383 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8384 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8385 "00010001" // /* MW 3 */ + 8386 "00011111" // /* MW 2 */ + 8387 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8388 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "00010001" // /* MW 3 */ + 8390 "11011111" // /* MW 2 */ + 8391 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 + 8392 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8393 "00010001" // /* MW 3 */ + 8394 "00011111" // /* MW 2 */ + 8395 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 7 "gemm_bfp16_params.h" 139 first + 8396 "01011100" // ST r24, [p1], #-12; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8397 "00000000" // /* MW 5 */ + 8398 "01010000" // /* MW 4 */ + 8399 "00110000" // /* MW 3 */ + 8400 "11100010" // /* MW 2 */ + 8401 "00111011" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first +.delay_slot + 8402 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8403 "00010001" // /* MW 3 */ + 8404 "00011111" // /* MW 2 */ + 8405 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.delay_slot + 8406 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8407 "00010001" // /* MW 3 */ + 8408 "11011111" // /* MW 2 */ + 8409 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.delay_slot + 8410 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8411 "00010001" // /* MW 3 */ + 8412 "00011111" // /* MW 2 */ + 8413 "00001001" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.delay_slot + 8414 "10011000" // ST r24, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8415 "00010001" // /* MW 3 */ + 8416 "00000111" // /* MW 2 */ + 8417 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16_params.h" 138 24 first +.delay_slot + 8418 "10011000" // ST r24, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00010001" // /* MW 3 */ + 8420 "00000011" // /* MW 2 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv__end +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_end0 + 8421 "00001001" // /* MW 1 */ +.label __Z8init_accILt1EEvPaS0_iii___func_begin0 +.label _Z8init_accILt1EEvPaS0_iii +.function init_acc<(unsigned short)1> _Z8init_accILt1EEvPaS0_iii +.src_ref 7 "gemm_bfp16.h" 38 first +.src_ref 7 "gemm_bfp16.h" 41 47 +.function_start + 8432 "01000100" // MOVXM p2, #508944 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8433 "00100000" // /* MW 5 */ + 8434 "11001000" // /* MW 4 */ + 8435 "11000100" // /* MW 3 */ + 8436 "00000111" // /* MW 2 */ + 8437 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 38 + 8438 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8439 "00000001" // /* MW 5 */ + 8440 "00000000" // /* MW 4 */ + 8441 "00000000" // /* MW 3 */ + 8442 "00001000" // /* MW 2 */ + 8443 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 first + 8444 "10011000" // LDA.s8 r4, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8445 "10000010" // /* MW 3 */ + 8446 "00000100" // /* MW 2 */ + 8447 "00000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 68 6 first + 8448 "01000100" // MOVXM ls, #8608 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8449 "01000000" // /* MW 5 */ + 8450 "11100011" // /* MW 4 */ + 8451 "00100001" // /* MW 3 */ + 8452 "00000000" // /* MW 2 */ + 8453 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 +.src_ref 7 "gemm_bfp16.h" 68 6 + 8454 "10111010" // MOVA r26, #0; MOVXM le, #8672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8455 "00010000" // /* MW 9 */ + 8456 "11110000" // /* MW 8 */ + 8457 "10111000" // /* MW 7 */ + 8458 "00001001" // /* MW 6 */ + 8459 "00000000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "00000000" // /* MW 3 */ + 8462 "00011010" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 53 4 +.src_ref 7 "gemm_bfp16.h" 53 29 + 8464 "10111010" // MOVA r5, #-4; MOVXM p3, #8560 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8465 "00010000" // /* MW 9 */ + 8466 "10111000" // /* MW 8 */ + 8467 "10110000" // /* MW 7 */ + 8468 "00001001" // /* MW 6 */ + 8469 "00000000" // /* MW 5 */ + 8470 "00000000" // /* MW 4 */ + 8471 "00000000" // /* MW 3 */ + 8472 "10000101" // /* MW 2 */ + 8473 "11111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 39 33 +.src_ref 7 "gemm_bfp16.h" 41 47 first +.src_ref 7 "gemm_bfp16.h" 53 29 first +.src_ref 7 "gemm_bfp16.h" 75 43 + 8474 "10111010" // MOVA r3, #5; LSHL r5, r1, r5; VINSERT.32 x1, x0, #0, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8475 "10111000" // /* MW 9 */ + 8476 "10101000" // /* MW 8 */ + 8477 "01000001" // /* MW 7 */ + 8478 "11101100" // /* MW 6 */ + 8479 "01010010" // /* MW 5 */ + 8480 "00000010" // /* MW 4 */ + 8481 "00000000" // /* MW 3 */ + 8482 "10100011" // /* MW 2 */ + 8483 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 39 33 first + 8484 "11100100" // LSHL r7, r0, r3; MOV p2, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8485 "11000001" // /* MW 5 */ + 8486 "11001011" // /* MW 4 */ + 8487 "10110100" // /* MW 3 */ + 8488 "11000111" // /* MW 2 */ + 8489 "00000001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 +.src_ref 7 "gemm_bfp16.h" 75 43 first + 8490 "11100100" // LSHL r3, r2, r3; VMOV bmll0, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8491 "00100101" // /* MW 5 */ + 8492 "00000101" // /* MW 4 */ + 8493 "10110000" // /* MW 3 */ + 8494 "11000111" // /* MW 2 */ + 8495 "00010000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 +.src_ref 7 "gemm_bfp16.h" 42 54 + 8496 "11100100" // MOVX crRnd, r4; MOV r1, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8497 "10000001" // /* MW 5 */ + 8498 "10100101" // /* MW 4 */ + 8499 "00000000" // /* MW 3 */ + 8500 "01010000" // /* MW 2 */ + 8501 "00100111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 47 first +.src_ref 7 "gemm_bfp16.h" 42 69 +.src_ref 7 "gemm_bfp16.h" 75 14 + 8502 "00110110" // PADDB [p2], #-64; VCONV.bf16.fp32 wl0, bmll0; MOVX r16, #1; MOV m1, r3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8503 "01111000" // /* MW 11 */ + 8504 "11010000" // /* MW 10 */ + 8505 "10000000" // /* MW 9 */ + 8506 "00101000" // /* MW 8 */ + 8507 "00000000" // /* MW 7 */ + 8508 "00000001" // /* MW 6 */ + 8509 "00100000" // /* MW 5 */ + 8510 "11111111" // /* MW 4 */ + 8511 "11000101" // /* MW 3 */ + 8512 "00000010" // /* MW 2 */ + 8513 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 42 54 +.src_ref 7 "gemm_bfp16.h" 42 69 first +.src_ref 7 "gemm_bfp16.h" 75 43 + 8514 "10111010" // MOVA r6, #-3; EQ r27, r2, r16; MOV r3, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8515 "01111000" // /* MW 9 */ + 8516 "01100000" // /* MW 8 */ + 8517 "01101010" // /* MW 7 */ + 8518 "00111100" // /* MW 6 */ + 8519 "10111000" // /* MW 5 */ + 8520 "00000101" // /* MW 4 */ + 8521 "00000000" // /* MW 3 */ + 8522 "10100110" // /* MW 2 */ + 8523 "11111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 22 first +.src_ref 7 "gemm_bfp16.h" 41 47 first +.src_ref 7 "gemm_bfp16.h" 75 43 first + 8524 "10100100" // LSHL r0, r0, r6; VEXTBCST.16 x1, x0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8525 "00000110" // /* MW 5 */ + 8526 "00000010" // /* MW 4 */ + 8527 "10110001" // /* MW 3 */ + 8528 "00001101" // /* MW 2 */ + 8529 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 42 54 first +.src_ref 7 "gemm_bfp16.h" 44 44 +.src_ref 7 "gemm_bfp16.h" 69 17 +.src_ref 7 "gemm_bfp16.h" 76 14 +.src_ref 7 "gemm_bfp16.h" 77 16 + 8530 "01111110" // NOPA; NOPB; MOVS p1, p0; SEL.EQZ r1, r3, r1, r27; MOV m0, r7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8531 "01100000" // /* MW 13 */ + 8532 "00010001" // /* MW 12 */ + 8533 "00110000" // /* MW 11 */ + 8534 "00001111" // /* MW 10 */ + 8535 "00111010" // /* MW 9 */ + 8536 "00000000" // /* MW 8 */ + 8537 "00010010" // /* MW 7 */ + 8538 "11000010" // /* MW 6 */ + 8539 "00100000" // /* MW 5 */ + 8540 "00000000" // /* MW 4 */ + 8541 "11110000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 41 22 first +.src_ref 7 "gemm_bfp16.h" 44 44 first +.src_ref 7 "gemm_bfp16.h" 54 24 +.src_ref 7 "gemm_bfp16.h" 75 14 + 8544 "11100001" // NOPA; PADDB [p0], m0; VST x1, [p2]; ADD r2, r5, #-1; MOV p2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8545 "00000000" // /* MW 15 */ + 8546 "00000000" // /* MW 14 */ + 8547 "01111000" // /* MW 13 */ + 8548 "01010000" // /* MW 12 */ + 8549 "00110000" // /* MW 11 */ + 8550 "11111001" // /* MW 10 */ + 8551 "00101111" // /* MW 9 */ + 8552 "00001010" // /* MW 8 */ + 8553 "01010011" // /* MW 7 */ + 8554 "00000100" // /* MW 6 */ + 8555 "00100010" // /* MW 5 */ + 8556 "00010111" // /* MW 4 */ + 8557 "11110000" // /* MW 3 */ + 8558 "00101100" // /* MW 2 */ + 8559 "00000000" // /* MW 1 */ +.label TGT_F_Z8init_accILt1EEvPaS0_iii_128 +.src_ref 7 "gemm_bfp16.h" 54 24 first +.src_ref 7 "gemm_bfp16.h" 68 6 first +.loop_nesting 1 + 8560 "11110100" // VLDB wl0, [p2]; MOV lc, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8561 "01000001" // /* MW 5 */ + 8562 "11100000" // /* MW 4 */ + 8563 "10001010" // /* MW 3 */ + 8564 "10000100" // /* MW 2 */ + 8565 "01000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8569 "00000000" // /* MW 1 */ + 8570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8571 "00000000" // /* MW 1 */ + 8572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8573 "00000000" // /* MW 1 */ + 8574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8575 "00000000" // /* MW 1 */ + 8576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8577 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 54 24 +.src_ref 7 "gemm_bfp16.h" 63 39 +.src_ref 7 "gemm_bfp16.h" 64 39 + 8578 "11111000" // VMOV wh0, wl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8579 "00100010" // /* MW 3 */ + 8580 "00000001" // /* MW 2 */ + 8581 "00011000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 54 24 +.src_ref 7 "gemm_bfp16.h" 63 39 first + 8582 "01011000" // VEXTBCST.128 x3, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8583 "00000011" // /* MW 3 */ + 8584 "10000100" // /* MW 2 */ + 8585 "00011001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 54 24 first +.src_ref 7 "gemm_bfp16.h" 64 39 first + 8586 "01011000" // VEXTBCST.128 x1, x0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8587 "00000111" // /* MW 3 */ + 8588 "10000100" // /* MW 2 */ + 8589 "00011000" // /* MW 1 */ + 8590 "11111000" // VCONV.fp32.bf16 cml0, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8591 "10001010" // /* MW 3 */ + 8592 "00000111" // /* MW 2 */ + 8593 "00011000" // /* MW 1 */ + 8594 "11111000" // VCONV.fp32.bf16 cmh0, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8595 "10001010" // /* MW 3 */ + 8596 "10000011" // /* MW 2 */ + 8597 "00011000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 69 17 first + 8598 "11111000" // VMOV bmll1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8599 "00010010" // /* MW 3 */ + 8600 "00000000" // /* MW 2 */ + 8601 "00011001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 71 19 first + 8602 "11010100" // NOPA; VMOV bmlh1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8603 "00100101" // /* MW 5 */ + 8604 "10000100" // /* MW 4 */ + 8605 "11110010" // /* MW 3 */ + 8606 "00101100" // /* MW 2 */ + 8607 "00000000" // /* MW 1 */ +.label ZLS_F_Z8init_accILt1EEvPaS0_iii_176 +.src_ref 7 "gemm_bfp16.h" 69 17 first +.begin_of_loop +.loop_nesting 2 + 8608 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8609 "00100110" // /* MW 3 */ + 8610 "00010100" // /* MW 2 */ + 8611 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 69 17 + 8612 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8613 "10000110" // /* MW 3 */ + 8614 "00101100" // /* MW 2 */ + 8615 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 70 17 first + 8616 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00100110" // /* MW 3 */ + 8618 "00010100" // /* MW 2 */ + 8619 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 70 17 + 8620 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8621 "10000110" // /* MW 3 */ + 8622 "00101100" // /* MW 2 */ + 8623 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 71 19 first + 8624 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8625 "00000000" // /* MW 15 */ + 8626 "00000000" // /* MW 14 */ + 8627 "01111000" // /* MW 13 */ + 8628 "10100101" // /* MW 12 */ + 8629 "00000001" // /* MW 11 */ + 8630 "00000000" // /* MW 10 */ + 8631 "00000000" // /* MW 9 */ + 8632 "10000000" // /* MW 8 */ + 8633 "01100110" // /* MW 7 */ + 8634 "00010100" // /* MW 6 */ + 8635 "00100000" // /* MW 5 */ + 8636 "00000000" // /* MW 4 */ + 8637 "11110000" // /* MW 3 */ + 8638 "00101100" // /* MW 2 */ + 8639 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 71 19 + 8640 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8641 "00000000" // /* MW 15 */ + 8642 "00000000" // /* MW 14 */ + 8643 "01111000" // /* MW 13 */ + 8644 "10100101" // /* MW 12 */ + 8645 "00000001" // /* MW 11 */ + 8646 "00000000" // /* MW 10 */ + 8647 "00000000" // /* MW 9 */ + 8648 "10000000" // /* MW 8 */ + 8649 "10100110" // /* MW 7 */ + 8650 "00101100" // /* MW 6 */ + 8651 "00100000" // /* MW 5 */ + 8652 "00000000" // /* MW 4 */ + 8653 "11110000" // /* MW 3 */ + 8654 "00101100" // /* MW 2 */ + 8655 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 72 19 first + 8656 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8657 "00000000" // /* MW 15 */ + 8658 "00000000" // /* MW 14 */ + 8659 "01111000" // /* MW 13 */ + 8660 "10100101" // /* MW 12 */ + 8661 "00000001" // /* MW 11 */ + 8662 "00000000" // /* MW 10 */ + 8663 "00000000" // /* MW 9 */ + 8664 "10000000" // /* MW 8 */ + 8665 "01100110" // /* MW 7 */ + 8666 "00010100" // /* MW 6 */ + 8667 "00100000" // /* MW 5 */ + 8668 "00000000" // /* MW 4 */ + 8669 "11110000" // /* MW 3 */ + 8670 "00101100" // /* MW 2 */ + 8671 "00000000" // /* MW 1 */ +.label ZLE_F_Z8init_accILt1EEvPaS0_iii_240 +.src_ref 7 "gemm_bfp16.h" 72 19 +.end_of_loop + 8672 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8673 "00000000" // /* MW 15 */ + 8674 "00000000" // /* MW 14 */ + 8675 "01111000" // /* MW 13 */ + 8676 "10100101" // /* MW 12 */ + 8677 "00000001" // /* MW 11 */ + 8678 "00000000" // /* MW 10 */ + 8679 "00000000" // /* MW 9 */ + 8680 "10000000" // /* MW 8 */ + 8681 "10100110" // /* MW 7 */ + 8682 "00101100" // /* MW 6 */ + 8683 "00100000" // /* MW 5 */ + 8684 "00000000" // /* MW 4 */ + 8685 "11110000" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 53 4 first +.src_ref 7 "gemm_bfp16.h" 75 14 first +.src_ref 7 "gemm_bfp16.h" 76 14 first +.loop_nesting 1 + 8688 "00010010" // PADDA [p1], m0; PADDB [p2], m1; JNZD r2, r2, p3 /* MW 8 */ /* control_operation: words=8 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 8689 "11100000" // /* MW 7 */ + 8690 "10000100" // /* MW 6 */ + 8691 "00100000" // /* MW 5 */ + 8692 "01010111" // /* MW 4 */ + 8693 "11110100" // /* MW 3 */ + 8694 "00001100" // /* MW 2 */ + 8695 "00100001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 77 16 first +.delay_slot + 8696 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8697 "10010000" // /* MW 3 */ + 8698 "00001011" // /* MW 2 */ + 8699 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 80 first +.loop_nesting 0 + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 80 +.delay_slot + 8712 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8713 "00000001" // /* MW 5 */ + 8714 "00000000" // /* MW 4 */ + 8715 "00000000" // /* MW 3 */ + 8716 "11111000" // /* MW 2 */ + 8717 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8719 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z8init_accILt1EEvPaS0_iii__end +.label __Z8init_accILt1EEvPaS0_iii___func_end0 + 8725 "00000000" // /* MW 1 */ +.label __Z12post_processPai___func_begin0 +.label _Z12post_processPai +.function post_process _Z12post_processPai +.src_ref 7 "gemm_bfp16.h" 83 first +.src_ref 7 "gemm_bfp16.h" 92 26 +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 97 21 +.src_ref 7 "gemm_bfp16.h" 97 23 +.function_start + 8736 "01110110" // MOVA m0, #512; MOVS p2, p0; MOVXM p1, #508944 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8737 "00010000" // /* MW 11 */ + 8738 "00001000" // /* MW 10 */ + 8739 "10110010" // /* MW 9 */ + 8740 "11110000" // /* MW 8 */ + 8741 "00000001" // /* MW 7 */ + 8742 "00000000" // /* MW 6 */ + 8743 "10001011" // /* MW 5 */ + 8744 "10000000" // /* MW 4 */ + 8745 "10000010" // /* MW 3 */ + 8746 "00000000" // /* MW 2 */ + 8747 "01000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 26 +.src_ref 7 "gemm_bfp16.h" 94 26 +.src_ref 7 "gemm_bfp16.h" 94 26 +.src_ref 7 "gemm_bfp16.h" 95 26 +.src_ref 7 "gemm_bfp16.h" 96 26 + 8748 "10111010" // MOVA r1, #-7; MOVX r2, #0; MOV r4, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8749 "01011000" // /* MW 9 */ + 8750 "00000001" // /* MW 8 */ + 8751 "10001000" // /* MW 7 */ + 8752 "00001000" // /* MW 6 */ + 8753 "00100000" // /* MW 5 */ + 8754 "00000000" // /* MW 4 */ + 8755 "00000000" // /* MW 3 */ + 8756 "00100001" // /* MW 2 */ + 8757 "11111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 26 first +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 94 26 first +.src_ref 7 "gemm_bfp16.h" 95 14 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8758 "01110110" // LDA.s8 r24, [p1]; MOVS p1, p0; OR r16, r2, r4; MOV r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8759 "01011000" // /* MW 11 */ + 8760 "00000111" // /* MW 10 */ + 8761 "01101000" // /* MW 9 */ + 8762 "00101100" // /* MW 8 */ + 8763 "00000010" // /* MW 7 */ + 8764 "00000101" // /* MW 6 */ + 8765 "10001011" // /* MW 5 */ + 8766 "10000000" // /* MW 4 */ + 8767 "01010001" // /* MW 3 */ + 8768 "11100000" // /* MW 2 */ + 8769 "00100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 26 +.src_ref 7 "gemm_bfp16.h" 93 12 first +.src_ref 7 "gemm_bfp16.h" 95 26 + 8770 "10111010" // VLDA bmlh1, [p1, #64]; LSHL r1, r0, r1; MOV r5, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8771 "01011000" // /* MW 9 */ + 8772 "00000010" // /* MW 8 */ + 8773 "10101000" // /* MW 7 */ + 8774 "11101100" // /* MW 6 */ + 8775 "00010000" // /* MW 5 */ + 8776 "00000000" // /* MW 4 */ + 8777 "10110000" // /* MW 3 */ + 8778 "10010110" // /* MW 2 */ + 8779 "00100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 94 14 first +.src_ref 7 "gemm_bfp16.h" 95 14 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8780 "10111010" // VLDA bmll1, [p1], m0; LSHL r18, r16, r3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8781 "01111000" // /* MW 9 */ + 8782 "01100000" // /* MW 8 */ + 8783 "00001000" // /* MW 7 */ + 8784 "11101100" // /* MW 6 */ + 8785 "00100001" // /* MW 5 */ + 8786 "00100001" // /* MW 4 */ + 8787 "10110000" // /* MW 3 */ + 8788 "00010010" // /* MW 2 */ + 8789 "00100001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 95 26 first +.src_ref 7 "gemm_bfp16.h" 96 26 + 8790 "10111010" // MOVA r6, #3; OR r7, r5, r2; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8791 "10101000" // /* MW 9 */ + 8792 "10000000" // /* MW 8 */ + 8793 "10110100" // /* MW 7 */ + 8794 "00101101" // /* MW 6 */ + 8795 "01110001" // /* MW 5 */ + 8796 "00001010" // /* MW 4 */ + 8797 "00000000" // /* MW 3 */ + 8798 "01100110" // /* MW 2 */ + 8799 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 2 first +.src_ref 7 "gemm_bfp16.h" 94 12 first +.src_ref 7 "gemm_bfp16.h" 95 14 + 8800 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r19, r7, r3; ADD.NC lc, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8801 "11001000" // /* MW 9 */ + 8802 "01111111" // /* MW 8 */ + 8803 "10111000" // /* MW 7 */ + 8804 "11101110" // /* MW 6 */ + 8805 "00110001" // /* MW 5 */ + 8806 "00001111" // /* MW 4 */ + 8807 "10110000" // /* MW 3 */ + 8808 "10001110" // /* MW 2 */ + 8809 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 12 +.src_ref 7 "gemm_bfp16.h" 95 14 first +.src_ref 7 "gemm_bfp16.h" 96 26 first + 8810 "10111010" // VLDA bmhl0, [p3]; OR r17, r6, r2; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8811 "10101000" // /* MW 9 */ + 8812 "11000000" // /* MW 8 */ + 8813 "00110100" // /* MW 7 */ + 8814 "00101110" // /* MW 6 */ + 8815 "00010001" // /* MW 5 */ + 8816 "00001101" // /* MW 4 */ + 8817 "10110000" // /* MW 3 */ + 8818 "10001010" // /* MW 2 */ + 8819 "01100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8820 "10111010" // VLDA bmlh0, [p4, #64]; LSHL r20, r17, r3; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8821 "00001000" // /* MW 9 */ + 8822 "10000001" // /* MW 8 */ + 8823 "01001000" // /* MW 7 */ + 8824 "11101100" // /* MW 6 */ + 8825 "01000001" // /* MW 5 */ + 8826 "00100011" // /* MW 4 */ + 8827 "10110000" // /* MW 3 */ + 8828 "10000110" // /* MW 2 */ + 8829 "10000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 +.src_ref 7 "gemm_bfp16.h" 96 14 +.src_ref 7 "gemm_bfp16.h" 97 21 +.src_ref 7 "gemm_bfp16.h" 97 23 +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 98 21 +.src_ref 7 "gemm_bfp16.h" 98 23 +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 99 21 +.src_ref 7 "gemm_bfp16.h" 99 23 +.src_ref 7 "gemm_bfp16.h" 100 4 +.src_ref 7 "gemm_bfp16.h" 100 21 +.src_ref 7 "gemm_bfp16.h" 100 23 + 8830 "10111010" // VLDA bmll0, [p4]; MOVX crRnd, r24; ADD.NC p5, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8831 "10101000" // /* MW 9 */ + 8832 "00000000" // /* MW 8 */ + 8833 "10110101" // /* MW 7 */ + 8834 "00000010" // /* MW 6 */ + 8835 "11010100" // /* MW 5 */ + 8836 "00110001" // /* MW 4 */ + 8837 "10110000" // /* MW 3 */ + 8838 "10000010" // /* MW 2 */ + 8839 "10000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 2 first +.src_ref 7 "gemm_bfp16.h" 96 12 + 8840 "10111010" // VLDA bmhh1, [p5, #64]; MOVXM ls, #8880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8841 "00010000" // /* MW 9 */ + 8842 "01011000" // /* MW 8 */ + 8843 "01111001" // /* MW 7 */ + 8844 "00001000" // /* MW 6 */ + 8845 "00000000" // /* MW 5 */ + 8846 "00000000" // /* MW 4 */ + 8847 "10110000" // /* MW 3 */ + 8848 "10011110" // /* MW 2 */ + 8849 "10100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 92 2 +.src_ref 7 "gemm_bfp16.h" 96 12 first + 8850 "10111010" // VLDA bmhl1, [p5]; MOVXM le, #8976 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8851 "00010000" // /* MW 9 */ + 8852 "10001000" // /* MW 8 */ + 8853 "10111001" // /* MW 7 */ + 8854 "00001001" // /* MW 6 */ + 8855 "00000000" // /* MW 5 */ + 8856 "00000000" // /* MW 4 */ + 8857 "10110000" // /* MW 3 */ + 8858 "10011010" // /* MW 2 */ + 8859 "10100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 100 4 + 8860 "00011000" // MOVX r1, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8861 "00011001" // /* MW 3 */ + 8862 "00000010" // /* MW 2 */ + 8863 "00010000" // /* MW 1 */ + 8864 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8865 "00000000" // /* MW 15 */ + 8866 "00000000" // /* MW 14 */ + 8867 "01111000" // /* MW 13 */ + 8868 "10100101" // /* MW 12 */ + 8869 "00000001" // /* MW 11 */ + 8870 "00000000" // /* MW 10 */ + 8871 "00000000" // /* MW 9 */ + 8872 "00000000" // /* MW 8 */ + 8873 "01011011" // /* MW 7 */ + 8874 "00000001" // /* MW 6 */ + 8875 "00100000" // /* MW 5 */ + 8876 "00000000" // /* MW 4 */ + 8877 "11110000" // /* MW 3 */ + 8878 "00101100" // /* MW 2 */ + 8879 "00000000" // /* MW 1 */ +.label ZLS_F_Z12post_processPai_144 +.src_ref 7 "gemm_bfp16.h" 97 21 first +.src_ref 7 "gemm_bfp16.h" 97 23 first +.src_ref 7 "gemm_bfp16.h" 98 4 first +.begin_of_loop +.loop_nesting 1 + 8880 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8881 "00111011" // /* MW 5 */ + 8882 "01010100" // /* MW 4 */ + 8883 "01101000" // /* MW 3 */ + 8884 "10010100" // /* MW 2 */ + 8885 "01001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 98 21 +.src_ref 7 "gemm_bfp16.h" 98 23 +.src_ref 7 "gemm_bfp16.h" 99 4 first + 8886 "11100100" // LSHL r22, r7, r1; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8887 "01000001" // /* MW 5 */ + 8888 "00010101" // /* MW 4 */ + 8889 "10110101" // /* MW 3 */ + 8890 "10000011" // /* MW 2 */ + 8891 "00111101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 first +.src_ref 7 "gemm_bfp16.h" 98 21 first +.src_ref 7 "gemm_bfp16.h" 98 23 first +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 99 21 +.src_ref 7 "gemm_bfp16.h" 99 23 +.src_ref 7 "gemm_bfp16.h" 100 4 first + 8892 "00111010" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r23, r17, r1; MOV dj0, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8893 "01111001" // /* MW 9 */ + 8894 "10010000" // /* MW 8 */ + 8895 "01000101" // /* MW 7 */ + 8896 "11101100" // /* MW 6 */ + 8897 "01110000" // /* MW 5 */ + 8898 "00100011" // /* MW 4 */ + 8899 "01100000" // /* MW 3 */ + 8900 "00001100" // /* MW 2 */ + 8901 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 26 first +.src_ref 7 "gemm_bfp16.h" 99 4 first +.src_ref 7 "gemm_bfp16.h" 99 21 first +.src_ref 7 "gemm_bfp16.h" 99 23 first +.src_ref 7 "gemm_bfp16.h" 100 4 +.src_ref 7 "gemm_bfp16.h" 100 21 +.src_ref 7 "gemm_bfp16.h" 100 23 + 8902 "00111010" // VST.CONV.bf16.fp32 cml0, [p0, dj0];OR r16, r2, r4; MOV dj1, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8903 "01111001" // /* MW 9 */ + 8904 "11010000" // /* MW 8 */ + 8905 "11000101" // /* MW 7 */ + 8906 "00101100" // /* MW 6 */ + 8907 "00000010" // /* MW 5 */ + 8908 "00000101" // /* MW 4 */ + 8909 "01100000" // /* MW 3 */ + 8910 "00000100" // /* MW 2 */ + 8911 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 26 first + 8912 "10011000" // OR r7, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8913 "00100101" // /* MW 3 */ + 8914 "01001110" // /* MW 2 */ + 8915 "00010001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 96 26 first +.src_ref 7 "gemm_bfp16.h" 100 4 first +.src_ref 7 "gemm_bfp16.h" 100 21 first +.src_ref 7 "gemm_bfp16.h" 100 23 first + 8916 "00111010" // VST.CONV.bf16.fp32 cmh1, [p0, dj1];OR r17, r6, r2; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8917 "00001001" // /* MW 9 */ + 8918 "10000001" // /* MW 8 */ + 8919 "01001000" // /* MW 7 */ + 8920 "00101100" // /* MW 6 */ + 8921 "00010001" // /* MW 5 */ + 8922 "00001101" // /* MW 4 */ + 8923 "01100000" // /* MW 3 */ + 8924 "00011100" // /* MW 2 */ + 8925 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 93 12 first +.src_ref 7 "gemm_bfp16.h" 94 14 first + 8926 "00101100" // VLDA bmlh1, [p1, #64]; LSHL r18, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8927 "01111011" // /* MW 5 */ + 8928 "01001000" // /* MW 4 */ + 8929 "10111000" // /* MW 3 */ + 8930 "10010110" // /* MW 2 */ + 8931 "00100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 93 12 +.src_ref 7 "gemm_bfp16.h" 94 14 +.src_ref 7 "gemm_bfp16.h" 95 14 first + 8932 "10111010" // VLDA bmll1, [p1], m0; LSHL r19, r7, r3; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8933 "10101000" // /* MW 9 */ + 8934 "10000000" // /* MW 8 */ + 8935 "10110100" // /* MW 7 */ + 8936 "11101101" // /* MW 6 */ + 8937 "00110001" // /* MW 5 */ + 8938 "00001111" // /* MW 4 */ + 8939 "10110000" // /* MW 3 */ + 8940 "00010010" // /* MW 2 */ + 8941 "00100001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 12 first +.src_ref 7 "gemm_bfp16.h" 95 14 +.src_ref 7 "gemm_bfp16.h" 96 14 first + 8942 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r20, r17, r3; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8943 "10101000" // /* MW 9 */ + 8944 "11000000" // /* MW 8 */ + 8945 "00110100" // /* MW 7 */ + 8946 "11101110" // /* MW 6 */ + 8947 "01000001" // /* MW 5 */ + 8948 "00100011" // /* MW 4 */ + 8949 "10110000" // /* MW 3 */ + 8950 "10001110" // /* MW 2 */ + 8951 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 94 12 +.src_ref 7 "gemm_bfp16.h" 96 14 + 8952 "10010100" // VLDA bmhl0, [p3]; ADD.NC p5, r20, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8953 "00000010" // /* MW 5 */ + 8954 "11010100" // /* MW 4 */ + 8955 "10111010" // /* MW 3 */ + 8956 "10001010" // /* MW 2 */ + 8957 "01100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 first + 8958 "10011000" // VLDA bmlh0, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00110101" // /* MW 3 */ + 8960 "00010100" // /* MW 2 */ + 8961 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 95 12 + 8962 "10011000" // VLDA bmll0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8963 "00010101" // /* MW 3 */ + 8964 "00000100" // /* MW 2 */ + 8965 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 96 12 first + 8966 "10011000" // VLDA bmhh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8967 "11110101" // /* MW 3 */ + 8968 "00010100" // /* MW 2 */ + 8969 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 96 12 + 8970 "00111100" // VLDA bmhl1, [p5]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "10110000" // /* MW 3 */ + 8974 "10011010" // /* MW 2 */ + 8975 "10100000" // /* MW 1 */ +.label ZLE_F_Z12post_processPai_240 +.end_of_loop + 8976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "10100101" // /* MW 12 */ + 8981 "00000001" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "01011011" // /* MW 7 */ + 8986 "00000001" // /* MW 6 */ + 8987 "00100000" // /* MW 5 */ + 8988 "00000000" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 97 21 first +.src_ref 7 "gemm_bfp16.h" 97 23 first +.src_ref 7 "gemm_bfp16.h" 98 4 first +.loop_nesting 0 + 8992 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8993 "00111011" // /* MW 5 */ + 8994 "01010100" // /* MW 4 */ + 8995 "01101000" // /* MW 3 */ + 8996 "10010100" // /* MW 2 */ + 8997 "01001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 +.src_ref 7 "gemm_bfp16.h" 98 21 +.src_ref 7 "gemm_bfp16.h" 98 23 +.src_ref 7 "gemm_bfp16.h" 102 first + 8998 "11100100" // RET lr; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8999 "01000001" // /* MW 5 */ + 9000 "00010101" // /* MW 4 */ + 9001 "00000101" // /* MW 3 */ + 9002 "00000000" // /* MW 2 */ + 9003 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 98 4 first +.src_ref 7 "gemm_bfp16.h" 98 21 first +.src_ref 7 "gemm_bfp16.h" 98 23 first +.src_ref 7 "gemm_bfp16.h" 99 4 first +.delay_slot + 9004 "01011100" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r22, r7, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9005 "00111011" // /* MW 5 */ + 9006 "11011000" // /* MW 4 */ + 9007 "01100011" // /* MW 3 */ + 9008 "00001100" // /* MW 2 */ + 9009 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 99 4 +.src_ref 7 "gemm_bfp16.h" 99 21 +.src_ref 7 "gemm_bfp16.h" 99 23 +.src_ref 7 "gemm_bfp16.h" 100 4 first +.delay_slot + 9010 "11100100" // LSHL r23, r17, r1; MOV dj0, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9011 "01000001" // /* MW 5 */ + 9012 "00010110" // /* MW 4 */ + 9013 "10110001" // /* MW 3 */ + 9014 "11000011" // /* MW 2 */ + 9015 "10001101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 99 4 first +.src_ref 7 "gemm_bfp16.h" 99 21 first +.src_ref 7 "gemm_bfp16.h" 99 23 first +.src_ref 7 "gemm_bfp16.h" 100 4 +.src_ref 7 "gemm_bfp16.h" 100 21 +.src_ref 7 "gemm_bfp16.h" 100 23 +.delay_slot + 9016 "00000010" // VST.CONV.bf16.fp32 cml0, [p0, dj0]; MOV dj1, r23 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9017 "01110000" // /* MW 7 */ + 9018 "11010000" // /* MW 6 */ + 9019 "11000101" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "01100000" // /* MW 3 */ + 9022 "00000100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 100 4 first +.src_ref 7 "gemm_bfp16.h" 100 21 first +.src_ref 7 "gemm_bfp16.h" 100 23 first +.delay_slot + 9024 "00011000" // VST.CONV.bf16.fp32 cmh1, [p0, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9025 "11100011" // /* MW 3 */ + 9026 "00100000" // /* MW 2 */ + 9027 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z12post_processPai__end +.label __Z12post_processPai___func_end0 + 9029 "00000000" // /* MW 1 */ +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_begin0 +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.function gemm_bfp16 _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.src_ref 7 "gemm_bfp16.h" 225 first +.src_ref 7 "gemm_bfp16.h" 231 12 +.src_ref 7 "gemm_bfp16.h" 231 12 +.function_start + 9040 "01110110" // MOVA m4, #-300; MOVS p4, p7; MOVXM p7, #508736 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9041 "00010000" // /* MW 11 */ + 9042 "10100000" // /* MW 10 */ + 9043 "10110001" // /* MW 9 */ + 9044 "11110011" // /* MW 8 */ + 9045 "00000001" // /* MW 7 */ + 9046 "00000000" // /* MW 6 */ + 9047 "10001011" // /* MW 5 */ + 9048 "10011100" // /* MW 4 */ + 9049 "10000100" // /* MW 3 */ + 9050 "10010000" // /* MW 2 */ + 9051 "11011010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 231 12 first + 9052 "10011000" // LDA r16, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9053 "00010110" // /* MW 3 */ + 9054 "10001010" // /* MW 2 */ + 9055 "00000111" // /* MW 1 */ + 9056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9057 "00000000" // /* MW 1 */ + 9058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9059 "00000000" // /* MW 1 */ + 9060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9061 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 + 9062 "00000010" // MOVS p0, p6; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9063 "01110000" // /* MW 7 */ + 9064 "01100000" // /* MW 6 */ + 9065 "00110000" // /* MW 5 */ + 9066 "00000011" // /* MW 4 */ + 9067 "01100000" // /* MW 3 */ + 9068 "00010001" // /* MW 2 */ + 9069 "00010011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 225 + 9070 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9071 "00000001" // /* MW 5 */ + 9072 "00000000" // /* MW 4 */ + 9073 "00000000" // /* MW 3 */ + 9074 "00001000" // /* MW 2 */ + 9075 "00000000" // /* MW 1 */ + 9076 "10011000" // ST p0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "00011101" // /* MW 3 */ + 9078 "11111100" // /* MW 2 */ + 9079 "00001111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 231 6 +.src_ref 7 "gemm_bfp16.h" 231 28 + 9080 "00111010" // ST p4, [sp, #-16]; JNZ r16, #9168 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9168 delay_slots=5 */ + 9081 "01100001" // /* MW 9 */ + 9082 "00000000" // /* MW 8 */ + 9083 "00010000" // /* MW 7 */ + 9084 "01111010" // /* MW 6 */ + 9085 "00000100" // /* MW 5 */ + 9086 "00100000" // /* MW 4 */ + 9087 "10110000" // /* MW 3 */ + 9088 "01000011" // /* MW 2 */ + 9089 "11111110" // /* MW 1 */ +.delay_slot + 9090 "10011000" // ST p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9091 "00011101" // /* MW 3 */ + 9092 "11110101" // /* MW 2 */ + 9093 "00001111" // /* MW 1 */ +.delay_slot + 9094 "10011000" // ST p1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9095 "10011101" // /* MW 3 */ + 9096 "11101100" // /* MW 2 */ + 9097 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9098 "01110110" // MOVA r18, #1; ST lr, [sp, #-8]; MOVXM p0, #508940 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9099 "00010000" // /* MW 11 */ + 9100 "00000110" // /* MW 10 */ + 9101 "00110010" // /* MW 9 */ + 9102 "11110000" // /* MW 8 */ + 9103 "00000001" // /* MW 7 */ + 9104 "10000000" // /* MW 6 */ + 9105 "00111101" // /* MW 5 */ + 9106 "11111000" // /* MW 4 */ + 9107 "00000111" // /* MW 3 */ + 9108 "00110010" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 4 "tile.hpp" 86 8 +.src_ref 4 "tile.hpp" 86 8 +.delay_slot + 9110 "01110110" // MOVA r17, #11; ST r18, [p0]; MOVXM p0, #508944 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9111 "00010000" // /* MW 11 */ + 9112 "00001000" // /* MW 10 */ + 9113 "00110010" // /* MW 9 */ + 9114 "11110000" // /* MW 8 */ + 9115 "00000001" // /* MW 7 */ + 9116 "10000000" // /* MW 6 */ + 9117 "01010001" // /* MW 5 */ + 9118 "00000110" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "01110001" // /* MW 2 */ + 9121 "00000001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 7 "gemm_bfp16.h" 235 66 +.delay_slot + 9122 "10111010" // ST.s8 r17, [p0]; MOVXM p5, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9123 "00010000" // /* MW 9 */ + 9124 "00000000" // /* MW 8 */ + 9125 "10110001" // /* MW 7 */ + 9126 "11110010" // /* MW 6 */ + 9127 "00000001" // /* MW 5 */ + 9128 "00000000" // /* MW 4 */ + 9129 "11100000" // /* MW 3 */ + 9130 "11000100" // /* MW 2 */ + 9131 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 4 +.src_ref 7 "gemm_bfp16.h" 235 66 first + 9132 "11010100" // LDA r0, [p5], #8; MOV p0, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9133 "10000001" // /* MW 5 */ + 9134 "11001001" // /* MW 4 */ + 9135 "11010000" // /* MW 3 */ + 9136 "10000010" // /* MW 2 */ + 9137 "10100101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 79 + 9138 "10011000" // LDA r1, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9139 "00110110" // /* MW 3 */ + 9140 "00000100" // /* MW 2 */ + 9141 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 92 + 9142 "10011000" // LDA r2, [p5, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9143 "01010110" // /* MW 3 */ + 9144 "00010100" // /* MW 2 */ + 9145 "00000101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 4 +.no_stack_arguments + 9146 "00000100" // JL #8432 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8432 delay_slots=5 */ + 9147 "00000001" // /* MW 5 */ + 9148 "00000000" // /* MW 4 */ + 9149 "01111000" // /* MW 3 */ + 9150 "00010000" // /* MW 2 */ + 9151 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 235 4 +.delay_slot + 9152 "11111000" // MOV p1, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9153 "11000000" // /* MW 3 */ + 9154 "01100110" // /* MW 2 */ + 9155 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9161 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9162 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9163 "00100000" // /* MW 5 */ + 9164 "00000000" // /* MW 4 */ + 9165 "11110000" // /* MW 3 */ + 9166 "00101100" // /* MW 2 */ + 9167 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.src_ref 9 "tuple" 562 47 +.src_ref 4 "tile.hpp" 86 8 +.src_ref 7 "gemm_bfp16.h" 252 79 +.src_ref 7 "gemm_bfp16.h" 252 85 +.return_address + 9168 "10111010" // MOVA r16, #184; MOVX r18, #-184; MOV m4, #220 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9169 "01011000" // /* MW 9 */ + 9170 "11011100" // /* MW 8 */ + 9171 "00000000" // /* MW 7 */ + 9172 "00001010" // /* MW 6 */ + 9173 "00100001" // /* MW 5 */ + 9174 "00111011" // /* MW 4 */ + 9175 "00000000" // /* MW 3 */ + 9176 "00010000" // /* MW 2 */ + 9177 "00010111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 252 85 first + 9178 "10011000" // LDA r27, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9179 "01110110" // /* MW 3 */ + 9180 "10001011" // /* MW 2 */ + 9181 "00000111" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 +.src_ref 7 "gemm_bfp16.h" 252 79 + 9182 "11111000" // MOV r19, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "11000000" // /* MW 3 */ + 9184 "11011110" // /* MW 2 */ + 9185 "00011100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 126 19 + 9186 "00011000" // ADD.NC r20, r19, #-56 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9187 "11100100" // /* MW 3 */ + 9188 "00011001" // /* MW 2 */ + 9189 "00011101" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 + 9190 "01011000" // ADD.NC p7, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9191 "01010001" // /* MW 3 */ + 9192 "01101001" // /* MW 2 */ + 9193 "00011111" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 7 "gemm_bfp16.h" 252 79 + 9194 "00011000" // MOVX r17, #240 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9195 "11000001" // /* MW 3 */ + 9196 "11100010" // /* MW 2 */ + 9197 "00010000" // /* MW 1 */ + 9198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9199 "00000000" // /* MW 1 */ + 9200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9201 "00000000" // /* MW 1 */ +.src_ref 8 "aie.hpp" 6982 6 first +.src_ref 7 "gemm_bfp16.h" 252 79 + 9202 "00011000" // SEL.EQZ r18, r20, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9203 "00110010" // /* MW 3 */ + 9204 "00100101" // /* MW 2 */ + 9205 "00010101" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 first +.src_ref 9 "tuple" 562 47 first +.src_ref 7 "gemm_bfp16.h" 252 79 first + 9206 "00100100" // SEL.EQZ r16, r16, r17, r27; ADD.NC p3, r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9207 "00000100" // /* MW 5 */ + 9208 "11010010" // /* MW 4 */ + 9209 "01000110" // /* MW 3 */ + 9210 "00100010" // /* MW 2 */ + 9211 "10000100" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 7 "gemm_bfp16.h" 134 10 first +.src_ref 7 "gemm_bfp16.h" 252 79 + 9212 "10111010" // LDA dj1, [p3], #4; JZ r27, #9472 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9472 delay_slots=5 */ + 9213 "01100000" // /* MW 9 */ + 9214 "00000000" // /* MW 8 */ + 9215 "00000000" // /* MW 7 */ + 9216 "10100000" // /* MW 6 */ + 9217 "00000100" // /* MW 5 */ + 9218 "00110110" // /* MW 4 */ + 9219 "11010000" // /* MW 3 */ + 9220 "10011000" // /* MW 2 */ + 9221 "01100011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 first +.delay_slot + 9222 "11010100" // LDA dn5, [p3], #4; MOV dj3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9223 "01000001" // /* MW 5 */ + 9224 "00010000" // /* MW 4 */ + 9225 "11010111" // /* MW 3 */ + 9226 "11010100" // /* MW 2 */ + 9227 "01100011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.delay_slot + 9228 "10011000" // LDA dj5, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9229 "11000110" // /* MW 3 */ + 9230 "00011110" // /* MW 2 */ + 9231 "00000011" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.src_ref 9 "tuple" 562 47 +.delay_slot + 9232 "10011000" // LDA dn1, [p7, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9233 "10100110" // /* MW 3 */ + 9234 "01100000" // /* MW 2 */ + 9235 "00000111" // /* MW 1 */ +.src_ref 9 "tuple" 562 47 +.delay_slot + 9236 "10011000" // LDA r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9237 "00010110" // /* MW 3 */ + 9238 "00000110" // /* MW 2 */ + 9239 "00000011" // /* MW 1 */ +.src_ref 9 "tuple" 562 49 +.delay_slot + 9240 "10011000" // LDA m4, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9241 "00000110" // /* MW 3 */ + 9242 "00010110" // /* MW 2 */ + 9243 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 7 "gemm_bfp16.h" 113 16 +.src_ref 7 "gemm_bfp16.h" 135 60 + 9244 "10111010" // LDA p3, [sp, #-20]; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9245 "00010000" // /* MW 9 */ + 9246 "00010000" // /* MW 8 */ + 9247 "00110001" // /* MW 7 */ + 9248 "11110001" // /* MW 6 */ + 9249 "00000001" // /* MW 5 */ + 9250 "00000000" // /* MW 4 */ + 9251 "00100000" // /* MW 3 */ + 9252 "10110011" // /* MW 2 */ + 9253 "11111101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 110 4 first +.src_ref 7 "gemm_bfp16.h" 135 60 first + 9254 "10111010" // LDA r19, [p2]; MOVXM ls, #9344 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9255 "00010000" // /* MW 9 */ + 9256 "01000000" // /* MW 8 */ + 9257 "01111010" // /* MW 7 */ + 9258 "00001000" // /* MW 6 */ + 9259 "00000000" // /* MW 5 */ + 9260 "00000000" // /* MW 4 */ + 9261 "11010000" // /* MW 3 */ + 9262 "11001110" // /* MW 2 */ + 9263 "01000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 110 4 +.src_ref 7 "gemm_bfp16.h" 135 68 + 9264 "10111010" // MOVA r20, #-6; MOVXM le, #9408 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9265 "00010000" // /* MW 9 */ + 9266 "01100000" // /* MW 8 */ + 9267 "10111010" // /* MW 7 */ + 9268 "00001001" // /* MW 6 */ + 9269 "00000000" // /* MW 5 */ + 9270 "00000000" // /* MW 4 */ + 9271 "00000000" // /* MW 3 */ + 9272 "01010100" // /* MW 2 */ + 9273 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 1365 19 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 4 "transpose.hpp" 225 15 + 9274 "01100100" // MOVX r17, #52; MOV r18, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9275 "11010101" // /* MW 5 */ + 9276 "00100000" // /* MW 4 */ + 9277 "00101001" // /* MW 3 */ + 9278 "01011010" // /* MW 2 */ + 9279 "00000100" // /* MW 1 */ + 9280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9283 "00000000" // /* MW 1 */ + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 + 9286 "11111000" // MOV p2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "11000000" // /* MW 3 */ + 9288 "01100110" // /* MW 2 */ + 9289 "00011010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 7 "gemm_bfp16.h" 135 68 + 9290 "00101100" // VLDA lfh0, [p2, #64]; LSHL r19, r19, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10011011" // /* MW 5 */ + 9292 "11001110" // /* MW 4 */ + 9293 "11111001" // /* MW 3 */ + 9294 "10000000" // /* MW 2 */ + 9295 "01000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 7 "gemm_bfp16.h" 110 4 first + 9296 "00010100" // VLDA lfl0, [p2], #128; ADD.NC lc, r19, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9297 "11111110" // /* MW 5 */ + 9298 "11110011" // /* MW 4 */ + 9299 "11111010" // /* MW 3 */ + 9300 "10010000" // /* MW 2 */ + 9301 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9303 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9304 "10011000" // VLDA lfh0, [p2, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9305 "00000111" // /* MW 3 */ + 9306 "00010100" // /* MW 2 */ + 9307 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9308 "10011000" // VLDA lfl0, [p2], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9309 "10000111" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9313 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9315 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9316 "11111000" // VMOV x8, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9317 "10010010" // /* MW 3 */ + 9318 "00100001" // /* MW 2 */ + 9319 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9320 "00000010" // NOPS; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9321 "01110000" // /* MW 7 */ + 9322 "11001001" // /* MW 6 */ + 9323 "01010000" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "01100000" // /* MW 3 */ + 9326 "00101011" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9328 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9329 "00000000" // /* MW 15 */ + 9330 "00000000" // /* MW 14 */ + 9331 "01111000" // /* MW 13 */ + 9332 "11001001" // /* MW 12 */ + 9333 "00010010" // /* MW 11 */ + 9334 "00000000" // /* MW 10 */ + 9335 "00000000" // /* MW 9 */ + 9336 "00000000" // /* MW 8 */ + 9337 "01011011" // /* MW 7 */ + 9338 "00000001" // /* MW 6 */ + 9339 "00100000" // /* MW 5 */ + 9340 "00000000" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 9344 "11100001" // VLDA lfh0, [p2, #64]; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x8, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9345 "00000000" // /* MW 15 */ + 9346 "00000000" // /* MW 14 */ + 9347 "00111000" // /* MW 13 */ + 9348 "00100100" // /* MW 12 */ + 9349 "11000010" // /* MW 11 */ + 9350 "00000000" // /* MW 10 */ + 9351 "00000000" // /* MW 9 */ + 9352 "00000000" // /* MW 8 */ + 9353 "01011011" // /* MW 7 */ + 9354 "00000001" // /* MW 6 */ + 9355 "00100000" // /* MW 5 */ + 9356 "00000000" // /* MW 4 */ + 9357 "11110000" // /* MW 3 */ + 9358 "10000000" // /* MW 2 */ + 9359 "01000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9360 "11100001" // VLDA lfl0, [p2], #128; NOPB; NOPS; NOPX; VSHUFFLE x2, x0, x8, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9361 "00000000" // /* MW 15 */ + 9362 "00000000" // /* MW 14 */ + 9363 "00111000" // /* MW 13 */ + 9364 "00100010" // /* MW 12 */ + 9365 "10000010" // /* MW 11 */ + 9366 "00000000" // /* MW 10 */ + 9367 "00000000" // /* MW 9 */ + 9368 "00000000" // /* MW 8 */ + 9369 "01011011" // /* MW 7 */ + 9370 "00000001" // /* MW 6 */ + 9371 "00100000" // /* MW 5 */ + 9372 "00000000" // /* MW 4 */ + 9373 "11110000" // /* MW 3 */ + 9374 "10010000" // /* MW 2 */ + 9375 "01000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 7 "gemm_bfp16.h" 113 16 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9376 "11100001" // NOPA; NOPB; VST x3, [p3, #64]; NOPX; VMOV x8, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9377 "00000000" // /* MW 15 */ + 9378 "00000000" // /* MW 14 */ + 9379 "01111000" // /* MW 13 */ + 9380 "11001001" // /* MW 12 */ + 9381 "00010000" // /* MW 11 */ + 9382 "00000010" // /* MW 10 */ + 9383 "00000000" // /* MW 9 */ + 9384 "00000000" // /* MW 8 */ + 9385 "11010011" // /* MW 7 */ + 9386 "00010100" // /* MW 6 */ + 9387 "00100011" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9392 "11100001" // NOPA; NOPB; VST x2, [p3], #128; NOPX; VMOV x1, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9393 "00000000" // /* MW 15 */ + 9394 "00000000" // /* MW 14 */ + 9395 "01111000" // /* MW 13 */ + 9396 "11001001" // /* MW 12 */ + 9397 "01010000" // /* MW 11 */ + 9398 "00000000" // /* MW 10 */ + 9399 "00000000" // /* MW 9 */ + 9400 "00000000" // /* MW 8 */ + 9401 "10010011" // /* MW 7 */ + 9402 "00101100" // /* MW 6 */ + 9403 "00100011" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11110000" // /* MW 3 */ + 9406 "00101100" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9408 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9409 "00000000" // /* MW 15 */ + 9410 "00000000" // /* MW 14 */ + 9411 "01111000" // /* MW 13 */ + 9412 "11001001" // /* MW 12 */ + 9413 "00010010" // /* MW 11 */ + 9414 "00000000" // /* MW 10 */ + 9415 "00000000" // /* MW 9 */ + 9416 "00000000" // /* MW 8 */ + 9417 "01011011" // /* MW 7 */ + 9418 "00000001" // /* MW 6 */ + 9419 "00100000" // /* MW 5 */ + 9420 "00000000" // /* MW 4 */ + 9421 "11110000" // /* MW 3 */ + 9422 "00101100" // /* MW 2 */ + 9423 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.loop_nesting 0 + 9424 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9425 "01001000" // /* MW 3 */ + 9426 "10000100" // /* MW 2 */ + 9427 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "transpose.hpp" 224 15 first + 9428 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "01000100" // /* MW 3 */ + 9430 "00000100" // /* MW 2 */ + 9431 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 7 "gemm_bfp16.h" 113 16 first + 9432 "00000010" // VST x3, [p3, #64]; VMOV x8, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9433 "01110000" // /* MW 7 */ + 9434 "11001001" // /* MW 6 */ + 9435 "00010000" // /* MW 5 */ + 9436 "00000010" // /* MW 4 */ + 9437 "01100000" // /* MW 3 */ + 9438 "10011010" // /* MW 2 */ + 9439 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 + 9440 "00000010" // VST x2, [p3], #128; VMOV x0, lfl0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9441 "01110000" // /* MW 7 */ + 9442 "11001001" // /* MW 6 */ + 9443 "00010010" // /* MW 5 */ + 9444 "00000000" // /* MW 4 */ + 9445 "01100000" // /* MW 3 */ + 9446 "10010010" // /* MW 2 */ + 9447 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first + 9448 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9449 "01001000" // /* MW 3 */ + 9450 "10000100" // /* MW 2 */ + 9451 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 109 101 first +.src_ref 4 "transpose.hpp" 224 15 first + 9452 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9453 "01000100" // /* MW 3 */ + 9454 "00000100" // /* MW 2 */ + 9455 "00011001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 first + 9456 "00000010" // VST x3, [p3, #64]; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9457 "01110000" // /* MW 7 */ + 9458 "11001001" // /* MW 6 */ + 9459 "01010000" // /* MW 5 */ + 9460 "00000000" // /* MW 4 */ + 9461 "01100000" // /* MW 3 */ + 9462 "10011010" // /* MW 2 */ + 9463 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 113 16 + 9464 "00000010" // VST x2, [p3], #128; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9465 "01110000" // /* MW 7 */ + 9466 "10100101" // /* MW 6 */ + 9467 "00000001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "01100000" // /* MW 3 */ + 9470 "10010010" // /* MW 2 */ + 9471 "01100101" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 +.src_ref 7 "gemm_bfp16.h" 141 44 first + 9472 "00011000" // PADDB [p7], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9473 "10010000" // /* MW 3 */ + 9474 "00011111" // /* MW 2 */ + 9475 "00111111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 148 2 first + 9476 "10011000" // LDA dj3, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9477 "11000110" // /* MW 3 */ + 9478 "00011101" // /* MW 2 */ + 9479 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 148 2 +.src_ref 7 "gemm_bfp16.h" 148 2 + 9480 "01010100" // LDA dn3, [p7], #4; MOV m5, #-36 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9481 "01110001" // /* MW 5 */ + 9482 "00011111" // /* MW 4 */ + 9483 "11011010" // /* MW 3 */ + 9484 "10110100" // /* MW 2 */ + 9485 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 148 2 + 9486 "10011000" // LDA r18, [p7], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9487 "01010110" // /* MW 3 */ + 9488 "10101010" // /* MW 2 */ + 9489 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9490 "10111010" // LDA r20, [p7], #12; MOVXM p3, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9491 "00010000" // /* MW 9 */ + 9492 "00001000" // /* MW 8 */ + 9493 "10110010" // /* MW 7 */ + 9494 "11110001" // /* MW 6 */ + 9495 "00000001" // /* MW 5 */ + 9496 "00000000" // /* MW 4 */ + 9497 "11010000" // /* MW 3 */ + 9498 "11010010" // /* MW 2 */ + 9499 "11100111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9500 "10111010" // LDA.s8 r20, [p3]; MOVXM r23, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9501 "00010000" // /* MW 9 */ + 9502 "11000000" // /* MW 8 */ + 9503 "11101111" // /* MW 7 */ + 9504 "00001110" // /* MW 6 */ + 9505 "00000000" // /* MW 5 */ + 9506 "00000000" // /* MW 4 */ + 9507 "01010000" // /* MW 3 */ + 9508 "11010000" // /* MW 2 */ + 9509 "01100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9510 "11010100" // LDA p3, [sp, #-12]; VBCST.16 x5, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9511 "11100101" // /* MW 5 */ + 9512 "10111010" // /* MW 4 */ + 9513 "00100101" // /* MW 3 */ + 9514 "10110011" // /* MW 2 */ + 9515 "11111110" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9516 "01010100" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOV m6, #84 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9517 "01010001" // /* MW 5 */ + 9518 "00000001" // /* MW 4 */ + 9519 "01111100" // /* MW 3 */ + 9520 "11001101" // /* MW 2 */ + 9521 "11000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9522 "11010100" // LDA m7, [p7], #4; VBCST.16 x4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9523 "11100101" // /* MW 5 */ + 9524 "10111010" // /* MW 4 */ + 9525 "11010100" // /* MW 3 */ + 9526 "11110000" // /* MW 2 */ + 9527 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9528 "11010100" // LDA m3, [p7], #4; VMOV x10, x4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9529 "00100101" // /* MW 5 */ + 9530 "01010001" // /* MW 4 */ + 9531 "11011010" // /* MW 3 */ + 9532 "10110000" // /* MW 2 */ + 9533 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9534 "11010100" // LDA m1, [p7], #4; VMOV x11, x5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9535 "00100101" // /* MW 5 */ + 9536 "01010101" // /* MW 4 */ + 9537 "11011011" // /* MW 3 */ + 9538 "10010000" // /* MW 2 */ + 9539 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9540 "00101100" // LDA m6, [p7], m6; ADD r23, r20, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9541 "11110110" // /* MW 5 */ + 9542 "01011111" // /* MW 4 */ + 9543 "11011010" // /* MW 3 */ + 9544 "01100000" // /* MW 2 */ + 9545 "11111001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.src_ref 7 "gemm_bfp16.h" 172 37 + 9546 "01010100" // LDA m0, [p7], #-16; MOV m2, #280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9547 "01100001" // /* MW 5 */ + 9548 "00000100" // /* MW 4 */ + 9549 "11010100" // /* MW 3 */ + 9550 "10000000" // /* MW 2 */ + 9551 "11111001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.src_ref 7 "gemm_bfp16.h" 172 37 + 9552 "01010100" // LDA dn0, [p7], #4; MOV m5, #-108 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9553 "01010001" // /* MW 5 */ + 9554 "00011110" // /* MW 4 */ + 9555 "11011010" // /* MW 3 */ + 9556 "10000100" // /* MW 2 */ + 9557 "11100011" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 + 9558 "10011000" // LDA dj0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9559 "01000110" // /* MW 3 */ + 9560 "00011100" // /* MW 2 */ + 9561 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 + 9562 "10011000" // LDA dn4, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9563 "00100110" // /* MW 3 */ + 9564 "00011110" // /* MW 2 */ + 9565 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 + 9566 "10011000" // LDA dj4, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "01000110" // /* MW 3 */ + 9568 "00101110" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 7 "gemm_bfp16.h" 172 37 + 9570 "01010100" // LDA m5, [p7], m5; MOV dc4, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9571 "00000001" // /* MW 5 */ + 9572 "10000000" // /* MW 4 */ + 9573 "11011001" // /* MW 3 */ + 9574 "01010000" // /* MW 2 */ + 9575 "11110101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 37 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 9576 "10111010" // LDA r26, [p7], m2; MOVS p0, p3; MOV r25, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9577 "01110010" // /* MW 9 */ + 9578 "01100000" // /* MW 8 */ + 9579 "00101111" // /* MW 7 */ + 9580 "00000011" // /* MW 6 */ + 9581 "10001011" // /* MW 5 */ + 9582 "10001100" // /* MW 4 */ + 9583 "11010000" // /* MW 3 */ + 9584 "01101010" // /* MW 2 */ + 9585 "11101001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 7 "gemm_bfp16.h" 172 2 +.src_ref 7 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 9586 "01111110" // LDA p7, [sp, #-20]; PADDB [p0], m3; MOVS dc0, dc4; MOVXM p2, #9696 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9587 "01100000" // /* MW 13 */ + 9588 "00001001" // /* MW 12 */ + 9589 "00000010" // /* MW 11 */ + 9590 "00000010" // /* MW 10 */ + 9591 "01011110" // /* MW 9 */ + 9592 "00100110" // /* MW 8 */ + 9593 "00000001" // /* MW 7 */ + 9594 "00000000" // /* MW 6 */ + 9595 "00100000" // /* MW 5 */ + 9596 "11010111" // /* MW 4 */ + 9597 "00100000" // /* MW 3 */ + 9598 "11110011" // /* MW 2 */ + 9599 "11111101" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 7 "gemm_bfp16.h" 175 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9600 "10111010" // VLDA bmlh2, [p0, #64]; MOVS dc2, dc4; MOV dc5, dc4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01110010" // /* MW 9 */ + 9602 "11000000" // /* MW 8 */ + 9603 "11100100" // /* MW 7 */ + 9604 "00000010" // /* MW 6 */ + 9605 "01001011" // /* MW 5 */ + 9606 "00010000" // /* MW 4 */ + 9607 "10110010" // /* MW 3 */ + 9608 "10100110" // /* MW 2 */ + 9609 "00000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9610 "10111010" // VLDA bmhl2, [p0, #128]; MOVS p4, p3; MOV dj2, dj3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9611 "01110010" // /* MW 9 */ + 9612 "10000000" // /* MW 8 */ + 9613 "01000011" // /* MW 7 */ + 9614 "00000001" // /* MW 6 */ + 9615 "10001011" // /* MW 5 */ + 9616 "10001100" // /* MW 4 */ + 9617 "10110100" // /* MW 3 */ + 9618 "10101010" // /* MW 2 */ + 9619 "00000100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 9620 "01111110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; MOVS p1, p3; MOVX r17, #780; MOV r24, m1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9621 "01100000" // /* MW 13 */ + 9622 "10010001" // /* MW 12 */ + 9623 "00110001" // /* MW 11 */ + 9624 "00001111" // /* MW 10 */ + 9625 "00100000" // /* MW 9 */ + 9626 "01100001" // /* MW 8 */ + 9627 "00110001" // /* MW 7 */ + 9628 "00100010" // /* MW 6 */ + 9629 "00100011" // /* MW 5 */ + 9630 "10010111" // /* MW 4 */ + 9631 "10110111" // /* MW 3 */ + 9632 "10101110" // /* MW 2 */ + 9633 "00000110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9634 "01111110" // VLDA bmlh0, [p3, #64]; NOPB; MOVS dc3, dc0; MOVX crRnd, r20; MOV r20, p7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9635 "01100000" // /* MW 13 */ + 9636 "00001001" // /* MW 12 */ + 9637 "01100000" // /* MW 11 */ + 9638 "00001111" // /* MW 10 */ + 9639 "11101100" // /* MW 9 */ + 9640 "01010001" // /* MW 8 */ + 9641 "10000000" // /* MW 7 */ + 9642 "00111010" // /* MW 6 */ + 9643 "00100101" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "10110000" // /* MW 3 */ + 9646 "10000110" // /* MW 2 */ + 9647 "01100010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 203 6 + 9648 "11100001" // VLDA bmhl0, [p3, #128]; NOPB; MOVS dn2, dn3; MOVX r19, #52; MOV m2, m3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "00000000" // /* MW 12 */ + 9653 "00000011" // /* MW 11 */ + 9654 "10001001" // /* MW 10 */ + 9655 "00110110" // /* MW 9 */ + 9656 "00000001" // /* MW 8 */ + 9657 "01001011" // /* MW 7 */ + 9658 "01001110" // /* MW 6 */ + 9659 "00100010" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "10110000" // /* MW 3 */ + 9662 "10001010" // /* MW 2 */ + 9663 "01100100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1365 19 +.src_ref 4 "vector.hpp" 1365 19 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 7 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 9664 "11100001" // VLDA bmhh0, [p3, #192]; NOPB; MOVS dc1, dc3; MOVX r21, #53; MOV m3, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9665 "00000000" // /* MW 15 */ + 9666 "00000000" // /* MW 14 */ + 9667 "01111000" // /* MW 13 */ + 9668 "10010000" // /* MW 12 */ + 9669 "10000100" // /* MW 11 */ + 9670 "10101001" // /* MW 10 */ + 9671 "01010110" // /* MW 9 */ + 9672 "00000001" // /* MW 8 */ + 9673 "01001011" // /* MW 7 */ + 9674 "00001100" // /* MW 6 */ + 9675 "00100001" // /* MW 5 */ + 9676 "00000000" // /* MW 4 */ + 9677 "10110000" // /* MW 3 */ + 9678 "10001110" // /* MW 2 */ + 9679 "01100110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 9680 "11100001" // VLDA bmll0, [p3]; VLDB x4, [p7, #64]; PADDS [p4], m1; MOVX r22, #60; MOV p5, p4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9681 "00000000" // /* MW 15 */ + 9682 "00000000" // /* MW 14 */ + 9683 "01111000" // /* MW 13 */ + 9684 "01100000" // /* MW 12 */ + 9685 "10110100" // /* MW 11 */ + 9686 "10001010" // /* MW 10 */ + 9687 "01100111" // /* MW 9 */ + 9688 "00000001" // /* MW 8 */ + 9689 "01011011" // /* MW 7 */ + 9690 "00101000" // /* MW 6 */ + 9691 "01101100" // /* MW 5 */ + 9692 "00101010" // /* MW 4 */ + 9693 "10111110" // /* MW 3 */ + 9694 "10000010" // /* MW 2 */ + 9695 "01100000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9696 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9697 "01110000" // /* MW 11 */ + 9698 "00010000" // /* MW 10 */ + 9699 "10000100" // /* MW 9 */ + 9700 "00000000" // /* MW 8 */ + 9701 "10001011" // /* MW 7 */ + 9702 "10010100" // /* MW 6 */ + 9703 "00100011" // /* MW 5 */ + 9704 "11010111" // /* MW 4 */ + 9705 "10111011" // /* MW 3 */ + 9706 "10010110" // /* MW 2 */ + 9707 "10000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9708 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9709 "01001110" // /* MW 9 */ + 9710 "10111111" // /* MW 8 */ + 9711 "10111110" // /* MW 7 */ + 9712 "00000010" // /* MW 6 */ + 9713 "10010000" // /* MW 5 */ + 9714 "01110011" // /* MW 4 */ + 9715 "10110011" // /* MW 3 */ + 9716 "10011010" // /* MW 2 */ + 9717 "10000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9718 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #9904 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9719 "00010000" // /* MW 9 */ + 9720 "01011000" // /* MW 8 */ + 9721 "01111011" // /* MW 7 */ + 9722 "00001000" // /* MW 6 */ + 9723 "00000000" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "10110000" // /* MW 3 */ + 9726 "10011110" // /* MW 2 */ + 9727 "10000110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9728 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #9984 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9729 "01100000" // /* MW 13 */ + 9730 "10010001" // /* MW 12 */ + 9731 "10010011" // /* MW 11 */ + 9732 "00000010" // /* MW 10 */ + 9733 "01110000" // /* MW 9 */ + 9734 "00110111" // /* MW 8 */ + 9735 "00000001" // /* MW 7 */ + 9736 "00000000" // /* MW 6 */ + 9737 "11101000" // /* MW 5 */ + 9738 "01110011" // /* MW 4 */ + 9739 "10111110" // /* MW 3 */ + 9740 "10010010" // /* MW 2 */ + 9741 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9742 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9743 "01111110" // /* MW 9 */ + 9744 "00000000" // /* MW 8 */ + 9745 "10000010" // /* MW 7 */ + 9746 "00000001" // /* MW 6 */ + 9747 "10010000" // /* MW 5 */ + 9748 "10001011" // /* MW 4 */ + 9749 "10110100" // /* MW 3 */ + 9750 "10110110" // /* MW 2 */ + 9751 "10100010" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9752 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9753 "10001011" // /* MW 7 */ + 9754 "10011100" // /* MW 6 */ + 9755 "11101100" // /* MW 5 */ + 9756 "00101010" // /* MW 4 */ + 9757 "01111000" // /* MW 3 */ + 9758 "11001011" // /* MW 2 */ + 9759 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9760 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9761 "01011011" // /* MW 7 */ + 9762 "10001000" // /* MW 6 */ + 9763 "01101100" // /* MW 5 */ + 9764 "00101010" // /* MW 4 */ + 9765 "10111110" // /* MW 3 */ + 9766 "10111010" // /* MW 2 */ + 9767 "10100100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 7 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9768 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9769 "11101000" // /* MW 5 */ + 9770 "01110011" // /* MW 4 */ + 9771 "10111110" // /* MW 3 */ + 9772 "10111110" // /* MW 2 */ + 9773 "10100110" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9774 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9775 "01111110" // /* MW 9 */ + 9776 "01100000" // /* MW 8 */ + 9777 "10110110" // /* MW 7 */ + 9778 "00000010" // /* MW 6 */ + 9779 "01110100" // /* MW 5 */ + 9780 "00010101" // /* MW 4 */ + 9781 "10110100" // /* MW 3 */ + 9782 "10110010" // /* MW 2 */ + 9783 "10100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9784 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9785 "00100000" // /* MW 5 */ + 9786 "01010111" // /* MW 4 */ + 9787 "01111011" // /* MW 3 */ + 9788 "01000101" // /* MW 2 */ + 9789 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9790 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9791 "00111110" // /* MW 9 */ + 9792 "00100110" // /* MW 8 */ + 9793 "10011101" // /* MW 7 */ + 9794 "00000001" // /* MW 6 */ + 9795 "01110100" // /* MW 5 */ + 9796 "00000110" // /* MW 4 */ + 9797 "10110100" // /* MW 3 */ + 9798 "10100010" // /* MW 2 */ + 9799 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9800 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9801 "01010100" // /* MW 3 */ + 9802 "10111010" // /* MW 2 */ + 9803 "00011011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9804 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9805 "00110110" // /* MW 9 */ + 9806 "01100110" // /* MW 8 */ + 9807 "00100101" // /* MW 7 */ + 9808 "00000010" // /* MW 6 */ + 9809 "00110100" // /* MW 5 */ + 9810 "00010101" // /* MW 4 */ + 9811 "01100111" // /* MW 3 */ + 9812 "10010001" // /* MW 2 */ + 9813 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9814 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9815 "01010001" // /* MW 11 */ + 9816 "11101101" // /* MW 10 */ + 9817 "10110100" // /* MW 9 */ + 9818 "01100010" // /* MW 8 */ + 9819 "11010100" // /* MW 7 */ + 9820 "11001010" // /* MW 6 */ + 9821 "00100100" // /* MW 5 */ + 9822 "00010111" // /* MW 4 */ + 9823 "01111001" // /* MW 3 */ + 9824 "11000101" // /* MW 2 */ + 9825 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9826 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9827 "00110000" // /* MW 11 */ + 9828 "00100110" // /* MW 10 */ + 9829 "10011101" // /* MW 9 */ + 9830 "00000001" // /* MW 8 */ + 9831 "10001011" // /* MW 7 */ + 9832 "10011000" // /* MW 6 */ + 9833 "11101101" // /* MW 5 */ + 9834 "00101010" // /* MW 4 */ + 9835 "01111000" // /* MW 3 */ + 9836 "11001101" // /* MW 2 */ + 9837 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9838 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9839 "10101000" // /* MW 5 */ + 9840 "01110100" // /* MW 4 */ + 9841 "11110111" // /* MW 3 */ + 9842 "00001100" // /* MW 2 */ + 9843 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9844 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9845 "01010001" // /* MW 9 */ + 9846 "11110001" // /* MW 8 */ + 9847 "10110100" // /* MW 7 */ + 9848 "00001001" // /* MW 6 */ + 9849 "00110110" // /* MW 5 */ + 9850 "00001010" // /* MW 4 */ + 9851 "01110000" // /* MW 3 */ + 9852 "11001101" // /* MW 2 */ + 9853 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9854 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9855 "00111110" // /* MW 9 */ + 9856 "01100110" // /* MW 8 */ + 9857 "00100101" // /* MW 7 */ + 9858 "00000010" // /* MW 6 */ + 9859 "11110100" // /* MW 5 */ + 9860 "00111001" // /* MW 4 */ + 9861 "01110111" // /* MW 3 */ + 9862 "01000101" // /* MW 2 */ + 9863 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9864 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11010100" // /* MW 3 */ + 9866 "11001010" // /* MW 2 */ + 9867 "00011100" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9868 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9869 "00110110" // /* MW 3 */ + 9870 "10001010" // /* MW 2 */ + 9871 "00001000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9872 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9873 "01010001" // /* MW 9 */ + 9874 "11101101" // /* MW 8 */ + 9875 "10110100" // /* MW 7 */ + 9876 "00011101" // /* MW 6 */ + 9877 "01110100" // /* MW 5 */ + 9878 "00000110" // /* MW 4 */ + 9879 "01110100" // /* MW 3 */ + 9880 "11000101" // /* MW 2 */ + 9881 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9882 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9883 "01101100" // /* MW 5 */ + 9884 "00010100" // /* MW 4 */ + 9885 "01110010" // /* MW 3 */ + 9886 "11001101" // /* MW 2 */ + 9887 "10100010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9888 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9889 "00000000" // /* MW 15 */ + 9890 "00000000" // /* MW 14 */ + 9891 "01111000" // /* MW 13 */ + 9892 "10100101" // /* MW 12 */ + 9893 "00000001" // /* MW 11 */ + 9894 "00000000" // /* MW 10 */ + 9895 "00000000" // /* MW 9 */ + 9896 "00000000" // /* MW 8 */ + 9897 "00110110" // /* MW 7 */ + 9898 "10001010" // /* MW 6 */ + 9899 "00100001" // /* MW 5 */ + 9900 "00000000" // /* MW 4 */ + 9901 "11110000" // /* MW 3 */ + 9902 "00101100" // /* MW 2 */ + 9903 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 9904 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9905 "01010001" // /* MW 9 */ + 9906 "11110001" // /* MW 8 */ + 9907 "10110100" // /* MW 7 */ + 9908 "11100110" // /* MW 6 */ + 9909 "11000000" // /* MW 5 */ + 9910 "01101100" // /* MW 4 */ + 9911 "01101101" // /* MW 3 */ + 9912 "00101010" // /* MW 2 */ + 9913 "00001110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9914 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9915 "00000001" // /* MW 15 */ + 9916 "01011011" // /* MW 14 */ + 9917 "00111100" // /* MW 13 */ + 9918 "00100110" // /* MW 12 */ + 9919 "10011101" // /* MW 11 */ + 9920 "00000001" // /* MW 10 */ + 9921 "00000000" // /* MW 9 */ + 9922 "00000000" // /* MW 8 */ + 9923 "10001011" // /* MW 7 */ + 9924 "10011100" // /* MW 6 */ + 9925 "11101100" // /* MW 5 */ + 9926 "01110011" // /* MW 4 */ + 9927 "01111110" // /* MW 3 */ + 9928 "11001101" // /* MW 2 */ + 9929 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9930 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9931 "00100001" // /* MW 15 */ + 9932 "01001001" // /* MW 14 */ + 9933 "00111100" // /* MW 13 */ + 9934 "00101010" // /* MW 12 */ + 9935 "11011101" // /* MW 11 */ + 9936 "00000001" // /* MW 10 */ + 9937 "00000000" // /* MW 9 */ + 9938 "00000000" // /* MW 8 */ + 9939 "00110110" // /* MW 7 */ + 9940 "00001010" // /* MW 6 */ + 9941 "00100000" // /* MW 5 */ + 9942 "00010111" // /* MW 4 */ + 9943 "01111001" // /* MW 3 */ + 9944 "01000101" // /* MW 2 */ + 9945 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9946 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9947 "01101001" // /* MW 11 */ + 9948 "01000000" // /* MW 10 */ + 9949 "10001010" // /* MW 9 */ + 9950 "00001110" // /* MW 8 */ + 9951 "00011011" // /* MW 7 */ + 9952 "01000101" // /* MW 6 */ + 9953 "11101000" // /* MW 5 */ + 9954 "00101010" // /* MW 4 */ + 9955 "11111000" // /* MW 3 */ + 9956 "00001100" // /* MW 2 */ + 9957 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9958 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9959 "01010001" // /* MW 11 */ + 9960 "11101101" // /* MW 10 */ + 9961 "10110100" // /* MW 9 */ + 9962 "01100010" // /* MW 8 */ + 9963 "11001100" // /* MW 7 */ + 9964 "01001010" // /* MW 6 */ + 9965 "11101100" // /* MW 5 */ + 9966 "00001100" // /* MW 4 */ + 9967 "01111000" // /* MW 3 */ + 9968 "11000101" // /* MW 2 */ + 9969 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9970 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9971 "01101001" // /* MW 13 */ + 9972 "00000100" // /* MW 12 */ + 9973 "10001000" // /* MW 11 */ + 9974 "10100011" // /* MW 10 */ + 9975 "01010110" // /* MW 9 */ + 9976 "01100110" // /* MW 8 */ + 9977 "00000000" // /* MW 7 */ + 9978 "00000000" // /* MW 6 */ + 9979 "01101100" // /* MW 5 */ + 9980 "00010100" // /* MW 4 */ + 9981 "01110010" // /* MW 3 */ + 9982 "11001101" // /* MW 2 */ + 9983 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 9984 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9985 "00000000" // /* MW 15 */ + 9986 "00000000" // /* MW 14 */ + 9987 "01111000" // /* MW 13 */ + 9988 "10100101" // /* MW 12 */ + 9989 "00000001" // /* MW 11 */ + 9990 "00000000" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00110110" // /* MW 7 */ + 9994 "10001010" // /* MW 6 */ + 9995 "00100001" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10000 "10001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB x4, [p7, #64]; MOVS p4, p1; NOPX; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10001 "10001010" // /* MW 15 */ + 10002 "10100111" // /* MW 14 */ + 10003 "01111101" // /* MW 13 */ + 10004 "01100000" // /* MW 12 */ + 10005 "10110110" // /* MW 11 */ + 10006 "00000010" // /* MW 10 */ + 10007 "00000000" // /* MW 9 */ + 10008 "00000000" // /* MW 8 */ + 10009 "10001011" // /* MW 7 */ + 10010 "10000100" // /* MW 6 */ + 10011 "01101100" // /* MW 5 */ + 10012 "00101010" // /* MW 4 */ + 10013 "01111110" // /* MW 3 */ + 10014 "11001101" // /* MW 2 */ + 10015 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10016 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10017 "00000001" // /* MW 15 */ + 10018 "01011011" // /* MW 14 */ + 10019 "00111100" // /* MW 13 */ + 10020 "00100110" // /* MW 12 */ + 10021 "10011101" // /* MW 11 */ + 10022 "00000001" // /* MW 10 */ + 10023 "00000000" // /* MW 9 */ + 10024 "00000000" // /* MW 8 */ + 10025 "10001011" // /* MW 7 */ + 10026 "10000100" // /* MW 6 */ + 10027 "00100000" // /* MW 5 */ + 10028 "01010111" // /* MW 4 */ + 10029 "01111011" // /* MW 3 */ + 10030 "01000101" // /* MW 2 */ + 10031 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.src_ref 7 "gemm_bfp16.h" 202 6 first +.src_ref 7 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10032 "01001011" // PADDA [p0], m3; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00100001" // /* MW 15 */ + 10034 "01001001" // /* MW 14 */ + 10035 "00111100" // /* MW 13 */ + 10036 "00101010" // /* MW 12 */ + 10037 "11011101" // /* MW 11 */ + 10038 "00000001" // /* MW 10 */ + 10039 "00000000" // /* MW 9 */ + 10040 "00000000" // /* MW 8 */ + 10041 "00110110" // /* MW 7 */ + 10042 "00001010" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "11010111" // /* MW 4 */ + 10045 "11110011" // /* MW 3 */ + 10046 "00001100" // /* MW 2 */ + 10047 "00001101" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10048 "01001010" // VCONV.bfp16ebs8.fp32 ex1, dm4; MOV m1, r24; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01101001" // /* MW 9 */ + 10050 "01000000" // /* MW 8 */ + 10051 "10001010" // /* MW 7 */ + 10052 "11100100" // /* MW 6 */ + 10053 "00100000" // /* MW 5 */ + 10054 "00001100" // /* MW 4 */ + 10055 "11000001" // /* MW 3 */ + 10056 "01000110" // /* MW 2 */ + 10057 "00010001" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10058 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10059 "01010001" // /* MW 9 */ + 10060 "11101101" // /* MW 8 */ + 10061 "10110100" // /* MW 7 */ + 10062 "01100010" // /* MW 6 */ + 10063 "11001100" // /* MW 5 */ + 10064 "01001010" // /* MW 4 */ + 10065 "01110100" // /* MW 3 */ + 10066 "11000101" // /* MW 2 */ + 10067 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10068 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10069 "01101001" // /* MW 13 */ + 10070 "00000100" // /* MW 12 */ + 10071 "10001000" // /* MW 11 */ + 10072 "10100011" // /* MW 10 */ + 10073 "01010110" // /* MW 9 */ + 10074 "01100110" // /* MW 8 */ + 10075 "00000000" // /* MW 7 */ + 10076 "00000000" // /* MW 6 */ + 10077 "01101100" // /* MW 5 */ + 10078 "00010100" // /* MW 4 */ + 10079 "01110010" // /* MW 3 */ + 10080 "11001101" // /* MW 2 */ + 10081 "10100010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10082 "10111010" // PADDB [p4], m1; VCONV.bfp16ebs8.fp32 ex3, dm4; MOV p5, p4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10083 "01110110" // /* MW 9 */ + 10084 "01100000" // /* MW 8 */ + 10085 "10110100" // /* MW 7 */ + 10086 "00000010" // /* MW 6 */ + 10087 "10010000" // /* MW 5 */ + 10088 "00101011" // /* MW 4 */ + 10089 "11000100" // /* MW 3 */ + 10090 "01000110" // /* MW 2 */ + 10091 "00110001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10092 "01100010" // MOV m2, r18; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10093 "00101001" // /* MW 7 */ + 10094 "01100000" // /* MW 6 */ + 10095 "10001011" // /* MW 5 */ + 10096 "11100110" // /* MW 4 */ + 10097 "00100000" // /* MW 3 */ + 10098 "00001001" // /* MW 2 */ + 10099 "00000010" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10100 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10101 "01010001" // /* MW 9 */ + 10102 "11110001" // /* MW 8 */ + 10103 "10110100" // /* MW 7 */ + 10104 "00001001" // /* MW 6 */ + 10105 "00110110" // /* MW 5 */ + 10106 "00001010" // /* MW 4 */ + 10107 "01110000" // /* MW 3 */ + 10108 "11001101" // /* MW 2 */ + 10109 "11000010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10110 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10111 "00101001" // /* MW 3 */ + 10112 "00100100" // /* MW 2 */ + 10113 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10114 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10115 "01101001" // /* MW 7 */ + 10116 "01000000" // /* MW 6 */ + 10117 "10001010" // /* MW 5 */ + 10118 "00000010" // /* MW 4 */ + 10119 "11000000" // /* MW 3 */ + 10120 "01000110" // /* MW 2 */ + 10121 "00010001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10122 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10123 "01101001" // /* MW 3 */ + 10124 "00000100" // /* MW 2 */ + 10125 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10128 "00011000" // VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10129 "00110110" // /* MW 3 */ + 10130 "00001010" // /* MW 2 */ + 10131 "00001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10132 "01100010" // VCONV.bfp16ebs8.fp32 ex3, dm4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10133 "00101001" // /* MW 7 */ + 10134 "01100000" // /* MW 6 */ + 10135 "10001011" // /* MW 5 */ + 10136 "00000010" // /* MW 4 */ + 10137 "11000000" // /* MW 3 */ + 10138 "01000110" // /* MW 2 */ + 10139 "00110001" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10141 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10143 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10144 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10145 "00101001" // /* MW 3 */ + 10146 "00100100" // /* MW 2 */ + 10147 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10148 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10149 "01101001" // /* MW 3 */ + 10150 "01000000" // /* MW 2 */ + 10151 "10001010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10152 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10153 "01101001" // /* MW 3 */ + 10154 "00000100" // /* MW 2 */ + 10155 "10001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10156 "10011000" // VST bmlh3, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10157 "10100110" // /* MW 3 */ + 10158 "00010101" // /* MW 2 */ + 10159 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10160 "10011000" // VST bmhl3, [p1, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10161 "11000110" // /* MW 3 */ + 10162 "00100101" // /* MW 2 */ + 10163 "00001001" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10164 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10165 "11100110" // /* MW 3 */ + 10166 "00110101" // /* MW 2 */ + 10167 "00001001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10168 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10169 "01110110" // /* MW 9 */ + 10170 "01100000" // /* MW 8 */ + 10171 "10110101" // /* MW 7 */ + 10172 "00000000" // /* MW 6 */ + 10173 "10010000" // /* MW 5 */ + 10174 "11001011" // /* MW 4 */ + 10175 "11010101" // /* MW 3 */ + 10176 "10110000" // /* MW 2 */ + 10177 "00100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.src_ref 7 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10178 "10111010" // PADDB.2D [p1], d2; VST bmlh2, [p0, #64]; MOV m2, m3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10179 "01110110" // /* MW 9 */ + 10180 "00000000" // /* MW 8 */ + 10181 "00000011" // /* MW 7 */ + 10182 "00000001" // /* MW 6 */ + 10183 "10010000" // /* MW 5 */ + 10184 "01010011" // /* MW 4 */ + 10185 "11010001" // /* MW 3 */ + 10186 "10100100" // /* MW 2 */ + 10187 "00000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10188 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10189 "01000110" // /* MW 3 */ + 10190 "00100101" // /* MW 2 */ + 10191 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10192 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10193 "01100110" // /* MW 3 */ + 10194 "00110101" // /* MW 2 */ + 10195 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10196 "00000010" // VST bmll2, [p0]; MOV p0, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10197 "01110000" // /* MW 7 */ + 10198 "01100000" // /* MW 6 */ + 10199 "00110011" // /* MW 5 */ + 10200 "00000000" // /* MW 4 */ + 10201 "11010000" // /* MW 3 */ + 10202 "10100000" // /* MW 2 */ + 10203 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 175 6 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10204 "10111010" // PADDB [p0], m3; VST bmlh1, [p4, #64]; MOV m3, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10205 "01110110" // /* MW 9 */ + 10206 "10010000" // /* MW 8 */ + 10207 "10000100" // /* MW 7 */ + 10208 "00000001" // /* MW 6 */ + 10209 "10010000" // /* MW 5 */ + 10210 "01101011" // /* MW 4 */ + 10211 "11010000" // /* MW 3 */ + 10212 "10010100" // /* MW 2 */ + 10213 "10000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10214 "00001100" // VLDA bmlh2, [p0, #64]; VST bmhl1, [p4, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10215 "10001101" // /* MW 5 */ + 10216 "01001001" // /* MW 4 */ + 10217 "10111000" // /* MW 3 */ + 10218 "10100110" // /* MW 2 */ + 10219 "00000010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 172 2 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10220 "01111010" // VLDA bmhl2, [p0, #128]; VST bmhh1, [p4, #192]; JNZD r23, r23, p2 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10221 "10100000" // /* MW 9 */ + 10222 "11101110" // /* MW 8 */ + 10223 "00000101" // /* MW 7 */ + 10224 "10000000" // /* MW 6 */ + 10225 "11100110" // /* MW 5 */ + 10226 "00110100" // /* MW 4 */ + 10227 "10110100" // /* MW 3 */ + 10228 "10101010" // /* MW 2 */ + 10229 "00000100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 175 6 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10230 "11110110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; VST bmll1, [p4]; MOV p4, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10231 "01110000" // /* MW 11 */ + 10232 "01100000" // /* MW 10 */ + 10233 "00110011" // /* MW 9 */ + 10234 "10000010" // /* MW 8 */ + 10235 "10000110" // /* MW 7 */ + 10236 "00000100" // /* MW 6 */ + 10237 "00100100" // /* MW 5 */ + 10238 "10010111" // /* MW 4 */ + 10239 "10110111" // /* MW 3 */ + 10240 "10101110" // /* MW 2 */ + 10241 "00000110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10242 "00001100" // VLDA bmlh0, [p3, #64]; VST bmlh0, [p5, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10243 "01001101" // /* MW 5 */ + 10244 "00101000" // /* MW 4 */ + 10245 "10111010" // /* MW 3 */ + 10246 "10000110" // /* MW 2 */ + 10247 "01100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10248 "00001100" // VLDA bmhl0, [p3, #128]; VST bmhl0, [p5, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10249 "10001101" // /* MW 5 */ + 10250 "01001000" // /* MW 4 */ + 10251 "10111010" // /* MW 3 */ + 10252 "10001010" // /* MW 2 */ + 10253 "01100100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 177 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10254 "00001100" // VLDA bmhh0, [p3, #192]; VST bmhh0, [p5, #192] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10255 "11001101" // /* MW 5 */ + 10256 "01101000" // /* MW 4 */ + 10257 "10111010" // /* MW 3 */ + 10258 "10001110" // /* MW 2 */ + 10259 "01100110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 174 6 +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 177 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10260 "11110110" // VLDA bmll0, [p3]; PADDB [p4], m1; VST bmll0, [p5]; MOV p5, p4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10261 "01110000" // /* MW 11 */ + 10262 "01100000" // /* MW 10 */ + 10263 "10110100" // /* MW 9 */ + 10264 "10000010" // /* MW 8 */ + 10265 "00000110" // /* MW 7 */ + 10266 "00000100" // /* MW 6 */ + 10267 "00100101" // /* MW 5 */ + 10268 "01010111" // /* MW 4 */ + 10269 "10111000" // /* MW 3 */ + 10270 "10000010" // /* MW 2 */ + 10271 "01100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 100 15 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10272 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10273 "01110000" // /* MW 11 */ + 10274 "00010000" // /* MW 10 */ + 10275 "10000100" // /* MW 9 */ + 10276 "00000000" // /* MW 8 */ + 10277 "10001011" // /* MW 7 */ + 10278 "10010100" // /* MW 6 */ + 10279 "00100011" // /* MW 5 */ + 10280 "11010111" // /* MW 4 */ + 10281 "10111011" // /* MW 3 */ + 10282 "10010110" // /* MW 2 */ + 10283 "10000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 176 6 first +.src_ref 7 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10284 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10285 "01001110" // /* MW 9 */ + 10286 "10111111" // /* MW 8 */ + 10287 "10111110" // /* MW 7 */ + 10288 "00000010" // /* MW 6 */ + 10289 "10010000" // /* MW 5 */ + 10290 "01110011" // /* MW 4 */ + 10291 "10110011" // /* MW 3 */ + 10292 "10011010" // /* MW 2 */ + 10293 "10000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10294 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #10480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10295 "00010000" // /* MW 9 */ + 10296 "01111000" // /* MW 8 */ + 10297 "01111100" // /* MW 7 */ + 10298 "00001000" // /* MW 6 */ + 10299 "00000000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "10110000" // /* MW 3 */ + 10302 "10011110" // /* MW 2 */ + 10303 "10000110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10304 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #10560 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10305 "01100000" // /* MW 13 */ + 10306 "10010001" // /* MW 12 */ + 10307 "10010011" // /* MW 11 */ + 10308 "00000010" // /* MW 10 */ + 10309 "10010100" // /* MW 9 */ + 10310 "00110111" // /* MW 8 */ + 10311 "00000001" // /* MW 7 */ + 10312 "00000000" // /* MW 6 */ + 10313 "11101000" // /* MW 5 */ + 10314 "01110011" // /* MW 4 */ + 10315 "10111110" // /* MW 3 */ + 10316 "10010010" // /* MW 2 */ + 10317 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.src_ref 7 "gemm_bfp16.h" 175 6 +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10318 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10319 "01111110" // /* MW 9 */ + 10320 "00000000" // /* MW 8 */ + 10321 "10000010" // /* MW 7 */ + 10322 "00000001" // /* MW 6 */ + 10323 "10010000" // /* MW 5 */ + 10324 "10001011" // /* MW 4 */ + 10325 "10110100" // /* MW 3 */ + 10326 "10110110" // /* MW 2 */ + 10327 "10100010" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10328 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10329 "10001011" // /* MW 7 */ + 10330 "10011100" // /* MW 6 */ + 10331 "11101100" // /* MW 5 */ + 10332 "00101010" // /* MW 4 */ + 10333 "01111000" // /* MW 3 */ + 10334 "11001011" // /* MW 2 */ + 10335 "10000000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10336 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10337 "01011011" // /* MW 7 */ + 10338 "10001000" // /* MW 6 */ + 10339 "01101100" // /* MW 5 */ + 10340 "00101010" // /* MW 4 */ + 10341 "10111110" // /* MW 3 */ + 10342 "10111010" // /* MW 2 */ + 10343 "10100100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 7 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10344 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10345 "11101000" // /* MW 5 */ + 10346 "01110011" // /* MW 4 */ + 10347 "10111110" // /* MW 3 */ + 10348 "10111110" // /* MW 2 */ + 10349 "10100110" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 7 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10350 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10351 "01111110" // /* MW 9 */ + 10352 "01100000" // /* MW 8 */ + 10353 "10110110" // /* MW 7 */ + 10354 "00000010" // /* MW 6 */ + 10355 "01110100" // /* MW 5 */ + 10356 "00010101" // /* MW 4 */ + 10357 "10110100" // /* MW 3 */ + 10358 "10110010" // /* MW 2 */ + 10359 "10100000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10360 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10361 "00100000" // /* MW 5 */ + 10362 "01010111" // /* MW 4 */ + 10363 "01111011" // /* MW 3 */ + 10364 "01000101" // /* MW 2 */ + 10365 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10366 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10367 "00111110" // /* MW 9 */ + 10368 "00100110" // /* MW 8 */ + 10369 "10011101" // /* MW 7 */ + 10370 "00000001" // /* MW 6 */ + 10371 "01110100" // /* MW 5 */ + 10372 "00000110" // /* MW 4 */ + 10373 "10110100" // /* MW 3 */ + 10374 "10100010" // /* MW 2 */ + 10375 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10376 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10377 "01010100" // /* MW 3 */ + 10378 "10111010" // /* MW 2 */ + 10379 "00011011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10380 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10381 "00110110" // /* MW 9 */ + 10382 "01100110" // /* MW 8 */ + 10383 "00100101" // /* MW 7 */ + 10384 "00000010" // /* MW 6 */ + 10385 "00110100" // /* MW 5 */ + 10386 "00010101" // /* MW 4 */ + 10387 "01100111" // /* MW 3 */ + 10388 "10010001" // /* MW 2 */ + 10389 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10390 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10391 "01010001" // /* MW 11 */ + 10392 "11101101" // /* MW 10 */ + 10393 "10110100" // /* MW 9 */ + 10394 "01100010" // /* MW 8 */ + 10395 "11010100" // /* MW 7 */ + 10396 "11001010" // /* MW 6 */ + 10397 "00100100" // /* MW 5 */ + 10398 "00010111" // /* MW 4 */ + 10399 "01111001" // /* MW 3 */ + 10400 "11000101" // /* MW 2 */ + 10401 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10402 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10403 "00110000" // /* MW 11 */ + 10404 "00100110" // /* MW 10 */ + 10405 "10011101" // /* MW 9 */ + 10406 "00000001" // /* MW 8 */ + 10407 "10001011" // /* MW 7 */ + 10408 "10011000" // /* MW 6 */ + 10409 "11101101" // /* MW 5 */ + 10410 "00101010" // /* MW 4 */ + 10411 "01111000" // /* MW 3 */ + 10412 "11001101" // /* MW 2 */ + 10413 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10414 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10415 "10101000" // /* MW 5 */ + 10416 "01110100" // /* MW 4 */ + 10417 "11110111" // /* MW 3 */ + 10418 "00001100" // /* MW 2 */ + 10419 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10420 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10421 "01010001" // /* MW 9 */ + 10422 "11110001" // /* MW 8 */ + 10423 "10110100" // /* MW 7 */ + 10424 "00001001" // /* MW 6 */ + 10425 "00110110" // /* MW 5 */ + 10426 "00001010" // /* MW 4 */ + 10427 "01110000" // /* MW 3 */ + 10428 "11001101" // /* MW 2 */ + 10429 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10430 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10431 "00111110" // /* MW 9 */ + 10432 "01100110" // /* MW 8 */ + 10433 "00100101" // /* MW 7 */ + 10434 "00000010" // /* MW 6 */ + 10435 "11110100" // /* MW 5 */ + 10436 "00111001" // /* MW 4 */ + 10437 "01110111" // /* MW 3 */ + 10438 "01000101" // /* MW 2 */ + 10439 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10440 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10441 "11010100" // /* MW 3 */ + 10442 "11001010" // /* MW 2 */ + 10443 "00011100" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10444 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "00110110" // /* MW 3 */ + 10446 "10001010" // /* MW 2 */ + 10447 "00001000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10448 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10449 "01010001" // /* MW 9 */ + 10450 "11101101" // /* MW 8 */ + 10451 "10110100" // /* MW 7 */ + 10452 "00011101" // /* MW 6 */ + 10453 "01110100" // /* MW 5 */ + 10454 "00000110" // /* MW 4 */ + 10455 "01110100" // /* MW 3 */ + 10456 "11000101" // /* MW 2 */ + 10457 "10100000" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10458 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10459 "01101100" // /* MW 5 */ + 10460 "00010100" // /* MW 4 */ + 10461 "01110010" // /* MW 3 */ + 10462 "11001101" // /* MW 2 */ + 10463 "10100010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10464 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "01111000" // /* MW 13 */ + 10468 "10100101" // /* MW 12 */ + 10469 "00000001" // /* MW 11 */ + 10470 "00000000" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00110110" // /* MW 7 */ + 10474 "10001010" // /* MW 6 */ + 10475 "00100001" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10480 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10481 "01010001" // /* MW 9 */ + 10482 "11110001" // /* MW 8 */ + 10483 "10110100" // /* MW 7 */ + 10484 "11100110" // /* MW 6 */ + 10485 "11000000" // /* MW 5 */ + 10486 "01101100" // /* MW 4 */ + 10487 "01101101" // /* MW 3 */ + 10488 "00101010" // /* MW 2 */ + 10489 "00001110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10490 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10491 "00000001" // /* MW 15 */ + 10492 "01011011" // /* MW 14 */ + 10493 "00111100" // /* MW 13 */ + 10494 "00100110" // /* MW 12 */ + 10495 "10011101" // /* MW 11 */ + 10496 "00000001" // /* MW 10 */ + 10497 "00000000" // /* MW 9 */ + 10498 "00000000" // /* MW 8 */ + 10499 "10001011" // /* MW 7 */ + 10500 "10011100" // /* MW 6 */ + 10501 "11101100" // /* MW 5 */ + 10502 "01110011" // /* MW 4 */ + 10503 "01111110" // /* MW 3 */ + 10504 "11001101" // /* MW 2 */ + 10505 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10506 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10507 "00100001" // /* MW 15 */ + 10508 "01001001" // /* MW 14 */ + 10509 "00111100" // /* MW 13 */ + 10510 "00101010" // /* MW 12 */ + 10511 "11011101" // /* MW 11 */ + 10512 "00000001" // /* MW 10 */ + 10513 "00000000" // /* MW 9 */ + 10514 "00000000" // /* MW 8 */ + 10515 "00110110" // /* MW 7 */ + 10516 "00001010" // /* MW 6 */ + 10517 "00100000" // /* MW 5 */ + 10518 "00010111" // /* MW 4 */ + 10519 "01111001" // /* MW 3 */ + 10520 "01000101" // /* MW 2 */ + 10521 "11000011" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10522 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10523 "01101001" // /* MW 11 */ + 10524 "01000000" // /* MW 10 */ + 10525 "10001010" // /* MW 9 */ + 10526 "00001110" // /* MW 8 */ + 10527 "00011011" // /* MW 7 */ + 10528 "01000101" // /* MW 6 */ + 10529 "11101000" // /* MW 5 */ + 10530 "00101010" // /* MW 4 */ + 10531 "11111000" // /* MW 3 */ + 10532 "00001100" // /* MW 2 */ + 10533 "10110101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10534 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10535 "01010001" // /* MW 11 */ + 10536 "11101101" // /* MW 10 */ + 10537 "10110100" // /* MW 9 */ + 10538 "01100010" // /* MW 8 */ + 10539 "11001100" // /* MW 7 */ + 10540 "01001010" // /* MW 6 */ + 10541 "11101100" // /* MW 5 */ + 10542 "00001100" // /* MW 4 */ + 10543 "01111000" // /* MW 3 */ + 10544 "11000101" // /* MW 2 */ + 10545 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10546 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10547 "01101001" // /* MW 13 */ + 10548 "00000100" // /* MW 12 */ + 10549 "10001000" // /* MW 11 */ + 10550 "10100011" // /* MW 10 */ + 10551 "01010110" // /* MW 9 */ + 10552 "01100110" // /* MW 8 */ + 10553 "00000000" // /* MW 7 */ + 10554 "00000000" // /* MW 6 */ + 10555 "01101100" // /* MW 5 */ + 10556 "00010100" // /* MW 4 */ + 10557 "01110010" // /* MW 3 */ + 10558 "11001101" // /* MW 2 */ + 10559 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10560 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10561 "00000000" // /* MW 15 */ + 10562 "00000000" // /* MW 14 */ + 10563 "01111000" // /* MW 13 */ + 10564 "10100101" // /* MW 12 */ + 10565 "00000001" // /* MW 11 */ + 10566 "00000000" // /* MW 10 */ + 10567 "00000000" // /* MW 9 */ + 10568 "00000000" // /* MW 8 */ + 10569 "00110110" // /* MW 7 */ + 10570 "10001010" // /* MW 6 */ + 10571 "00100001" // /* MW 5 */ + 10572 "00000000" // /* MW 4 */ + 10573 "11110000" // /* MW 3 */ + 10574 "00101100" // /* MW 2 */ + 10575 "00000000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 11 "array_helpers.hpp" 313 19 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10576 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOVS p4, p1; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10577 "01010001" // /* MW 13 */ + 10578 "11110001" // /* MW 12 */ + 10579 "10110100" // /* MW 11 */ + 10580 "00000111" // /* MW 10 */ + 10581 "01100110" // /* MW 9 */ + 10582 "01101011" // /* MW 8 */ + 10583 "00000000" // /* MW 7 */ + 10584 "00000000" // /* MW 6 */ + 10585 "00010110" // /* MW 5 */ + 10586 "00001001" // /* MW 4 */ + 10587 "01111001" // /* MW 3 */ + 10588 "11001101" // /* MW 2 */ + 10589 "11000010" // /* MW 1 */ +.src_ref 10 "aie_core.h" 100 15 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 11 "array_helpers.hpp" 313 19 first +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10590 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10591 "00000001" // /* MW 15 */ + 10592 "01011011" // /* MW 14 */ + 10593 "00111100" // /* MW 13 */ + 10594 "00100110" // /* MW 12 */ + 10595 "10011101" // /* MW 11 */ + 10596 "00000001" // /* MW 10 */ + 10597 "00000000" // /* MW 9 */ + 10598 "00000000" // /* MW 8 */ + 10599 "10001011" // /* MW 7 */ + 10600 "10000100" // /* MW 6 */ + 10601 "00100000" // /* MW 5 */ + 10602 "01010111" // /* MW 4 */ + 10603 "01111011" // /* MW 3 */ + 10604 "01000101" // /* MW 2 */ + 10605 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 4 "transpose.hpp" 225 15 first +.src_ref 7 "gemm_bfp16.h" 202 6 first +.src_ref 7 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10606 "01001011" // MOVA dj1, #-304; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10607 "00100001" // /* MW 15 */ + 10608 "01001001" // /* MW 14 */ + 10609 "00111100" // /* MW 13 */ + 10610 "00101010" // /* MW 12 */ + 10611 "11011101" // /* MW 11 */ + 10612 "00000001" // /* MW 10 */ + 10613 "00000000" // /* MW 9 */ + 10614 "00000000" // /* MW 8 */ + 10615 "00110110" // /* MW 7 */ + 10616 "00001010" // /* MW 6 */ + 10617 "00100000" // /* MW 5 */ + 10618 "11010111" // /* MW 4 */ + 10619 "10000011" // /* MW 3 */ + 10620 "00000110" // /* MW 2 */ + 10621 "11011010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 203 6 first +.src_ref 7 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10622 "01101110" // PADDA [p0], m3; VCONV.bfp16ebs8.fp32 ex1, dm4; MOV p7, r20; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10623 "01101001" // /* MW 13 */ + 10624 "01000000" // /* MW 12 */ + 10625 "10001010" // /* MW 11 */ + 10626 "00000111" // /* MW 10 */ + 10627 "01010001" // /* MW 9 */ + 10628 "01111011" // /* MW 8 */ + 10629 "00000000" // /* MW 7 */ + 10630 "00000000" // /* MW 6 */ + 10631 "01101100" // /* MW 5 */ + 10632 "00010100" // /* MW 4 */ + 10633 "11110001" // /* MW 3 */ + 10634 "00001100" // /* MW 2 */ + 10635 "00001101" // /* MW 1 */ +.src_ref 11 "array_helpers.hpp" 252 27 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 4 "transpose.hpp" 224 15 first +.src_ref 7 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10636 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p5]; MOVS p6, r25; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10637 "01010001" // /* MW 13 */ + 10638 "11101101" // /* MW 12 */ + 10639 "10110100" // /* MW 11 */ + 10640 "01100011" // /* MW 10 */ + 10641 "01010110" // /* MW 9 */ + 10642 "01100010" // /* MW 8 */ + 10643 "00000000" // /* MW 7 */ + 10644 "00000000" // /* MW 6 */ + 10645 "00010110" // /* MW 5 */ + 10646 "00110010" // /* MW 4 */ + 10647 "01111101" // /* MW 3 */ + 10648 "11000101" // /* MW 2 */ + 10649 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1365 19 first +.src_ref 11 "array_helpers.hpp" 252 27 +.src_ref 5 "accum.hpp" 903 19 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10650 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10651 "01101001" // /* MW 13 */ + 10652 "00000100" // /* MW 12 */ + 10653 "10001000" // /* MW 11 */ + 10654 "10100011" // /* MW 10 */ + 10655 "01010110" // /* MW 9 */ + 10656 "01100110" // /* MW 8 */ + 10657 "00000000" // /* MW 7 */ + 10658 "00000000" // /* MW 6 */ + 10659 "01101100" // /* MW 5 */ + 10660 "00010100" // /* MW 4 */ + 10661 "01110010" // /* MW 3 */ + 10662 "11001101" // /* MW 2 */ + 10663 "10100010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "gemm_bfp16.h" 176 6 +.src_ref 7 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10664 "00000010" // VCONV.bfp16ebs8.fp32 ex3, dm4; MOV m1, r24 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10665 "01110000" // /* MW 7 */ + 10666 "00010000" // /* MW 6 */ + 10667 "10000110" // /* MW 5 */ + 10668 "00000000" // /* MW 4 */ + 10669 "11000000" // /* MW 3 */ + 10670 "01000110" // /* MW 2 */ + 10671 "00110001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 204 6 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10672 "01001010" // PADDB [p4], m1; MOV p5, p4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10673 "00101001" // /* MW 9 */ + 10674 "01100000" // /* MW 8 */ + 10675 "10001011" // /* MW 7 */ + 10676 "11100110" // /* MW 6 */ + 10677 "11000000" // /* MW 5 */ + 10678 "01101000" // /* MW 4 */ + 10679 "00100101" // /* MW 3 */ + 10680 "01010111" // /* MW 2 */ + 10681 "00001000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10682 "01100010" // VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10683 "01010001" // /* MW 7 */ + 10684 "11110001" // /* MW 6 */ + 10685 "10110100" // /* MW 5 */ + 10686 "00000010" // /* MW 4 */ + 10687 "11000000" // /* MW 3 */ + 10688 "01000110" // /* MW 2 */ + 10689 "00000001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10690 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10691 "00101001" // /* MW 3 */ + 10692 "00100100" // /* MW 2 */ + 10693 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10694 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10695 "01101001" // /* MW 7 */ + 10696 "01000000" // /* MW 6 */ + 10697 "10001010" // /* MW 5 */ + 10698 "00000010" // /* MW 4 */ + 10699 "11000000" // /* MW 3 */ + 10700 "01000110" // /* MW 2 */ + 10701 "00010001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10702 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10703 "01101001" // /* MW 3 */ + 10704 "00000100" // /* MW 2 */ + 10705 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10707 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "gemm_bfp16.h" 268 12 +.src_ref 7 "gemm_bfp16.h" 268 37 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10708 "10111010" // LDA r17, [p7, dj1]; VCONV.bfp16ebs8.fp32 ex2, dm4; MOV dj1, #280 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10709 "01010010" // /* MW 9 */ + 10710 "00011000" // /* MW 8 */ + 10711 "11000001" // /* MW 7 */ + 10712 "00000000" // /* MW 6 */ + 10713 "00110110" // /* MW 5 */ + 10714 "00001010" // /* MW 4 */ + 10715 "11010001" // /* MW 3 */ + 10716 "01000110" // /* MW 2 */ + 10717 "11100100" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 7 "gemm_bfp16.h" 268 12 +.src_ref 7 "gemm_bfp16.h" 269 34 +.src_ref 7 "gemm_bfp16.h" 269 48 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10718 "01001011" // LDA r16, [p6, dj1]; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;MOVXM p7, #508416; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10719 "00000001" // /* MW 15 */ + 10720 "01011011" // /* MW 14 */ + 10721 "00010100" // /* MW 13 */ + 10722 "00000000" // /* MW 12 */ + 10723 "10110001" // /* MW 11 */ + 10724 "11110011" // /* MW 10 */ + 10725 "00000001" // /* MW 9 */ + 10726 "00000000" // /* MW 8 */ + 10727 "00110110" // /* MW 7 */ + 10728 "10001010" // /* MW 6 */ + 10729 "00100001" // /* MW 5 */ + 10730 "00000000" // /* MW 4 */ + 10731 "11010000" // /* MW 3 */ + 10732 "01000010" // /* MW 2 */ + 10733 "11000100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10735 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10737 "00000000" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10738 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10739 "00101001" // /* MW 3 */ + 10740 "00100100" // /* MW 2 */ + 10741 "10001001" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10742 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10743 "01101001" // /* MW 3 */ + 10744 "01000000" // /* MW 2 */ + 10745 "10001010" // /* MW 1 */ +.src_ref 5 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10746 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10747 "01101001" // /* MW 3 */ + 10748 "00000100" // /* MW 2 */ + 10749 "10001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 first +.src_ref 7 "gemm_bfp16.h" 268 45 first + 10750 "01011100" // VST bmlh3, [p1, #64]; ADD r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10751 "11111110" // /* MW 5 */ + 10752 "11000111" // /* MW 4 */ + 10753 "11011000" // /* MW 3 */ + 10754 "10110100" // /* MW 2 */ + 10755 "00100010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 268 28 + 10756 "01011100" // VST bmhl3, [p1, #128]; NE r17, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10757 "00010001" // /* MW 5 */ + 10758 "11000110" // /* MW 4 */ + 10759 "11011000" // /* MW 3 */ + 10760 "10111000" // /* MW 2 */ + 10761 "00100100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 202 6 + 10762 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "11100110" // /* MW 3 */ + 10764 "00110101" // /* MW 2 */ + 10765 "00001001" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 202 6 +.src_ref 7 "gemm_bfp16.h" 205 6 first + 10766 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10767 "01110110" // /* MW 9 */ + 10768 "01100000" // /* MW 8 */ + 10769 "10110101" // /* MW 7 */ + 10770 "00000000" // /* MW 6 */ + 10771 "10010000" // /* MW 5 */ + 10772 "11001011" // /* MW 4 */ + 10773 "11010101" // /* MW 3 */ + 10774 "10110000" // /* MW 2 */ + 10775 "00100000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 first + 10776 "10011000" // VST bmlh2, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00100110" // /* MW 3 */ + 10778 "00010101" // /* MW 2 */ + 10779 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 + 10780 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "01000110" // /* MW 3 */ + 10782 "00100101" // /* MW 2 */ + 10783 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 + 10784 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "01100110" // /* MW 3 */ + 10786 "00110101" // /* MW 2 */ + 10787 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 203 6 + 10788 "10011000" // VST bmll2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10789 "00000110" // /* MW 3 */ + 10790 "00000101" // /* MW 2 */ + 10791 "00001000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 first + 10792 "10011000" // VST bmlh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10793 "10100110" // /* MW 3 */ + 10794 "00010100" // /* MW 2 */ + 10795 "00001100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 + 10796 "10011000" // VST bmhl1, [p4, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10797 "11000110" // /* MW 3 */ + 10798 "00100100" // /* MW 2 */ + 10799 "00001100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 +.src_ref 7 "gemm_bfp16.h" 268 6 first + 10800 "00111010" // VST bmhh1, [p4, #192]; JNZ r17, #10912 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10912 delay_slots=5 */ + 10801 "01100001" // /* MW 9 */ + 10802 "00000000" // /* MW 8 */ + 10803 "00010000" // /* MW 7 */ + 10804 "01010100" // /* MW 6 */ + 10805 "00000101" // /* MW 5 */ + 10806 "00100010" // /* MW 4 */ + 10807 "11010000" // /* MW 3 */ + 10808 "10011100" // /* MW 2 */ + 10809 "10000110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 204 6 first +.delay_slot + 10810 "10011000" // VST bmll1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10811 "10000110" // /* MW 3 */ + 10812 "00000100" // /* MW 2 */ + 10813 "00001100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 205 6 first +.delay_slot + 10814 "10011000" // VST bmlh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10815 "00100110" // /* MW 3 */ + 10816 "00010100" // /* MW 2 */ + 10817 "00001101" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot + 10818 "10011000" // VST bmhl0, [p5, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01000110" // /* MW 3 */ + 10820 "00100100" // /* MW 2 */ + 10821 "00001101" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot + 10822 "00000010" // VST bmhh0, [p5, #192]; MOV m2, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10823 "01110000" // /* MW 7 */ + 10824 "10010000" // /* MW 6 */ + 10825 "00000100" // /* MW 5 */ + 10826 "00000001" // /* MW 4 */ + 10827 "11010000" // /* MW 3 */ + 10828 "10001100" // /* MW 2 */ + 10829 "10100110" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 7 "gemm_bfp16.h" 205 6 +.delay_slot + 10830 "01001100" // PADDB.2D [p1], d2; VST bmll0, [p5] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10831 "00001101" // /* MW 5 */ + 10832 "00001000" // /* MW 4 */ + 10833 "00001010" // /* MW 3 */ + 10834 "01110010" // /* MW 2 */ + 10835 "00101010" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 34 first + 10836 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00010110" // /* MW 3 */ + 10838 "00000110" // /* MW 2 */ + 10839 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 48 + 10840 "10011000" // LDA r17, [p7, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110110" // /* MW 3 */ + 10842 "00100110" // /* MW 2 */ + 10843 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 4 + 10844 "00011000" // LDA p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00011001" // /* MW 3 */ + 10846 "11110100" // /* MW 2 */ + 10847 "00000111" // /* MW 1 */ + 10848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10849 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 4 +.no_stack_arguments + 10850 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 10851 "00000001" // /* MW 5 */ + 10852 "00000000" // /* MW 4 */ + 10853 "00010000" // /* MW 3 */ + 10854 "00010001" // /* MW 2 */ + 10855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10861 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 269 40 +.delay_slot + 10862 "10011000" // MUL r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10863 "00001111" // /* MW 3 */ + 10864 "01000001" // /* MW 2 */ + 10865 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10866 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10867 "00011100" // /* MW 13 */ + 10868 "00000000" // /* MW 12 */ + 10869 "00000000" // /* MW 11 */ + 10870 "01010111" // /* MW 10 */ + 10871 "00011010" // /* MW 9 */ + 10872 "01000000" // /* MW 8 */ + 10873 "00000000" // /* MW 7 */ + 10874 "00000000" // /* MW 6 */ + 10875 "10110110" // /* MW 5 */ + 10876 "00000010" // /* MW 4 */ + 10877 "11110000" // /* MW 3 */ + 10878 "00101100" // /* MW 2 */ + 10879 "00000000" // /* MW 1 */ +.return_address + 10880 "10000100" // J #10928 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10928 delay_slots=5 */ + 10881 "00000000" // /* MW 5 */ + 10882 "00000000" // /* MW 4 */ + 10883 "01011000" // /* MW 3 */ + 10884 "00010101" // /* MW 2 */ + 10885 "00000000" // /* MW 1 */ +.delay_slot + 10886 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10887 "00000001" // /* MW 3 */ + 10888 "00100000" // /* MW 2 */ + 10889 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10893 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10895 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10896 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10897 "00000000" // /* MW 15 */ + 10898 "00000000" // /* MW 14 */ + 10899 "01111000" // /* MW 13 */ + 10900 "10100101" // /* MW 12 */ + 10901 "00000001" // /* MW 11 */ + 10902 "00000000" // /* MW 10 */ + 10903 "00000000" // /* MW 9 */ + 10904 "00000000" // /* MW 8 */ + 10905 "01011011" // /* MW 7 */ + 10906 "00000001" // /* MW 6 */ + 10907 "00100000" // /* MW 5 */ + 10908 "00000000" // /* MW 4 */ + 10909 "11110000" // /* MW 3 */ + 10910 "00101100" // /* MW 2 */ + 10911 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 +.src_ref 7 "gemm_bfp16.h" 272 25 first + 10912 "11100001" // NOPA; NOPB; NOPS; ADD r16, r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10913 "00000000" // /* MW 15 */ + 10914 "00000000" // /* MW 14 */ + 10915 "01111000" // /* MW 13 */ + 10916 "10100101" // /* MW 12 */ + 10917 "00000001" // /* MW 11 */ + 10918 "00111000" // /* MW 10 */ + 10919 "00000000" // /* MW 9 */ + 10920 "00100001" // /* MW 8 */ + 10921 "01011011" // /* MW 7 */ + 10922 "00000001" // /* MW 6 */ + 10923 "00100000" // /* MW 5 */ + 10924 "00000000" // /* MW 4 */ + 10925 "11110000" // /* MW 3 */ + 10926 "00101100" // /* MW 2 */ + 10927 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 +.src_ref 7 "gemm_bfp16.h" 274 + 10928 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10929 "00111001" // /* MW 3 */ + 10930 "11111000" // /* MW 2 */ + 10931 "00000111" // /* MW 1 */ + 10932 "00011000" // LDA p7, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10933 "10011001" // /* MW 3 */ + 10934 "11110011" // /* MW 2 */ + 10935 "00000111" // /* MW 1 */ + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.noswbrkpt + 10944 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10945 "00011001" // /* MW 3 */ + 10946 "11111111" // /* MW 2 */ + 10947 "00000111" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 274 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 10948 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10949 "00000000" // /* MW 3 */ + 10950 "00101000" // /* MW 2 */ + 10951 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 10952 "10111000" // MOV dj1, #280 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10953 "00110000" // /* MW 3 */ + 10954 "10000010" // /* MW 2 */ + 10955 "00011001" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10956 "10011000" // ST r16, [p6, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10957 "00010001" // /* MW 3 */ + 10958 "00100010" // /* MW 2 */ + 10959 "00001110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 274 first +.delay_slot + 10960 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10961 "00000001" // /* MW 5 */ + 10962 "00000000" // /* MW 4 */ + 10963 "00000000" // /* MW 3 */ + 10964 "11111000" // /* MW 2 */ + 10965 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10967 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params__end +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_end0 + 10969 "00000000" // /* MW 1 */ +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_GemmBfp16 _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 381 first +.src_ref 6 "superkernels.cpp" 382 6 +.src_ref 6 "superkernels.cpp" 388 11 +.function_start + 10976 "00111010" // MOVS p4, p1; MOVXM p5, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10977 "00010001" // /* MW 9 */ + 10978 "11100000" // /* MW 8 */ + 10979 "10110001" // /* MW 7 */ + 10980 "11110010" // /* MW 6 */ + 10981 "00000001" // /* MW 5 */ + 10982 "00000000" // /* MW 4 */ + 10983 "01100000" // /* MW 3 */ + 10984 "10010001" // /* MW 2 */ + 10985 "10010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 382 6 first + 10986 "10011000" // LDA r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10987 "00010110" // /* MW 3 */ + 10988 "00000110" // /* MW 2 */ + 10989 "00000101" // /* MW 1 */ + 10990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10991 "00000000" // /* MW 1 */ + 10992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10993 "00000000" // /* MW 1 */ + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10999 "00000000" // /* MW 1 */ + 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11001 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 382 6 +.src_ref 6 "superkernels.cpp" 382 16 + 11002 "10000100" // JNZ r16, #11120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11120 delay_slots=5 */ + 11003 "00000001" // /* MW 5 */ + 11004 "01000000" // /* MW 4 */ + 11005 "10111000" // /* MW 3 */ + 11006 "00010101" // /* MW 2 */ + 11007 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 381 +.delay_slot + 11008 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11009 "00000001" // /* MW 5 */ + 11010 "00000000" // /* MW 4 */ + 11011 "00000000" // /* MW 3 */ + 11012 "00001000" // /* MW 2 */ + 11013 "00000000" // /* MW 1 */ +.delay_slot + 11014 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11015 "00011101" // /* MW 3 */ + 11016 "11111111" // /* MW 2 */ + 11017 "00001111" // /* MW 1 */ +.delay_slot + 11018 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "10011101" // /* MW 3 */ + 11020 "11110111" // /* MW 2 */ + 11021 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 11022 "00000010" // ST lr, [sp, #-8]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11023 "01110000" // /* MW 7 */ + 11024 "01100000" // /* MW 6 */ + 11025 "10110000" // /* MW 5 */ + 11026 "00000011" // /* MW 4 */ + 11027 "10110000" // /* MW 3 */ + 11028 "00000111" // /* MW 2 */ + 11029 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 11030 "11111000" // MOV p6, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11031 "11000000" // /* MW 3 */ + 11032 "01100110" // /* MW 2 */ + 11033 "00011110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 384 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11034 "00111010" // MOVS p0, p2; MOVXM p3, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11035 "00010001" // /* MW 9 */ + 11036 "00001000" // /* MW 8 */ + 11037 "10110010" // /* MW 7 */ + 11038 "11110001" // /* MW 6 */ + 11039 "00000001" // /* MW 5 */ + 11040 "00000000" // /* MW 4 */ + 11041 "01100000" // /* MW 3 */ + 11042 "00010001" // /* MW 2 */ + 11043 "00010001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11044 "10111010" // ST.s8 r16, [p3]; MOVXM p3, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11045 "00010000" // /* MW 9 */ + 11046 "00000110" // /* MW 8 */ + 11047 "10110010" // /* MW 7 */ + 11048 "11110001" // /* MW 6 */ + 11049 "00000001" // /* MW 5 */ + 11050 "00000000" // /* MW 4 */ + 11051 "11100000" // /* MW 3 */ + 11052 "11000000" // /* MW 2 */ + 11053 "01100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 384 6 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11056 "00000100" // JL #7872 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7872 delay_slots=5 */ + 11057 "00000001" // /* MW 5 */ + 11058 "00000000" // /* MW 4 */ + 11059 "01100000" // /* MW 3 */ + 11060 "00001111" // /* MW 2 */ + 11061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11066 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11067 "00110001" // /* MW 3 */ + 11068 "00100000" // /* MW 2 */ + 11069 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11070 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11071 "00000101" // /* MW 3 */ + 11072 "00100000" // /* MW 2 */ + 11073 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11074 "00101110" // NOPA; ST r16, [p3]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11075 "00011100" // /* MW 13 */ + 11076 "00000000" // /* MW 12 */ + 11077 "00000000" // /* MW 11 */ + 11078 "01010111" // /* MW 10 */ + 11079 "00011010" // /* MW 9 */ + 11080 "01000000" // /* MW 8 */ + 11081 "00000000" // /* MW 7 */ + 11082 "00000000" // /* MW 6 */ + 11083 "00100011" // /* MW 5 */ + 11084 "00001100" // /* MW 4 */ + 11085 "11110110" // /* MW 3 */ + 11086 "00101100" // /* MW 2 */ + 11087 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 385 18 +.src_ref 6 "superkernels.cpp" 385 20 first +.return_address + 11088 "10111010" // LDA el0, [p2, #24]; MOVXM p2, #508872 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11089 "00010000" // /* MW 9 */ + 11090 "11100100" // /* MW 8 */ + 11091 "00110001" // /* MW 7 */ + 11092 "11110001" // /* MW 6 */ + 11093 "00000001" // /* MW 5 */ + 11094 "00000000" // /* MW 4 */ + 11095 "11010000" // /* MW 3 */ + 11096 "10000101" // /* MW 2 */ + 11097 "01001100" // /* MW 1 */ + 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11099 "00000000" // /* MW 1 */ + 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11101 "00000000" // /* MW 1 */ + 11102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11103 "00000000" // /* MW 1 */ + 11104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11105 "00000000" // /* MW 1 */ + 11106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11107 "00000000" // /* MW 1 */ + 11108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11109 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 385 18 + 11110 "01111010" // NOPA; ST el0, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "00000000" // /* MW 7 */ + 11114 "10000000" // /* MW 6 */ + 11115 "00101001" // /* MW 5 */ + 11116 "00000100" // /* MW 4 */ + 11117 "11110010" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 +.src_ref 6 "superkernels.cpp" 387 12 +.src_ref 6 "superkernels.cpp" 388 11 first + 11120 "10111010" // LDA r16, [p5]; MOVXM p2, #508868 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11121 "00010000" // /* MW 9 */ + 11122 "11100010" // /* MW 8 */ + 11123 "00110001" // /* MW 7 */ + 11124 "11110001" // /* MW 6 */ + 11125 "00000001" // /* MW 5 */ + 11126 "00000000" // /* MW 4 */ + 11127 "11010000" // /* MW 3 */ + 11128 "11000010" // /* MW 2 */ + 11129 "10100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 387 12 first + 11130 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11131 "00110110" // /* MW 3 */ + 11132 "00000110" // /* MW 2 */ + 11133 "00000010" // /* MW 1 */ + 11134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11135 "00000000" // /* MW 1 */ + 11136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11137 "00000000" // /* MW 1 */ + 11138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11139 "00000000" // /* MW 1 */ + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 389 6 first +.src_ref 6 "superkernels.cpp" 389 17 first + 11146 "10000100" // JNZ r17, #11232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11232 delay_slots=5 */ + 11147 "00000001" // /* MW 5 */ + 11148 "01000000" // /* MW 4 */ + 11149 "11110000" // /* MW 3 */ + 11150 "00010101" // /* MW 2 */ + 11151 "10001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 388 11 first +.delay_slot + 11152 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11153 "00000111" // /* MW 3 */ + 11154 "00100000" // /* MW 2 */ + 11155 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 388 11 +.delay_slot + 11156 "10011000" // ST r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00010001" // /* MW 3 */ + 11158 "00000110" // /* MW 2 */ + 11159 "00001101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 387 12 first +.delay_slot + 11160 "00011000" // ADD r16, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "00000111" // /* MW 3 */ + 11162 "01100000" // /* MW 2 */ + 11163 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 387 12 +.delay_slot + 11164 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11165 "00010001" // /* MW 3 */ + 11166 "00000110" // /* MW 2 */ + 11167 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11169 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 11170 "11111000" // MOV r16, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11171 "11000000" // /* MW 3 */ + 11172 "00011100" // /* MW 2 */ + 11173 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 11174 "00011000" // ADD.NC p2, r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11175 "00000110" // /* MW 3 */ + 11176 "01101000" // /* MW 2 */ + 11177 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 11178 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11179 "01110110" // /* MW 3 */ + 11180 "11111111" // /* MW 2 */ + 11181 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 11182 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "00010110" // /* MW 3 */ + 11184 "11111110" // /* MW 2 */ + 11185 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 11186 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11187 "00110110" // /* MW 3 */ + 11188 "11111110" // /* MW 2 */ + 11189 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11192 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11193 "00010110" // /* MW 3 */ + 11194 "01000110" // /* MW 2 */ + 11195 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11197 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11199 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11201 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11204 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00000010" // /* MW 3 */ + 11206 "01100001" // /* MW 2 */ + 11207 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11208 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11209 "00010001" // /* MW 3 */ + 11210 "00000110" // /* MW 2 */ + 11211 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 11212 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11213 "11111101" // /* MW 3 */ + 11214 "11100010" // /* MW 2 */ + 11215 "00010111" // /* MW 1 */ + 11216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11217 "00000000" // /* MW 1 */ + 11218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11219 "00000000" // /* MW 1 */ + 11220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11221 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 11222 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11223 "00011000" // /* MW 9 */ + 11224 "00010011" // /* MW 8 */ + 11225 "00000100" // /* MW 7 */ + 11226 "00000000" // /* MW 6 */ + 11227 "01011011" // /* MW 5 */ + 11228 "00000001" // /* MW 4 */ + 11229 "11110000" // /* MW 3 */ + 11230 "00101100" // /* MW 2 */ + 11231 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.src_ref 7 "gemm_bfp16.h" 285 80 +.src_ref 7 "gemm_bfp16.h" 285 80 + 11232 "10111010" // MOVA r24, #0; MOVXM r16, #2147483616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11233 "00010000" // /* MW 9 */ + 11234 "11110000" // /* MW 8 */ + 11235 "00001111" // /* MW 7 */ + 11236 "11111110" // /* MW 6 */ + 11237 "11111111" // /* MW 5 */ + 11238 "00011111" // /* MW 4 */ + 11239 "00000000" // /* MW 3 */ + 11240 "00011000" // /* MW 2 */ + 11241 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 + 11242 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11243 "00000101" // /* MW 3 */ + 11244 "00100010" // /* MW 2 */ + 11245 "00010000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 7 "gemm_bfp16.h" 285 86 + 11248 "10111010" // LDA p3, [p4]; MOVXM p4, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11249 "00010000" // /* MW 9 */ + 11250 "00000110" // /* MW 8 */ + 11251 "00110001" // /* MW 7 */ + 11252 "11110010" // /* MW 6 */ + 11253 "00000001" // /* MW 5 */ + 11254 "00000000" // /* MW 4 */ + 11255 "11010000" // /* MW 3 */ + 11256 "10110011" // /* MW 2 */ + 11257 "10000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 86 first + 11258 "10011000" // LDA r27, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11259 "01110110" // /* MW 3 */ + 11260 "11111111" // /* MW 2 */ + 11261 "00000100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 + 11262 "10011000" // LDA r18, [p4], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11263 "01010110" // /* MW 3 */ + 11264 "11101110" // /* MW 2 */ + 11265 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 6 "superkernels.cpp" 393 34 + 11266 "11010100" // LDA p0, [p7]; MOV p7, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11267 "10000001" // /* MW 5 */ + 11268 "11010001" // /* MW 4 */ + 11269 "11011110" // /* MW 3 */ + 11270 "10000011" // /* MW 2 */ + 11271 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 11272 "10011000" // LDA p2, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11273 "00011110" // /* MW 3 */ + 11274 "00000101" // /* MW 2 */ + 11275 "00000110" // /* MW 1 */ + 11276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11277 "00000000" // /* MW 1 */ + 11278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11279 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 + 11280 "11111000" // MOV r19, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "11010110" // /* MW 2 */ + 11283 "00011100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 4 first +.no_stack_arguments + 11284 "00000100" // JL #9040 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9040 delay_slots=5 */ + 11285 "00000001" // /* MW 5 */ + 11286 "00000000" // /* MW 4 */ + 11287 "10101000" // /* MW 3 */ + 11288 "00010001" // /* MW 2 */ + 11289 "00000000" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 +.delay_slot + 11290 "00011000" // ADD r18, r18, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11291 "01111111" // /* MW 3 */ + 11292 "10100100" // /* MW 2 */ + 11293 "00010100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 +.delay_slot + 11294 "10011000" // AND r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11295 "00000100" // /* MW 3 */ + 11296 "10100001" // /* MW 2 */ + 11297 "00010100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 80 +.delay_slot + 11298 "00011000" // SEL.EQZ r16, r24, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11299 "00000010" // /* MW 3 */ + 11300 "00100001" // /* MW 2 */ + 11301 "00010110" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 +.delay_slot + 11302 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11303 "00011101" // /* MW 3 */ + 11304 "00100001" // /* MW 2 */ + 11305 "00010100" // /* MW 1 */ +.src_ref 7 "gemm_bfp16.h" 285 74 +.delay_slot + 11306 "10010100" // NOPA; ADD.NC p1, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11307 "10000010" // /* MW 5 */ + 11308 "11010011" // /* MW 4 */ + 11309 "11110010" // /* MW 3 */ + 11310 "00101100" // /* MW 2 */ + 11311 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 393 6 +.src_ref 6 "superkernels.cpp" 393 34 first +.src_ref 6 "superkernels.cpp" 394 17 +.return_address + 11312 "10111010" // LDA r16, [p7, #16]; MOVXM p2, #508868 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11313 "00010000" // /* MW 9 */ + 11314 "11100010" // /* MW 8 */ + 11315 "00110001" // /* MW 7 */ + 11316 "11110001" // /* MW 6 */ + 11317 "00000001" // /* MW 5 */ + 11318 "00000000" // /* MW 4 */ + 11319 "11010000" // /* MW 3 */ + 11320 "11000010" // /* MW 2 */ + 11321 "11101000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 393 6 + 11322 "11010100" // LDA r18, [p2]; MOV r17, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11323 "10000001" // /* MW 5 */ + 11324 "10111001" // /* MW 4 */ + 11325 "11011000" // /* MW 3 */ + 11326 "11001010" // /* MW 2 */ + 11327 "01000000" // /* MW 1 */ + 11328 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "10011001" // /* MW 3 */ + 11330 "11110111" // /* MW 2 */ + 11331 "00000111" // /* MW 1 */ + 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11333 "00000000" // /* MW 1 */ + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ + 11338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11339 "00000000" // /* MW 1 */ + 11340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11341 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 393 17 + 11342 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11343 "00001000" // /* MW 3 */ + 11344 "10100001" // /* MW 2 */ + 11345 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 393 6 + 11346 "10000100" // JNZ r16, #11424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11424 delay_slots=5 */ + 11347 "00000001" // /* MW 5 */ + 11348 "01000000" // /* MW 4 */ + 11349 "01010000" // /* MW 3 */ + 11350 "00010110" // /* MW 2 */ + 11351 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 394 17 +.src_ref 6 "superkernels.cpp" 398 16 +.delay_slot + 11352 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11353 "00000001" // /* MW 3 */ + 11354 "00110000" // /* MW 2 */ + 11355 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11363 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 394 17 first + 11364 "00111010" // ST r24, [p2]; MOVX r16, #1; ADD.NC p6, r17, #20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11365 "00001001" // /* MW 9 */ + 11366 "01000101" // /* MW 8 */ + 11367 "00110100" // /* MW 7 */ + 11368 "00101011" // /* MW 6 */ + 11369 "00000000" // /* MW 5 */ + 11370 "00000001" // /* MW 4 */ + 11371 "00110000" // /* MW 3 */ + 11372 "11100010" // /* MW 2 */ + 11373 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 11374 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11375 "00110110" // /* MW 3 */ + 11376 "00000110" // /* MW 2 */ + 11377 "00000110" // /* MW 1 */ + 11378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11379 "00000000" // /* MW 1 */ + 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11381 "00000000" // /* MW 1 */ + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ + 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11385 "00000000" // /* MW 1 */ + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 11390 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11391 "00001000" // /* MW 3 */ + 11392 "01010001" // /* MW 2 */ + 11393 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 11394 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11395 "00110110" // /* MW 3 */ + 11396 "11100110" // /* MW 2 */ + 11397 "00000110" // /* MW 1 */ + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ + 11408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11409 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 11410 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11411 "00010001" // /* MW 3 */ + 11412 "00100001" // /* MW 2 */ + 11413 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 11414 "01111010" // NOPA; ST r16, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11415 "00000000" // /* MW 9 */ + 11416 "00000000" // /* MW 8 */ + 11417 "00000000" // /* MW 7 */ + 11418 "10000000" // /* MW 6 */ + 11419 "00010001" // /* MW 5 */ + 11420 "11100110" // /* MW 4 */ + 11421 "11110110" // /* MW 3 */ + 11422 "00101100" // /* MW 2 */ + 11423 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 397 6 +.src_ref 6 "superkernels.cpp" 398 16 + 11424 "01000100" // MOVXM p2, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11425 "10000000" // /* MW 5 */ + 11426 "11000111" // /* MW 4 */ + 11427 "11000100" // /* MW 3 */ + 11428 "00000111" // /* MW 2 */ + 11429 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 6 first +.src_ref 6 "superkernels.cpp" 397 19 + 11430 "10111010" // LDA r16, [p2]; MOVXM p3, #508872 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11431 "00010000" // /* MW 9 */ + 11432 "11100100" // /* MW 8 */ + 11433 "10110001" // /* MW 7 */ + 11434 "11110001" // /* MW 6 */ + 11435 "00000001" // /* MW 5 */ + 11436 "00000000" // /* MW 4 */ + 11437 "11010000" // /* MW 3 */ + 11438 "11000010" // /* MW 2 */ + 11439 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 19 + 11440 "10011000" // LDA r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11441 "00110110" // /* MW 3 */ + 11442 "00000110" // /* MW 2 */ + 11443 "00000011" // /* MW 1 */ + 11444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11445 "00000000" // /* MW 1 */ + 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11447 "00000000" // /* MW 1 */ + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 16 + 11456 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11457 "00001000" // /* MW 3 */ + 11458 "01100001" // /* MW 2 */ + 11459 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 397 6 + 11460 "10000100" // JNZ r16, #11488 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11488 delay_slots=5 */ + 11461 "00000001" // /* MW 5 */ + 11462 "01000000" // /* MW 4 */ + 11463 "01110000" // /* MW 3 */ + 11464 "00010110" // /* MW 2 */ + 11465 "10000000" // /* MW 1 */ +.delay_slot + 11466 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11467 "00011001" // /* MW 3 */ + 11468 "11111111" // /* MW 2 */ + 11469 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11477 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 398 16 first + 11478 "01111010" // NOPA; ST r24, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11479 "00000000" // /* MW 9 */ + 11480 "00000000" // /* MW 8 */ + 11481 "00000000" // /* MW 7 */ + 11482 "10000000" // /* MW 6 */ + 11483 "00010001" // /* MW 5 */ + 11484 "00000111" // /* MW 4 */ + 11485 "11110010" // /* MW 3 */ + 11486 "00101100" // /* MW 2 */ + 11487 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 +.src_ref 6 "superkernels.cpp" 400 + 11488 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11489 "00111001" // /* MW 3 */ + 11490 "11111000" // /* MW 2 */ + 11491 "00000111" // /* MW 1 */ + 11492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11493 "00000000" // /* MW 1 */ + 11494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11495 "00000000" // /* MW 1 */ + 11496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11497 "00000000" // /* MW 1 */ + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ + 11500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11501 "00000000" // /* MW 1 */ + 11502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11503 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 400 first + 11504 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11505 "00000000" // /* MW 3 */ + 11506 "00101000" // /* MW 2 */ + 11507 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 400 +.delay_slot + 11508 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11509 "00000001" // /* MW 5 */ + 11510 "00000000" // /* MW 4 */ + 11511 "00000000" // /* MW 3 */ + 11512 "11111000" // /* MW 2 */ + 11513 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 11521 "00000000" // /* MW 1 */ +.label __Z15_b13786_wrapperPPv___func_begin0 +.label _Z15_b13786_wrapperPPv +.function _b13786_wrapper _Z15_b13786_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 20 first +.src_ref 0 "0_0_reloadable5.cc" 22 79 +.function_start + 11536 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11537 "11000000" // /* MW 3 */ + 11538 "01100000" // /* MW 2 */ + 11539 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 22 79 first + 11540 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00011110" // /* MW 3 */ + 11542 "00011100" // /* MW 2 */ + 11543 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 23 79 first + 11544 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "10011110" // /* MW 3 */ + 11546 "00101100" // /* MW 2 */ + 11547 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 25 81 first + 11548 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11549 "10011110" // /* MW 3 */ + 11550 "11110101" // /* MW 2 */ + 11551 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 24 46 first + 11552 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11553 "00011110" // /* MW 3 */ + 11554 "00000101" // /* MW 2 */ + 11555 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 21 4 first +.tail_call + 11556 "10000100" // J #10976 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10976 delay_slots=5 */ + 11557 "00000000" // /* MW 5 */ + 11558 "00000000" // /* MW 4 */ + 11559 "01110000" // /* MW 3 */ + 11560 "00010101" // /* MW 2 */ + 11561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11567 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13786_wrapperPPv__end +.label __Z15_b13786_wrapperPPv___func_end0 + 11571 "00000000" // /* MW 1 */ +.label __Z15_b13811_wrapperPPv___func_begin0 +.label _Z15_b13811_wrapperPPv +.function _b13811_wrapper _Z15_b13811_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 29 first +.src_ref 0 "0_0_reloadable5.cc" 31 79 +.function_start + 11584 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11585 "11000000" // /* MW 3 */ + 11586 "01100000" // /* MW 2 */ + 11587 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 31 79 first + 11588 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11589 "00011110" // /* MW 3 */ + 11590 "00111100" // /* MW 2 */ + 11591 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 32 47 first + 11592 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11593 "10011110" // /* MW 3 */ + 11594 "11101100" // /* MW 2 */ + 11595 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 34 81 first + 11596 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11597 "10011110" // /* MW 3 */ + 11598 "00010101" // /* MW 2 */ + 11599 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 33 80 first + 11600 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11601 "00011110" // /* MW 3 */ + 11602 "00000101" // /* MW 2 */ + 11603 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 30 4 first +.tail_call + 11604 "10000100" // J #6144 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6144 delay_slots=5 */ + 11605 "00000000" // /* MW 5 */ + 11606 "00000000" // /* MW 4 */ + 11607 "00000000" // /* MW 3 */ + 11608 "00001100" // /* MW 2 */ + 11609 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11615 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11617 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13811_wrapperPPv__end +.label __Z15_b13811_wrapperPPv___func_end0 + 11619 "00000000" // /* MW 1 */ +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function _b13739_wrapper _Z15_b13739_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 38 first +.src_ref 0 "0_0_reloadable5.cc" 40 79 +.function_start + 11632 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11633 "11000000" // /* MW 3 */ + 11634 "01100000" // /* MW 2 */ + 11635 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 40 79 first + 11636 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11637 "00011110" // /* MW 3 */ + 11638 "00101100" // /* MW 2 */ + 11639 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 42 81 first + 11640 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11641 "00011110" // /* MW 3 */ + 11642 "11110101" // /* MW 2 */ + 11643 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 41 47 first + 11644 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11645 "10011110" // /* MW 3 */ + 11646 "00000100" // /* MW 2 */ + 11647 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 39 4 first +.tail_call + 11648 "10000100" // J #3904 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3904 delay_slots=5 */ + 11649 "00000000" // /* MW 5 */ + 11650 "00000000" // /* MW 4 */ + 11651 "10100000" // /* MW 3 */ + 11652 "00000111" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 + 11663 "00000000" // /* MW 1 */ +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function _b13744_wrapper _Z15_b13744_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 46 first +.src_ref 0 "0_0_reloadable5.cc" 48 79 +.function_start + 11664 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "11000000" // /* MW 3 */ + 11666 "01100000" // /* MW 2 */ + 11667 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 48 79 first + 11668 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "00011110" // /* MW 3 */ + 11670 "00101100" // /* MW 2 */ + 11671 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 50 81 first + 11672 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00011110" // /* MW 3 */ + 11674 "11110101" // /* MW 2 */ + 11675 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 49 47 first + 11676 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "10011110" // /* MW 3 */ + 11678 "00000100" // /* MW 2 */ + 11679 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 47 4 first +.tail_call + 11680 "10000100" // J #5360 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5360 delay_slots=5 */ + 11681 "00000000" // /* MW 5 */ + 11682 "00000000" // /* MW 4 */ + 11683 "01111000" // /* MW 3 */ + 11684 "00001010" // /* MW 2 */ + 11685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11693 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 + 11695 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 54 first +.src_ref 0 "0_0_reloadable5.cc" 56 79 +.function_start + 11696 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11697 "11000000" // /* MW 3 */ + 11698 "01100000" // /* MW 2 */ + 11699 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 56 79 first + 11700 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11701 "00011110" // /* MW 3 */ + 11702 "00111100" // /* MW 2 */ + 11703 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 57 47 first + 11704 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11705 "10011110" // /* MW 3 */ + 11706 "11101100" // /* MW 2 */ + 11707 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 59 81 first + 11708 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11709 "10011110" // /* MW 3 */ + 11710 "00010101" // /* MW 2 */ + 11711 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 58 80 first + 11712 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11713 "00011110" // /* MW 3 */ + 11714 "00000101" // /* MW 2 */ + 11715 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 55 4 first +.tail_call + 11716 "10000100" // J #7264 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7264 delay_slots=5 */ + 11717 "00000000" // /* MW 5 */ + 11718 "00000000" // /* MW 4 */ + 11719 "00110000" // /* MW 3 */ + 11720 "00001110" // /* MW 2 */ + 11721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11727 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11729 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11731 "00000000" // /* MW 1 */ +.label _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj +.label __ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj___func_begin0 +.function setup_rmsnorm_row_major_params _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj +.src_ref 3 "rmsnorm_row_major_params.h" 45 first +.src_ref 3 "rmsnorm_row_major_params.h" 48 34 +.src_ref 3 "rmsnorm_row_major_params.h" 49 21 +.src_ref 3 "rmsnorm_row_major_params.h" 62 38 +.function_start + 11744 "01110110" // MOVA m0, #-24; MOVS p1, p7; MOVX r20, #-1; MOV r16, p0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11745 "01111000" // /* MW 11 */ + 11746 "01100000" // /* MW 10 */ + 11747 "00001000" // /* MW 9 */ + 11748 "11101010" // /* MW 8 */ + 11749 "01000111" // /* MW 7 */ + 11750 "00111111" // /* MW 6 */ + 11751 "10001011" // /* MW 5 */ + 11752 "10011100" // /* MW 4 */ + 11753 "10000001" // /* MW 3 */ + 11754 "00000000" // /* MW 2 */ + 11755 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 48 34 first +.src_ref 3 "rmsnorm_row_major_params.h" 51 19 +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 +.src_ref 3 "rmsnorm_row_major_params.h" 63 27 +.src_ref 3 "rmsnorm_row_major_params.h" 64 23 +.src_ref 3 "rmsnorm_row_major_params.h" 65 23 + 11756 "10111010" // MOVA m1, #54; MOVX r16, #1; ADD.NC p0, r16, #20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11757 "00001000" // /* MW 9 */ + 11758 "00000101" // /* MW 8 */ + 11759 "00110100" // /* MW 7 */ + 11760 "00101000" // /* MW 6 */ + 11761 "00000000" // /* MW 5 */ + 11762 "00000001" // /* MW 4 */ + 11763 "10000000" // /* MW 3 */ + 11764 "11000100" // /* MW 2 */ + 11765 "00000110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 48 21 +.src_ref 3 "rmsnorm_row_major_params.h" 50 29 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11766 "10111010" // LDA.s16 r18, [p0], #4; MOVXM p7, #508788 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11767 "00010000" // /* MW 9 */ + 11768 "10111010" // /* MW 8 */ + 11769 "10110001" // /* MW 7 */ + 11770 "11110011" // /* MW 6 */ + 11771 "00000001" // /* MW 5 */ + 11772 "00000000" // /* MW 4 */ + 11773 "01010000" // /* MW 3 */ + 11774 "11001010" // /* MW 2 */ + 11775 "00000101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 50 29 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11776 "11010100" // ST.s16 r18, [p7], #2; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11777 "01000001" // /* MW 5 */ + 11778 "00101111" // /* MW 4 */ + 11779 "11100000" // /* MW 3 */ + 11780 "11001010" // /* MW 2 */ + 11781 "11100011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 49 21 first +.src_ref 3 "rmsnorm_row_major_params.h" 68 70 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11782 "01010100" // LDA.s16 r17, [p0], m0; MOV m0, #-76 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11783 "11010001" // /* MW 5 */ + 11784 "00011110" // /* MW 4 */ + 11785 "01010000" // /* MW 3 */ + 11786 "01000110" // /* MW 2 */ + 11787 "00000001" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 53 23 first + 11788 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11789 "00101110" // /* MW 3 */ + 11790 "00011100" // /* MW 2 */ + 11791 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 45 + 11792 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11793 "00000001" // /* MW 5 */ + 11794 "00000000" // /* MW 4 */ + 11795 "00000000" // /* MW 3 */ + 11796 "00001000" // /* MW 2 */ + 11797 "00000000" // /* MW 1 */ + 11798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11799 "00000000" // /* MW 1 */ + 11800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11801 "00000000" // /* MW 1 */ + 11802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11803 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 51 19 first + 11804 "00011000" // ST.s16 r17, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11805 "00110111" // /* MW 3 */ + 11806 "00101010" // /* MW 2 */ + 11807 "00000111" // /* MW 1 */ + 11808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11809 "00000000" // /* MW 1 */ + 11810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11811 "00000000" // /* MW 1 */ + 11812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11813 "00000000" // /* MW 1 */ + 11814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11815 "00000000" // /* MW 1 */ + 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11817 "00000000" // /* MW 1 */ + 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11819 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 70 first + 11820 "00001100" // LDA r15, [p7], m0; ST r13, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11821 "01101011" // /* MW 5 */ + 11822 "11111011" // /* MW 4 */ + 11823 "11011111" // /* MW 3 */ + 11824 "00111110" // /* MW 2 */ + 11825 "11100001" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 53 21 first + 11826 "10011000" // ST el0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11827 "00101001" // /* MW 3 */ + 11828 "00011100" // /* MW 2 */ + 11829 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 54 24 first + 11830 "00001100" // LDA r17, [p0], #4; ST lr, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11831 "01111011" // /* MW 5 */ + 11832 "11011000" // /* MW 4 */ + 11833 "11011111" // /* MW 3 */ + 11834 "11000110" // /* MW 2 */ + 11835 "00000011" // /* MW 1 */ + 11836 "10011000" // ST r0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11837 "00010101" // /* MW 3 */ + 11838 "11111000" // /* MW 2 */ + 11839 "00001111" // /* MW 1 */ + 11840 "10011000" // ST r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11841 "11010101" // /* MW 3 */ + 11842 "11110001" // /* MW 2 */ + 11843 "00001111" // /* MW 1 */ + 11844 "10011000" // ST p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11845 "10011101" // /* MW 3 */ + 11846 "11110100" // /* MW 2 */ + 11847 "00001111" // /* MW 1 */ + 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11849 "00000000" // /* MW 1 */ + 11850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11851 "00000000" // /* MW 1 */ + 11852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11853 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 54 22 + 11854 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "00110001" // /* MW 3 */ + 11856 "00011110" // /* MW 2 */ + 11857 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 55 23 first + 11858 "10011000" // LDA r18, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11859 "01010110" // /* MW 3 */ + 11860 "00011110" // /* MW 2 */ + 11861 "00000000" // /* MW 1 */ + 11862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11863 "00000000" // /* MW 1 */ + 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11865 "00000000" // /* MW 1 */ + 11866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11867 "00000000" // /* MW 1 */ + 11868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11869 "00000000" // /* MW 1 */ + 11870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11871 "00000000" // /* MW 1 */ + 11872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11873 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 55 21 + 11874 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11875 "01010001" // /* MW 3 */ + 11876 "00011110" // /* MW 2 */ + 11877 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 56 25 first + 11878 "10011000" // LDA r19, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11879 "01110110" // /* MW 3 */ + 11880 "00011110" // /* MW 2 */ + 11881 "00000000" // /* MW 1 */ + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ + 11888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11889 "00000000" // /* MW 1 */ + 11890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11891 "00000000" // /* MW 1 */ + 11892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11893 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 56 23 + 11894 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11895 "01110001" // /* MW 3 */ + 11896 "00011110" // /* MW 2 */ + 11897 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 57 17 first + 11898 "10011000" // LDA r21, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11899 "10110110" // /* MW 3 */ + 11900 "00111110" // /* MW 2 */ + 11901 "00000000" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11909 "00000000" // /* MW 1 */ + 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11911 "00000000" // /* MW 1 */ + 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 57 15 +.src_ref 3 "rmsnorm_row_major_params.h" 62 38 first + 11914 "01011100" // ST r21, [p7], #8; EQ r27, r21, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "10001111" // /* MW 5 */ + 11916 "11101110" // /* MW 4 */ + 11917 "00111010" // /* MW 3 */ + 11918 "11010110" // /* MW 2 */ + 11919 "11100101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 58 24 first +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 + 11920 "00101100" // LDA r20, [p0], #4; SEL.EQZ r21, r18, r16, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11921 "00000100" // /* MW 5 */ + 11922 "01010110" // /* MW 4 */ + 11923 "11011001" // /* MW 3 */ + 11924 "11010010" // /* MW 2 */ + 11925 "00000011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 first +.src_ref 3 "rmsnorm_row_major_params.h" 63 27 first + 11926 "00011000" // SEL.EQZ r18, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11927 "00100010" // /* MW 3 */ + 11928 "00100101" // /* MW 2 */ + 11929 "00010100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 63 27 + 11930 "10011000" // MUL r13, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11931 "00101111" // /* MW 3 */ + 11932 "01011011" // /* MW 2 */ + 11933 "00010100" // /* MW 1 */ + 11934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11935 "00000000" // /* MW 1 */ + 11936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11937 "00000000" // /* MW 1 */ + 11938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11939 "00000000" // /* MW 1 */ + 11940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11941 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 58 22 first + 11942 "10011000" // ST r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11943 "10010001" // /* MW 3 */ + 11944 "00011110" // /* MW 2 */ + 11945 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 59 23 first + 11946 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11947 "00110110" // /* MW 3 */ + 11948 "00000110" // /* MW 2 */ + 11949 "00000000" // /* MW 1 */ + 11950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11951 "00000000" // /* MW 1 */ + 11952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11953 "00000000" // /* MW 1 */ + 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11955 "00000000" // /* MW 1 */ + 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11957 "00000000" // /* MW 1 */ + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 59 21 +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 first +.src_ref 3 "rmsnorm_row_major_params.h" 64 23 first + 11962 "01011100" // ST r17, [p7], #4; SEL.EQZ r18, r17, r16, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11963 "00000100" // /* MW 5 */ + 11964 "11001010" // /* MW 4 */ + 11965 "00111000" // /* MW 3 */ + 11966 "11000110" // /* MW 2 */ + 11967 "11100011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 60 25 first +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 +.src_ref 3 "rmsnorm_row_major_params.h" 65 23 first + 11968 "00101100" // LDA r16, [p0, #4]; SEL.EQZ r17, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11969 "00100100" // /* MW 5 */ + 11970 "01000110" // /* MW 4 */ + 11971 "11011000" // /* MW 3 */ + 11972 "11000010" // /* MW 2 */ + 11973 "00000010" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 65 23 + 11974 "10011000" // MUL r14, r17, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "01001111" // /* MW 3 */ + 11976 "01011101" // /* MW 2 */ + 11977 "00010100" // /* MW 1 */ + 11978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11979 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 62 27 first + 11980 "10011000" // MUL r0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11981 "01011111" // /* MW 3 */ + 11982 "11000001" // /* MW 2 */ + 11983 "00010100" // /* MW 1 */ + 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11985 "00000000" // /* MW 1 */ + 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11987 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 66 49 first +.no_stack_arguments + 11988 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 11989 "00000001" // /* MW 5 */ + 11990 "00000000" // /* MW 4 */ + 11991 "10111000" // /* MW 3 */ + 11992 "00011110" // /* MW 2 */ + 11993 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 60 23 first +.src_ref 3 "rmsnorm_row_major_params.h" 64 23 first +.delay_slot + 11994 "01011100" // ST r16, [p7], #4; MUL r1, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11995 "00011111" // /* MW 5 */ + 11996 "00000110" // /* MW 4 */ + 11997 "00111001" // /* MW 3 */ + 11998 "11000010" // /* MW 2 */ + 11999 "11100011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 62 25 first +.delay_slot + 12000 "10011000" // ST r0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12001 "00010001" // /* MW 3 */ + 12002 "00011100" // /* MW 2 */ + 12003 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 63 25 first +.delay_slot + 12004 "10011000" // ST r13, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "10110001" // /* MW 3 */ + 12006 "00011101" // /* MW 2 */ + 12007 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 64 21 first +.delay_slot + 12008 "10011000" // ST r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12009 "00110001" // /* MW 3 */ + 12010 "00011100" // /* MW 2 */ + 12011 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 65 21 first +.delay_slot + 12012 "10011000" // ST r14, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12013 "11010001" // /* MW 3 */ + 12014 "00011101" // /* MW 2 */ + 12015 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 66 26 first +.src_ref 3 "rmsnorm_row_major_params.h" 67 51 +.src_ref 3 "rmsnorm_row_major_params.h" 68 43 +.src_ref 3 "rmsnorm_row_major_params.h" 75 +.return_address + 12016 "01110110" // LDA r13, [sp, #-20]; ST r2, [p7], #4; ADD r16, r2, #63; MOV r0, r13 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12017 "01111000" // /* MW 11 */ + 12018 "01010000" // /* MW 10 */ + 12019 "00001011" // /* MW 9 */ + 12020 "11111000" // /* MW 8 */ + 12021 "00000111" // /* MW 7 */ + 12022 "10000101" // /* MW 6 */ + 12023 "01010001" // /* MW 5 */ + 12024 "00011100" // /* MW 4 */ + 12025 "00100111" // /* MW 3 */ + 12026 "10110110" // /* MW 2 */ + 12027 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 43 first +.no_stack_arguments + 12028 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 12029 "00000001" // /* MW 5 */ + 12030 "00000000" // /* MW 4 */ + 12031 "10111000" // /* MW 3 */ + 12032 "00011110" // /* MW 2 */ + 12033 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 67 25 +.src_ref 3 "rmsnorm_row_major_params.h" 68 43 +.delay_slot + 12034 "11100100" // MOVX r17, #-64; MOV r1, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12035 "01000001" // /* MW 5 */ + 12036 "10101110" // /* MW 4 */ + 12037 "00100000" // /* MW 3 */ + 12038 "01000000" // /* MW 2 */ + 12039 "11111100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 67 25 first +.delay_slot + 12040 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12041 "00000100" // /* MW 3 */ + 12042 "01100001" // /* MW 2 */ + 12043 "00010100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 67 20 +.delay_slot + 12044 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12045 "00010001" // /* MW 3 */ + 12046 "00000110" // /* MW 2 */ + 12047 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12049 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12050 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12051 "00011100" // /* MW 13 */ + 12052 "00000000" // /* MW 12 */ + 12053 "00000000" // /* MW 11 */ + 12054 "01010111" // /* MW 10 */ + 12055 "00011010" // /* MW 9 */ + 12056 "01000000" // /* MW 8 */ + 12057 "00000000" // /* MW 7 */ + 12058 "00000000" // /* MW 6 */ + 12059 "10110110" // /* MW 5 */ + 12060 "00000010" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 62 first +.return_address +.no_stack_arguments + 12064 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 12065 "00000001" // /* MW 5 */ + 12066 "00000000" // /* MW 4 */ + 12067 "10111000" // /* MW 3 */ + 12068 "00011110" // /* MW 2 */ + 12069 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 62 +.delay_slot + 12070 "11111000" // MOV r1, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12071 "10100000" // /* MW 3 */ + 12072 "01010111" // /* MW 2 */ + 12073 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 62 +.delay_slot + 12074 "11111000" // MOV r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12075 "00100000" // /* MW 3 */ + 12076 "00010001" // /* MW 2 */ + 12077 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12082 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12083 "00011100" // /* MW 13 */ + 12084 "00000000" // /* MW 12 */ + 12085 "00000000" // /* MW 11 */ + 12086 "01010111" // /* MW 10 */ + 12087 "00011010" // /* MW 9 */ + 12088 "01000000" // /* MW 8 */ + 12089 "00000000" // /* MW 7 */ + 12090 "00000000" // /* MW 6 */ + 12091 "10110110" // /* MW 5 */ + 12092 "00000010" // /* MW 4 */ + 12093 "11110000" // /* MW 3 */ + 12094 "00101100" // /* MW 2 */ + 12095 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 75 +.return_address + 12096 "11010100" // LDA r15, [sp, #-8]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "01000001" // /* MW 5 */ + 12098 "11101101" // /* MW 4 */ + 12099 "00101110" // /* MW 3 */ + 12100 "00111110" // /* MW 2 */ + 12101 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 "00011000" // LDA r13, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12103 "10110001" // /* MW 3 */ + 12104 "11111101" // /* MW 2 */ + 12105 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "10011001" // /* MW 3 */ + 12108 "11110111" // /* MW 2 */ + 12109 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "11010001" // /* MW 3 */ + 12112 "11110001" // /* MW 2 */ + 12113 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 75 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12115 "00000000" // /* MW 3 */ + 12116 "00101000" // /* MW 2 */ + 12117 "00010000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 68 20 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12118 "10011000" // ST r2, [p7, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12119 "01010001" // /* MW 3 */ + 12120 "00010100" // /* MW 2 */ + 12121 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major_params.h" 75 first +.delay_slot + 12122 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12123 "00000001" // /* MW 5 */ + 12124 "00000000" // /* MW 4 */ + 12125 "00000000" // /* MW 3 */ + 12126 "11111000" // /* MW 2 */ + 12127 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj__end +.label __ZL30setup_rmsnorm_row_major_paramsR33rmsnorm_row_major_internal_paramsRA10_Kj___func_end0 + 12133 "00000000" // /* MW 1 */ +.label __Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params___func_begin0 +.label _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.function rmsnorm_row_major_part1 _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 48 first +.src_ref 3 "rmsnorm_row_major.h" 60 15 +.src_ref 3 "rmsnorm_row_major.h" 65 51 +.src_ref 3 "rmsnorm_row_major.h" 65 51 +.function_start + 12144 "01110110" // MOVA r24, #0; MOVS p6, p1; MOVX vaddSign0, #1; MOV p2, p6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12145 "01111000" // /* MW 11 */ + 12146 "01100000" // /* MW 10 */ + 12147 "00110110" // /* MW 9 */ + 12148 "00000001" // /* MW 8 */ + 12149 "11010010" // /* MW 7 */ + 12150 "00000010" // /* MW 6 */ + 12151 "10001011" // /* MW 5 */ + 12152 "10000100" // /* MW 4 */ + 12153 "00000110" // /* MW 3 */ + 12154 "00011000" // /* MW 2 */ + 12155 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 49 +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12156 "10111010" // MOVA r16, #1; MOVXM p1, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12157 "00010000" // /* MW 9 */ + 12158 "00001000" // /* MW 8 */ + 12159 "10110010" // /* MW 7 */ + 12160 "11110000" // /* MW 6 */ + 12161 "00000001" // /* MW 5 */ + 12162 "00000000" // /* MW 4 */ + 12163 "00000000" // /* MW 3 */ + 12164 "00110000" // /* MW 2 */ + 12165 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 53 31 +.src_ref 3 "rmsnorm_row_major.h" 65 51 first + 12166 "01110110" // LDA.s8 r17, [p1]; MOVS p1, p7; MOVXM p7, #508824 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12167 "00010000" // /* MW 11 */ + 12168 "11001100" // /* MW 10 */ + 12169 "10110001" // /* MW 9 */ + 12170 "11110011" // /* MW 8 */ + 12171 "00000001" // /* MW 7 */ + 12172 "00000000" // /* MW 6 */ + 12173 "10001011" // /* MW 5 */ + 12174 "10011100" // /* MW 4 */ + 12175 "01010001" // /* MW 3 */ + 12176 "11000100" // /* MW 2 */ + 12177 "00100000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 53 31 first +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12178 "11010100" // LDA el0, [p7], #8; VINSERT.32 x0, x0, #0, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12179 "00100010" // /* MW 5 */ + 12180 "00000110" // /* MW 4 */ + 12181 "11010000" // /* MW 3 */ + 12182 "10000101" // /* MW 2 */ + 12183 "11100101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 55 33 first +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12184 "11010100" // LDA r0, [p7], #8; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12185 "00100101" // /* MW 5 */ + 12186 "00000001" // /* MW 4 */ + 12187 "11010000" // /* MW 3 */ + 12188 "10000010" // /* MW 2 */ + 12189 "11100101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 58 35 first + 12190 "10011000" // LDA r18, [p7], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12191 "01010110" // /* MW 3 */ + 12192 "10011110" // /* MW 2 */ + 12193 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 48 + 12194 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12195 "00000001" // /* MW 5 */ + 12196 "00000000" // /* MW 4 */ + 12197 "00000000" // /* MW 3 */ + 12198 "00010000" // /* MW 2 */ + 12199 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 67 28 first + 12200 "00001100" // LDA r1, [p7]; ST lr, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12201 "01111011" // /* MW 5 */ + 12202 "11011000" // /* MW 4 */ + 12203 "11011111" // /* MW 3 */ + 12204 "10000110" // /* MW 2 */ + 12205 "11100000" // /* MW 1 */ + 12206 "10011000" // ST p2, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12207 "00011101" // /* MW 3 */ + 12208 "11111101" // /* MW 2 */ + 12209 "00001111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 65 51 + 12210 "01011100" // ST r17, [sp, #-8]; MOVX crRnd, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12211 "00000000" // /* MW 5 */ + 12212 "11110101" // /* MW 4 */ + 12213 "10111000" // /* MW 3 */ + 12214 "01000110" // /* MW 2 */ + 12215 "11111111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 65 51 first + 12216 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12217 "00010110" // /* MW 3 */ + 12218 "01000000" // /* MW 2 */ + 12219 "00001000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 67 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first +.no_stack_arguments + 12220 "00111010" // ST p0, [sp, #-12]; JL #15728 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 12221 "01000001" // /* MW 9 */ + 12222 "00000000" // /* MW 8 */ + 12223 "00000000" // /* MW 7 */ + 12224 "10101110" // /* MW 6 */ + 12225 "00000111" // /* MW 5 */ + 12226 "00000000" // /* MW 4 */ + 12227 "10110000" // /* MW 3 */ + 12228 "10000011" // /* MW 2 */ + 12229 "11111110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 65 51 first +.delay_slot +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12230 "00000010" // ST p1, [sp, #-24]; VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12231 "11010000" // /* MW 7 */ + 12232 "10000000" // /* MW 6 */ + 12233 "00000000" // /* MW 5 */ + 12234 "00000010" // /* MW 4 */ + 12235 "10110000" // /* MW 3 */ + 12236 "00010011" // /* MW 2 */ + 12237 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 49 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12238 "01011100" // ST el0, [sp, #-16]; LSHL r17, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12239 "00011011" // /* MW 5 */ + 12240 "01000110" // /* MW 4 */ + 12241 "10111001" // /* MW 3 */ + 12242 "00000101" // /* MW 2 */ + 12243 "11111110" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 12244 "00000010" // ST r16, [sp, #-28]; VBCST.16 x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12245 "01110000" // /* MW 7 */ + 12246 "10111001" // /* MW 6 */ + 12247 "00100000" // /* MW 5 */ + 12248 "00000000" // /* MW 4 */ + 12249 "10110000" // /* MW 3 */ + 12250 "11000010" // /* MW 2 */ + 12251 "11111100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 15 +.delay_slot + 12252 "11111000" // MOV m0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12253 "10100000" // /* MW 3 */ + 12254 "00001000" // /* MW 2 */ + 12255 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 60 15 first +.delay_slot + 12256 "11100001" // NOPA; PADDB [p6], m0; VST x0, [sp, #-128]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12257 "00000000" // /* MW 15 */ + 12258 "00000000" // /* MW 14 */ + 12259 "01111000" // /* MW 13 */ + 12260 "10100101" // /* MW 12 */ + 12261 "00000001" // /* MW 11 */ + 12262 "00000000" // /* MW 10 */ + 12263 "00000000" // /* MW 9 */ + 12264 "00000000" // /* MW 8 */ + 12265 "00110011" // /* MW 7 */ + 12266 "11111000" // /* MW 6 */ + 12267 "00100111" // /* MW 5 */ + 12268 "00010111" // /* MW 4 */ + 12269 "11111100" // /* MW 3 */ + 12270 "00101100" // /* MW 2 */ + 12271 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 54 31 first +.return_address + 12272 "10011000" // LDA r16, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "00010110" // /* MW 3 */ + 12274 "01000110" // /* MW 2 */ + 12275 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 67 8 first +.src_ref 3 "rmsnorm_row_major.h" 67 39 first + 12276 "10000100" // JZ r3, #12768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12768 delay_slots=5 */ + 12277 "00000001" // /* MW 5 */ + 12278 "00000000" // /* MW 4 */ + 12279 "11110000" // /* MW 3 */ + 12280 "00011000" // /* MW 2 */ + 12281 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12285 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12291 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 + 12292 "00011000" // LDA lr, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12293 "00111001" // /* MW 3 */ + 12294 "11101100" // /* MW 2 */ + 12295 "00000111" // /* MW 1 */ + 12296 "00100010" // LDA p7, [sp, #-24]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12297 "00011100" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00000000" // /* MW 5 */ + 12300 "00000100" // /* MW 4 */ + 12301 "00100000" // /* MW 3 */ + 12302 "01110011" // /* MW 2 */ + 12303 "11111101" // /* MW 1 */ +.label __ll6__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 80 4 first + 12304 "10000100" // JZ r16, #12736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12736 delay_slots=5 */ + 12305 "00000001" // /* MW 5 */ + 12306 "00000000" // /* MW 4 */ + 12307 "11100000" // /* MW 3 */ + 12308 "00011000" // /* MW 2 */ + 12309 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12313 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12315 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12317 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12319 "00000000" // /* MW 1 */ +.label __ll14__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 87 8 first + 12320 "10111010" // VLDA x0, [sp, #-128]; MOVXM ls, #12464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12321 "00010000" // /* MW 9 */ + 12322 "01011000" // /* MW 8 */ + 12323 "01111000" // /* MW 7 */ + 12324 "00001100" // /* MW 6 */ + 12325 "00000000" // /* MW 5 */ + 12326 "00000000" // /* MW 4 */ + 12327 "01110000" // /* MW 3 */ + 12328 "00000111" // /* MW 2 */ + 12329 "11111111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 87 8 + 12330 "10111010" // LDA r27, [sp, #-16]; MOVXM le, #12512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12331 "00010000" // /* MW 9 */ + 12332 "01110000" // /* MW 8 */ + 12333 "10111000" // /* MW 7 */ + 12334 "00001101" // /* MW 6 */ + 12335 "00000000" // /* MW 5 */ + 12336 "00000000" // /* MW 4 */ + 12337 "00100000" // /* MW 3 */ + 12338 "01101110" // /* MW 2 */ + 12339 "11111110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 4 "add_reduce.hpp" 332 18 +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.src_ref 3 "rmsnorm_row_major.h" 99 36 + 12340 "10111010" // LDA r26, [sp, #-8]; MOVX r18, #60; MOV r21, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12341 "01011000" // /* MW 9 */ + 12342 "00010000" // /* MW 8 */ + 12343 "10101000" // /* MW 7 */ + 12344 "10001010" // /* MW 6 */ + 12345 "00100111" // /* MW 5 */ + 12346 "00000001" // /* MW 4 */ + 12347 "00100000" // /* MW 3 */ + 12348 "01101010" // /* MW 2 */ + 12349 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 3 "rmsnorm_row_major.h" 89 21 + 12350 "10111010" // LDA p0, [sp, #-12]; MOVX r20, #828; MOV r19, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12351 "01011000" // /* MW 9 */ + 12352 "00100000" // /* MW 8 */ + 12353 "01101000" // /* MW 7 */ + 12354 "10001010" // /* MW 6 */ + 12355 "01000111" // /* MW 5 */ + 12356 "00011001" // /* MW 4 */ + 12357 "00100000" // /* MW 3 */ + 12358 "10000011" // /* MW 2 */ + 12359 "11111110" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 + 12360 "10111010" // MOVA r23, #8; MOVX r16, #-5; ADD.NC r17, r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12361 "11001000" // /* MW 9 */ + 12362 "00111111" // /* MW 8 */ + 12363 "00101100" // /* MW 7 */ + 12364 "01101010" // /* MW 6 */ + 12365 "00000111" // /* MW 5 */ + 12366 "00111111" // /* MW 4 */ + 12367 "00000000" // /* MW 3 */ + 12368 "00010111" // /* MW 2 */ + 12369 "00000001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 3 "rmsnorm_row_major.h" 80 4 + 12370 "10111010" // MOVA r22, #4; MOVXM p1, #12400 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12371 "00010000" // /* MW 9 */ + 12372 "00111000" // /* MW 8 */ + 12373 "10110000" // /* MW 7 */ + 12374 "00001100" // /* MW 6 */ + 12375 "00000000" // /* MW 5 */ + 12376 "00000000" // /* MW 4 */ + 12377 "00000000" // /* MW 3 */ + 12378 "10010110" // /* MW 2 */ + 12379 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 3 "rmsnorm_row_major.h" 99 36 + 12380 "00011000" // MOVX vaddSign0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12381 "01000000" // /* MW 3 */ + 12382 "01011010" // /* MW 2 */ + 12383 "00010000" // /* MW 1 */ + 12384 "11111000" // VCONV.fp32.bf16 cml0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12385 "10001010" // /* MW 3 */ + 12386 "00000001" // /* MW 2 */ + 12387 "00011000" // /* MW 1 */ + 12388 "00101100" // NOPA; LSHL r16, r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12389 "00011011" // /* MW 5 */ + 12390 "11000010" // /* MW 4 */ + 12391 "11111101" // /* MW 3 */ + 12392 "00101100" // /* MW 2 */ + 12393 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 3 "rmsnorm_row_major.h" 99 36 + 12394 "11100100" // MOVX crRnd, r26; VMOV cml1, cml0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12395 "00010101" // /* MW 5 */ + 12396 "00000001" // /* MW 4 */ + 12397 "00000010" // /* MW 3 */ + 12398 "01010000" // /* MW 2 */ + 12399 "11010111" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_256 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "rmsnorm_row_major.h" 87 8 first +.src_ref 3 "rmsnorm_row_major.h" 89 21 first +.loop_nesting 1 + 12400 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; ADD.NC lc, r16, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12401 "00000000" // /* MW 15 */ + 12402 "00000000" // /* MW 14 */ + 12403 "11001000" // /* MW 13 */ + 12404 "00111111" // /* MW 12 */ + 12405 "10111100" // /* MW 11 */ + 12406 "00000010" // /* MW 10 */ + 12407 "00000000" // /* MW 9 */ + 12408 "00000000" // /* MW 8 */ + 12409 "01011011" // /* MW 7 */ + 12410 "00000001" // /* MW 6 */ + 12411 "01101000" // /* MW 5 */ + 12412 "00111001" // /* MW 4 */ + 12413 "11110000" // /* MW 3 */ + 12414 "00101100" // /* MW 2 */ + 12415 "00000000" // /* MW 1 */ + 12416 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12417 "00000000" // /* MW 15 */ + 12418 "00000000" // /* MW 14 */ + 12419 "01111000" // /* MW 13 */ + 12420 "10100101" // /* MW 12 */ + 12421 "00000001" // /* MW 11 */ + 12422 "00000000" // /* MW 10 */ + 12423 "00000000" // /* MW 9 */ + 12424 "00000000" // /* MW 8 */ + 12425 "01011011" // /* MW 7 */ + 12426 "00000001" // /* MW 6 */ + 12427 "00100000" // /* MW 5 */ + 12428 "00000000" // /* MW 4 */ + 12429 "11110000" // /* MW 3 */ + 12430 "00101100" // /* MW 2 */ + 12431 "00000000" // /* MW 1 */ + 12432 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12433 "00000000" // /* MW 15 */ + 12434 "00000000" // /* MW 14 */ + 12435 "01111000" // /* MW 13 */ + 12436 "10100101" // /* MW 12 */ + 12437 "00000001" // /* MW 11 */ + 12438 "00000000" // /* MW 10 */ + 12439 "00000000" // /* MW 9 */ + 12440 "00000000" // /* MW 8 */ + 12441 "01011011" // /* MW 7 */ + 12442 "00000001" // /* MW 6 */ + 12443 "00100000" // /* MW 5 */ + 12444 "00000000" // /* MW 4 */ + 12445 "11110000" // /* MW 3 */ + 12446 "00101100" // /* MW 2 */ + 12447 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12448 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12449 "00000000" // /* MW 15 */ + 12450 "00000000" // /* MW 14 */ + 12451 "01111000" // /* MW 13 */ + 12452 "10100101" // /* MW 12 */ + 12453 "00000001" // /* MW 11 */ + 12454 "00000000" // /* MW 10 */ + 12455 "00000000" // /* MW 9 */ + 12456 "00000000" // /* MW 8 */ + 12457 "01011011" // /* MW 7 */ + 12458 "00000001" // /* MW 6 */ + 12459 "00100000" // /* MW 5 */ + 12460 "00000000" // /* MW 4 */ + 12461 "11110000" // /* MW 3 */ + 12462 "00101100" // /* MW 2 */ + 12463 "00000000" // /* MW 1 */ +.label ZLS_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_320 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "rmsnorm_row_major.h" 89 21 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 12464 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12465 "00000000" // /* MW 15 */ + 12466 "00000000" // /* MW 14 */ + 12467 "01111000" // /* MW 13 */ + 12468 "10100101" // /* MW 12 */ + 12469 "00000001" // /* MW 11 */ + 12470 "00000000" // /* MW 10 */ + 12471 "00000000" // /* MW 9 */ + 12472 "00000000" // /* MW 8 */ + 12473 "01011011" // /* MW 7 */ + 12474 "00000001" // /* MW 6 */ + 12475 "01101000" // /* MW 5 */ + 12476 "00111001" // /* MW 4 */ + 12477 "11110000" // /* MW 3 */ + 12478 "00101100" // /* MW 2 */ + 12479 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00000000" // /* MW 15 */ + 12482 "00000000" // /* MW 14 */ + 12483 "01111000" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "00000000" // /* MW 10 */ + 12487 "00000000" // /* MW 9 */ + 12488 "00000000" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12496 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12497 "00000000" // /* MW 15 */ + 12498 "00000000" // /* MW 14 */ + 12499 "01111000" // /* MW 13 */ + 12500 "10100101" // /* MW 12 */ + 12501 "00000001" // /* MW 11 */ + 12502 "00000000" // /* MW 10 */ + 12503 "00000000" // /* MW 9 */ + 12504 "00000000" // /* MW 8 */ + 12505 "01011011" // /* MW 7 */ + 12506 "00000001" // /* MW 6 */ + 12507 "00100000" // /* MW 5 */ + 12508 "00000000" // /* MW 4 */ + 12509 "11110000" // /* MW 3 */ + 12510 "00101100" // /* MW 2 */ + 12511 "00000000" // /* MW 1 */ +.label ZLE_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_368 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12512 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm0, dm0, x2, x2, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12513 "00100010" // /* MW 15 */ + 12514 "00000000" // /* MW 14 */ + 12515 "01111101" // /* MW 13 */ + 12516 "10100101" // /* MW 12 */ + 12517 "00000001" // /* MW 11 */ + 12518 "00000000" // /* MW 10 */ + 12519 "00000000" // /* MW 9 */ + 12520 "00000000" // /* MW 8 */ + 12521 "01011011" // /* MW 7 */ + 12522 "00000001" // /* MW 6 */ + 12523 "00100000" // /* MW 5 */ + 12524 "00000000" // /* MW 4 */ + 12525 "11110000" // /* MW 3 */ + 12526 "00101100" // /* MW 2 */ + 12527 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 97 26 first +.loop_nesting 1 + 12528 "10011000" // LDA.s16 r26, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12529 "01010010" // /* MW 3 */ + 12530 "00000111" // /* MW 2 */ + 12531 "00000110" // /* MW 1 */ + 12532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12533 "00000000" // /* MW 1 */ + 12534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12535 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 12536 "01001000" // VMAC.f dm0, dm0, x2, x2, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12537 "01000001" // /* MW 3 */ + 12538 "00000100" // /* MW 2 */ + 12539 "10100000" // /* MW 1 */ + 12540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12541 "00000000" // /* MW 1 */ + 12542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12543 "00000000" // /* MW 1 */ + 12544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12545 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 first + 12546 "10011000" // ASHL r26, r26, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "01011110" // /* MW 3 */ + 12548 "10110101" // /* MW 2 */ + 12549 "00010110" // /* MW 1 */ + 12550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12551 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 12552 "00011000" // VCONV.bf16.fp32 x0, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12553 "00010110" // /* MW 3 */ + 12554 "00010000" // /* MW 2 */ + 12555 "00001000" // /* MW 1 */ + 12556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12557 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12558 "11111000" // VCONV.fp32.bf16 cml2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "10001010" // /* MW 3 */ + 12560 "00000001" // /* MW 2 */ + 12561 "00011010" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 332 18 first +.src_ref 3 "rmsnorm_row_major.h" 99 36 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12562 "01100010" // VINSERT.32 x0, x0, #0, r26; VADD.f dm2, dm2, dm3, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12563 "00111101" // /* MW 7 */ + 12564 "01001100" // /* MW 6 */ + 12565 "10010010" // /* MW 5 */ + 12566 "01100110" // /* MW 4 */ + 12567 "01010001" // /* MW 3 */ + 12568 "00000011" // /* MW 2 */ + 12569 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12570 "11111000" // VMOV bmll3, bmlh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12571 "00010010" // /* MW 3 */ + 12572 "00001001" // /* MW 2 */ + 12573 "00011011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 first + 12574 "11111000" // VMOV bmll3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "10010010" // /* MW 3 */ + 12576 "00000000" // /* MW 2 */ + 12577 "00011011" // /* MW 1 */ + 12578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12579 "00000000" // /* MW 1 */ + 12580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12581 "00000000" // /* MW 1 */ + 12582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12583 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 12584 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12585 "00010010" // /* MW 3 */ + 12586 "00101000" // /* MW 2 */ + 12587 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 12588 "01100010" // VSHIFT x0, x0, x0, r19; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12589 "00111101" // /* MW 7 */ + 12590 "01010000" // /* MW 6 */ + 12591 "10010010" // /* MW 5 */ + 12592 "11000110" // /* MW 4 */ + 12593 "01001110" // /* MW 3 */ + 12594 "00000000" // /* MW 2 */ + 12595 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12596 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12597 "10010010" // /* MW 3 */ + 12598 "00000000" // /* MW 2 */ + 12599 "00011100" // /* MW 1 */ + 12600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12601 "00000000" // /* MW 1 */ + 12602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12603 "00000000" // /* MW 1 */ + 12604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12605 "00000000" // /* MW 1 */ + 12606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12607 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 12608 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12609 "00010010" // /* MW 3 */ + 12610 "00101000" // /* MW 2 */ + 12611 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 12612 "01100010" // VSHIFT x0, x0, x0, r21; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12613 "00111101" // /* MW 7 */ + 12614 "01010000" // /* MW 6 */ + 12615 "10010010" // /* MW 5 */ + 12616 "11000110" // /* MW 4 */ + 12617 "01010110" // /* MW 3 */ + 12618 "00000000" // /* MW 2 */ + 12619 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12620 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12621 "10010010" // /* MW 3 */ + 12622 "00000000" // /* MW 2 */ + 12623 "00011100" // /* MW 1 */ + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ + 12626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12627 "00000000" // /* MW 1 */ + 12628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12629 "00000000" // /* MW 1 */ + 12630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12631 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 12632 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12633 "00010010" // /* MW 3 */ + 12634 "00101000" // /* MW 2 */ + 12635 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 12636 "01100010" // VSHIFT x0, x0, x0, r23; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12637 "00111101" // /* MW 7 */ + 12638 "01010000" // /* MW 6 */ + 12639 "10010010" // /* MW 5 */ + 12640 "11000110" // /* MW 4 */ + 12641 "01011110" // /* MW 3 */ + 12642 "00000000" // /* MW 2 */ + 12643 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12644 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12645 "10010010" // /* MW 3 */ + 12646 "00000000" // /* MW 2 */ + 12647 "00011100" // /* MW 1 */ + 12648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12649 "00000000" // /* MW 1 */ + 12650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12651 "00000000" // /* MW 1 */ + 12652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12653 "00000000" // /* MW 1 */ + 12654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12655 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 12656 "11111000" // VMOV x0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12657 "00010010" // /* MW 3 */ + 12658 "00101000" // /* MW 2 */ + 12659 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 12660 "01100010" // VSHIFT x0, x0, x0, r22; VADD.f dm2, dm2, dm4, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12661 "00111101" // /* MW 7 */ + 12662 "01010000" // /* MW 6 */ + 12663 "10010010" // /* MW 5 */ + 12664 "11000110" // /* MW 4 */ + 12665 "01011010" // /* MW 3 */ + 12666 "00000000" // /* MW 2 */ + 12667 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12668 "11111000" // VMOV bmll4, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12669 "10010010" // /* MW 3 */ + 12670 "00000000" // /* MW 2 */ + 12671 "00011100" // /* MW 1 */ + 12672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12673 "00000000" // /* MW 1 */ + 12674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12675 "00000000" // /* MW 1 */ + 12676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12677 "00000000" // /* MW 1 */ + 12678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12679 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 12680 "00011000" // VCONV.bf16.fp32 x0, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12681 "00010110" // /* MW 3 */ + 12682 "00010001" // /* MW 2 */ + 12683 "00001000" // /* MW 1 */ + 12684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12685 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first + 12686 "10111000" // VEXTRACT.16 r26, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12687 "00000001" // /* MW 3 */ + 12688 "10000001" // /* MW 2 */ + 12689 "00011110" // /* MW 1 */ + 12690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12691 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 12692 "10011000" // ASHL r26, r26, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12693 "01011110" // /* MW 3 */ + 12694 "10110101" // /* MW 2 */ + 12695 "00010110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 12696 "01100010" // VINSERT.32 x0, x0, #0, r26; VADD.f dm2, dm2, dm3, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12697 "00111101" // /* MW 7 */ + 12698 "01001100" // /* MW 6 */ + 12699 "10010010" // /* MW 5 */ + 12700 "01100110" // /* MW 4 */ + 12701 "01010001" // /* MW 3 */ + 12702 "00000011" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12704 "11111000" // VMOV bmll2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12705 "10010010" // /* MW 3 */ + 12706 "00000000" // /* MW 2 */ + 12707 "00011010" // /* MW 1 */ + 12708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12709 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 80 4 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 12710 "00011000" // JNZD r17, r17, p1 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 12711 "01100000" // /* MW 3 */ + 12712 "01100010" // /* MW 2 */ + 12713 "00010100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 24 first +.src_ref 3 "rmsnorm_row_major.h" 104 23 first +.delay_slot +.aggressive_scheduled_block_id 9 +.noswbrkpt + 12714 "00011000" // ST.s16 r26, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "01010111" // /* MW 3 */ + 12716 "00101111" // /* MW 2 */ + 12717 "00000110" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12719 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12720 "00011000" // VCONV.bf16.fp32 wl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12721 "00010110" // /* MW 3 */ + 12722 "01000001" // /* MW 2 */ + 12723 "00001000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.delay_slot +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 12724 "11111000" // VMOV cml0, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12725 "10001010" // /* MW 3 */ + 12726 "00000100" // /* MW 2 */ + 12727 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 99 36 +.delay_slot +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12728 "00000010" // NOPS; VEXTRACT.16 r26, x0, #0, vaddSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12729 "11010000" // /* MW 7 */ + 12730 "10000000" // /* MW 6 */ + 12731 "01000000" // /* MW 5 */ + 12732 "00000011" // /* MW 4 */ + 12733 "01100000" // /* MW 3 */ + 12734 "00101011" // /* MW 2 */ + 12735 "00000000" // /* MW 1 */ +.label __ll61__Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params +.loop_nesting 0 + 12736 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12737 "00011001" // /* MW 3 */ + 12738 "11111111" // /* MW 2 */ + 12739 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 first + 12740 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12741 "00000000" // /* MW 3 */ + 12742 "00101000" // /* MW 2 */ + 12743 "00010000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 +.delay_slot + 12744 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12745 "00000001" // /* MW 5 */ + 12746 "00000000" // /* MW 4 */ + 12747 "00000000" // /* MW 3 */ + 12748 "11110000" // /* MW 2 */ + 12749 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12756 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12757 "10000001" // /* MW 11 */ + 12758 "10101101" // /* MW 10 */ + 12759 "00000000" // /* MW 9 */ + 12760 "00000000" // /* MW 8 */ + 12761 "00000000" // /* MW 7 */ + 12762 "00000000" // /* MW 6 */ + 12763 "00100000" // /* MW 5 */ + 12764 "00000000" // /* MW 4 */ + 12765 "11110000" // /* MW 3 */ + 12766 "00101100" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_624 +.src_ref 3 "rmsnorm_row_major.h" 67 8 first + 12768 "10000100" // JZ r16, #12976 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12976 delay_slots=5 */ + 12769 "00000001" // /* MW 5 */ + 12770 "00000000" // /* MW 4 */ + 12771 "01011000" // /* MW 3 */ + 12772 "00011001" // /* MW 2 */ + 12773 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12783 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 70 8 first +.src_ref 3 "rmsnorm_row_major.h" 72 35 +.src_ref 3 "rmsnorm_row_major.h" 72 35 +.src_ref 3 "rmsnorm_row_major.h" 73 35 + 12784 "01110110" // LDA r17, [sp, #-28]; MOVS p0, p6; MOVXM ls, #12816 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12785 "00010000" // /* MW 11 */ + 12786 "00001000" // /* MW 10 */ + 12787 "01111001" // /* MW 9 */ + 12788 "00001100" // /* MW 8 */ + 12789 "00000000" // /* MW 7 */ + 12790 "00000000" // /* MW 6 */ + 12791 "10001011" // /* MW 5 */ + 12792 "10011000" // /* MW 4 */ + 12793 "00100000" // /* MW 3 */ + 12794 "11000110" // /* MW 2 */ + 12795 "11111100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 70 8 + 12796 "10111010" // LDA p7, [sp, #-24]; MOVXM le, #12928 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12797 "00010000" // /* MW 9 */ + 12798 "01000000" // /* MW 8 */ + 12799 "10111001" // /* MW 7 */ + 12800 "00001101" // /* MW 6 */ + 12801 "00000000" // /* MW 5 */ + 12802 "00000000" // /* MW 4 */ + 12803 "00100000" // /* MW 3 */ + 12804 "01110011" // /* MW 2 */ + 12805 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 70 8 + 12806 "10111010" // NOPA; NOPB; MOV lc, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12807 "01111110" // /* MW 9 */ + 12808 "00010000" // /* MW 8 */ + 12809 "10111100" // /* MW 7 */ + 12810 "00000010" // /* MW 6 */ + 12811 "00010000" // /* MW 5 */ + 12812 "00000000" // /* MW 4 */ + 12813 "11110000" // /* MW 3 */ + 12814 "00101100" // /* MW 2 */ + 12815 "00000000" // /* MW 1 */ +.label ZLS_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_672 +.src_ref 3 "rmsnorm_row_major.h" 72 35 first +.begin_of_loop +.loop_nesting 1 + 12816 "00011000" // ST.s16 r17, [p0], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12817 "00110111" // /* MW 3 */ + 12818 "00011110" // /* MW 2 */ + 12819 "00000000" // /* MW 1 */ + 12820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12821 "00000000" // /* MW 1 */ + 12822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12823 "00000000" // /* MW 1 */ + 12824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12825 "00000000" // /* MW 1 */ + 12826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12827 "00000000" // /* MW 1 */ + 12828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12829 "00000000" // /* MW 1 */ + 12830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12831 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 73 35 first + 12832 "11100001" // ST.s16 r17, [p0], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12833 "00000000" // /* MW 15 */ + 12834 "00000000" // /* MW 14 */ + 12835 "01111000" // /* MW 13 */ + 12836 "10100101" // /* MW 12 */ + 12837 "00000001" // /* MW 11 */ + 12838 "00000000" // /* MW 10 */ + 12839 "00000000" // /* MW 9 */ + 12840 "00000000" // /* MW 8 */ + 12841 "01011011" // /* MW 7 */ + 12842 "00000001" // /* MW 6 */ + 12843 "00100000" // /* MW 5 */ + 12844 "00000000" // /* MW 4 */ + 12845 "11100000" // /* MW 3 */ + 12846 "11000110" // /* MW 2 */ + 12847 "00000011" // /* MW 1 */ + 12848 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12849 "00000000" // /* MW 15 */ + 12850 "00000000" // /* MW 14 */ + 12851 "01111000" // /* MW 13 */ + 12852 "10100101" // /* MW 12 */ + 12853 "00000001" // /* MW 11 */ + 12854 "00000000" // /* MW 10 */ + 12855 "00000000" // /* MW 9 */ + 12856 "00000000" // /* MW 8 */ + 12857 "01011011" // /* MW 7 */ + 12858 "00000001" // /* MW 6 */ + 12859 "00100000" // /* MW 5 */ + 12860 "00000000" // /* MW 4 */ + 12861 "11110000" // /* MW 3 */ + 12862 "00101100" // /* MW 2 */ + 12863 "00000000" // /* MW 1 */ + 12864 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12865 "00000000" // /* MW 15 */ + 12866 "00000000" // /* MW 14 */ + 12867 "01111000" // /* MW 13 */ + 12868 "10100101" // /* MW 12 */ + 12869 "00000001" // /* MW 11 */ + 12870 "00000000" // /* MW 10 */ + 12871 "00000000" // /* MW 9 */ + 12872 "00000000" // /* MW 8 */ + 12873 "01011011" // /* MW 7 */ + 12874 "00000001" // /* MW 6 */ + 12875 "00100000" // /* MW 5 */ + 12876 "00000000" // /* MW 4 */ + 12877 "11110000" // /* MW 3 */ + 12878 "00101100" // /* MW 2 */ + 12879 "00000000" // /* MW 1 */ + 12880 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12881 "00000000" // /* MW 15 */ + 12882 "00000000" // /* MW 14 */ + 12883 "01111000" // /* MW 13 */ + 12884 "10100101" // /* MW 12 */ + 12885 "00000001" // /* MW 11 */ + 12886 "00000000" // /* MW 10 */ + 12887 "00000000" // /* MW 9 */ + 12888 "00000000" // /* MW 8 */ + 12889 "01011011" // /* MW 7 */ + 12890 "00000001" // /* MW 6 */ + 12891 "00100000" // /* MW 5 */ + 12892 "00000000" // /* MW 4 */ + 12893 "11110000" // /* MW 3 */ + 12894 "00101100" // /* MW 2 */ + 12895 "00000000" // /* MW 1 */ + 12896 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12897 "00000000" // /* MW 15 */ + 12898 "00000000" // /* MW 14 */ + 12899 "01111000" // /* MW 13 */ + 12900 "10100101" // /* MW 12 */ + 12901 "00000001" // /* MW 11 */ + 12902 "00000000" // /* MW 10 */ + 12903 "00000000" // /* MW 9 */ + 12904 "00000000" // /* MW 8 */ + 12905 "01011011" // /* MW 7 */ + 12906 "00000001" // /* MW 6 */ + 12907 "00100000" // /* MW 5 */ + 12908 "00000000" // /* MW 4 */ + 12909 "11110000" // /* MW 3 */ + 12910 "00101100" // /* MW 2 */ + 12911 "00000000" // /* MW 1 */ + 12912 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12913 "00000000" // /* MW 15 */ + 12914 "00000000" // /* MW 14 */ + 12915 "01111000" // /* MW 13 */ + 12916 "10100101" // /* MW 12 */ + 12917 "00000001" // /* MW 11 */ + 12918 "00000000" // /* MW 10 */ + 12919 "00000000" // /* MW 9 */ + 12920 "00000000" // /* MW 8 */ + 12921 "01011011" // /* MW 7 */ + 12922 "00000001" // /* MW 6 */ + 12923 "00100000" // /* MW 5 */ + 12924 "00000000" // /* MW 4 */ + 12925 "11110000" // /* MW 3 */ + 12926 "00101100" // /* MW 2 */ + 12927 "00000000" // /* MW 1 */ +.label ZLE_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_784 +.end_of_loop + 12928 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12929 "00000000" // /* MW 15 */ + 12930 "00000000" // /* MW 14 */ + 12931 "01111000" // /* MW 13 */ + 12932 "10100101" // /* MW 12 */ + 12933 "00000001" // /* MW 11 */ + 12934 "00000000" // /* MW 10 */ + 12935 "00000000" // /* MW 9 */ + 12936 "00000000" // /* MW 8 */ + 12937 "01011011" // /* MW 7 */ + 12938 "00000001" // /* MW 6 */ + 12939 "00100000" // /* MW 5 */ + 12940 "00000000" // /* MW 4 */ + 12941 "11110000" // /* MW 3 */ + 12942 "00101100" // /* MW 2 */ + 12943 "00000000" // /* MW 1 */ +.loop_nesting 0 + 12944 "10000100" // J #12320 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12320 delay_slots=5 */ + 12945 "00000000" // /* MW 5 */ + 12946 "00000000" // /* MW 4 */ + 12947 "00010000" // /* MW 3 */ + 12948 "00011000" // /* MW 2 */ + 12949 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 +.delay_slot + 12950 "00011000" // LDA lr, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12951 "00111001" // /* MW 3 */ + 12952 "11101100" // /* MW 2 */ + 12953 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12960 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12961 "00000000" // /* MW 15 */ + 12962 "00000000" // /* MW 14 */ + 12963 "01111000" // /* MW 13 */ + 12964 "10100101" // /* MW 12 */ + 12965 "00000001" // /* MW 11 */ + 12966 "00000000" // /* MW 10 */ + 12967 "00000000" // /* MW 9 */ + 12968 "00000000" // /* MW 8 */ + 12969 "01011011" // /* MW 7 */ + 12970 "00000001" // /* MW 6 */ + 12971 "00100000" // /* MW 5 */ + 12972 "00000000" // /* MW 4 */ + 12973 "11110000" // /* MW 3 */ + 12974 "00101100" // /* MW 2 */ + 12975 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params_832 + 12976 "10000100" // J #12304 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12304 delay_slots=5 */ + 12977 "00000000" // /* MW 5 */ + 12978 "00000000" // /* MW 4 */ + 12979 "00001000" // /* MW 3 */ + 12980 "00011000" // /* MW 2 */ + 12981 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 107 +.delay_slot + 12982 "00011000" // LDA lr, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12983 "00111001" // /* MW 3 */ + 12984 "11101100" // /* MW 2 */ + 12985 "00000111" // /* MW 1 */ +.delay_slot + 12986 "00011000" // LDA p7, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12987 "10011001" // /* MW 3 */ + 12988 "11101011" // /* MW 2 */ + 12989 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12991 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params__end +.label __Z23rmsnorm_row_major_part1I8bfloat16EvPT_S2_R33rmsnorm_row_major_internal_params___func_end0 + 12995 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE +.function rmsnorm_row_major_part1_4x4_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE +.src_ref 12 "rms_norm_adf_wrapper.cpp" 76 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 5 +.function_start + 13008 "01000100" // MOVXM p1, #508900 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13009 "11001000" // /* MW 5 */ + 13010 "11000111" // /* MW 4 */ + 13011 "11000010" // /* MW 3 */ + 13012 "00000111" // /* MW 2 */ + 13013 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 5 first + 13014 "10111010" // LDA r16, [p1]; MOVS p0, p6; MOV p6, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13015 "01110010" // /* MW 9 */ + 13016 "01100000" // /* MW 8 */ + 13017 "00110000" // /* MW 7 */ + 13018 "00000011" // /* MW 6 */ + 13019 "10001011" // /* MW 5 */ + 13020 "10011000" // /* MW 4 */ + 13021 "11010000" // /* MW 3 */ + 13022 "11000010" // /* MW 2 */ + 13023 "00100000" // /* MW 1 */ + 13024 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13025 "10100000" // /* MW 3 */ + 13026 "00010111" // /* MW 2 */ + 13027 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 49 + 13028 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13029 "11000000" // /* MW 3 */ + 13030 "11010110" // /* MW 2 */ + 13031 "00011011" // /* MW 1 */ + 13032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13033 "00000000" // /* MW 1 */ + 13034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13035 "00000000" // /* MW 1 */ + 13036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13037 "00000000" // /* MW 1 */ + 13038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13039 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 5 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 83 17 + 13040 "10000100" // JNZ r16, #13152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13152 delay_slots=5 */ + 13041 "00000001" // /* MW 5 */ + 13042 "01000000" // /* MW 4 */ + 13043 "10110000" // /* MW 3 */ + 13044 "00011001" // /* MW 2 */ + 13045 "10000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 76 +.delay_slot + 13046 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13047 "00000001" // /* MW 5 */ + 13048 "00000000" // /* MW 4 */ + 13049 "00000000" // /* MW 3 */ + 13050 "00010000" // /* MW 2 */ + 13051 "00000000" // /* MW 1 */ +.delay_slot + 13052 "10011000" // ST p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13053 "00011101" // /* MW 3 */ + 13054 "11110100" // /* MW 2 */ + 13055 "00001111" // /* MW 1 */ +.delay_slot + 13056 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "10011101" // /* MW 3 */ + 13058 "11111011" // /* MW 2 */ + 13059 "00001111" // /* MW 1 */ +.delay_slot + 13060 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13061 "00010101" // /* MW 3 */ + 13062 "11111100" // /* MW 2 */ + 13063 "00001111" // /* MW 1 */ +.delay_slot + 13064 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13065 "00111101" // /* MW 3 */ + 13066 "11110000" // /* MW 2 */ + 13067 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 36 + 13068 "10111010" // MOVA r18, #12; MOVX r20, #-16; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13069 "01111000" // /* MW 9 */ + 13070 "01110000" // /* MW 8 */ + 13071 "00101101" // /* MW 7 */ + 13072 "00001010" // /* MW 6 */ + 13073 "01000110" // /* MW 5 */ + 13074 "00111111" // /* MW 4 */ + 13075 "00000000" // /* MW 3 */ + 13076 "10010010" // /* MW 2 */ + 13077 "00000001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 84 14 + 13078 "10111010" // MOVA r19, #1; MOVXM p7, #508888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13079 "00010000" // /* MW 9 */ + 13080 "11101100" // /* MW 8 */ + 13081 "10110001" // /* MW 7 */ + 13082 "11110011" // /* MW 6 */ + 13083 "00000001" // /* MW 5 */ + 13084 "00000000" // /* MW 4 */ + 13085 "00000000" // /* MW 3 */ + 13086 "00110011" // /* MW 2 */ + 13087 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 84 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 27 first + 13088 "01011100" // ST r17, [p7]; EXTEND.u8 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13089 "00100000" // /* MW 5 */ + 13090 "11010101" // /* MW 4 */ + 13091 "00111000" // /* MW 3 */ + 13092 "11000110" // /* MW 2 */ + 13093 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 36 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 44 + 13094 "00100100" // LSHL r17, r17, r20; ADD.NC r20, r21, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13095 "11111110" // /* MW 5 */ + 13096 "00110101" // /* MW 4 */ + 13097 "10111010" // /* MW 3 */ + 13098 "01101001" // /* MW 2 */ + 13099 "10001100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 15 + 13100 "01000100" // MOVXM p7, #508892 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13101 "10111000" // /* MW 5 */ + 13102 "11000111" // /* MW 4 */ + 13103 "11001110" // /* MW 3 */ + 13104 "00000111" // /* MW 2 */ + 13105 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 89 15 first + 13106 "00111010" // ST r20, [p7]; MOVXM p7, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13107 "00010001" // /* MW 9 */ + 13108 "00001000" // /* MW 8 */ + 13109 "10110010" // /* MW 7 */ + 13110 "11110011" // /* MW 6 */ + 13111 "00000001" // /* MW 5 */ + 13112 "00000000" // /* MW 4 */ + 13113 "00110000" // /* MW 3 */ + 13114 "11010010" // /* MW 2 */ + 13115 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first + 13116 "10111010" // ST.s8 r18, [p7]; MOVXM p0, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13117 "00010000" // /* MW 9 */ + 13118 "00000110" // /* MW 8 */ + 13119 "00110010" // /* MW 7 */ + 13120 "11110000" // /* MW 6 */ + 13121 "00000001" // /* MW 5 */ + 13122 "00000000" // /* MW 4 */ + 13123 "11100000" // /* MW 3 */ + 13124 "11001000" // /* MW 2 */ + 13125 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 27 first + 13126 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13127 "10010000" // /* MW 3 */ + 13128 "01100010" // /* MW 2 */ + 13129 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 15 + 13130 "01000100" // MOVXM p7, #508896 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13131 "11000000" // /* MW 5 */ + 13132 "11000111" // /* MW 4 */ + 13133 "11001110" // /* MW 3 */ + 13134 "00000111" // /* MW 2 */ + 13135 "00000000" // /* MW 1 */ + 13136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13137 "00000000" // /* MW 1 */ + 13138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13139 "00000000" // /* MW 1 */ + 13140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13141 "00000000" // /* MW 1 */ + 13142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13143 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first + 13144 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13145 "01110001" // /* MW 3 */ + 13146 "00000110" // /* MW 2 */ + 13147 "00001000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 85 15 first + 13148 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13149 "00110001" // /* MW 3 */ + 13150 "00000110" // /* MW 2 */ + 13151 "00001111" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE_144 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 95 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 97 24 + 13152 "10111010" // MOVA dj0, #40; MOVXM p7, #508832 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13153 "00010000" // /* MW 9 */ + 13154 "11010000" // /* MW 8 */ + 13155 "10110001" // /* MW 7 */ + 13156 "11110011" // /* MW 6 */ + 13157 "00000001" // /* MW 5 */ + 13158 "00000000" // /* MW 4 */ + 13159 "10000000" // /* MW 3 */ + 13160 "00000010" // /* MW 2 */ + 13161 "00000101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 95 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 + 13162 "00111010" // ST r16, [p7], #-20; MOVX r24, #0; MOV r17, sp /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13163 "01111001" // /* MW 9 */ + 13164 "11110000" // /* MW 8 */ + 13165 "00101010" // /* MW 7 */ + 13166 "00001010" // /* MW 6 */ + 13167 "10000000" // /* MW 5 */ + 13168 "00000001" // /* MW 4 */ + 13169 "00110000" // /* MW 3 */ + 13170 "11000010" // /* MW 2 */ + 13171 "11110111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 97 24 first + 13172 "00010100" // LDA el0, [p2, dj0]; ADD.NC p0, r17, #-104 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13173 "10011000" // /* MW 5 */ + 13174 "11010001" // /* MW 4 */ + 13175 "11010000" // /* MW 3 */ + 13176 "00000101" // /* MW 2 */ + 13177 "01000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 33 first + 13178 "10011000" // LDA eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13179 "00001110" // /* MW 3 */ + 13180 "00011100" // /* MW 2 */ + 13181 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 33 + 13182 "10011000" // LDA el3, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13183 "11101110" // /* MW 3 */ + 13184 "00011100" // /* MW 2 */ + 13185 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 67 + 13186 "10011000" // LDA el2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13187 "10101110" // /* MW 3 */ + 13188 "00011100" // /* MW 2 */ + 13189 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 67 + 13190 "10011000" // LDA el1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13191 "01101110" // /* MW 3 */ + 13192 "00011100" // /* MW 2 */ + 13193 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 101 + 13194 "10011000" // LDA eh1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13195 "01001110" // /* MW 3 */ + 13196 "00011100" // /* MW 2 */ + 13197 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 101 + 13198 "10011000" // LDA eh2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13199 "10001110" // /* MW 3 */ + 13200 "00011100" // /* MW 2 */ + 13201 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 50 first + 13202 "00001100" // LDA el0, [p2], #4; ST el0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13203 "01011011" // /* MW 5 */ + 13204 "11011000" // /* MW 4 */ + 13205 "11011111" // /* MW 3 */ + 13206 "10000101" // /* MW 2 */ + 13207 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 50 + 13208 "00001100" // LDA eh0, [p2], #4; ST eh0, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13209 "00011011" // /* MW 5 */ + 13210 "00110000" // /* MW 4 */ + 13211 "11011111" // /* MW 3 */ + 13212 "10000001" // /* MW 2 */ + 13213 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13214 "10011000" // ST el3, [sp, #-100] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13215 "11101101" // /* MW 3 */ + 13216 "10011100" // /* MW 2 */ + 13217 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13218 "10011000" // ST el2, [sp, #-96] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13219 "10101101" // /* MW 3 */ + 13220 "10100000" // /* MW 2 */ + 13221 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13222 "10011000" // ST el1, [sp, #-92] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13223 "01101101" // /* MW 3 */ + 13224 "10100100" // /* MW 2 */ + 13225 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13226 "10011000" // ST eh1, [sp, #-88] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13227 "01001101" // /* MW 3 */ + 13228 "10101000" // /* MW 2 */ + 13229 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13230 "10011000" // ST eh2, [sp, #-84] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13231 "10001101" // /* MW 3 */ + 13232 "10101100" // /* MW 2 */ + 13233 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13234 "10011000" // ST el0, [sp, #-80] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13235 "00101101" // /* MW 3 */ + 13236 "10110000" // /* MW 2 */ + 13237 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 + 13238 "10011000" // ST eh0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13239 "00001101" // /* MW 3 */ + 13240 "10110100" // /* MW 2 */ + 13241 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 84 first + 13242 "10011000" // LDA eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13243 "00001110" // /* MW 3 */ + 13244 "00000100" // /* MW 2 */ + 13245 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 100 84 + 13246 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13247 "00101110" // /* MW 3 */ + 13248 "00010100" // /* MW 2 */ + 13249 "00000010" // /* MW 1 */ + 13250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13251 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 102 6 first + 13252 "10000100" // JNZ r16, #13360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13360 delay_slots=5 */ + 13253 "00000001" // /* MW 5 */ + 13254 "01000000" // /* MW 4 */ + 13255 "00011000" // /* MW 3 */ + 13256 "00011010" // /* MW 2 */ + 13257 "10000000" // /* MW 1 */ +.delay_slot + 13258 "10011000" // ST p7, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13259 "10011101" // /* MW 3 */ + 13260 "11101011" // /* MW 2 */ + 13261 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13265 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 first +.delay_slot + 13266 "10011000" // ST eh0, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13267 "00001101" // /* MW 3 */ + 13268 "10111000" // /* MW 2 */ + 13269 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 99 32 +.delay_slot + 13270 "10011000" // ST el0, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13271 "00101101" // /* MW 3 */ + 13272 "10111100" // /* MW 2 */ + 13273 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 106 4 first +.no_stack_arguments + 13274 "10111010" // MOVA dj0, #16; JL #11744 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 13275 "01000000" // /* MW 9 */ + 13276 "00000000" // /* MW 8 */ + 13277 "00000000" // /* MW 7 */ + 13278 "10111100" // /* MW 6 */ + 13279 "00000101" // /* MW 5 */ + 13280 "00000000" // /* MW 4 */ + 13281 "10000000" // /* MW 3 */ + 13282 "00000010" // /* MW 2 */ + 13283 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 +.delay_slot + 13284 "01000100" // MOVXM p7, #508840 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13285 "01010000" // /* MW 5 */ + 13286 "11000111" // /* MW 4 */ + 13287 "11001110" // /* MW 3 */ + 13288 "00000111" // /* MW 2 */ + 13289 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 first +.delay_slot + 13290 "10011000" // ST r24, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13291 "00010001" // /* MW 3 */ + 13292 "00011111" // /* MW 2 */ + 13293 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 105 35 +.delay_slot + 13294 "10011000" // ST dj0, [p7], #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13295 "01000001" // /* MW 3 */ + 13296 "10111100" // /* MW 2 */ + 13297 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13299 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13300 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13301 "10000001" // /* MW 11 */ + 13302 "10101101" // /* MW 10 */ + 13303 "00000000" // /* MW 9 */ + 13304 "00000000" // /* MW 8 */ + 13305 "00000000" // /* MW 7 */ + 13306 "00000000" // /* MW 6 */ + 13307 "00100000" // /* MW 5 */ + 13308 "00000000" // /* MW 4 */ + 13309 "11110000" // /* MW 3 */ + 13310 "00101100" // /* MW 2 */ + 13311 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 55 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 98 +.return_address + 13312 "10111010" // LDA r16, [p7], #4; MOVXM p2, #508892 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13313 "00010000" // /* MW 9 */ + 13314 "11101110" // /* MW 8 */ + 13315 "00110001" // /* MW 7 */ + 13316 "11110001" // /* MW 6 */ + 13317 "00000001" // /* MW 5 */ + 13318 "00000000" // /* MW 4 */ + 13319 "11010000" // /* MW 3 */ + 13320 "11000010" // /* MW 2 */ + 13321 "11100011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 86 + 13322 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13323 "01010110" // /* MW 3 */ + 13324 "00000110" // /* MW 2 */ + 13325 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 98 + 13326 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00110110" // /* MW 3 */ + 13328 "00000110" // /* MW 2 */ + 13329 "00000010" // /* MW 1 */ + 13330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13331 "00000000" // /* MW 1 */ + 13332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13333 "00000000" // /* MW 1 */ + 13334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13335 "00000000" // /* MW 1 */ + 13336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13337 "00000000" // /* MW 1 */ + 13338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13339 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 65 + 13340 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13341 "00001111" // /* MW 3 */ + 13342 "10100001" // /* MW 2 */ + 13343 "00010100" // /* MW 1 */ + 13344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13345 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 96 + 13346 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001111" // /* MW 3 */ + 13348 "01100001" // /* MW 2 */ + 13349 "00010100" // /* MW 1 */ + 13350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13351 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 107 34 + 13352 "00000010" // ST r16, [p7, #20]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13353 "01110000" // /* MW 7 */ + 13354 "10100101" // /* MW 6 */ + 13355 "00000001" // /* MW 5 */ + 13356 "00000000" // /* MW 4 */ + 13357 "00110000" // /* MW 3 */ + 13358 "11000010" // /* MW 2 */ + 13359 "11101010" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE_352 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 47 + 13360 "10111010" // LDA p7, [sp, #-24]; MOVXM p2, #508900 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13361 "00010000" // /* MW 9 */ + 13362 "11110010" // /* MW 8 */ + 13363 "00110001" // /* MW 7 */ + 13364 "11110001" // /* MW 6 */ + 13365 "00000001" // /* MW 5 */ + 13366 "00000000" // /* MW 4 */ + 13367 "00100000" // /* MW 3 */ + 13368 "01110011" // /* MW 2 */ + 13369 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 47 + 13370 "01010100" // LDA r0, [p2]; MOV m0, #36 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13371 "10010001" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11010000" // /* MW 3 */ + 13374 "10000010" // /* MW 2 */ + 13375 "01000000" // /* MW 1 */ + 13376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13377 "00000000" // /* MW 1 */ + 13378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13379 "00000000" // /* MW 1 */ + 13380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13381 "00000000" // /* MW 1 */ + 13382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13383 "00000000" // /* MW 1 */ + 13384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13385 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 47 + 13386 "10011000" // LDA r1, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13387 "00110110" // /* MW 3 */ + 13388 "00001000" // /* MW 2 */ + 13389 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 26 +.no_stack_arguments + 13390 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 13391 "00000001" // /* MW 5 */ + 13392 "00000000" // /* MW 4 */ + 13393 "10111000" // /* MW 3 */ + 13394 "00011110" // /* MW 2 */ + 13395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13405 "01100111" // /* MW 3 */ + 13406 "00000001" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 49 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 110 58 +.return_address + 13408 "00100100" // EQZ r26, r3; ADD.NC p2, r15, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13409 "00001100" // /* MW 5 */ + 13410 "11001111" // /* MW 4 */ + 13411 "00000100" // /* MW 3 */ + 13412 "10011010" // /* MW 2 */ + 13413 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 49 + 13414 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13415 "01110110" // /* MW 3 */ + 13416 "11111111" // /* MW 2 */ + 13417 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 64 + 13418 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13419 "00010110" // /* MW 3 */ + 13420 "11111110" // /* MW 2 */ + 13421 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 80 + 13422 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13423 "00110110" // /* MW 3 */ + 13424 "11111110" // /* MW 2 */ + 13425 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 302 28 first + 13426 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13427 "01010110" // /* MW 3 */ + 13428 "01000110" // /* MW 2 */ + 13429 "00000010" // /* MW 1 */ + 13430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13431 "00000000" // /* MW 1 */ + 13432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13433 "00000000" // /* MW 1 */ + 13434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13435 "00000000" // /* MW 1 */ + 13436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13437 "00000000" // /* MW 1 */ + 13438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13439 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 36 first +.src_ref 1 "io_buffer_main.h" 285 43 first + 13440 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13441 "00000010" // /* MW 3 */ + 13442 "01100001" // /* MW 2 */ + 13443 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 285 20 + 13444 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13445 "00010001" // /* MW 3 */ + 13446 "00000110" // /* MW 2 */ + 13447 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 13448 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13449 "11111101" // /* MW 3 */ + 13450 "11100000" // /* MW 2 */ + 13451 "00010111" // /* MW 1 */ + 13452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13453 "00000000" // /* MW 1 */ + 13454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13455 "00000000" // /* MW 1 */ + 13456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13457 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 13458 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13459 "00001000" // /* MW 3 */ + 13460 "10010111" // /* MW 2 */ + 13461 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 48 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 59 + 13462 "10111010" // MOVA m0, #-80; MOVX r16, #1; MOV r15, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13463 "01111000" // /* MW 9 */ + 13464 "01100000" // /* MW 8 */ + 13465 "11101010" // /* MW 7 */ + 13466 "00101001" // /* MW 6 */ + 13467 "00000000" // /* MW 5 */ + 13468 "00000001" // /* MW 4 */ + 13469 "10000000" // /* MW 3 */ + 13470 "00000000" // /* MW 2 */ + 13471 "11110110" // /* MW 1 */ + 13472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13473 "00000000" // /* MW 1 */ + 13474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13475 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 48 first + 13476 "10011000" // LDA r18, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13477 "01010110" // /* MW 3 */ + 13478 "00001010" // /* MW 2 */ + 13479 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 13480 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13481 "00110110" // /* MW 3 */ + 13482 "00000110" // /* MW 2 */ + 13483 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 13484 "10011000" // LDA p1, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13485 "10011110" // /* MW 3 */ + 13486 "01011100" // /* MW 2 */ + 13487 "00000010" // /* MW 1 */ + 13488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13489 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 145 2 first +.no_stack_arguments + 13490 "00000100" // JL #12144 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12144 delay_slots=5 */ + 13491 "00000001" // /* MW 5 */ + 13492 "00000000" // /* MW 4 */ + 13493 "10111000" // /* MW 3 */ + 13494 "00010111" // /* MW 2 */ + 13495 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 28 +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 145 2 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 +.delay_slot + 13496 "00000010" // MOVS p2, p7; MOV p7, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13497 "01110000" // /* MW 7 */ + 13498 "01100000" // /* MW 6 */ + 13499 "10110010" // /* MW 5 */ + 13500 "00000011" // /* MW 4 */ + 13501 "01100000" // /* MW 3 */ + 13502 "10010001" // /* MW 2 */ + 13503 "01010011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 +.delay_slot + 13504 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13505 "11000000" // /* MW 3 */ + 13506 "01100100" // /* MW 2 */ + 13507 "00011110" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 59 first +.delay_slot + 13508 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13509 "00001101" // /* MW 3 */ + 13510 "10100001" // /* MW 2 */ + 13511 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13513 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 143 11 +.delay_slot + 13514 "10010100" // NOPA; ADD.NC p0, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13515 "10000010" // /* MW 5 */ + 13516 "11010001" // /* MW 4 */ + 13517 "11110000" // /* MW 3 */ + 13518 "00101100" // /* MW 2 */ + 13519 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 +.return_address + 13520 "10111000" // MOV dj0, #44 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13521 "01011000" // /* MW 3 */ + 13522 "10000000" // /* MW 2 */ + 13523 "00011000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 47 first + 13524 "10111010" // LDA r1, [p6, dj0]; MOVXM p0, #508900 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13525 "00010000" // /* MW 9 */ + 13526 "11110010" // /* MW 8 */ + 13527 "00110001" // /* MW 7 */ + 13528 "11110000" // /* MW 6 */ + 13529 "00000001" // /* MW 5 */ + 13530 "00000000" // /* MW 4 */ + 13531 "11010000" // /* MW 3 */ + 13532 "00000110" // /* MW 2 */ + 13533 "11000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 14 + 13534 "10011000" // LDA r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13535 "00010110" // /* MW 3 */ + 13536 "00000100" // /* MW 2 */ + 13537 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 26 +.no_stack_arguments + 13538 "00000100" // JL #15728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15728 delay_slots=5 */ + 13539 "00000001" // /* MW 5 */ + 13540 "00000000" // /* MW 4 */ + 13541 "10111000" // /* MW 3 */ + 13542 "00011110" // /* MW 2 */ + 13543 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 28 +.src_ref 1 "io_buffer_main.h" 400 30 +.delay_slot + 13544 "11111000" // MOV p6, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13545 "11000000" // /* MW 3 */ + 13546 "01101110" // /* MW 2 */ + 13547 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13551 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 +.delay_slot + 13552 "11111000" // MOV p2, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13553 "10100000" // /* MW 3 */ + 13554 "01100111" // /* MW 2 */ + 13555 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 91 +.delay_slot + 13556 "11110110" // NOPA; NOPB; MOVS p7, p2; MOV r15, r1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13557 "01110000" // /* MW 11 */ + 13558 "01010000" // /* MW 10 */ + 13559 "11101000" // /* MW 9 */ + 13560 "00000001" // /* MW 8 */ + 13561 "10001011" // /* MW 7 */ + 13562 "10001000" // /* MW 6 */ + 13563 "00100111" // /* MW 5 */ + 13564 "00000000" // /* MW 4 */ + 13565 "11110000" // /* MW 3 */ + 13566 "00101100" // /* MW 2 */ + 13567 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 397 51 first +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 91 +.return_address + 13568 "10111010" // LDA r17, [p7, #20]; ADD r18, r15, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13569 "01011000" // /* MW 9 */ + 13570 "00000001" // /* MW 8 */ + 13571 "00001000" // /* MW 7 */ + 13572 "11111010" // /* MW 6 */ + 13573 "00101111" // /* MW 5 */ + 13574 "00011111" // /* MW 4 */ + 13575 "11010000" // /* MW 3 */ + 13576 "11000110" // /* MW 2 */ + 13577 "11101010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 159 58 first + 13578 "10011000" // EQ r26, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13579 "00100111" // /* MW 3 */ + 13580 "11110101" // /* MW 2 */ + 13581 "00010000" // /* MW 1 */ + 13582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13583 "00000000" // /* MW 1 */ + 13584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13585 "00000000" // /* MW 1 */ + 13586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13587 "00000000" // /* MW 1 */ + 13588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13589 "00000000" // /* MW 1 */ + 13590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13591 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 13592 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13593 "00001000" // /* MW 3 */ + 13594 "01010101" // /* MW 2 */ + 13595 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 169 + 13596 "10111010" // LDA lr, [sp, #-16]; MOVXM p2, #508900 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13597 "00010000" // /* MW 9 */ + 13598 "11110010" // /* MW 8 */ + 13599 "00110001" // /* MW 7 */ + 13600 "11110001" // /* MW 6 */ + 13601 "00000001" // /* MW 5 */ + 13602 "00000000" // /* MW 4 */ + 13603 "00100000" // /* MW 3 */ + 13604 "00000111" // /* MW 2 */ + 13605 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 1 "io_buffer_main.h" 400 30 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 + 13606 "10111010" // LDA r17, [p6, #-8]; MOVX r24, #0; MOV r27, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13607 "01111000" // /* MW 9 */ + 13608 "10010000" // /* MW 8 */ + 13609 "01101110" // /* MW 7 */ + 13610 "00001011" // /* MW 6 */ + 13611 "10000000" // /* MW 5 */ + 13612 "00000001" // /* MW 4 */ + 13613 "11010000" // /* MW 3 */ + 13614 "11000110" // /* MW 2 */ + 13615 "11011100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 32 + 13616 "00011000" // LDA r18, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13617 "01010001" // /* MW 3 */ + 13618 "11101110" // /* MW 2 */ + 13619 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 first + 13620 "10011000" // LDA r19, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13621 "01110110" // /* MW 3 */ + 13622 "00000110" // /* MW 2 */ + 13623 "00000010" // /* MW 1 */ + 13624 "00011000" // LDA p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13625 "00011001" // /* MW 3 */ + 13626 "11110100" // /* MW 2 */ + 13627 "00000111" // /* MW 1 */ + 13628 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13629 "10011001" // /* MW 3 */ + 13630 "11111011" // /* MW 2 */ + 13631 "00000111" // /* MW 1 */ + 13632 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13633 "11110001" // /* MW 3 */ + 13634 "11111101" // /* MW 2 */ + 13635 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 169 first + 13636 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13637 "00000000" // /* MW 3 */ + 13638 "00101000" // /* MW 2 */ + 13639 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 30 first +.delay_slot + 13640 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13641 "00010001" // /* MW 3 */ + 13642 "00100001" // /* MW 2 */ + 13643 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 30 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 32 first +.delay_slot + 13644 "00100100" // SEL.EQZ r17, r17, r16, r27; ADD.NC r16, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13645 "11111111" // /* MW 5 */ + 13646 "00110010" // /* MW 4 */ + 13647 "01001000" // /* MW 3 */ + 13648 "01100000" // /* MW 2 */ + 13649 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 400 28 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 18 +.delay_slot + 13650 "00111010" // ST r17, [p6, #-8]; EQ r27, r19, r16; ADD.NC r16, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13651 "01001001" // /* MW 9 */ + 13652 "11000000" // /* MW 8 */ + 13653 "00001100" // /* MW 7 */ + 13654 "00111110" // /* MW 6 */ + 13655 "10111000" // /* MW 5 */ + 13656 "00100111" // /* MW 4 */ + 13657 "00110000" // /* MW 3 */ + 13658 "11000110" // /* MW 2 */ + 13659 "11011100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 161 6 first +.delay_slot + 13660 "01011100" // MOVS p6, p0; SEL.EQZ r16, r16, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13661 "00000100" // /* MW 5 */ + 13662 "01000011" // /* MW 4 */ + 13663 "01101000" // /* MW 3 */ + 13664 "00010001" // /* MW 2 */ + 13665 "11010000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 169 first +.delay_slot + 13666 "00111010" // ST r16, [p2]; PADDXM [sp], #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13667 "01110001" // /* MW 9 */ + 13668 "00000000" // /* MW 8 */ + 13669 "00000000" // /* MW 7 */ + 13670 "00000000" // /* MW 6 */ + 13671 "11111100" // /* MW 5 */ + 13672 "00111111" // /* MW 4 */ + 13673 "00110000" // /* MW 3 */ + 13674 "11000010" // /* MW 2 */ +.label _ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part1_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EEEEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSF_ISG_SI_T1_EERA11_KjRNSF_ISG_NSH_3outET2_EE___func_end0 + 13675 "01000000" // /* MW 1 */ +.label __Z14_b8292_wrapperPPv___func_begin0 +.label _Z14_b8292_wrapperPPv +.function _b8292_wrapper _Z14_b8292_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 63 first +.src_ref 0 "0_0_reloadable5.cc" 65 79 +.function_start + 13680 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13681 "11000000" // /* MW 3 */ + 13682 "01100000" // /* MW 2 */ + 13683 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 65 79 first + 13684 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13685 "00011110" // /* MW 3 */ + 13686 "00011100" // /* MW 2 */ + 13687 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 66 79 first + 13688 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13689 "10011110" // /* MW 3 */ + 13690 "00101100" // /* MW 2 */ + 13691 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 68 81 first + 13692 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13693 "10011110" // /* MW 3 */ + 13694 "11110101" // /* MW 2 */ + 13695 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 67 47 first + 13696 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "00011110" // /* MW 3 */ + 13698 "00000101" // /* MW 2 */ + 13699 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 64 4 first +.tail_call + 13700 "10000100" // J #13008 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13008 delay_slots=5 */ + 13701 "00000000" // /* MW 5 */ + 13702 "00000000" // /* MW 4 */ + 13703 "01101000" // /* MW 3 */ + 13704 "00011001" // /* MW 2 */ + 13705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8292_wrapperPPv__end +.label __Z14_b8292_wrapperPPv___func_end0 + 13715 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.function rmsnorm_row_major_part2_4x4_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.src_ref 12 "rms_norm_adf_wrapper.cpp" 178 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 5 +.function_start + 13728 "01000100" // MOVXM p4, #508916 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13729 "11101000" // /* MW 5 */ + 13730 "11000111" // /* MW 4 */ + 13731 "11001000" // /* MW 3 */ + 13732 "00000111" // /* MW 2 */ + 13733 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 5 first + 13734 "10011000" // LDA r16, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "00010110" // /* MW 3 */ + 13736 "00000110" // /* MW 2 */ + 13737 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 178 + 13738 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13739 "00000001" // /* MW 5 */ + 13740 "00000000" // /* MW 4 */ + 13741 "00000000" // /* MW 3 */ + 13742 "00010000" // /* MW 2 */ + 13743 "00000000" // /* MW 1 */ + 13744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13745 "10011101" // /* MW 3 */ + 13746 "11111111" // /* MW 2 */ + 13747 "00001111" // /* MW 1 */ + 13748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13749 "00000000" // /* MW 1 */ + 13750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13751 "00000000" // /* MW 1 */ + 13752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13753 "00000000" // /* MW 1 */ + 13754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13755 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 5 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 184 17 + 13756 "10000100" // JNZ r16, #13872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13872 delay_slots=5 */ + 13757 "00000001" // /* MW 5 */ + 13758 "01000000" // /* MW 4 */ + 13759 "00011000" // /* MW 3 */ + 13760 "00011011" // /* MW 2 */ + 13761 "10000000" // /* MW 1 */ +.delay_slot + 13762 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13763 "00011101" // /* MW 3 */ + 13764 "11101000" // /* MW 2 */ + 13765 "00001111" // /* MW 1 */ +.delay_slot + 13766 "10011000" // ST p3, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13767 "10011101" // /* MW 3 */ + 13768 "11110001" // /* MW 2 */ + 13769 "00001111" // /* MW 1 */ +.delay_slot + 13770 "10011000" // ST p1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13771 "10011101" // /* MW 3 */ + 13772 "11101100" // /* MW 2 */ + 13773 "00001111" // /* MW 1 */ +.delay_slot + 13774 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13775 "11110101" // /* MW 3 */ + 13776 "11111001" // /* MW 2 */ + 13777 "00001111" // /* MW 1 */ +.delay_slot + 13778 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13779 "00111101" // /* MW 3 */ + 13780 "11110100" // /* MW 2 */ + 13781 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 36 + 13782 "10111010" // MOVA r17, #12; MOVX r19, #-16; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13783 "01111000" // /* MW 9 */ + 13784 "01110000" // /* MW 8 */ + 13785 "00001101" // /* MW 7 */ + 13786 "00001010" // /* MW 6 */ + 13787 "00110110" // /* MW 5 */ + 13788 "00111111" // /* MW 4 */ + 13789 "00000000" // /* MW 3 */ + 13790 "10010001" // /* MW 2 */ + 13791 "00000001" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 185 14 + 13792 "10111010" // MOVA r18, #1; MOVXM p7, #508904 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13793 "00010000" // /* MW 9 */ + 13794 "11110100" // /* MW 8 */ + 13795 "10110001" // /* MW 7 */ + 13796 "11110011" // /* MW 6 */ + 13797 "00000001" // /* MW 5 */ + 13798 "00000000" // /* MW 4 */ + 13799 "00000000" // /* MW 3 */ + 13800 "00110010" // /* MW 2 */ + 13801 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 185 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 27 first + 13802 "01011100" // ST r16, [p7]; EXTEND.u8 r20, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13803 "00100000" // /* MW 5 */ + 13804 "01010001" // /* MW 4 */ + 13805 "00111000" // /* MW 3 */ + 13806 "11000010" // /* MW 2 */ + 13807 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 36 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 44 + 13808 "00100100" // LSHL r16, r16, r19; ADD.NC r19, r20, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13809 "11111110" // /* MW 5 */ + 13810 "10110100" // /* MW 4 */ + 13811 "10111001" // /* MW 3 */ + 13812 "00100111" // /* MW 2 */ + 13813 "10000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 15 + 13814 "01000100" // MOVXM p7, #508908 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13815 "11011000" // /* MW 5 */ + 13816 "11000111" // /* MW 4 */ + 13817 "11001110" // /* MW 3 */ + 13818 "00000111" // /* MW 2 */ + 13819 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 190 15 first + 13820 "00111010" // ST r19, [p7]; MOVXM p7, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13821 "00010001" // /* MW 9 */ + 13822 "00001000" // /* MW 8 */ + 13823 "10110010" // /* MW 7 */ + 13824 "11110011" // /* MW 6 */ + 13825 "00000001" // /* MW 5 */ + 13826 "00000000" // /* MW 4 */ + 13827 "00110000" // /* MW 3 */ + 13828 "11001110" // /* MW 2 */ + 13829 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first + 13830 "10111010" // ST.s8 r17, [p7]; MOVXM p0, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13831 "00010000" // /* MW 9 */ + 13832 "00000110" // /* MW 8 */ + 13833 "00110010" // /* MW 7 */ + 13834 "11110000" // /* MW 6 */ + 13835 "00000001" // /* MW 5 */ + 13836 "00000000" // /* MW 4 */ + 13837 "11100000" // /* MW 3 */ + 13838 "11000100" // /* MW 2 */ + 13839 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 27 first + 13840 "00011000" // EXTEND.u8 r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13841 "10010000" // /* MW 3 */ + 13842 "00100000" // /* MW 2 */ + 13843 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 15 + 13844 "01000100" // MOVXM p7, #508912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13845 "11100000" // /* MW 5 */ + 13846 "11000111" // /* MW 4 */ + 13847 "11001110" // /* MW 3 */ + 13848 "00000111" // /* MW 2 */ + 13849 "00000000" // /* MW 1 */ + 13850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13851 "00000000" // /* MW 1 */ + 13852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13853 "00000000" // /* MW 1 */ + 13854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13855 "00000000" // /* MW 1 */ + 13856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13857 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first + 13858 "10011000" // ST r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13859 "01010001" // /* MW 3 */ + 13860 "00000110" // /* MW 2 */ + 13861 "00001000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 186 15 first + 13862 "01111010" // NOPA; ST r16, [p7]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13863 "00000000" // /* MW 9 */ + 13864 "00000000" // /* MW 8 */ + 13865 "00000000" // /* MW 7 */ + 13866 "10000000" // /* MW 6 */ + 13867 "00010001" // /* MW 5 */ + 13868 "00000110" // /* MW 4 */ + 13869 "11110111" // /* MW 3 */ + 13870 "00101100" // /* MW 2 */ + 13871 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_144 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 197 24 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 197 24 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 201 30 + 13872 "01110110" // LDA el0, [p2], #4; MOVS p1, p2; MOVX r17, #2; MOV dj0, #40 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13873 "01011000" // /* MW 11 */ + 13874 "00101000" // /* MW 10 */ + 13875 "01000000" // /* MW 9 */ + 13876 "01001000" // /* MW 8 */ + 13877 "00010000" // /* MW 7 */ + 13878 "00000001" // /* MW 6 */ + 13879 "10001011" // /* MW 5 */ + 13880 "10001000" // /* MW 4 */ + 13881 "11010001" // /* MW 3 */ + 13882 "10000101" // /* MW 2 */ + 13883 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 201 30 + 13884 "10111010" // LDA el3, [p2], #4; MOVXM p7, #508852 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13885 "00010000" // /* MW 9 */ + 13886 "11011010" // /* MW 8 */ + 13887 "10110001" // /* MW 7 */ + 13888 "11110011" // /* MW 6 */ + 13889 "00000001" // /* MW 5 */ + 13890 "00000000" // /* MW 4 */ + 13891 "11010000" // /* MW 3 */ + 13892 "10011101" // /* MW 2 */ + 13893 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 67 + 13894 "11010100" // LDA el2, [p2], #4; MOV r18, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13895 "11000001" // /* MW 5 */ + 13896 "00101011" // /* MW 4 */ + 13897 "11011001" // /* MW 3 */ + 13898 "10010101" // /* MW 2 */ + 13899 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 67 + 13900 "00010100" // LDA eh0, [p2], #4; ADD.NC p0, r18, #-104 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13901 "10011000" // /* MW 5 */ + 13902 "11010010" // /* MW 4 */ + 13903 "11010000" // /* MW 3 */ + 13904 "10000001" // /* MW 2 */ + 13905 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 101 + 13906 "10011000" // LDA el1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13907 "01101110" // /* MW 3 */ + 13908 "00011100" // /* MW 2 */ + 13909 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 101 + 13910 "10011000" // LDA eh1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13911 "01001110" // /* MW 3 */ + 13912 "00011100" // /* MW 2 */ + 13913 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 50 first + 13914 "10011000" // LDA eh2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13915 "10001110" // /* MW 3 */ + 13916 "00011100" // /* MW 2 */ + 13917 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 50 + 13918 "00001100" // LDA el0, [p2], #4; ST el0, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13919 "01011011" // /* MW 5 */ + 13920 "00110000" // /* MW 4 */ + 13921 "11011111" // /* MW 3 */ + 13922 "10000101" // /* MW 2 */ + 13923 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 197 24 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13924 "00001100" // LDA r15, [p1, dj0]; ST el3, [sp, #-100] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13925 "11011011" // /* MW 5 */ + 13926 "00111001" // /* MW 4 */ + 13927 "11011111" // /* MW 3 */ + 13928 "00111110" // /* MW 2 */ + 13929 "00100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 first + 13930 "10011000" // ST el2, [sp, #-96] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13931 "10101101" // /* MW 3 */ + 13932 "10100000" // /* MW 2 */ + 13933 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13934 "10011000" // ST eh0, [sp, #-92] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13935 "00001101" // /* MW 3 */ + 13936 "10100100" // /* MW 2 */ + 13937 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13938 "10011000" // ST el1, [sp, #-88] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13939 "01101101" // /* MW 3 */ + 13940 "10101000" // /* MW 2 */ + 13941 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13942 "10011000" // ST eh1, [sp, #-84] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13943 "01001101" // /* MW 3 */ + 13944 "10101100" // /* MW 2 */ + 13945 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13946 "10011000" // ST eh2, [sp, #-80] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13947 "10001101" // /* MW 3 */ + 13948 "10110000" // /* MW 2 */ + 13949 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 + 13950 "10011000" // ST el0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13951 "00101101" // /* MW 3 */ + 13952 "10110100" // /* MW 2 */ + 13953 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 84 first + 13954 "10011000" // LDA eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13955 "00001110" // /* MW 3 */ + 13956 "00000100" // /* MW 2 */ + 13957 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 200 84 + 13958 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13959 "00101110" // /* MW 3 */ + 13960 "00010100" // /* MW 2 */ + 13961 "00000010" // /* MW 1 */ + 13962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13963 "00000000" // /* MW 1 */ + 13964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13965 "00000000" // /* MW 1 */ + 13966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13967 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 203 2 first +.no_stack_arguments + 13968 "00000100" // JL #11744 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 13969 "00000001" // /* MW 5 */ + 13970 "00000000" // /* MW 4 */ + 13971 "11110000" // /* MW 3 */ + 13972 "00010110" // /* MW 2 */ + 13973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13975 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 first +.delay_slot + 13976 "10011000" // ST eh0, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13977 "00001101" // /* MW 3 */ + 13978 "10111000" // /* MW 2 */ + 13979 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 199 32 +.delay_slot + 13980 "10011000" // ST el0, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13981 "00101101" // /* MW 3 */ + 13982 "10111100" // /* MW 2 */ + 13983 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 201 30 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 202 31 +.delay_slot + 13984 "01011100" // ST r17, [p7], #-8; MOVX r16, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13985 "10000010" // /* MW 5 */ + 13986 "01000000" // /* MW 4 */ + 13987 "00110000" // /* MW 3 */ + 13988 "11000110" // /* MW 2 */ + 13989 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 202 31 first +.delay_slot + 13990 "01111010" // NOPA; ST r16, [p7], #-12; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13991 "00000000" // /* MW 9 */ + 13992 "00000000" // /* MW 8 */ + 13993 "00000000" // /* MW 7 */ + 13994 "10000000" // /* MW 6 */ + 13995 "00010001" // /* MW 5 */ + 13996 "11011110" // /* MW 4 */ + 13997 "11110111" // /* MW 3 */ + 13998 "00101100" // /* MW 2 */ + 13999 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 204 35 +.return_address + 14000 "10111010" // LDA p1, [sp, #-24]; MOVXM p2, #508916 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14001 "00010000" // /* MW 9 */ + 14002 "11111010" // /* MW 8 */ + 14003 "00110001" // /* MW 7 */ + 14004 "11110001" // /* MW 6 */ + 14005 "00000001" // /* MW 5 */ + 14006 "00000000" // /* MW 4 */ + 14007 "00100000" // /* MW 3 */ + 14008 "00010011" // /* MW 2 */ + 14009 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 204 35 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 95 + 14010 "10111010" // LDA r16, [p2]; MOVXM p0, #508908 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14011 "00010000" // /* MW 9 */ + 14012 "11110110" // /* MW 8 */ + 14013 "00110001" // /* MW 7 */ + 14014 "11110000" // /* MW 6 */ + 14015 "00000001" // /* MW 5 */ + 14016 "00000000" // /* MW 4 */ + 14017 "11010000" // /* MW 3 */ + 14018 "11000010" // /* MW 2 */ + 14019 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 2 first + 14020 "10111010" // LDA p3, [sp, #-20]; MOVXM ls, #14128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14021 "00010000" // /* MW 9 */ + 14022 "10011000" // /* MW 8 */ + 14023 "01111011" // /* MW 7 */ + 14024 "00001100" // /* MW 6 */ + 14025 "00000000" // /* MW 5 */ + 14026 "00000000" // /* MW 4 */ + 14027 "00100000" // /* MW 3 */ + 14028 "10110011" // /* MW 2 */ + 14029 "11111101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 37 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 95 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 215 59 + 14030 "10111010" // LDA r20, [p0]; MOVX r19, #-5; MOV r6, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14031 "01011000" // /* MW 9 */ + 14032 "00000001" // /* MW 8 */ + 14033 "11001000" // /* MW 7 */ + 14034 "01101000" // /* MW 6 */ + 14035 "00110111" // /* MW 5 */ + 14036 "00111111" // /* MW 4 */ + 14037 "11010000" // /* MW 3 */ + 14038 "11010010" // /* MW 2 */ + 14039 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 2 first + 14040 "10111010" // LDA p0, [sp, #-16]; MOVXM le, #14224 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14041 "00010000" // /* MW 9 */ + 14042 "11001000" // /* MW 8 */ + 14043 "10111011" // /* MW 7 */ + 14044 "00001101" // /* MW 6 */ + 14045 "00000000" // /* MW 5 */ + 14046 "00000000" // /* MW 4 */ + 14047 "00100000" // /* MW 3 */ + 14048 "00000011" // /* MW 2 */ + 14049 "11111110" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 205 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 224 6 + 14050 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14051 "00000001" // /* MW 3 */ + 14052 "00110000" // /* MW 2 */ + 14053 "00010000" // /* MW 1 */ + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 14056 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14057 "10110110" // /* MW 3 */ + 14058 "00000110" // /* MW 2 */ + 14059 "00000001" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 204 33 first + 14060 "10011000" // ST r16, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14061 "00010001" // /* MW 3 */ + 14062 "00101110" // /* MW 2 */ + 14063 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 205 33 first + 14064 "00001100" // LDA p1, [p3]; ST r24, [p7], #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14065 "00100011" // /* MW 5 */ + 14066 "01111110" // /* MW 4 */ + 14067 "11011110" // /* MW 3 */ + 14068 "10010011" // /* MW 2 */ + 14069 "01100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 84 first + 14070 "10011000" // LDA r22, [p7], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14071 "11010110" // /* MW 3 */ + 14072 "10011110" // /* MW 2 */ + 14073 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 43 29 first + 14074 "10011000" // LDA r23, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14075 "11110110" // /* MW 3 */ + 14076 "00011110" // /* MW 2 */ + 14077 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 53 first + 14078 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14079 "00110110" // /* MW 3 */ + 14080 "00000110" // /* MW 2 */ + 14081 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 14082 "10011000" // LDA p0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14083 "00011110" // /* MW 3 */ + 14084 "00000100" // /* MW 2 */ + 14085 "00000000" // /* MW 1 */ + 14086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14087 "00000000" // /* MW 1 */ + 14088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14089 "00000000" // /* MW 1 */ + 14090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14091 "00000000" // /* MW 1 */ + 14092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14093 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 37 first + 14094 "10011000" // LSHL r19, r23, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14095 "00111101" // /* MW 3 */ + 14096 "11100111" // /* MW 2 */ + 14097 "00010101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 51 2 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 63 first + 14098 "00100100" // MUL r19, r17, r22; ADD.NC lc, r19, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14099 "00000000" // /* MW 5 */ + 14100 "11110011" // /* MW 4 */ + 14101 "11111010" // /* MW 3 */ + 14102 "11101101" // /* MW 2 */ + 14103 "10001100" // /* MW 1 */ + 14104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14105 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 93 + 14106 "10011000" // MUL r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14107 "01001111" // /* MW 3 */ + 14108 "11100111" // /* MW 2 */ + 14109 "00010100" // /* MW 1 */ + 14110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14111 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 206 32 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 215 59 first + 14112 "01011100" // ST r19, [p7, #20]; LSHL r6, r19, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14113 "11011011" // /* MW 5 */ + 14114 "10011000" // /* MW 4 */ + 14115 "00111001" // /* MW 3 */ + 14116 "11001110" // /* MW 2 */ + 14117 "11101010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 215 11 + 14118 "10111010" // NOPA; NOPB; ADD.NC p7, r21, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14119 "10101110" // /* MW 9 */ + 14120 "01001100" // /* MW 8 */ + 14121 "10110101" // /* MW 7 */ + 14122 "00000011" // /* MW 6 */ + 14123 "00010000" // /* MW 5 */ + 14124 "00000000" // /* MW 4 */ + 14125 "11110000" // /* MW 3 */ + 14126 "00101100" // /* MW 2 */ + 14127 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_400 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 54 13 first +.begin_of_loop +.loop_nesting 1 + 14128 "10011000" // VLDA bmll0, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14129 "00010101" // /* MW 3 */ + 14130 "00011100" // /* MW 2 */ + 14131 "00000001" // /* MW 1 */ + 14132 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14133 "10000001" // /* MW 11 */ + 14134 "10101101" // /* MW 10 */ + 14135 "00000000" // /* MW 9 */ + 14136 "00000000" // /* MW 8 */ + 14137 "00000000" // /* MW 7 */ + 14138 "00000000" // /* MW 6 */ + 14139 "00100000" // /* MW 5 */ + 14140 "00000000" // /* MW 4 */ + 14141 "11110000" // /* MW 3 */ + 14142 "00101100" // /* MW 2 */ + 14143 "00000000" // /* MW 1 */ + 14144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14145 "00000000" // /* MW 15 */ + 14146 "00000000" // /* MW 14 */ + 14147 "01111000" // /* MW 13 */ + 14148 "10100101" // /* MW 12 */ + 14149 "00000001" // /* MW 11 */ + 14150 "00000000" // /* MW 10 */ + 14151 "00000000" // /* MW 9 */ + 14152 "00000000" // /* MW 8 */ + 14153 "01011011" // /* MW 7 */ + 14154 "00000001" // /* MW 6 */ + 14155 "00100000" // /* MW 5 */ + 14156 "00000000" // /* MW 4 */ + 14157 "11110000" // /* MW 3 */ + 14158 "00101100" // /* MW 2 */ + 14159 "00000000" // /* MW 1 */ + 14160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14161 "00000000" // /* MW 15 */ + 14162 "00000000" // /* MW 14 */ + 14163 "01111000" // /* MW 13 */ + 14164 "10100101" // /* MW 12 */ + 14165 "00000001" // /* MW 11 */ + 14166 "00000000" // /* MW 10 */ + 14167 "00000000" // /* MW 9 */ + 14168 "00000000" // /* MW 8 */ + 14169 "01011011" // /* MW 7 */ + 14170 "00000001" // /* MW 6 */ + 14171 "00100000" // /* MW 5 */ + 14172 "00000000" // /* MW 4 */ + 14173 "11110000" // /* MW 3 */ + 14174 "00101100" // /* MW 2 */ + 14175 "00000000" // /* MW 1 */ + 14176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14177 "00000000" // /* MW 15 */ + 14178 "00000000" // /* MW 14 */ + 14179 "01111000" // /* MW 13 */ + 14180 "10100101" // /* MW 12 */ + 14181 "00000001" // /* MW 11 */ + 14182 "00000000" // /* MW 10 */ + 14183 "00000000" // /* MW 9 */ + 14184 "00000000" // /* MW 8 */ + 14185 "01011011" // /* MW 7 */ + 14186 "00000001" // /* MW 6 */ + 14187 "00100000" // /* MW 5 */ + 14188 "00000000" // /* MW 4 */ + 14189 "11110000" // /* MW 3 */ + 14190 "00101100" // /* MW 2 */ + 14191 "00000000" // /* MW 1 */ + 14192 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14193 "00000000" // /* MW 15 */ + 14194 "00000000" // /* MW 14 */ + 14195 "01111000" // /* MW 13 */ + 14196 "10100101" // /* MW 12 */ + 14197 "00000001" // /* MW 11 */ + 14198 "00000000" // /* MW 10 */ + 14199 "00000000" // /* MW 9 */ + 14200 "00000000" // /* MW 8 */ + 14201 "01011011" // /* MW 7 */ + 14202 "00000001" // /* MW 6 */ + 14203 "00100000" // /* MW 5 */ + 14204 "00000000" // /* MW 4 */ + 14205 "11110000" // /* MW 3 */ + 14206 "00101100" // /* MW 2 */ + 14207 "00000000" // /* MW 1 */ + 14208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14209 "00000000" // /* MW 15 */ + 14210 "00000000" // /* MW 14 */ + 14211 "01111000" // /* MW 13 */ + 14212 "10100101" // /* MW 12 */ + 14213 "00000001" // /* MW 11 */ + 14214 "00000000" // /* MW 10 */ + 14215 "00000000" // /* MW 9 */ + 14216 "00000000" // /* MW 8 */ + 14217 "01011011" // /* MW 7 */ + 14218 "00000001" // /* MW 6 */ + 14219 "00100000" // /* MW 5 */ + 14220 "00000000" // /* MW 4 */ + 14221 "11110000" // /* MW 3 */ + 14222 "00101100" // /* MW 2 */ + 14223 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_496 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 56 13 first +.end_of_loop + 14224 "11100001" // NOPA; NOPB; VST bmll0, [p0], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14225 "00000000" // /* MW 15 */ + 14226 "00000000" // /* MW 14 */ + 14227 "01111000" // /* MW 13 */ + 14228 "10100101" // /* MW 12 */ + 14229 "00000001" // /* MW 11 */ + 14230 "00000000" // /* MW 10 */ + 14231 "00000000" // /* MW 9 */ + 14232 "10000000" // /* MW 8 */ + 14233 "00000110" // /* MW 7 */ + 14234 "00011100" // /* MW 6 */ + 14235 "00100000" // /* MW 5 */ + 14236 "00000000" // /* MW 4 */ + 14237 "11110000" // /* MW 3 */ + 14238 "00101100" // /* MW 2 */ + 14239 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 61 2 first +.loop_nesting 0 + 14240 "01000100" // MOVXM ls, #14256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14241 "01100000" // /* MW 5 */ + 14242 "11101111" // /* MW 4 */ + 14243 "00110001" // /* MW 3 */ + 14244 "00000000" // /* MW 2 */ + 14245 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 61 2 + 14246 "01000100" // MOVXM le, #14352 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14247 "00100000" // /* MW 5 */ + 14248 "11110000" // /* MW 4 */ + 14249 "00110110" // /* MW 3 */ + 14250 "00000000" // /* MW 2 */ + 14251 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 61 2 + 14252 "00011000" // ADD.NC lc, r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14253 "10000000" // /* MW 3 */ + 14254 "01111000" // /* MW 2 */ + 14255 "00011101" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_528 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 63 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 64 13 first +.begin_of_loop +.loop_nesting 1 + 14256 "11100001" // LDA.s16 r18, [p7], #4; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14257 "00000000" // /* MW 15 */ + 14258 "00000000" // /* MW 14 */ + 14259 "01111000" // /* MW 13 */ + 14260 "10100101" // /* MW 12 */ + 14261 "00000001" // /* MW 11 */ + 14262 "00000000" // /* MW 10 */ + 14263 "00000000" // /* MW 9 */ + 14264 "00000000" // /* MW 8 */ + 14265 "01011011" // /* MW 7 */ + 14266 "00000001" // /* MW 6 */ + 14267 "00100000" // /* MW 5 */ + 14268 "00000000" // /* MW 4 */ + 14269 "01010000" // /* MW 3 */ + 14270 "11001010" // /* MW 2 */ + 14271 "11100101" // /* MW 1 */ + 14272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14273 "00000000" // /* MW 15 */ + 14274 "00000000" // /* MW 14 */ + 14275 "01111000" // /* MW 13 */ + 14276 "10100101" // /* MW 12 */ + 14277 "00000001" // /* MW 11 */ + 14278 "00000000" // /* MW 10 */ + 14279 "00000000" // /* MW 9 */ + 14280 "00000000" // /* MW 8 */ + 14281 "01011011" // /* MW 7 */ + 14282 "00000001" // /* MW 6 */ + 14283 "00100000" // /* MW 5 */ + 14284 "00000000" // /* MW 4 */ + 14285 "11110000" // /* MW 3 */ + 14286 "00101100" // /* MW 2 */ + 14287 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 63 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 65 13 first + 14288 "11100001" // ST.s16 r18, [p0], #4; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14289 "00000000" // /* MW 15 */ + 14290 "00000000" // /* MW 14 */ + 14291 "01111000" // /* MW 13 */ + 14292 "10100101" // /* MW 12 */ + 14293 "00000001" // /* MW 11 */ + 14294 "00000000" // /* MW 10 */ + 14295 "00000000" // /* MW 9 */ + 14296 "00000000" // /* MW 8 */ + 14297 "01011011" // /* MW 7 */ + 14298 "00000001" // /* MW 6 */ + 14299 "00100000" // /* MW 5 */ + 14300 "00000000" // /* MW 4 */ + 14301 "11100000" // /* MW 3 */ + 14302 "11001010" // /* MW 2 */ + 14303 "00000101" // /* MW 1 */ + 14304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14305 "00000000" // /* MW 15 */ + 14306 "00000000" // /* MW 14 */ + 14307 "01111000" // /* MW 13 */ + 14308 "10100101" // /* MW 12 */ + 14309 "00000001" // /* MW 11 */ + 14310 "00000000" // /* MW 10 */ + 14311 "00000000" // /* MW 9 */ + 14312 "00000000" // /* MW 8 */ + 14313 "01011011" // /* MW 7 */ + 14314 "00000001" // /* MW 6 */ + 14315 "00100000" // /* MW 5 */ + 14316 "00000000" // /* MW 4 */ + 14317 "11110000" // /* MW 3 */ + 14318 "00101100" // /* MW 2 */ + 14319 "00000000" // /* MW 1 */ + 14320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14321 "00000000" // /* MW 15 */ + 14322 "00000000" // /* MW 14 */ + 14323 "01111000" // /* MW 13 */ + 14324 "10100101" // /* MW 12 */ + 14325 "00000001" // /* MW 11 */ + 14326 "00000000" // /* MW 10 */ + 14327 "00000000" // /* MW 9 */ + 14328 "00000000" // /* MW 8 */ + 14329 "01011011" // /* MW 7 */ + 14330 "00000001" // /* MW 6 */ + 14331 "00100000" // /* MW 5 */ + 14332 "00000000" // /* MW 4 */ + 14333 "11110000" // /* MW 3 */ + 14334 "00101100" // /* MW 2 */ + 14335 "00000000" // /* MW 1 */ + 14336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14337 "00000000" // /* MW 15 */ + 14338 "00000000" // /* MW 14 */ + 14339 "01111000" // /* MW 13 */ + 14340 "10100101" // /* MW 12 */ + 14341 "00000001" // /* MW 11 */ + 14342 "00000000" // /* MW 10 */ + 14343 "00000000" // /* MW 9 */ + 14344 "00000000" // /* MW 8 */ + 14345 "01011011" // /* MW 7 */ + 14346 "00000001" // /* MW 6 */ + 14347 "00100000" // /* MW 5 */ + 14348 "00000000" // /* MW 4 */ + 14349 "11110000" // /* MW 3 */ + 14350 "00101100" // /* MW 2 */ + 14351 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_624 +.end_of_loop + 14352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14353 "00000000" // /* MW 15 */ + 14354 "00000000" // /* MW 14 */ + 14355 "01111000" // /* MW 13 */ + 14356 "10100101" // /* MW 12 */ + 14357 "00000001" // /* MW 11 */ + 14358 "00000000" // /* MW 10 */ + 14359 "00000000" // /* MW 9 */ + 14360 "00000000" // /* MW 8 */ + 14361 "01011011" // /* MW 7 */ + 14362 "00000001" // /* MW 6 */ + 14363 "00100000" // /* MW 5 */ + 14364 "00000000" // /* MW 4 */ + 14365 "11110000" // /* MW 3 */ + 14366 "00101100" // /* MW 2 */ + 14367 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 228 +.loop_nesting 0 + 14368 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14369 "00111001" // /* MW 3 */ + 14370 "11110100" // /* MW 2 */ + 14371 "00000111" // /* MW 1 */ + 14372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14373 "00000000" // /* MW 1 */ + 14374 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14375 "10011001" // /* MW 3 */ + 14376 "11111111" // /* MW 2 */ + 14377 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14379 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14380 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14381 "11110001" // /* MW 3 */ + 14382 "11111001" // /* MW 2 */ + 14383 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 228 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14384 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14385 "00000001" // /* MW 5 */ + 14386 "00000000" // /* MW 4 */ + 14387 "00000000" // /* MW 3 */ + 14388 "11110000" // /* MW 2 */ + 14389 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14391 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 228 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14392 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14393 "00000000" // /* MW 3 */ + 14394 "00101000" // /* MW 2 */ + 14395 "00010000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 222 14 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14396 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14397 "00000111" // /* MW 3 */ + 14398 "00100000" // /* MW 2 */ + 14399 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 224 18 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14400 "10011000" // EQ r27, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14401 "00000111" // /* MW 3 */ + 14402 "11110111" // /* MW 2 */ + 14403 "00010011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 224 6 +.delay_slot + 14404 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14405 "10000010" // /* MW 3 */ + 14406 "00100001" // /* MW 2 */ + 14407 "00010100" // /* MW 1 */ +.delay_slot + 14408 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14409 "00010001" // /* MW 3 */ + 14410 "00000110" // /* MW 2 */ + 14411 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part2_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_end0 + 14413 "00000000" // /* MW 1 */ +.label __Z14_b8300_wrapperPPv___func_begin0 +.label _Z14_b8300_wrapperPPv +.function _b8300_wrapper _Z14_b8300_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 72 first +.src_ref 0 "0_0_reloadable5.cc" 74 79 +.function_start + 14416 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14417 "11000000" // /* MW 3 */ + 14418 "01100000" // /* MW 2 */ + 14419 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 74 79 first + 14420 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14421 "00011110" // /* MW 3 */ + 14422 "00011100" // /* MW 2 */ + 14423 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 75 79 first + 14424 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14425 "10011110" // /* MW 3 */ + 14426 "00101100" // /* MW 2 */ + 14427 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 77 80 first + 14428 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14429 "10011110" // /* MW 3 */ + 14430 "11110101" // /* MW 2 */ + 14431 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 76 47 first + 14432 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14433 "00011110" // /* MW 3 */ + 14434 "00000101" // /* MW 2 */ + 14435 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 73 4 first +.tail_call + 14436 "10000100" // J #13728 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13728 delay_slots=5 */ + 14437 "00000000" // /* MW 5 */ + 14438 "00000000" // /* MW 4 */ + 14439 "11010000" // /* MW 3 */ + 14440 "00011010" // /* MW 2 */ + 14441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8300_wrapperPPv__end +.label __Z14_b8300_wrapperPPv___func_end0 + 14451 "00000000" // /* MW 1 */ +.label __Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params___func_begin0 +.label _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params +.function rmsnorm_row_major_part2 _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params +.src_ref 3 "rmsnorm_row_major.h" 118 first +.src_ref 3 "rmsnorm_row_major.h" 125 31 +.function_start + 14464 "01000100" // MOVXM p4, #508824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14465 "00110000" // /* MW 5 */ + 14466 "11000111" // /* MW 4 */ + 14467 "11001000" // /* MW 3 */ + 14468 "00000111" // /* MW 2 */ + 14469 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 125 31 first +.src_ref 3 "rmsnorm_row_major.h" 126 31 + 14470 "01010100" // LDA r6, [p4], #4; MOV m0, #-40 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14471 "01100001" // /* MW 5 */ + 14472 "00011111" // /* MW 4 */ + 14473 "11010000" // /* MW 3 */ + 14474 "10011010" // /* MW 2 */ + 14475 "10000011" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 126 31 first + 14476 "10011000" // LDA r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14477 "01010110" // /* MW 3 */ + 14478 "00001000" // /* MW 2 */ + 14479 "00000100" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 127 44 first + 14480 "10011000" // LDA.s16 r0, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14481 "00010010" // /* MW 3 */ + 14482 "00011100" // /* MW 2 */ + 14483 "00000100" // /* MW 1 */ + 14484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14485 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 118 + 14486 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14487 "00000001" // /* MW 5 */ + 14488 "00000000" // /* MW 4 */ + 14489 "00000000" // /* MW 3 */ + 14490 "00001000" // /* MW 2 */ + 14491 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 128 34 first +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14492 "10111010" // LDA.s16 r5, [p4]; MOVXM p5, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14493 "00010000" // /* MW 9 */ + 14494 "00001000" // /* MW 8 */ + 14495 "10110010" // /* MW 7 */ + 14496 "11110010" // /* MW 6 */ + 14497 "00000001" // /* MW 5 */ + 14498 "00000000" // /* MW 4 */ + 14499 "01010000" // /* MW 3 */ + 14500 "10010110" // /* MW 2 */ + 14501 "10000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 134 35 +.src_ref 3 "rmsnorm_row_major.h" 143 25 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14502 "10111010" // LDA.s8 r14, [p5]; MOVX r4, #-5; MOV dj1, #50 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14503 "01011000" // /* MW 9 */ + 14504 "00110010" // /* MW 8 */ + 14505 "11000000" // /* MW 7 */ + 14506 "01101000" // /* MW 6 */ + 14507 "01000111" // /* MW 5 */ + 14508 "00111110" // /* MW 4 */ + 14509 "01010000" // /* MW 3 */ + 14510 "10111000" // /* MW 2 */ + 14511 "10100000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 134 35 first +.src_ref 3 "rmsnorm_row_major.h" 143 25 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14512 "11111010" // LDA r11, [p4, dj1]; ST r12, [sp, #-4]; LSHL r12, r6, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14513 "01001101" // /* MW 9 */ + 14514 "10011000" // /* MW 8 */ + 14515 "00000001" // /* MW 7 */ + 14516 "10000000" // /* MW 6 */ + 14517 "10010101" // /* MW 5 */ + 14518 "11111101" // /* MW 4 */ + 14519 "11010111" // /* MW 3 */ + 14520 "00101110" // /* MW 2 */ + 14521 "10000100" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "rmsnorm_row_major.h" 127 38 +.src_ref 3 "rmsnorm_row_major.h" 128 28 +.src_ref 3 "rmsnorm_row_major.h" 143 34 +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14522 "01110110" // MOVA dj0, #64; ST r9, [sp, #-12]; MOVX r3, #16; ADD.NC r9, r12, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14523 "11001000" // /* MW 11 */ + 14524 "00111111" // /* MW 10 */ + 14525 "00101011" // /* MW 9 */ + 14526 "00001001" // /* MW 8 */ + 14527 "00110010" // /* MW 7 */ + 14528 "10000000" // /* MW 6 */ + 14529 "00110101" // /* MW 5 */ + 14530 "11110101" // /* MW 4 */ + 14531 "10000111" // /* MW 3 */ + 14532 "00000010" // /* MW 2 */ + 14533 "00001000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 127 38 first +.src_ref 3 "rmsnorm_row_major.h" 139 49 +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14534 "01110110" // MOVA r1, #1; ST r13, [sp, #-28]; ASHL r12, r0, r3; MOV r13, dj0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14535 "01111000" // /* MW 11 */ + 14536 "10000000" // /* MW 10 */ + 14537 "10101000" // /* MW 9 */ + 14538 "11110101" // /* MW 8 */ + 14539 "11000001" // /* MW 7 */ + 14540 "10000000" // /* MW 6 */ + 14541 "10110101" // /* MW 5 */ + 14542 "11100101" // /* MW 4 */ + 14543 "00000111" // /* MW 3 */ + 14544 "00100001" // /* MW 2 */ + 14545 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 144 36 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14546 "00111010" // ST r15, [sp, #-8]; LSHL r15, r6, r1; VINSERT.32 x0, x0, #0, r12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14547 "10111001" // /* MW 9 */ + 14548 "11001000" // /* MW 8 */ + 14549 "00000000" // /* MW 7 */ + 14550 "11101100" // /* MW 6 */ + 14551 "11110000" // /* MW 5 */ + 14552 "00001100" // /* MW 4 */ + 14553 "10110000" // /* MW 3 */ + 14554 "00111110" // /* MW 2 */ + 14555 "11111111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 147 4 first +.src_ref 3 "rmsnorm_row_major.h" 147 27 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14556 "00111010" // ST r14, [sp, #-16]; JZ r2, #15072 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15072 delay_slots=5 */ + 14557 "01100001" // /* MW 9 */ + 14558 "00000000" // /* MW 8 */ + 14559 "00000000" // /* MW 7 */ + 14560 "01011100" // /* MW 6 */ + 14561 "00000111" // /* MW 5 */ + 14562 "00000100" // /* MW 4 */ + 14563 "10110000" // /* MW 3 */ + 14564 "00111010" // /* MW 2 */ + 14565 "11111110" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 128 28 first +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14566 "00111010" // ST r8, [sp, #-32]; ASHL r8, r5, r3; VMOV bmlh1, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14567 "01111001" // /* MW 9 */ + 14568 "01001001" // /* MW 8 */ + 14569 "10100000" // /* MW 7 */ + 14570 "11110100" // /* MW 6 */ + 14571 "10000001" // /* MW 5 */ + 14572 "00001010" // /* MW 4 */ + 14573 "10110000" // /* MW 3 */ + 14574 "00100010" // /* MW 2 */ + 14575 "11111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 130 16 +.src_ref 3 "rmsnorm_row_major.h" 130 16 first +.src_ref 3 "rmsnorm_row_major.h" 173 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14576 "00111010" // ST r11, [sp, #-24]; MOVX crRnd, r14; VINSERT.32 x0, x0, #0, r8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14577 "10111001" // /* MW 9 */ + 14578 "10001000" // /* MW 8 */ + 14579 "00000000" // /* MW 7 */ + 14580 "00000000" // /* MW 6 */ + 14581 "11010100" // /* MW 5 */ + 14582 "00011101" // /* MW 4 */ + 14583 "10110000" // /* MW 3 */ + 14584 "00101110" // /* MW 2 */ + 14585 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.src_ref 3 "rmsnorm_row_major.h" 129 26 +.src_ref 3 "rmsnorm_row_major.h" 130 16 +.src_ref 3 "rmsnorm_row_major.h" 139 49 first +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.delay_slot + 14586 "01110110" // MOVA r7, #-64; VCONV.bf16.fp32 wl3, bmlh1; LSHL r8, r11, r1; VMOV bmll1, x0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14587 "01111000" // /* MW 11 */ + 14588 "01001001" // /* MW 10 */ + 14589 "10000000" // /* MW 9 */ + 14590 "11101100" // /* MW 8 */ + 14591 "10000000" // /* MW 7 */ + 14592 "00010110" // /* MW 6 */ + 14593 "10110110" // /* MW 5 */ + 14594 "11000000" // /* MW 4 */ + 14595 "00000001" // /* MW 3 */ + 14596 "00000111" // /* MW 2 */ + 14597 "11111000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 139 15 +.src_ref 3 "rmsnorm_row_major.h" 144 36 first +.delay_slot + 14598 "00111010" // ST r10, [sp, #-20]; AND r10, r15, r7; MOV m0, r8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14599 "01111001" // /* MW 9 */ + 14600 "00010000" // /* MW 8 */ + 14601 "00000010" // /* MW 7 */ + 14602 "10100100" // /* MW 6 */ + 14603 "10100011" // /* MW 5 */ + 14604 "00011110" // /* MW 4 */ + 14605 "10110000" // /* MW 3 */ + 14606 "10101010" // /* MW 2 */ + 14607 "11111101" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 129 26 first +.src_ref 3 "rmsnorm_row_major.h" 130 16 first +.src_ref 3 "rmsnorm_row_major.h" 139 15 first +.src_ref 3 "rmsnorm_row_major.h" 144 36 +.delay_slot + 14608 "11010010" // PADDB [p3], m0; VCONV.bf16.fp32 wl0, bmll1; SUB r12, r13, r10 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14609 "10100001" // /* MW 7 */ + 14610 "01011000" // /* MW 6 */ + 14611 "00100011" // /* MW 5 */ + 14612 "00010111" // /* MW 4 */ + 14613 "11000110" // /* MW 3 */ + 14614 "00010010" // /* MW 2 */ + 14615 "00001000" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "rmsnorm_row_major.h" 161 8 first + 14616 "01110110" // MOVA dc0, #0; MOVS dn0, r9; MOVXM ls, #14976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14617 "00010000" // /* MW 11 */ + 14618 "01000000" // /* MW 10 */ + 14619 "01111101" // /* MW 9 */ + 14620 "00001100" // /* MW 8 */ + 14621 "00000000" // /* MW 7 */ + 14622 "00000000" // /* MW 6 */ + 14623 "00001011" // /* MW 5 */ + 14624 "01001001" // /* MW 4 */ + 14625 "10000000" // /* MW 3 */ + 14626 "00000011" // /* MW 2 */ + 14627 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "rmsnorm_row_major.h" 161 8 + 14628 "10111010" // MOVA r10, #828; MOVXM le, #15040 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14629 "00010000" // /* MW 9 */ + 14630 "01100000" // /* MW 8 */ + 14631 "10111101" // /* MW 7 */ + 14632 "00001101" // /* MW 6 */ + 14633 "00000000" // /* MW 5 */ + 14634 "00000000" // /* MW 4 */ + 14635 "00000000" // /* MW 3 */ + 14636 "10001010" // /* MW 2 */ + 14637 "01100111" // /* MW 1 */ +.src_ref 5 "add_accum.hpp" 19 92 + 14638 "10111010" // MOVA r11, #60; ADD r8, r2, #-1; VEXTBCST.16 x1, x0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14639 "10101000" // /* MW 9 */ + 14640 "10000001" // /* MW 8 */ + 14641 "01000000" // /* MW 7 */ + 14642 "11111000" // /* MW 6 */ + 14643 "10001111" // /* MW 5 */ + 14644 "00000100" // /* MW 4 */ + 14645 "00000000" // /* MW 3 */ + 14646 "10001011" // /* MW 2 */ + 14647 "00000111" // /* MW 1 */ +.src_ref 10 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 1139 17 + 14648 "11100100" // MOVX vaddSign0, #1; MOV m0, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14649 "01000001" // /* MW 5 */ + 14650 "00001100" // /* MW 4 */ + 14651 "00000000" // /* MW 3 */ + 14652 "01001000" // /* MW 2 */ + 14653 "00001011" // /* MW 1 */ + 14654 "01011000" // VEXTBCST.16 x0, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14655 "00000011" // /* MW 3 */ + 14656 "00011001" // /* MW 2 */ + 14657 "00011000" // /* MW 1 */ + 14658 "11111000" // VCONV.fp32.bf16 cml0, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14659 "10001010" // /* MW 3 */ + 14660 "00000011" // /* MW 2 */ + 14661 "00011000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 147 4 + 14662 "10111010" // NOPA; MOVXM p4, #14672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14663 "00010000" // /* MW 9 */ + 14664 "10101000" // /* MW 8 */ + 14665 "00110100" // /* MW 7 */ + 14666 "00001110" // /* MW 6 */ + 14667 "00000000" // /* MW 5 */ + 14668 "00000000" // /* MW 4 */ + 14669 "11110000" // /* MW 3 */ + 14670 "00101100" // /* MW 2 */ + 14671 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_208 +.src_ref 3 "rmsnorm_row_major.h" 150 38 first +.src_ref 3 "rmsnorm_row_major.h" 151 23 first +.src_ref 3 "rmsnorm_row_major.h" 161 8 first +.loop_nesting 1 + 14672 "00010100" // LDA.s16 r12, [p3], #4; ADD.NC lc, r9, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14673 "00000001" // /* MW 5 */ + 14674 "11101001" // /* MW 4 */ + 14675 "01011010" // /* MW 3 */ + 14676 "10110010" // /* MW 2 */ + 14677 "01100101" // /* MW 1 */ + 14678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14679 "00000000" // /* MW 1 */ + 14680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14681 "00000000" // /* MW 1 */ + 14682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14683 "00000000" // /* MW 1 */ + 14684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14685 "00000000" // /* MW 1 */ + 14686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14687 "00000000" // /* MW 1 */ + 14688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14689 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 14690 "11111000" // VBCST.16 x1, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14691 "01110010" // /* MW 3 */ + 14692 "10110001" // /* MW 2 */ + 14693 "00011000" // /* MW 1 */ + 14694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14695 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 14696 "01001000" // VMUL.f dm3, x1, x0, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14697 "00000001" // /* MW 3 */ + 14698 "11100010" // /* MW 2 */ + 14699 "01010011" // /* MW 1 */ + 14700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14701 "00000000" // /* MW 1 */ + 14702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14703 "00000000" // /* MW 1 */ +.src_ref 5 "add_accum.hpp" 19 92 first + 14704 "01001000" // VADD.f dm3, dm3, dm0, r11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14705 "00111101" // /* MW 3 */ + 14706 "01100000" // /* MW 2 */ + 14707 "01011011" // /* MW 1 */ + 14708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14709 "00000000" // /* MW 1 */ + 14710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14711 "00000000" // /* MW 1 */ + 14712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14713 "00000000" // /* MW 1 */ + 14714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14715 "00000000" // /* MW 1 */ + 14716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14717 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 14718 "00011000" // VCONV.bf16.fp32 x1, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14719 "10010110" // /* MW 3 */ + 14720 "10010001" // /* MW 2 */ + 14721 "00001000" // /* MW 1 */ + 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14723 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 14724 "11111000" // VCONV.fp32.bf16 cml4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14725 "10001010" // /* MW 3 */ + 14726 "00000011" // /* MW 2 */ + 14727 "00011100" // /* MW 1 */ + 14728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14729 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first + 14730 "11111000" // VMOV x6, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14731 "00010010" // /* MW 3 */ + 14732 "00110000" // /* MW 2 */ + 14733 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first + 14734 "10111000" // VEXTRACT.64 r13:r12, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14735 "00000001" // /* MW 3 */ + 14736 "00011011" // /* MW 2 */ + 14737 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 + 14738 "10111000" // VEXTRACT.64 r17:r16, x6, #1, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14739 "00000011" // /* MW 3 */ + 14740 "00011011" // /* MW 2 */ + 14741 "00011100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14742 "11100100" // INVSQRT r3, r13; VMOV x1, bmlh4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14743 "00100101" // /* MW 5 */ + 14744 "01100010" // /* MW 4 */ + 14745 "10000001" // /* MW 3 */ + 14746 "11010000" // /* MW 2 */ + 14747 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14748 "01100100" // INVSQRT r2, r12; VEXTRACT.64 r5:r4, x1, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14749 "00000011" // /* MW 5 */ + 14750 "00001110" // /* MW 4 */ + 14751 "10000010" // /* MW 3 */ + 14752 "10010000" // /* MW 2 */ + 14753 "01100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14754 "01100100" // INVSQRT r7, r17; VEXTRACT.64 r1:r0, x6, #2, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14755 "00001011" // /* MW 5 */ + 14756 "00110110" // /* MW 4 */ + 14757 "10000000" // /* MW 3 */ + 14758 "11010000" // /* MW 2 */ + 14759 "10001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14760 "01100100" // INVSQRT r6, r16; VEXTRACT.64 r15:r14, x6, #3, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14761 "00001111" // /* MW 5 */ + 14762 "00110110" // /* MW 4 */ + 14763 "10000111" // /* MW 3 */ + 14764 "10010000" // /* MW 2 */ + 14765 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14766 "01100100" // INVSQRT r13, r1; VEXTRACT.64 r3:r2, x1, #1, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14767 "00000111" // /* MW 5 */ + 14768 "00001110" // /* MW 4 */ + 14769 "10000001" // /* MW 3 */ + 14770 "01010000" // /* MW 2 */ + 14771 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14772 "11100100" // INVSQRT r25, r5; VPUSH.hi.64 x10, x0, r3:r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14773 "01101110" // /* MW 5 */ + 14774 "00000001" // /* MW 4 */ + 14775 "10001010" // /* MW 3 */ + 14776 "01010000" // /* MW 2 */ + 14777 "00101110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14778 "01100100" // INVSQRT r12, r0; VEXTRACT.64 r21:r20, x6, #4, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14779 "00010011" // /* MW 5 */ + 14780 "00110110" // /* MW 4 */ + 14781 "10001010" // /* MW 3 */ + 14782 "00010000" // /* MW 2 */ + 14783 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14784 "01100100" // INVSQRT r1, r15; VEXTRACT.64 r17:r16, x1, #2, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14785 "00001011" // /* MW 5 */ + 14786 "00001110" // /* MW 4 */ + 14787 "10001000" // /* MW 3 */ + 14788 "01010000" // /* MW 2 */ + 14789 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14790 "11100100" // INVSQRT r24, r4; VPUSH.hi.64 x10, x10, r7:r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14791 "01101110" // /* MW 5 */ + 14792 "10100011" // /* MW 4 */ + 14793 "10001010" // /* MW 3 */ + 14794 "00010000" // /* MW 2 */ + 14795 "00100110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14796 "01100100" // INVSQRT r5, r3; VEXTRACT.64 r15:r14, x6, #5, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14797 "00010111" // /* MW 5 */ + 14798 "00110110" // /* MW 4 */ + 14799 "10000111" // /* MW 3 */ + 14800 "01010000" // /* MW 2 */ + 14801 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14802 "01100100" // INVSQRT r0, r14; VEXTRACT.64 r7:r6, x6, #6, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14803 "00011011" // /* MW 5 */ + 14804 "00110110" // /* MW 4 */ + 14805 "10000011" // /* MW 3 */ + 14806 "00010000" // /* MW 2 */ + 14807 "01110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14808 "01100100" // INVSQRT r4, r2; VEXTRACT.64 r3:r2, x1, #3, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14809 "00001111" // /* MW 5 */ + 14810 "00001110" // /* MW 4 */ + 14811 "10000001" // /* MW 3 */ + 14812 "00010000" // /* MW 2 */ + 14813 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first + 14814 "11100100" // INVSQRT r19, r21; VPUSH.hi.64 x10, x10, r13:r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14815 "01101110" // /* MW 5 */ + 14816 "10100110" // /* MW 4 */ + 14817 "10001010" // /* MW 3 */ + 14818 "11010000" // /* MW 2 */ + 14819 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14820 "01100100" // INVSQRT r13, r17; VEXTRACT.64 r23:r22, x1, #4, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14821 "00010011" // /* MW 5 */ + 14822 "00001110" // /* MW 4 */ + 14823 "10001011" // /* MW 3 */ + 14824 "01010000" // /* MW 2 */ + 14825 "10001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first + 14826 "11100100" // INVSQRT r18, r20; VPUSH.hi.64 x10, x10, r1:r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14827 "01101110" // /* MW 5 */ + 14828 "10100000" // /* MW 4 */ + 14829 "10001010" // /* MW 3 */ + 14830 "10010000" // /* MW 2 */ + 14831 "10100100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14832 "11100100" // INVSQRT r12, r16; VPUSH.hi.64 x8, x0, r25:r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14833 "01101110" // /* MW 5 */ + 14834 "00001100" // /* MW 4 */ + 14835 "10001000" // /* MW 3 */ + 14836 "00010000" // /* MW 2 */ + 14837 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first + 14838 "11100100" // INVSQRT r17, r15; VPUSH.hi.64 x8, x8, r5:r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14839 "01101110" // /* MW 5 */ + 14840 "10000010" // /* MW 4 */ + 14841 "10001000" // /* MW 3 */ + 14842 "01010000" // /* MW 2 */ + 14843 "01111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 14844 "01100100" // INVSQRT r1, r7; VEXTRACT.64 r5:r4, x1, #5, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14845 "00010111" // /* MW 5 */ + 14846 "00001110" // /* MW 4 */ + 14847 "10000010" // /* MW 3 */ + 14848 "01010000" // /* MW 2 */ + 14849 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 14850 "01100100" // INVSQRT r16, r14; VEXTRACT.64 r7:r6, x6, #7, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14851 "00011111" // /* MW 5 */ + 14852 "00110110" // /* MW 4 */ + 14853 "10000011" // /* MW 3 */ + 14854 "00010000" // /* MW 2 */ + 14855 "01110100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14856 "11100100" // INVSQRT r0, r6; VPUSH.hi.64 x6, x8, r13:r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14857 "01101110" // /* MW 5 */ + 14858 "10000110" // /* MW 4 */ + 14859 "10000110" // /* MW 3 */ + 14860 "00010000" // /* MW 2 */ + 14861 "00110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14862 "01100100" // INVSQRT r13, r3; VEXTRACT.64 r15:r14, x1, #6, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14863 "00011011" // /* MW 5 */ + 14864 "00001110" // /* MW 4 */ + 14865 "10000111" // /* MW 3 */ + 14866 "01010000" // /* MW 2 */ + 14867 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 14868 "01100100" // INVSQRT r12, r2; VEXTRACT.64 r3:r2, x1, #7, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14869 "00011111" // /* MW 5 */ + 14870 "00001110" // /* MW 4 */ + 14871 "10000001" // /* MW 3 */ + 14872 "00010000" // /* MW 2 */ + 14873 "00010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 14874 "11100100" // INVSQRT r1, r23; VPUSH.hi.64 x10, x10, r19:r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14875 "01101110" // /* MW 5 */ + 14876 "10101001" // /* MW 4 */ + 14877 "10001010" // /* MW 3 */ + 14878 "01010000" // /* MW 2 */ + 14879 "10111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 14880 "11100100" // INVSQRT r0, r22; VPUSH.hi.64 x10, x10, r17:r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14881 "01101110" // /* MW 5 */ + 14882 "10101000" // /* MW 4 */ + 14883 "10001010" // /* MW 3 */ + 14884 "00010000" // /* MW 2 */ + 14885 "10110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first +.src_ref 5 "elementary.hpp" 381 23 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14886 "11100100" // INVSQRT r19, r7; VPUSH.hi.64 x1, x10, r1:r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14887 "01101110" // /* MW 5 */ + 14888 "10100000" // /* MW 4 */ + 14889 "10000001" // /* MW 3 */ + 14890 "11010000" // /* MW 2 */ + 14891 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14892 "11100100" // INVSQRT r18, r6; VPUSH.hi.64 x6, x6, r13:r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14893 "01101110" // /* MW 5 */ + 14894 "01100110" // /* MW 4 */ + 14895 "10000110" // /* MW 3 */ + 14896 "10010000" // /* MW 2 */ + 14897 "00110100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14898 "00011000" // INVSQRT r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14899 "10000100" // /* MW 3 */ + 14900 "01001110" // /* MW 2 */ + 14901 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14902 "11100100" // INVSQRT r13, r15; VPUSH.hi.64 x6, x6, r1:r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14903 "01101110" // /* MW 5 */ + 14904 "01100000" // /* MW 4 */ + 14905 "10000110" // /* MW 3 */ + 14906 "01010000" // /* MW 2 */ + 14907 "01111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14908 "00011000" // INVSQRT r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14909 "10000100" // /* MW 3 */ + 14910 "00001100" // /* MW 2 */ + 14911 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 +.src_ref 5 "elementary.hpp" 381 23 + 14912 "11100100" // INVSQRT r1, r3; VPUSH.hi.64 x1, x1, r19:r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14913 "01101110" // /* MW 5 */ + 14914 "00011001" // /* MW 4 */ + 14915 "10000001" // /* MW 3 */ + 14916 "01010000" // /* MW 2 */ + 14917 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "elementary.hpp" 342 19 first +.src_ref 5 "elementary.hpp" 381 23 first + 14918 "11100100" // INVSQRT r12, r14; VMOV bmll4, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14919 "00100101" // /* MW 5 */ + 14920 "00000101" // /* MW 4 */ + 14921 "10001000" // /* MW 3 */ + 14922 "00010000" // /* MW 2 */ + 14923 "01110011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "elementary.hpp" 342 19 +.src_ref 5 "elementary.hpp" 381 23 + 14924 "00011000" // INVSQRT r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14925 "10000100" // /* MW 3 */ + 14926 "10000000" // /* MW 2 */ + 14927 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1454 19 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 5 "elementary.hpp" 381 23 first + 14928 "00000010" // VCONV.bf16.fp32 wl1, bmll4; VPUSH.hi.64 x6, x6, r7:r6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14929 "10110000" // /* MW 7 */ + 14930 "11011011" // /* MW 6 */ + 14931 "10011000" // /* MW 5 */ + 14932 "00000001" // /* MW 4 */ + 14933 "11000000" // /* MW 3 */ + 14934 "01000010" // /* MW 2 */ + 14935 "00011000" // /* MW 1 */ + 14936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14937 "00000000" // /* MW 1 */ +.src_ref 5 "elementary.hpp" 381 23 + 14938 "01111000" // VPUSH.hi.64 x6, x6, r13:r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14939 "00110111" // /* MW 3 */ + 14940 "00110011" // /* MW 2 */ + 14941 "00011011" // /* MW 1 */ +.src_ref 5 "elementary.hpp" 381 23 + 14942 "01111000" // VPUSH.hi.64 x6, x6, r1:r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14943 "00110111" // /* MW 3 */ + 14944 "00110000" // /* MW 2 */ + 14945 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 + 14946 "11111000" // VMOV bmll4, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14947 "10010010" // /* MW 3 */ + 14948 "00001100" // /* MW 2 */ + 14949 "00011100" // /* MW 1 */ + 14950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14951 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1454 19 +.src_ref 5 "accum.hpp" 1117 103 + 14952 "00011000" // VCONV.bf16.fp32 wh1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14953 "00010110" // /* MW 3 */ + 14954 "10000010" // /* MW 2 */ + 14955 "00001000" // /* MW 1 */ + 14956 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14957 "01100111" // /* MW 3 */ + 14958 "00000001" // /* MW 2 */ + 14959 "00000000" // /* MW 1 */ + 14960 "11100001" // NOPA; NOPB; NOPS; NOPX; VEXTBCST.16 x1, x1, #0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14961 "00000000" // /* MW 15 */ + 14962 "00000000" // /* MW 14 */ + 14963 "10101000" // /* MW 13 */ + 14964 "10000001" // /* MW 12 */ + 14965 "01000100" // /* MW 11 */ + 14966 "00000000" // /* MW 10 */ + 14967 "00000000" // /* MW 9 */ + 14968 "00000000" // /* MW 8 */ + 14969 "01011011" // /* MW 7 */ + 14970 "00000001" // /* MW 6 */ + 14971 "00100000" // /* MW 5 */ + 14972 "00000000" // /* MW 4 */ + 14973 "11110000" // /* MW 3 */ + 14974 "00101100" // /* MW 2 */ + 14975 "00000000" // /* MW 1 */ +.label ZLS_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_512 +.src_ref 10 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "rmsnorm_row_major.h" 165 25 first +.begin_of_loop +.loop_nesting 2 + 14976 "00111100" // VLDA.2D x2, [p1], d0; VLDB x3, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14977 "11101000" // /* MW 5 */ + 14978 "00111001" // /* MW 4 */ + 14979 "01110000" // /* MW 3 */ + 14980 "00010011" // /* MW 2 */ + 14981 "00100010" // /* MW 1 */ + 14982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14983 "00000000" // /* MW 1 */ + 14984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14985 "00000000" // /* MW 1 */ + 14986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14987 "00000000" // /* MW 1 */ + 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14989 "00000000" // /* MW 1 */ + 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14991 "00000000" // /* MW 1 */ + 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14993 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 14994 "01001000" // VMUL.f dm1, x3, x1, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14995 "00100001" // /* MW 3 */ + 14996 "11100110" // /* MW 2 */ + 14997 "01010001" // /* MW 1 */ + 14998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14999 "00000000" // /* MW 1 */ + 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15001 "00000000" // /* MW 1 */ + 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15003 "00000000" // /* MW 1 */ + 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15005 "00000000" // /* MW 1 */ + 15006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15007 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 15008 "00011000" // VCONV.bf16.fp32 x4, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15009 "10010110" // /* MW 3 */ + 15010 "00010000" // /* MW 2 */ + 15011 "00001010" // /* MW 1 */ + 15012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15013 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 15014 "01001000" // VMUL.f dm2, x4, x2, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15015 "01000001" // /* MW 3 */ + 15016 "11101000" // /* MW 2 */ + 15017 "01010010" // /* MW 1 */ + 15018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15019 "00000000" // /* MW 1 */ + 15020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15021 "00000000" // /* MW 1 */ + 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15023 "00000000" // /* MW 1 */ + 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15025 "00000000" // /* MW 1 */ + 15026 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 15027 "00011100" // /* MW 13 */ + 15028 "00000000" // /* MW 12 */ + 15029 "00000000" // /* MW 11 */ + 15030 "01010111" // /* MW 10 */ + 15031 "00011010" // /* MW 9 */ + 15032 "01000000" // /* MW 8 */ + 15033 "00000000" // /* MW 7 */ + 15034 "00000000" // /* MW 6 */ + 15035 "10110110" // /* MW 5 */ + 15036 "00000010" // /* MW 4 */ + 15037 "11110000" // /* MW 3 */ + 15038 "00101100" // /* MW 2 */ + 15039 "00000000" // /* MW 1 */ +.label ZLE_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_576 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 3 "rmsnorm_row_major.h" 173 25 first +.end_of_loop + 15040 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 cml2, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 15041 "00000000" // /* MW 15 */ + 15042 "00000000" // /* MW 14 */ + 15043 "01111000" // /* MW 13 */ + 15044 "10100101" // /* MW 12 */ + 15045 "00000001" // /* MW 11 */ + 15046 "00000000" // /* MW 10 */ + 15047 "00000000" // /* MW 9 */ + 15048 "00000000" // /* MW 8 */ + 15049 "00100011" // /* MW 7 */ + 15050 "00011101" // /* MW 6 */ + 15051 "00100010" // /* MW 5 */ + 15052 "00000000" // /* MW 4 */ + 15053 "11110000" // /* MW 3 */ + 15054 "00101100" // /* MW 2 */ + 15055 "00000000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 147 4 first +.loop_nesting 1 + 15056 "00011000" // JNZD r8, r8, p4 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 15057 "00100000" // /* MW 3 */ + 15058 "00010001" // /* MW 2 */ + 15059 "00010010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15065 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15067 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15068 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15069 "01100111" // /* MW 3 */ + 15070 "00000001" // /* MW 2 */ + 15071 "00000000" // /* MW 1 */ +.label TGT_F_Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params_608 +.loop_nesting 0 + 15072 "00011000" // LDA r13, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15073 "10110001" // /* MW 3 */ + 15074 "11100101" // /* MW 2 */ + 15075 "00000111" // /* MW 1 */ + 15076 "00011000" // LDA r11, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15077 "01110001" // /* MW 3 */ + 15078 "11101001" // /* MW 2 */ + 15079 "00000111" // /* MW 1 */ + 15080 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15081 "11010001" // /* MW 3 */ + 15082 "11110001" // /* MW 2 */ + 15083 "00000111" // /* MW 1 */ + 15084 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15085 "11110001" // /* MW 3 */ + 15086 "11111001" // /* MW 2 */ + 15087 "00000111" // /* MW 1 */ + 15088 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15089 "10010001" // /* MW 3 */ + 15090 "11111101" // /* MW 2 */ + 15091 "00000111" // /* MW 1 */ + 15092 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15093 "00110001" // /* MW 3 */ + 15094 "11110101" // /* MW 2 */ + 15095 "00000111" // /* MW 1 */ + 15096 "00011000" // LDA r10, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15097 "01010001" // /* MW 3 */ + 15098 "11101101" // /* MW 2 */ + 15099 "00000111" // /* MW 1 */ + 15100 "00011000" // LDA r8, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15101 "00010001" // /* MW 3 */ + 15102 "11100001" // /* MW 2 */ + 15103 "00000111" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 176 first + 15104 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15105 "00000000" // /* MW 3 */ + 15106 "00101000" // /* MW 2 */ + 15107 "00010000" // /* MW 1 */ +.src_ref 3 "rmsnorm_row_major.h" 176 +.delay_slot + 15108 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15109 "00000001" // /* MW 5 */ + 15110 "00000000" // /* MW 4 */ + 15111 "00000000" // /* MW 3 */ + 15112 "11111000" // /* MW 2 */ + 15113 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params__end +.label __Z23rmsnorm_row_major_part2I8bfloat16EvPT_S2_S2_S2_R33rmsnorm_row_major_internal_params___func_end0 + 15121 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_begin0 +.label _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.function rmsnorm_row_major_part3_4x4_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE +.src_ref 12 "rms_norm_adf_wrapper.cpp" 237 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 5 +.function_start + 15136 "01000100" // MOVXM p4, #508932 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15137 "00001000" // /* MW 5 */ + 15138 "11001000" // /* MW 4 */ + 15139 "11001000" // /* MW 3 */ + 15140 "00000111" // /* MW 2 */ + 15141 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 5 first + 15142 "10011000" // LDA r16, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15143 "00010110" // /* MW 3 */ + 15144 "00000110" // /* MW 2 */ + 15145 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 237 + 15146 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15147 "00000001" // /* MW 5 */ + 15148 "00000000" // /* MW 4 */ + 15149 "00000000" // /* MW 3 */ + 15150 "00010000" // /* MW 2 */ + 15151 "00000000" // /* MW 1 */ + 15152 "10011000" // ST p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15153 "10011101" // /* MW 3 */ + 15154 "11101000" // /* MW 2 */ + 15155 "00001111" // /* MW 1 */ + 15156 "10011000" // ST r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15157 "11110101" // /* MW 3 */ + 15158 "11110101" // /* MW 2 */ + 15159 "00001111" // /* MW 1 */ + 15160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15161 "00000000" // /* MW 1 */ + 15162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15163 "00000000" // /* MW 1 */ + 15164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15165 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 5 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 243 17 + 15166 "10000100" // JNZ r16, #15280 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15280 delay_slots=5 */ + 15167 "00000001" // /* MW 5 */ + 15168 "01000000" // /* MW 4 */ + 15169 "11011000" // /* MW 3 */ + 15170 "00011101" // /* MW 2 */ + 15171 "10000000" // /* MW 1 */ +.delay_slot + 15172 "10011000" // ST p0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15173 "00011101" // /* MW 3 */ + 15174 "11110000" // /* MW 2 */ + 15175 "00001111" // /* MW 1 */ +.delay_slot + 15176 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15177 "10011101" // /* MW 3 */ + 15178 "11111001" // /* MW 2 */ + 15179 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 +.delay_slot + 15180 "00000010" // MOVS p4, p6; MOV p6, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15181 "01110000" // /* MW 7 */ + 15182 "11110000" // /* MW 6 */ + 15183 "00110000" // /* MW 5 */ + 15184 "00000011" // /* MW 4 */ + 15185 "01100000" // /* MW 3 */ + 15186 "00010001" // /* MW 2 */ + 15187 "10010011" // /* MW 1 */ +.delay_slot + 15188 "10011000" // ST p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15189 "10011101" // /* MW 3 */ + 15190 "11101111" // /* MW 2 */ + 15191 "00001111" // /* MW 1 */ +.delay_slot + 15192 "10011000" // ST p4, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15193 "00011101" // /* MW 3 */ + 15194 "11111110" // /* MW 2 */ + 15195 "00001111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 + 15196 "10111010" // MOVA r19, #12; MOVX r20, #1; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15197 "01111000" // /* MW 9 */ + 15198 "01110000" // /* MW 8 */ + 15199 "00101101" // /* MW 7 */ + 15200 "00101010" // /* MW 6 */ + 15201 "01000000" // /* MW 5 */ + 15202 "00000001" // /* MW 4 */ + 15203 "00000000" // /* MW 3 */ + 15204 "10010011" // /* MW 2 */ + 15205 "00000001" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 244 14 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 36 + 15206 "10111010" // MOVA r18, #-16; MOVXM p7, #508920 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15207 "00010000" // /* MW 9 */ + 15208 "11111100" // /* MW 8 */ + 15209 "10110001" // /* MW 7 */ + 15210 "11110011" // /* MW 6 */ + 15211 "00000001" // /* MW 5 */ + 15212 "00000000" // /* MW 4 */ + 15213 "00000000" // /* MW 3 */ + 15214 "00010010" // /* MW 2 */ + 15215 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 244 14 first + 15216 "00111010" // ST r17, [p7]; MOVXM p7, #508940 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15217 "00010001" // /* MW 9 */ + 15218 "00000110" // /* MW 8 */ + 15219 "10110010" // /* MW 7 */ + 15220 "11110011" // /* MW 6 */ + 15221 "00000001" // /* MW 5 */ + 15222 "00000000" // /* MW 4 */ + 15223 "00110000" // /* MW 3 */ + 15224 "11000110" // /* MW 2 */ + 15225 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 4 "tile.hpp" 86 8 + 15226 "00111010" // ST r20, [p7]; MOVXM p7, #508944 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15227 "00010001" // /* MW 9 */ + 15228 "00001000" // /* MW 8 */ + 15229 "10110010" // /* MW 7 */ + 15230 "11110011" // /* MW 6 */ + 15231 "00000001" // /* MW 5 */ + 15232 "00000000" // /* MW 4 */ + 15233 "00110000" // /* MW 3 */ + 15234 "11010010" // /* MW 2 */ + 15235 "11100000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 36 first + 15236 "00101100" // ST.s8 r19, [p7]; LSHL r18, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15237 "01011011" // /* MW 5 */ + 15238 "11001010" // /* MW 4 */ + 15239 "11101000" // /* MW 3 */ + 15240 "11001100" // /* MW 2 */ + 15241 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 27 first + 15242 "00011000" // EXTEND.u8 r21, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15243 "10010000" // /* MW 3 */ + 15244 "01101010" // /* MW 2 */ + 15245 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 27 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 44 + 15246 "00100100" // EXTEND.u8 r18, r18; ADD.NC r17, r21, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15247 "11111110" // /* MW 5 */ + 15248 "10110101" // /* MW 4 */ + 15249 "00001000" // /* MW 3 */ + 15250 "10010010" // /* MW 2 */ + 15251 "10010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 15 + 15252 "01000100" // MOVXM p0, #508924 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15253 "11111000" // /* MW 5 */ + 15254 "11000111" // /* MW 4 */ + 15255 "11000000" // /* MW 3 */ + 15256 "00000111" // /* MW 2 */ + 15257 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 15 + 15258 "01000100" // MOVXM p7, #508928 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15259 "00000000" // /* MW 5 */ + 15260 "11001000" // /* MW 4 */ + 15261 "11001110" // /* MW 3 */ + 15262 "00000111" // /* MW 2 */ + 15263 "00000000" // /* MW 1 */ + 15264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15265 "00000000" // /* MW 1 */ + 15266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15267 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 249 15 first + 15268 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15269 "00110001" // /* MW 3 */ + 15270 "00000110" // /* MW 2 */ + 15271 "00001000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 245 15 first + 15272 "00000010" // ST r18, [p7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15273 "01110000" // /* MW 7 */ + 15274 "10100101" // /* MW 6 */ + 15275 "00000001" // /* MW 5 */ + 15276 "00000000" // /* MW 4 */ + 15277 "00110000" // /* MW 3 */ + 15278 "11001010" // /* MW 2 */ + 15279 "11100000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_144 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 256 33 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 257 30 + 15280 "10111010" // MOVA r17, #2; MOVXM p7, #508832 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15281 "00010000" // /* MW 9 */ + 15282 "11010000" // /* MW 8 */ + 15283 "10110001" // /* MW 7 */ + 15284 "11110011" // /* MW 6 */ + 15285 "00000001" // /* MW 5 */ + 15286 "00000000" // /* MW 4 */ + 15287 "00000000" // /* MW 3 */ + 15288 "01010001" // /* MW 2 */ + 15289 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 256 33 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 259 24 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 + 15290 "01110110" // MOVA dj0, #40; ST r16, [p7], #20; MOVX r24, #0; MOV r18, sp /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 15291 "01111000" // /* MW 11 */ + 15292 "11110000" // /* MW 10 */ + 15293 "01001010" // /* MW 9 */ + 15294 "00001010" // /* MW 8 */ + 15295 "10000000" // /* MW 7 */ + 15296 "10000001" // /* MW 6 */ + 15297 "00010001" // /* MW 5 */ + 15298 "01011110" // /* MW 4 */ + 15299 "10000111" // /* MW 3 */ + 15300 "00000010" // /* MW 2 */ + 15301 "00000101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 257 30 first + 15302 "00000010" // ST r17, [p7]; ADD.NC p0, r18, #-104 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15303 "00000000" // /* MW 7 */ + 15304 "10100110" // /* MW 6 */ + 15305 "00110100" // /* MW 5 */ + 15306 "00000000" // /* MW 4 */ + 15307 "00110000" // /* MW 3 */ + 15308 "11000110" // /* MW 2 */ + 15309 "11100000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 259 24 first + 15310 "10011000" // LDA r15, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15311 "11110110" // /* MW 3 */ + 15312 "00000001" // /* MW 2 */ + 15313 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 33 first + 15314 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15315 "00101110" // /* MW 3 */ + 15316 "00011100" // /* MW 2 */ + 15317 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 33 + 15318 "10011000" // LDA el3, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15319 "11101110" // /* MW 3 */ + 15320 "00011100" // /* MW 2 */ + 15321 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 67 + 15322 "10011000" // LDA el2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15323 "10101110" // /* MW 3 */ + 15324 "00011100" // /* MW 2 */ + 15325 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 67 + 15326 "10011000" // LDA eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15327 "00001110" // /* MW 3 */ + 15328 "00011100" // /* MW 2 */ + 15329 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 101 + 15330 "10011000" // LDA el1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15331 "01101110" // /* MW 3 */ + 15332 "00011100" // /* MW 2 */ + 15333 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 101 + 15334 "10011000" // LDA eh1, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15335 "01001110" // /* MW 3 */ + 15336 "00011100" // /* MW 2 */ + 15337 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 50 first + 15338 "10011000" // LDA eh2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15339 "10001110" // /* MW 3 */ + 15340 "00011100" // /* MW 2 */ + 15341 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 50 + 15342 "00001100" // LDA el0, [p2], #4; ST el0, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15343 "01011011" // /* MW 5 */ + 15344 "00110000" // /* MW 4 */ + 15345 "11011111" // /* MW 3 */ + 15346 "10000101" // /* MW 2 */ + 15347 "01000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15348 "10011000" // ST el3, [sp, #-100] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15349 "11101101" // /* MW 3 */ + 15350 "10011100" // /* MW 2 */ + 15351 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15352 "10011000" // ST el2, [sp, #-96] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15353 "10101101" // /* MW 3 */ + 15354 "10100000" // /* MW 2 */ + 15355 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15356 "10011000" // ST eh0, [sp, #-92] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15357 "00001101" // /* MW 3 */ + 15358 "10100100" // /* MW 2 */ + 15359 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15360 "10011000" // ST el1, [sp, #-88] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15361 "01101101" // /* MW 3 */ + 15362 "10101000" // /* MW 2 */ + 15363 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15364 "10011000" // ST eh1, [sp, #-84] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15365 "01001101" // /* MW 3 */ + 15366 "10101100" // /* MW 2 */ + 15367 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15368 "10011000" // ST eh2, [sp, #-80] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15369 "10001101" // /* MW 3 */ + 15370 "10110000" // /* MW 2 */ + 15371 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 + 15372 "10011000" // ST el0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15373 "00101101" // /* MW 3 */ + 15374 "10110100" // /* MW 2 */ + 15375 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 84 first + 15376 "10011000" // LDA eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15377 "00001110" // /* MW 3 */ + 15378 "00000100" // /* MW 2 */ + 15379 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 262 84 + 15380 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15381 "00101110" // /* MW 3 */ + 15382 "00010100" // /* MW 2 */ + 15383 "00000010" // /* MW 1 */ + 15384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15385 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 264 6 first + 15386 "10000100" // JNZ r16, #15488 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15488 delay_slots=5 */ + 15387 "00000001" // /* MW 5 */ + 15388 "01000000" // /* MW 4 */ + 15389 "01000000" // /* MW 3 */ + 15390 "00011110" // /* MW 2 */ + 15391 "10000000" // /* MW 1 */ +.delay_slot + 15392 "10011000" // ST p7, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15393 "10011101" // /* MW 3 */ + 15394 "11100111" // /* MW 2 */ + 15395 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15399 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 first +.delay_slot + 15400 "10011000" // ST eh0, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15401 "00001101" // /* MW 3 */ + 15402 "10111000" // /* MW 2 */ + 15403 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 261 32 +.delay_slot + 15404 "10011000" // ST el0, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15405 "00101101" // /* MW 3 */ + 15406 "10111100" // /* MW 2 */ + 15407 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 268 4 first +.no_stack_arguments + 15408 "10111010" // MOVA dj0, #16; JL #11744 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 15409 "01000000" // /* MW 9 */ + 15410 "00000000" // /* MW 8 */ + 15411 "00000000" // /* MW 7 */ + 15412 "10111100" // /* MW 6 */ + 15413 "00000101" // /* MW 5 */ + 15414 "00000000" // /* MW 4 */ + 15415 "10000000" // /* MW 3 */ + 15416 "00000010" // /* MW 2 */ + 15417 "00000010" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 +.delay_slot + 15418 "01000100" // MOVXM p7, #508840 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15419 "01010000" // /* MW 5 */ + 15420 "11000111" // /* MW 4 */ + 15421 "11001110" // /* MW 3 */ + 15422 "00000111" // /* MW 2 */ + 15423 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 first +.delay_slot + 15424 "10011000" // ST r24, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15425 "00010001" // /* MW 3 */ + 15426 "00011111" // /* MW 2 */ + 15427 "00001111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 267 35 +.delay_slot + 15428 "10011000" // ST dj0, [p7], #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15429 "01000001" // /* MW 3 */ + 15430 "10111100" // /* MW 2 */ + 15431 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15434 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15435 "00100000" // /* MW 5 */ + 15436 "00000000" // /* MW 4 */ + 15437 "11110000" // /* MW 3 */ + 15438 "00101100" // /* MW 2 */ + 15439 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 55 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 98 +.return_address + 15440 "10111010" // LDA r16, [p7], #4; MOVXM p2, #508924 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15441 "00010000" // /* MW 9 */ + 15442 "11111110" // /* MW 8 */ + 15443 "00110001" // /* MW 7 */ + 15444 "11110001" // /* MW 6 */ + 15445 "00000001" // /* MW 5 */ + 15446 "00000000" // /* MW 4 */ + 15447 "11010000" // /* MW 3 */ + 15448 "11000010" // /* MW 2 */ + 15449 "11100011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 86 + 15450 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15451 "01010110" // /* MW 3 */ + 15452 "00000110" // /* MW 2 */ + 15453 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 98 + 15454 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15455 "00110110" // /* MW 3 */ + 15456 "00000110" // /* MW 2 */ + 15457 "00000010" // /* MW 1 */ + 15458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15459 "00000000" // /* MW 1 */ + 15460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15461 "00000000" // /* MW 1 */ + 15462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15463 "00000000" // /* MW 1 */ + 15464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15465 "00000000" // /* MW 1 */ + 15466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15467 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 65 + 15468 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15469 "00001111" // /* MW 3 */ + 15470 "10100001" // /* MW 2 */ + 15471 "00010100" // /* MW 1 */ + 15472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15473 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 96 + 15474 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15475 "00001111" // /* MW 3 */ + 15476 "01100001" // /* MW 2 */ + 15477 "00010100" // /* MW 1 */ + 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15479 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 269 34 + 15480 "00000010" // ST r16, [p7, #20]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15481 "01110000" // /* MW 7 */ + 15482 "10100101" // /* MW 6 */ + 15483 "00000001" // /* MW 5 */ + 15484 "00000000" // /* MW 4 */ + 15485 "00110000" // /* MW 3 */ + 15486 "11000010" // /* MW 2 */ + 15487 "11101010" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE_352 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 58 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 89 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 48 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 59 + 15488 "10111010" // LDA p4, [sp, #-28]; MOVX r16, #1; MOV m0, #-80 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15489 "01011000" // /* MW 9 */ + 15490 "10110000" // /* MW 8 */ + 15491 "00000111" // /* MW 7 */ + 15492 "00101000" // /* MW 6 */ + 15493 "00000000" // /* MW 5 */ + 15494 "00000001" // /* MW 4 */ + 15495 "00100000" // /* MW 3 */ + 15496 "11000011" // /* MW 2 */ + 15497 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 25 + 15498 "10111010" // LDA p2, [sp, #-16]; MOVXM p7, #508928 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15499 "00010000" // /* MW 9 */ + 15500 "00000000" // /* MW 8 */ + 15501 "10110010" // /* MW 7 */ + 15502 "11110011" // /* MW 6 */ + 15503 "00000001" // /* MW 5 */ + 15504 "00000000" // /* MW 4 */ + 15505 "00100000" // /* MW 3 */ + 15506 "00100011" // /* MW 2 */ + 15507 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 136 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 280 33 + 15508 "10111010" // LDA p0, [sp, #-24]; MOVX r19, #-5; MOV r17, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15509 "01011000" // /* MW 9 */ + 15510 "00000110" // /* MW 8 */ + 15511 "00101000" // /* MW 7 */ + 15512 "01101010" // /* MW 6 */ + 15513 "00110111" // /* MW 5 */ + 15514 "00111111" // /* MW 4 */ + 15515 "00100000" // /* MW 3 */ + 15516 "00000011" // /* MW 2 */ + 15517 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 15518 "00011000" // LDA p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15519 "10011001" // /* MW 3 */ + 15520 "11111001" // /* MW 2 */ + 15521 "00000111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 25 first + 15522 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15523 "01010110" // /* MW 3 */ + 15524 "00000110" // /* MW 2 */ + 15525 "00000111" // /* MW 1 */ + 15526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15527 "00000000" // /* MW 1 */ + 15528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15529 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 58 + 15530 "10011000" // LDA r21, [p4, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15531 "10110110" // /* MW 3 */ + 15532 "10100110" // /* MW 2 */ + 15533 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 89 + 15534 "10011000" // LDA r23, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15535 "11110110" // /* MW 3 */ + 15536 "10011110" // /* MW 2 */ + 15537 "00000100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 119 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 15538 "10011000" // LDA r20, [p4], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15539 "10010110" // /* MW 3 */ + 15540 "01101110" // /* MW 2 */ + 15541 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 15542 "10011000" // LDA p2, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15543 "00011110" // /* MW 3 */ + 15544 "00000101" // /* MW 2 */ + 15545 "00000011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 48 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 15546 "10011000" // LDA r28, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15547 "10010110" // /* MW 3 */ + 15548 "00001011" // /* MW 2 */ + 15549 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15550 "10011000" // LDA r26, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15551 "01010110" // /* MW 3 */ + 15552 "00000111" // /* MW 2 */ + 15553 "00000010" // /* MW 1 */ + 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15555 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 15556 "10011000" // LDA r22, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15557 "11010110" // /* MW 3 */ + 15558 "00000110" // /* MW 2 */ + 15559 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 68 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 98 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 15560 "00011000" // MAC r20, r20, r23, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15561 "01010110" // /* MW 3 */ + 15562 "11101001" // /* MW 2 */ + 15563 "00010101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15564 "10011000" // LSHL r29, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15565 "00001101" // /* MW 3 */ + 15566 "00111011" // /* MW 2 */ + 15567 "00010101" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 + 15568 "11111000" // MOV m0, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15569 "10100000" // /* MW 3 */ + 15570 "00001110" // /* MW 2 */ + 15571 "00011000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 59 first + 15572 "10011000" // LSHL r16, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15573 "00001101" // /* MW 3 */ + 15574 "00100001" // /* MW 2 */ + 15575 "00010111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 130 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 282 11 + 15576 "10100100" // ADD r16, r20, #31; ADD.NC p0, r26, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15577 "10000010" // /* MW 5 */ + 15578 "11011010" // /* MW 4 */ + 15579 "11100000" // /* MW 3 */ + 15580 "00001111" // /* MW 2 */ + 15581 "10100100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 136 + 15582 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15583 "00111101" // /* MW 3 */ + 15584 "00100001" // /* MW 2 */ + 15585 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 284 2 first +.no_stack_arguments + 15586 "00000100" // JL #14464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=14464 delay_slots=5 */ + 15587 "00000001" // /* MW 5 */ + 15588 "00000000" // /* MW 4 */ + 15589 "01000000" // /* MW 3 */ + 15590 "00011100" // /* MW 2 */ + 15591 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 279 142 first +.delay_slot + 15592 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15593 "00001111" // /* MW 3 */ + 15594 "10100001" // /* MW 2 */ + 15595 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 289 14 +.delay_slot + 15596 "01000100" // MOVXM p7, #508932 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15597 "00001000" // /* MW 5 */ + 15598 "11001000" // /* MW 4 */ + 15599 "11001110" // /* MW 3 */ + 15600 "00000111" // /* MW 2 */ + 15601 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 280 33 first +.delay_slot + 15602 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15603 "00011101" // /* MW 3 */ + 15604 "00100001" // /* MW 2 */ + 15605 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 280 33 +.delay_slot + 15606 "01011000" // ADD.NC p3, r22, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15607 "01000001" // /* MW 3 */ + 15608 "01101011" // /* MW 2 */ + 15609 "00011011" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 281 42 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 284 2 +.delay_slot + 15610 "11110100" // PADDB [p3], m0; MOV p1, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15611 "10000001" // /* MW 5 */ + 15612 "11001101" // /* MW 4 */ + 15613 "00000010" // /* MW 3 */ + 15614 "01110010" // /* MW 2 */ + 15615 "01100001" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 289 14 first +.src_ref 12 "rms_norm_adf_wrapper.cpp" 291 6 +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 +.return_address + 15616 "10111010" // LDA r17, [p7]; MOVX r16, #0; MOV lr, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15617 "01111000" // /* MW 9 */ + 15618 "01100000" // /* MW 8 */ + 15619 "10111110" // /* MW 7 */ + 15620 "00001011" // /* MW 6 */ + 15621 "00000000" // /* MW 5 */ + 15622 "00000001" // /* MW 4 */ + 15623 "11010000" // /* MW 3 */ + 15624 "11000110" // /* MW 2 */ + 15625 "11100000" // /* MW 1 */ + 15626 "00011000" // LDA r0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15627 "00010001" // /* MW 3 */ + 15628 "11110100" // /* MW 2 */ + 15629 "00000111" // /* MW 1 */ + 15630 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15631 "00011001" // /* MW 3 */ + 15632 "11101101" // /* MW 2 */ + 15633 "00000111" // /* MW 1 */ + 15634 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15635 "00011001" // /* MW 3 */ + 15636 "11111111" // /* MW 2 */ + 15637 "00000111" // /* MW 1 */ + 15638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15639 "00000000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 first + 15640 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15641 "00000000" // /* MW 3 */ + 15642 "00101000" // /* MW 2 */ + 15643 "00010000" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 295 +.delay_slot + 15644 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15645 "00000001" // /* MW 5 */ + 15646 "00000000" // /* MW 4 */ + 15647 "00000000" // /* MW 3 */ + 15648 "11110000" // /* MW 2 */ + 15649 "11111111" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 289 14 first +.delay_slot + 15650 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15651 "00000111" // /* MW 3 */ + 15652 "01100010" // /* MW 2 */ + 15653 "00010100" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 291 18 first +.delay_slot + 15654 "11100100" // EQ r27, r15, r17; MOV r15, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15655 "01000001" // /* MW 5 */ + 15656 "10100000" // /* MW 4 */ + 15657 "11110111" // /* MW 3 */ + 15658 "11100010" // /* MW 2 */ + 15659 "01111110" // /* MW 1 */ +.src_ref 12 "rms_norm_adf_wrapper.cpp" 291 6 +.delay_slot + 15660 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15661 "00000010" // /* MW 3 */ + 15662 "01100001" // /* MW 2 */ + 15663 "00010100" // /* MW 1 */ +.delay_slot + 15664 "00000010" // ST r16, [p7]; MOV p7, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15665 "01110000" // /* MW 7 */ + 15666 "01100000" // /* MW 6 */ + 15667 "10110010" // /* MW 5 */ + 15668 "00000011" // /* MW 4 */ + 15669 "00110000" // /* MW 3 */ + 15670 "11000010" // /* MW 2 */ +.label _ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE__end +.label __ZN12mllib_graphs35rmsnorm_row_major_part3_4x4_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_SC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_SG_T1_EERA11_KjRNSD_ISE_NSF_3outET2_EE___func_end0 + 15671 "11100000" // /* MW 1 */ +.label __Z14_b8308_wrapperPPv___func_begin0 +.label _Z14_b8308_wrapperPPv +.function _b8308_wrapper _Z14_b8308_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 81 first +.src_ref 0 "0_0_reloadable5.cc" 83 79 +.function_start + 15680 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15681 "11000000" // /* MW 3 */ + 15682 "01100000" // /* MW 2 */ + 15683 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 83 79 first + 15684 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15685 "00011110" // /* MW 3 */ + 15686 "00011100" // /* MW 2 */ + 15687 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 84 79 first + 15688 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15689 "10011110" // /* MW 3 */ + 15690 "00101100" // /* MW 2 */ + 15691 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 86 80 first + 15692 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15693 "10011110" // /* MW 3 */ + 15694 "11110101" // /* MW 2 */ + 15695 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 85 47 first + 15696 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15697 "00011110" // /* MW 3 */ + 15698 "00000101" // /* MW 2 */ + 15699 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 82 4 first +.tail_call + 15700 "10000100" // J #15136 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=15136 delay_slots=5 */ + 15701 "00000000" // /* MW 5 */ + 15702 "00000000" // /* MW 4 */ + 15703 "10010000" // /* MW 3 */ + 15704 "00011101" // /* MW 2 */ + 15705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8308_wrapperPPv__end +.label __Z14_b8308_wrapperPPv___func_end0 + 15715 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 13 "me_div.c" 108 19 +.src_ref 13 "me_div.c" 108 19 +.src_ref 13 "me_div.c" 115 4 first +.function_start + 15728 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15729 "01000001" // /* MW 5 */ + 15730 "10100000" // /* MW 4 */ + 15731 "00101111" // /* MW 3 */ + 15732 "11000000" // /* MW 2 */ + 15733 "00000000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15735 "00011100" // /* MW 3 */ + 15736 "11000110" // /* MW 2 */ + 15737 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15739 "00011100" // /* MW 3 */ + 15740 "11000110" // /* MW 2 */ + 15741 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15743 "00011100" // /* MW 3 */ + 15744 "11000110" // /* MW 2 */ + 15745 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15747 "00011100" // /* MW 3 */ + 15748 "11000110" // /* MW 2 */ + 15749 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15751 "00011100" // /* MW 3 */ + 15752 "11000110" // /* MW 2 */ + 15753 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15755 "00011100" // /* MW 3 */ + 15756 "11000110" // /* MW 2 */ + 15757 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15759 "00011100" // /* MW 3 */ + 15760 "11000110" // /* MW 2 */ + 15761 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15763 "00011100" // /* MW 3 */ + 15764 "11000110" // /* MW 2 */ + 15765 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15767 "00011100" // /* MW 3 */ + 15768 "11000110" // /* MW 2 */ + 15769 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15771 "00011100" // /* MW 3 */ + 15772 "11000110" // /* MW 2 */ + 15773 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15775 "00011100" // /* MW 3 */ + 15776 "11000110" // /* MW 2 */ + 15777 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15779 "00011100" // /* MW 3 */ + 15780 "11000110" // /* MW 2 */ + 15781 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15782 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15783 "00011100" // /* MW 3 */ + 15784 "11000110" // /* MW 2 */ + 15785 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15787 "00011100" // /* MW 3 */ + 15788 "11000110" // /* MW 2 */ + 15789 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15791 "00011100" // /* MW 3 */ + 15792 "11000110" // /* MW 2 */ + 15793 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15795 "00011100" // /* MW 3 */ + 15796 "11000110" // /* MW 2 */ + 15797 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15799 "00011100" // /* MW 3 */ + 15800 "11000110" // /* MW 2 */ + 15801 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15802 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15803 "00011100" // /* MW 3 */ + 15804 "11000110" // /* MW 2 */ + 15805 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15806 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15807 "00011100" // /* MW 3 */ + 15808 "11000110" // /* MW 2 */ + 15809 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15810 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15811 "00011100" // /* MW 3 */ + 15812 "11000110" // /* MW 2 */ + 15813 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15814 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15815 "00011100" // /* MW 3 */ + 15816 "11000110" // /* MW 2 */ + 15817 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15818 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15819 "00011100" // /* MW 3 */ + 15820 "11000110" // /* MW 2 */ + 15821 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15822 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15823 "00011100" // /* MW 3 */ + 15824 "11000110" // /* MW 2 */ + 15825 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15826 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15827 "00011100" // /* MW 3 */ + 15828 "11000110" // /* MW 2 */ + 15829 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15830 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15831 "00011100" // /* MW 3 */ + 15832 "11000110" // /* MW 2 */ + 15833 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15834 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15835 "00011100" // /* MW 3 */ + 15836 "11000110" // /* MW 2 */ + 15837 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15838 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15839 "00011100" // /* MW 3 */ + 15840 "11000110" // /* MW 2 */ + 15841 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 + 15842 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15843 "00011100" // /* MW 3 */ + 15844 "11000110" // /* MW 2 */ + 15845 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 119 first + 15846 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15847 "00000000" // /* MW 3 */ + 15848 "00101000" // /* MW 2 */ + 15849 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 first +.delay_slot + 15850 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15851 "00011100" // /* MW 3 */ + 15852 "11000110" // /* MW 2 */ + 15853 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 +.delay_slot + 15854 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15855 "00011100" // /* MW 3 */ + 15856 "11000110" // /* MW 2 */ + 15857 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 +.delay_slot + 15858 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15859 "00011100" // /* MW 3 */ + 15860 "11000110" // /* MW 2 */ + 15861 "00010000" // /* MW 1 */ +.src_ref 13 "me_div.c" 108 19 +.delay_slot + 15862 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15863 "00011100" // /* MW 3 */ + 15864 "11000110" // /* MW 2 */ + 15865 "00010000" // /* MW 1 */ +.delay_slot + 15866 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15867 "10100000" // /* MW 3 */ + 15868 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 15869 "00011000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/gemm" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 11 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail" +.dir 12 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 13 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.txt b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.txt new file mode 100644 index 0000000000000000000000000000000000000000..b0b6d1657d838a3e4a3255791f4a7be47f2ee7fc --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/Release/0_0_reloadable80.txt @@ -0,0 +1,5296 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 66 0x1130 x +elementwise_binary_shared.h 78 0x1130 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 134 0x1134 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 78 0x1134 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 134 0x113a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 81 0x113a 1 x +elementwise_binary_shared.h 83 0x1140 x +elementwise_binary_shared.h 83 0x114e +elementwise_binary_shared.h 83 0x1152 +elementwise_binary_shared.h 83 0x1156 +elementwise_binary_shared.h 66 0x115c +elementwise_binary_shared.h 78 0x116a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x1170 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 85 0x1170 1 +elementwise_binary_shared.h 90 0x1170 2 +elementwise_binary_shared.h 85 0x1176 x +elementwise_binary_shared.h 90 0x1176 1 x +elementwise_binary_shared.h 90 0x1186 +elementwise_binary_shared.h 90 0x1186 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x118c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x118c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0x118c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1190 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 130 0x1190 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x119a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0x119a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x11ac x +vector.hpp 1139 0x11b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0x11b0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x11b4 +vector.hpp 1159 0x11b4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 130 0x11b4 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x11c6 +vector.hpp 1139 0x11c6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x11c6 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0x11c6 3 +elementwise_binary_shared.h 132 0x11c6 4 +elementwise_binary_shared.h 109 0x11e0 +elementwise_binary_shared.h 109 0x11e4 x +elementwise_binary_shared.h 109 0x11e8 +elementwise_binary_shared.h 109 0x11ee +elementwise_binary_shared.h 109 0x11fa +elementwise_binary_shared.h 109 0x1200 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1210 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1210 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 109 0x1210 2 +elementwise_binary_shared.h 132 0x1210 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1216 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0x1216 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x121a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 130 0x121a 1 +elementwise_binary_shared.h 109 0x1222 +elementwise_binary_shared.h 109 0x1226 +elementwise_binary_shared.h 109 0x122a +elementwise_binary_shared.h 109 0x122e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x1234 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 109 0x1234 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x123a x +vector.hpp 1139 0x1240 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 125 0x1240 1 x +elementwise_binary_shared.h 125 0x1240 2 x +elementwise_binary_shared.h 128 0x1240 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x124c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 125 0x124c 1 +elementwise_binary_shared.h 125 0x124c 2 +elementwise_binary_shared.h 130 0x124c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1258 x +vector.hpp 1139 0x1258 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1258 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0x1258 3 x +elementwise_binary_shared.h 132 0x1258 4 x +elementwise_binary_shared.h 136 0x1258 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1260 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 125 0x1260 1 +elementwise_binary_shared.h 130 0x1260 2 x +elementwise_binary_shared.h 125 0x126a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x126e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x126e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0x126e 2 x +elementwise_binary_shared.h 125 0x1274 x +elementwise_binary_shared.h 125 0x1278 +elementwise_binary_shared.h 136 0x1278 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1280 x +vector.hpp 1139 0x1280 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0x1280 2 x +elementwise_binary_shared.h 130 0x1280 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1286 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1286 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0x1286 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1290 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1290 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0x1290 2 +elementwise_binary_shared.h 136 0x12a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x12b0 x +vector.hpp 1139 0x12b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0x12b0 2 x +elementwise_binary_shared.h 130 0x12b0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x12c0 +vector.hpp 1159 0x12c0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x12c0 2 x +accum.hpp 1119 0x12c0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0x12c0 4 x +elementwise_binary_shared.h 144 0x12c0 5 x +elementwise_binary_shared.h 136 0x12e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12f2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12f2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0x12f2 2 x +elementwise_binary_shared.h 136 0x12f8 x +elementwise_binary_shared.h 146 0x12fe x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1302 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1302 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0x1302 2 x +elementwise_binary_shared.h 146 0x1306 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x130e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x130e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0x130e 2 x +elementwise_binary_shared.h 164 0x16e0 x +elementwise_binary_shared.h 170 0x16e0 1 +elementwise_binary_shared.h 170 0x16e0 2 x +elementwise_binary_shared.h 175 0x16ea +elementwise_binary_shared.h 175 0x16ea 1 +elementwise_binary_shared.h 175 0x16ea 2 +elementwise_binary_shared.h 170 0x16fe x +elementwise_binary_shared.h 171 0x1702 x +elementwise_binary_shared.h 171 0x1712 +elementwise_binary_shared.h 172 0x1716 x +elementwise_binary_shared.h 172 0x1726 +elementwise_binary_shared.h 173 0x172a x +elementwise_binary_shared.h 173 0x173a +elementwise_binary_shared.h 175 0x173e x +elementwise_binary_shared.h 176 0x1742 x +elementwise_binary_shared.h 175 0x1746 +elementwise_binary_shared.h 175 0x174c x +elementwise_binary_shared.h 175 0x1750 +elementwise_binary_shared.h 175 0x1754 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 100 0xc10 x +elementwise_binary.h 103 0xc10 1 x +elementwise_binary.h 101 0xc14 +elementwise_binary.h 101 0xc1a +elementwise_binary.h 101 0xc1e x +elementwise_binary.h 101 0xc22 +elementwise_binary.h 89 0xc30 x +elementwise_binary.h 92 0xc30 1 +elementwise_binary.h 92 0xc30 2 x +elementwise_binary.h 89 0xc3a +elementwise_binary.h 92 0xc4e x +elementwise_binary.h 93 0xc52 x +elementwise_binary.h 93 0xc62 +elementwise_binary.h 94 0xc66 x +elementwise_binary.h 94 0xc76 +elementwise_binary.h 95 0xc7a x +elementwise_binary.h 96 0xc82 x +elementwise_binary.h 95 0xc8e x +elementwise_binary.h 96 0xc92 +elementwise_binary.h 96 0xca0 +elementwise_binary.h 98 0xca0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 105 0xca0 2 +add_impl.h 105 0xcaa +add_impl.h 106 0xcaa 1 +add_impl.h 106 0xcaa 2 +add_impl.h 105 0xcb4 x +add_impl.h 106 0xcb4 1 +add_impl.h 106 0xcbe x +add_impl.h 106 0xcc6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0xcca x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0xcce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0xcd2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0xcd8 x +add_impl.h 106 0xcdc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 40 0xcf0 x +elementwise_binary_broadcasting.h 41 0xcf0 1 +elementwise_binary_broadcasting.h 41 0xcf0 2 +elementwise_binary_broadcasting.h 41 0xcfa +elementwise_binary_broadcasting.h 41 0xcfa 1 +elementwise_binary_broadcasting.h 41 0xcfa 2 x +elementwise_binary_broadcasting.h 42 0xcfa 3 +elementwise_binary_broadcasting.h 43 0xd0e x +elementwise_binary_broadcasting.h 41 0xd12 x +elementwise_binary_broadcasting.h 41 0xd16 +elementwise_binary_broadcasting.h 42 0xd1a x +elementwise_binary_broadcasting.h 41 0xd1e x +elementwise_binary_broadcasting.h 42 0xd1e 1 +elementwise_binary_broadcasting.h 41 0xd24 +elementwise_binary_broadcasting.h 35 0xd30 +elementwise_binary_broadcasting.h 35 0xd30 1 x +elementwise_binary_broadcasting.h 36 0xd3a x +elementwise_binary_broadcasting.h 36 0xd40 +elementwise_binary_broadcasting.h 37 0xd50 +elementwise_binary_broadcasting.h 37 0xd54 x +elementwise_binary_broadcasting.h 37 0xd5a +elementwise_binary_broadcasting.h 38 0xd60 x +elementwise_binary_broadcasting.h 48 0xd70 x +elementwise_binary_broadcasting.h 55 0xd70 1 +elementwise_binary_broadcasting.h 61 0xd70 2 +elementwise_binary_broadcasting.h 55 0xd7a x +elementwise_binary_broadcasting.h 61 0xd7e x +elementwise_binary_broadcasting.h 76 0xd7e 1 +elementwise_binary_broadcasting.h 61 0xd90 +elementwise_binary_broadcasting.h 61 0xd90 1 +elementwise_binary_broadcasting.h 55 0xd96 +elementwise_binary_broadcasting.h 55 0xd9a x +elementwise_binary_broadcasting.h 62 0xda4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xdb8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xdc0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 65 0xdd0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xde0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xde6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xdf0 +add_accum.hpp 19 0xdf0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 76 0xdf0 2 x +elementwise_binary_broadcasting.h 76 0xdf0 3 x +elementwise_binary_broadcasting.h 76 0xdfa +elementwise_binary_broadcasting.h 76 0xdfa 1 +elementwise_binary_broadcasting.h 76 0xe04 +elementwise_binary_broadcasting.h 76 0xe0a +elementwise_binary_broadcasting.h 76 0xe10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe18 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe18 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe1c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe1c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe1c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe20 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe20 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe20 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe24 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe24 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe24 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe28 x +vector.hpp 1159 0xe28 1 +vector.hpp 1159 0xe28 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe28 3 x +accum.hpp 1119 0xe28 4 +accum.hpp 1119 0xe28 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe28 6 x +elementwise_binary.h 154 0xe28 7 +elementwise_binary.h 177 0xe28 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe2e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe2e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe2e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe2e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe36 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe36 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe36 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe3a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe3a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe3a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe42 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe42 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe42 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe46 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe46 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe46 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe4e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe4e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe52 +vector.hpp 1159 0xe52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe52 2 +accum.hpp 1119 0xe52 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe52 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe52 5 x +elementwise_binary.h 177 0xe52 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe60 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe60 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe64 +vector.hpp 1159 0xe64 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe64 2 +accum.hpp 1119 0xe64 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe64 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe64 5 x +elementwise_binary.h 154 0xe64 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe70 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe70 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe70 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe80 +vector.hpp 1159 0xe80 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe80 2 +accum.hpp 1119 0xe80 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe80 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe80 5 x +elementwise_binary.h 177 0xe80 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe92 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe92 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe92 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xe92 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe9c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe9c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe9c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0xe9c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xea6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xea6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xea6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 80 0xea6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xeae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xeae 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0xeae 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xeb4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xeb4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xeb4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 41 0xec0 +ise_binary_attribute_broadcasting.h 41 0xec0 1 x +ise_binary_attribute_broadcasting.h 76 0xec0 2 +ise_binary_attribute_broadcasting.h 51 0xeca +ise_binary_attribute_broadcasting.h 51 0xed6 +ise_binary_attribute_broadcasting.h 51 0xedc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0xee2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0xee2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 76 0xef0 x +ise_binary_attribute_broadcasting.h 51 0xef6 +ise_binary_attribute_broadcasting.h 51 0xefc x +ise_binary_attribute_broadcasting.h 51 0xf00 +ise_binary_attribute_broadcasting.h 76 0xf00 1 +ise_binary_attribute_broadcasting.h 76 0xf06 +ise_binary_attribute_broadcasting.h 77 0xf10 +ise_binary_attribute_broadcasting.h 77 0xf20 x +ise_binary_attribute_broadcasting.h 77 0xf24 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 152 0xf40 x +superkernels.cpp 157 0xf40 1 +superkernels.cpp 157 0xf46 x +superkernels.cpp 152 0xf4c +superkernels.cpp 154 0xf5a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0xf64 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 157 0xf6c +superkernels.cpp 157 0xf6c 1 +superkernels.cpp 154 0xf72 x +superkernels.cpp 154 0xf76 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0xf7e + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 154 0xf7e 1 +superkernels.cpp 160 0xf86 +superkernels.cpp 171 0xf86 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xf8c +tile.hpp 74 0xf92 +tile.hpp 86 0xf92 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 160 0xf9e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xfa8 +tile.hpp 74 0xfac +tile.hpp 74 0xfb0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0xfc0 +superkernels.cpp 164 0xfc6 x +superkernels.cpp 164 0xfc6 1 +superkernels.cpp 162 0xfd0 +superkernels.cpp 164 0xfd0 1 +superkernels.cpp 171 0xfd0 2 +superkernels.cpp 162 0xfda x +superkernels.cpp 164 0xfda 1 +superkernels.cpp 169 0xfda 2 +superkernels.cpp 162 0xfee +superkernels.cpp 164 0xff6 x +superkernels.cpp 162 0xffa x +superkernels.cpp 164 0x1000 x +superkernels.cpp 169 0x1010 +superkernels.cpp 171 0x1010 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1020 x +io_buffer_main.h 242 0x1028 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x1028 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1032 +io_buffer_main.h 242 0x1036 +io_buffer_main.h 259 0x103a x +io_buffer_main.h 242 0x1048 x +io_buffer_main.h 242 0x1048 1 x +io_buffer_main.h 242 0x104c +io_buffer_main.h 419 0x1050 +io_buffer_main.h 419 0x105a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 169 0x105e +superkernels.cpp 168 0x1068 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x106c x +io_buffer_main.h 348 0x106c 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 169 0x1072 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1076 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 171 0x107c x +superkernels.cpp 168 0x1084 x +superkernels.cpp 168 0x1088 +superkernels.cpp 169 0x108c x +superkernels.cpp 169 0x1090 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x10a0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10a0 1 +superkernels.cpp 174 0x10a0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x10aa +io_buffer_main.h 449 0x10aa 1 +io_buffer_main.h 449 0x10b8 x +io_buffer_main.h 351 0x10bc x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10bc 1 +superkernels.cpp 173 0x10c6 x +superkernels.cpp 173 0x10ca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x10d6 x +io_buffer_main.h 351 0x10da + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10de x +superkernels.cpp 173 0x10e2 +superkernels.cpp 174 0x10f2 +superkernels.cpp 174 0x10f6 x +superkernels.cpp 176 0x1100 +superkernels.cpp 176 0x1114 x +superkernels.cpp 176 0x111c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 95 0x1320 x +elementwise_unary.h 97 0x1320 1 +elementwise_unary.h 97 0x1320 2 x +elementwise_unary.h 97 0x1336 x +elementwise_unary.h 98 0x133a x +elementwise_unary.h 98 0x134a +elementwise_unary.h 99 0x134e x +elementwise_unary.h 101 0x1354 x +elementwise_unary.h 99 0x1360 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1370 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 107 0x1370 1 x +elementwise_unary.h 113 0x1370 2 +elementwise_unary.h 113 0x1370 3 +elementwise_unary.h 142 0x1370 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x137c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 113 0x137c 1 x +elementwise_unary.h 161 0x137c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1388 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x1388 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1394 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 161 0x1394 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x13aa x +max_min.hpp 20 0x13ae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 113 0x13ae 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x13b4 +vector.hpp 1159 0x13b4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x13b4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x13b4 3 +accum.hpp 1119 0x13b4 4 +accum.hpp 1119 0x13b4 5 +accum.hpp 1119 0x13b4 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x13b4 7 +elementwise_unary.h 166 0x13b4 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x13c8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x13ce x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x13d2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x13d2 1 +mul_acc32_fp.hpp 36 0x13d2 2 +mul_acc32_fp.hpp 38 0x13d2 3 +mul_acc32_fp.hpp 38 0x13d2 4 +mul_acc32_fp.hpp 39 0x13d2 5 +mul_acc32_fp.hpp 39 0x13d2 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x13d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x13d8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x13e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x13ea x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x13f2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x13f6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x13f6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x13fe x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x13fe 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x13fe 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x140a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x140e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x140e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1418 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x1418 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1420 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x1424 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1430 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1430 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x1430 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x1430 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x143a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x143a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x143a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 161 0x143a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x1444 x +mul_acc32_fp.hpp 36 0x1448 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x144c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x144c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x144c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x144c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1456 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1456 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x1456 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1460 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1460 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1470 x +max_min.hpp 21 0x1480 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1480 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x1480 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x1490 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x14a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x14a0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x14a8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x14ac x +mul_acc32_fp.hpp 36 0x14b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x14b4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x14b4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x14b4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x14ba + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x14ba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x14ba 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x14be x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x14c2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x14c2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x14ca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x14ce x +mul_acc32_fp.hpp 39 0x14d2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 129 0x14d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x14e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x14e0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x14e0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x14e4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x14e4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x14e4 2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 210 0x14f0 x +superkernels.cpp 215 0x14f0 1 +superkernels.cpp 215 0x14f6 x +superkernels.cpp 210 0x14fc +superkernels.cpp 212 0x150a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1514 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 215 0x151c +superkernels.cpp 215 0x151c 1 +superkernels.cpp 212 0x1522 x +superkernels.cpp 212 0x1526 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x152e + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 212 0x152e 1 +superkernels.cpp 218 0x1536 +superkernels.cpp 229 0x1536 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x153c +tile.hpp 74 0x1542 +tile.hpp 86 0x1542 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 218 0x154e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1558 +tile.hpp 74 0x155c +tile.hpp 74 0x1560 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x1570 +superkernels.cpp 222 0x1576 x +superkernels.cpp 222 0x1576 1 +superkernels.cpp 220 0x1580 +superkernels.cpp 222 0x1580 1 +superkernels.cpp 229 0x1580 2 +superkernels.cpp 220 0x158a x +superkernels.cpp 222 0x158a 1 +superkernels.cpp 227 0x158a 2 +superkernels.cpp 220 0x159e +superkernels.cpp 222 0x15a6 x +superkernels.cpp 220 0x15aa x +superkernels.cpp 222 0x15b0 x +superkernels.cpp 227 0x15c0 +superkernels.cpp 229 0x15c0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x15d0 x +io_buffer_main.h 242 0x15d8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x15d8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x15e2 +io_buffer_main.h 242 0x15e6 +io_buffer_main.h 259 0x15ea x +io_buffer_main.h 242 0x15f8 x +io_buffer_main.h 242 0x15f8 1 x +io_buffer_main.h 242 0x15fc +io_buffer_main.h 419 0x1600 +io_buffer_main.h 419 0x160a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 227 0x160e +superkernels.cpp 226 0x1618 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x161c x +io_buffer_main.h 348 0x161c 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 227 0x1622 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1626 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 229 0x162c x +superkernels.cpp 226 0x1634 x +superkernels.cpp 226 0x1638 +superkernels.cpp 227 0x163c x +superkernels.cpp 227 0x1640 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1650 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x1650 1 +superkernels.cpp 232 0x1650 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x165a +io_buffer_main.h 449 0x165a 1 +io_buffer_main.h 449 0x1668 x +io_buffer_main.h 351 0x166c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x166c 1 +superkernels.cpp 231 0x1676 x +superkernels.cpp 231 0x167a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1686 x +io_buffer_main.h 351 0x168a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x168e x +superkernels.cpp 231 0x1692 +superkernels.cpp 232 0x16a2 +superkernels.cpp 232 0x16a6 x +superkernels.cpp 234 0x16b0 +superkernels.cpp 234 0x16c4 x +superkernels.cpp 234 0x16cc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 178 0x1760 +elementwise_binary_shared.h 178 0x1760 1 x +elementwise_binary_shared.h 179 0x176a x +elementwise_binary_shared.h 179 0x1774 +elementwise_binary_shared.h 179 0x177e +elementwise_binary_shared.h 179 0x1790 +elementwise_binary_shared.h 181 0x1790 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 105 0x1790 2 +add_impl.h 105 0x179a +add_impl.h 106 0x179a 1 +add_impl.h 106 0x179a 2 +add_impl.h 105 0x17a4 x +add_impl.h 106 0x17a4 1 +add_impl.h 106 0x17ae x +add_impl.h 106 0x17b6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0x17ba x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0x17be + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0x17c2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0x17c8 x +add_impl.h 106 0x17cc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 186 0x17e0 x +elementwise_binary_shared.h 191 0x17e0 1 x +elementwise_binary_shared.h 191 0x17e6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 240 0x1800 x +superkernels.cpp 245 0x1800 1 +superkernels.cpp 245 0x1806 x +superkernels.cpp 240 0x180c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1812 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 242 0x1812 1 x +superkernels.cpp 245 0x182e x +superkernels.cpp 245 0x182e 1 x +superkernels.cpp 242 0x1834 x +superkernels.cpp 242 0x1838 +superkernels.cpp 242 0x183e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1846 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 248 0x184a +superkernels.cpp 250 0x184a 1 +superkernels.cpp 252 0x184a 2 +superkernels.cpp 264 0x184a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1854 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 248 0x1854 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x185e +tile.hpp 86 0x185e 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 248 0x186a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1874 +tile.hpp 74 0x1878 +tile.hpp 74 0x187c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 252 0x1880 +superkernels.cpp 252 0x1880 1 x +superkernels.cpp 252 0x188a +superkernels.cpp 252 0x188a 1 +superkernels.cpp 261 0x188a 2 +superkernels.cpp 250 0x1894 x +superkernels.cpp 253 0x1894 1 +superkernels.cpp 262 0x1894 2 +superkernels.cpp 250 0x18aa +superkernels.cpp 252 0x18b0 x +superkernels.cpp 250 0x18b4 x +superkernels.cpp 252 0x18b8 x +superkernels.cpp 253 0x18bc x +superkernels.cpp 261 0x18c0 +superkernels.cpp 262 0x18c6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x18d0 x +io_buffer_main.h 242 0x18d4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 256 0x18d4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x18de +io_buffer_main.h 242 0x18e2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 256 0x18e6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 259 0x18ea x +io_buffer_main.h 242 0x18f6 x +io_buffer_main.h 242 0x18f6 1 x +io_buffer_main.h 242 0x18fa + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 256 0x18fa 1 x +superkernels.cpp 256 0x1900 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x1904 +io_buffer_main.h 419 0x1904 1 +io_buffer_main.h 419 0x190e x +io_buffer_main.h 242 0x1912 x +io_buffer_main.h 242 0x191a +io_buffer_main.h 242 0x191e +io_buffer_main.h 242 0x1922 +io_buffer_main.h 259 0x1926 x +io_buffer_main.h 242 0x1934 x +io_buffer_main.h 242 0x1934 1 x +io_buffer_main.h 242 0x1938 +io_buffer_main.h 419 0x1944 x +io_buffer_main.h 348 0x1948 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 261 0x1948 1 +superkernels.cpp 262 0x1948 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1956 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 261 0x195a x +superkernels.cpp 262 0x1960 x +superkernels.cpp 264 0x1960 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1966 x +io_buffer_main.h 149 0x196a +io_buffer_main.h 351 0x196e +io_buffer_main.h 351 0x196e 1 +io_buffer_main.h 149 0x1974 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 264 0x197a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1980 +io_buffer_main.h 351 0x1980 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 261 0x1984 x +superkernels.cpp 262 0x1988 x +superkernels.cpp 262 0x198c +superkernels.cpp 261 0x1990 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x19a0 x +io_buffer_main.h 351 0x19a0 1 +io_buffer_main.h 351 0x19a0 2 +io_buffer_main.h 351 0x19a0 3 +io_buffer_main.h 351 0x19a0 4 +io_buffer_main.h 449 0x19a0 5 +io_buffer_main.h 449 0x19a0 6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 268 0x19aa + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x19ba x +io_buffer_main.h 351 0x19be x +io_buffer_main.h 348 0x19c2 +io_buffer_main.h 351 0x19d0 +io_buffer_main.h 348 0x19d4 x +io_buffer_main.h 351 0x19d4 1 +io_buffer_main.h 449 0x19e6 x +io_buffer_main.h 351 0x19ea x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 268 0x19ea 1 +superkernels.cpp 269 0x19ea 2 +superkernels.cpp 268 0x19f4 x +superkernels.cpp 268 0x19f8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1a04 x +io_buffer_main.h 351 0x1a08 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 268 0x1a0c x +superkernels.cpp 268 0x1a10 +superkernels.cpp 269 0x1a20 +superkernels.cpp 269 0x1a24 x +superkernels.cpp 271 0x1a30 +superkernels.cpp 271 0x1a46 x +superkernels.cpp 271 0x1a4e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 100 0x1a60 x +elementwise_binary.h 103 0x1a60 1 x +elementwise_binary.h 101 0x1a64 +elementwise_binary.h 101 0x1a6a +elementwise_binary.h 101 0x1a6e x +elementwise_binary.h 101 0x1a72 +elementwise_binary.h 89 0x1a80 x +elementwise_binary.h 92 0x1a80 1 +elementwise_binary.h 92 0x1a80 2 x +elementwise_binary.h 89 0x1a8a +elementwise_binary.h 92 0x1a9c x +elementwise_binary.h 93 0x1aa0 x +elementwise_binary.h 93 0x1ab0 +elementwise_binary.h 94 0x1ab4 x +elementwise_binary.h 94 0x1ac4 +elementwise_binary.h 95 0x1ac8 x +elementwise_binary.h 96 0x1ad0 x +elementwise_binary.h 95 0x1ade x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x1ae2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x1af0 +elementwise_binary.h 98 0x1b02 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x1b0c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x1b10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x1b10 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 108 0x1b20 x +elementwise_binary.h 115 0x1b20 1 +elementwise_binary.h 115 0x1b20 2 +elementwise_binary.h 115 0x1b2a x +elementwise_binary.h 115 0x1b2a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1b34 +mul_acc32_fp.hpp 36 0x1b34 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 115 0x1b34 2 +elementwise_binary.h 115 0x1b34 3 +elementwise_binary.h 115 0x1b3e +elementwise_binary.h 127 0x1b3e 1 x +elementwise_binary.h 115 0x1b48 x +elementwise_binary.h 127 0x1b48 1 +elementwise_binary.h 115 0x1b58 +elementwise_binary.h 127 0x1b5c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b60 x +vector.hpp 1139 0x1b60 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1b60 2 x +elementwise_binary.h 148 0x1b60 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b66 +vector.hpp 1139 0x1b66 1 +vector.hpp 1159 0x1b66 2 +vector.hpp 1159 0x1b66 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1b66 4 +accum.hpp 1119 0x1b66 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1b66 6 +elementwise_binary.h 170 0x1b66 7 x +elementwise_binary.h 172 0x1b66 8 x +elementwise_binary.h 177 0x1b66 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b6e x +vector.hpp 1139 0x1b6e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1b6e 2 x +elementwise_binary.h 148 0x1b6e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b74 +vector.hpp 1139 0x1b74 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1b74 2 x +elementwise_binary.h 172 0x1b74 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b7a x +vector.hpp 1139 0x1b7a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1b7a 2 x +elementwise_binary.h 148 0x1b7a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b80 +vector.hpp 1139 0x1b80 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1b80 2 x +elementwise_binary.h 172 0x1b80 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b86 x +vector.hpp 1139 0x1b86 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1b86 2 x +elementwise_binary.h 148 0x1b86 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b8c +vector.hpp 1139 0x1b8c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1b8c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1b8c 3 x +elementwise_binary.h 172 0x1b8c 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1b96 x +vector.hpp 1139 0x1b96 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1b96 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1b96 3 x +elementwise_binary.h 148 0x1b96 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ba0 +vector.hpp 1139 0x1ba0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1ba0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1ba0 3 x +elementwise_binary.h 172 0x1ba0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1baa x +vector.hpp 1139 0x1baa 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1baa 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1baa 3 x +elementwise_binary.h 148 0x1baa 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1bb4 +vector.hpp 1139 0x1bb4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1bb4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1bb4 3 x +elementwise_binary.h 172 0x1bb4 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1bc0 x +vector.hpp 1139 0x1bc0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1bc0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1bc0 3 x +elementwise_binary.h 148 0x1bc0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1bd0 +vector.hpp 1139 0x1bd0 1 +vector.hpp 1159 0x1bd0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1bd0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1bd0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1bd0 5 x +elementwise_binary.h 172 0x1bd0 6 x +elementwise_binary.h 177 0x1bd0 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1be0 x +vector.hpp 1139 0x1be0 1 x +vector.hpp 1159 0x1be0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1be0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1be0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1be0 5 x +elementwise_binary.h 148 0x1be0 6 x +elementwise_binary.h 154 0x1be0 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1bf0 +vector.hpp 1139 0x1bf0 1 +vector.hpp 1159 0x1bf0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1bf0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1bf0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1bf0 5 x +elementwise_binary.h 172 0x1bf0 6 x +elementwise_binary.h 177 0x1bf0 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c00 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c00 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1c00 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1c00 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c08 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c08 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1c08 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1c08 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c10 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1c10 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1c10 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c18 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1c18 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1c18 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c20 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1c20 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1c20 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c28 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c28 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1c28 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1c28 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c30 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c30 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1c30 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1c30 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c38 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c38 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1c38 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c3c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c3c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 131 0x1c3c 2 x +elementwise_binary.h 154 0x1c3c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c42 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c42 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1c42 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c46 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1c46 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c4a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c4a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1c4a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c4e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1c4e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1c4e 2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 277 0x1c60 x +superkernels.cpp 282 0x1c60 1 +superkernels.cpp 282 0x1c66 x +superkernels.cpp 277 0x1c6c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1c72 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 279 0x1c72 1 x +superkernels.cpp 282 0x1c8e x +superkernels.cpp 282 0x1c8e 1 x +superkernels.cpp 279 0x1c94 x +superkernels.cpp 279 0x1c98 +superkernels.cpp 279 0x1c9e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1ca6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x1caa +superkernels.cpp 287 0x1caa 1 +superkernels.cpp 289 0x1caa 2 +superkernels.cpp 301 0x1caa 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1cb4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x1cb4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1cbe +tile.hpp 86 0x1cbe 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x1cca x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1cd4 +tile.hpp 74 0x1cd8 +tile.hpp 74 0x1cdc x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 289 0x1ce0 +superkernels.cpp 289 0x1ce0 1 x +superkernels.cpp 289 0x1cea +superkernels.cpp 289 0x1cea 1 +superkernels.cpp 298 0x1cea 2 +superkernels.cpp 287 0x1cf4 x +superkernels.cpp 290 0x1cf4 1 +superkernels.cpp 299 0x1cf4 2 +superkernels.cpp 287 0x1d0a +superkernels.cpp 289 0x1d10 x +superkernels.cpp 287 0x1d14 x +superkernels.cpp 289 0x1d18 x +superkernels.cpp 290 0x1d1c x +superkernels.cpp 298 0x1d20 +superkernels.cpp 299 0x1d26 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1d30 x +io_buffer_main.h 242 0x1d34 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x1d34 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1d3e +io_buffer_main.h 242 0x1d42 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x1d46 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 259 0x1d4a x +io_buffer_main.h 242 0x1d56 x +io_buffer_main.h 242 0x1d56 1 x +io_buffer_main.h 242 0x1d5a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x1d5a 1 x +superkernels.cpp 293 0x1d60 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x1d64 +io_buffer_main.h 419 0x1d64 1 +io_buffer_main.h 419 0x1d6e x +io_buffer_main.h 242 0x1d72 x +io_buffer_main.h 242 0x1d7a +io_buffer_main.h 242 0x1d7e +io_buffer_main.h 242 0x1d82 +io_buffer_main.h 259 0x1d86 x +io_buffer_main.h 242 0x1d94 x +io_buffer_main.h 242 0x1d94 1 x +io_buffer_main.h 242 0x1d98 +io_buffer_main.h 419 0x1da4 x +io_buffer_main.h 348 0x1da8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1da8 1 +superkernels.cpp 299 0x1da8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1db6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1dba x +superkernels.cpp 299 0x1dc0 x +superkernels.cpp 301 0x1dc0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1dc6 x +io_buffer_main.h 149 0x1dca +io_buffer_main.h 351 0x1dce +io_buffer_main.h 351 0x1dce 1 +io_buffer_main.h 149 0x1dd4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 301 0x1dda x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1de0 +io_buffer_main.h 351 0x1de0 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1de4 x +superkernels.cpp 299 0x1de8 x +superkernels.cpp 299 0x1dec +superkernels.cpp 298 0x1df0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1e00 x +io_buffer_main.h 351 0x1e00 1 +io_buffer_main.h 351 0x1e00 2 +io_buffer_main.h 351 0x1e00 3 +io_buffer_main.h 351 0x1e00 4 +io_buffer_main.h 449 0x1e00 5 +io_buffer_main.h 449 0x1e00 6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x1e0a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x1e1a x +io_buffer_main.h 351 0x1e1e x +io_buffer_main.h 348 0x1e22 +io_buffer_main.h 351 0x1e30 +io_buffer_main.h 348 0x1e34 x +io_buffer_main.h 351 0x1e34 1 +io_buffer_main.h 449 0x1e46 x +io_buffer_main.h 351 0x1e4a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x1e4a 1 +superkernels.cpp 306 0x1e4a 2 +superkernels.cpp 305 0x1e54 x +superkernels.cpp 305 0x1e58 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1e64 x +io_buffer_main.h 351 0x1e68 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x1e6c x +superkernels.cpp 305 0x1e70 +superkernels.cpp 306 0x1e80 +superkernels.cpp 306 0x1e84 x +superkernels.cpp 308 0x1e90 +superkernels.cpp 308 0x1ea6 x +superkernels.cpp 308 0x1eae + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 128 0x1ec0 x +gemm_bfp16_params.h 130 0x1ec0 1 +gemm_bfp16_params.h 130 0x1ec0 2 x +gemm_bfp16_params.h 58 0x1eca +gemm_bfp16_params.h 59 0x1eca 1 +gemm_bfp16_params.h 61 0x1eca 2 +gemm_bfp16_params.h 71 0x1eca 3 +gemm_bfp16_params.h 86 0x1eca 4 +gemm_bfp16_params.h 93 0x1eca 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x1ed4 +aie.hpp 6982 0x1ed4 1 +aie.hpp 6982 0x1ed4 2 +aie.hpp 6982 0x1ed4 3 +aie.hpp 6982 0x1ed4 4 +aie.hpp 7054 0x1ed4 5 +aie.hpp 7056 0x1ed4 6 +aie.hpp 7057 0x1ed4 7 +aie.hpp 7072 0x1ed4 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1ed4 9 +gemm_bfp16_params.h 44 0x1ed4 10 +gemm_bfp16_params.h 80 0x1ed4 11 +gemm_bfp16_params.h 99 0x1ed4 12 +gemm_bfp16_params.h 138 0x1ed4 13 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0x1ede +aie.hpp 7072 0x1ede 1 +aie.hpp 7073 0x1ede 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1ede 3 +gemm_bfp16_params.h 88 0x1ede 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0x1ee8 +aie.hpp 7053 0x1ee8 1 +aie.hpp 7053 0x1ee8 2 +aie.hpp 7057 0x1ee8 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 85 0x1ee8 4 +gemm_bfp16_params.h 88 0x1ee8 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x1ef2 +tuple 562 0x1ef2 1 +tuple 562 0x1ef2 2 +tuple 562 0x1ef2 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x1ef2 4 +aie.hpp 6982 0x1ef2 5 +aie.hpp 7056 0x1ef2 6 +aie.hpp 7056 0x1ef2 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 138 0x1ef2 8 +gemm_bfp16_params.h 138 0x1ef8 +gemm_bfp16_params.h 58 0x1efc x +gemm_bfp16_params.h 130 0x1efc 1 x +gemm_bfp16_params.h 93 0x1f02 x +gemm_bfp16_params.h 131 0x1f02 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0x1f08 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 80 0x1f08 1 x +gemm_bfp16_params.h 82 0x1f0e +gemm_bfp16_params.h 85 0x1f0e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0x1f14 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 59 0x1f1e x +gemm_bfp16_params.h 131 0x1f1e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0x1f24 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 132 0x1f24 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0x1f2a +aie.hpp 7056 0x1f30 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 65 0x1f3a x +gemm_bfp16_params.h 132 0x1f3a 1 x +gemm_bfp16_params.h 61 0x1f40 x +gemm_bfp16_params.h 133 0x1f40 1 x +gemm_bfp16_params.h 71 0x1f46 x +gemm_bfp16_params.h 88 0x1f46 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7057 0x1f4c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 86 0x1f4c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0x1f52 +aie.hpp 7056 0x1f52 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 71 0x1f52 2 x +gemm_bfp16_params.h 86 0x1f52 3 +gemm_bfp16_params.h 88 0x1f5c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0x1f60 x +aie.hpp 7057 0x1f64 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 99 0x1f68 x +gemm_bfp16_params.h 133 0x1f68 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0x1f6e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 134 0x1f6e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0x1f74 x +aie.hpp 7054 0x1f78 x +aie.hpp 7054 0x1f7c +aie.hpp 7057 0x1f7c 1 x +aie.hpp 7054 0x1f80 x +aie.hpp 7056 0x1f80 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 134 0x1f88 x +gemm_bfp16_params.h 135 0x1f8c x +gemm_bfp16_params.h 135 0x1f9c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0x1fa0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fa0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0x1fa8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fa8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0x1fae + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fae 1 x +gemm_bfp16_params.h 44 0x1fb6 +gemm_bfp16_params.h 44 0x1fba +gemm_bfp16_params.h 44 0x1fbe +gemm_bfp16_params.h 44 0x1fc2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7072 0x1fc6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fc6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7073 0x1fcc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fcc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0x1fd2 x +aie.hpp 7072 0x1fd2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fd2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0x1fd8 +aie.hpp 7057 0x1fd8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fd8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0x1fde x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fde 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7072 0x1fe4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fe4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7057 0x1fea x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1fea 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7057 0x1ff0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0x1ff0 1 x +gemm_bfp16_params.h 44 0x1ff6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x1ffa x +aie.hpp 6982 0x1ffe +aie.hpp 6982 0x2002 +aie.hpp 6982 0x2006 +aie.hpp 6982 0x200a +aie.hpp 6982 0x200e +aie.hpp 6982 0x2012 +aie.hpp 6982 0x2016 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x201a x +tuple 562 0x201e +tuple 562 0x2022 +tuple 562 0x2026 +tuple 562 0x202a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0x202a 1 x +aie.hpp 7057 0x202a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x2030 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x2034 x +aie.hpp 7056 0x2034 1 x +aie.hpp 6982 0x203a +aie.hpp 7073 0x203a 1 x +aie.hpp 6982 0x2040 x +aie.hpp 6982 0x2044 +aie.hpp 6982 0x2048 +aie.hpp 6982 0x204c +aie.hpp 6982 0x2050 +aie.hpp 6982 0x2054 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x2058 x +tuple 562 0x205c +tuple 562 0x2060 +tuple 562 0x2064 +tuple 562 0x2068 +tuple 562 0x206c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x2070 x +aie.hpp 6982 0x2074 +aie.hpp 6982 0x2078 +aie.hpp 6982 0x207c +aie.hpp 6982 0x2080 +aie.hpp 6982 0x2084 +aie.hpp 6982 0x2088 +aie.hpp 6982 0x208c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x2090 x +tuple 562 0x2094 +tuple 562 0x2098 +tuple 562 0x209c +tuple 562 0x20a0 +tuple 562 0x20a4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x20a8 x +aie.hpp 6982 0x20ac +aie.hpp 6982 0x20b0 +aie.hpp 6982 0x20b4 +aie.hpp 6982 0x20b8 +aie.hpp 6982 0x20bc +aie.hpp 6982 0x20c0 +aie.hpp 6982 0x20c4 +aie.hpp 6982 0x20c8 +aie.hpp 6982 0x20cc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 139 0x20cc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x20d2 x +aie.hpp 6982 0x20d6 +aie.hpp 6982 0x20da +aie.hpp 6982 0x20de + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 138 0x20e2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 38 0x20f0 x +gemm_bfp16.h 41 0x20f0 1 +gemm_bfp16.h 38 0x20f6 +gemm_bfp16.h 41 0x20fc x +gemm_bfp16.h 68 0x2100 x +gemm_bfp16.h 41 0x2106 +gemm_bfp16.h 68 0x2106 1 +gemm_bfp16.h 53 0x2110 +gemm_bfp16.h 53 0x2110 1 +gemm_bfp16.h 39 0x211a +gemm_bfp16.h 41 0x211a 1 x +gemm_bfp16.h 53 0x211a 2 x +gemm_bfp16.h 75 0x211a 3 +gemm_bfp16.h 39 0x2124 x +gemm_bfp16.h 41 0x212a +gemm_bfp16.h 75 0x212a 1 x +gemm_bfp16.h 41 0x2130 +gemm_bfp16.h 42 0x2130 1 +gemm_bfp16.h 41 0x2136 x +gemm_bfp16.h 42 0x2136 1 +gemm_bfp16.h 75 0x2136 2 +gemm_bfp16.h 42 0x2142 +gemm_bfp16.h 42 0x2142 1 x +gemm_bfp16.h 75 0x2142 2 +gemm_bfp16.h 41 0x214c x +gemm_bfp16.h 41 0x214c 1 x +gemm_bfp16.h 75 0x214c 2 x +gemm_bfp16.h 42 0x2152 x +gemm_bfp16.h 44 0x2152 1 +gemm_bfp16.h 69 0x2152 2 +gemm_bfp16.h 76 0x2152 3 +gemm_bfp16.h 77 0x2152 4 +gemm_bfp16.h 41 0x2160 x +gemm_bfp16.h 44 0x2160 1 x +gemm_bfp16.h 54 0x2160 2 +gemm_bfp16.h 75 0x2160 3 +gemm_bfp16.h 54 0x2170 x +gemm_bfp16.h 68 0x2170 1 x +gemm_bfp16.h 54 0x2182 +gemm_bfp16.h 63 0x2182 1 +gemm_bfp16.h 64 0x2182 2 +gemm_bfp16.h 54 0x2186 +gemm_bfp16.h 63 0x2186 1 x +gemm_bfp16.h 54 0x218a x +gemm_bfp16.h 64 0x218a 1 x +gemm_bfp16.h 69 0x2196 x +gemm_bfp16.h 71 0x219a x +gemm_bfp16.h 69 0x21a0 x +gemm_bfp16.h 69 0x21a4 +gemm_bfp16.h 70 0x21a8 x +gemm_bfp16.h 70 0x21ac +gemm_bfp16.h 71 0x21b0 x +gemm_bfp16.h 71 0x21c0 +gemm_bfp16.h 72 0x21d0 x +gemm_bfp16.h 72 0x21e0 +gemm_bfp16.h 53 0x21f0 x +gemm_bfp16.h 75 0x21f0 1 x +gemm_bfp16.h 76 0x21f0 2 x +gemm_bfp16.h 77 0x21f8 x +gemm_bfp16.h 80 0x2204 x +gemm_bfp16.h 80 0x2208 +gemm_bfp16.h 83 0x2220 x +gemm_bfp16.h 92 0x2220 1 +gemm_bfp16.h 93 0x2220 2 +gemm_bfp16.h 97 0x2220 3 +gemm_bfp16.h 97 0x2220 4 +gemm_bfp16.h 92 0x222c +gemm_bfp16.h 94 0x222c 1 +gemm_bfp16.h 94 0x222c 2 +gemm_bfp16.h 95 0x222c 3 +gemm_bfp16.h 96 0x222c 4 +gemm_bfp16.h 92 0x2236 x +gemm_bfp16.h 93 0x2236 1 +gemm_bfp16.h 94 0x2236 2 +gemm_bfp16.h 94 0x2236 3 x +gemm_bfp16.h 95 0x2236 4 +gemm_bfp16.h 96 0x2236 5 +gemm_bfp16.h 92 0x2242 +gemm_bfp16.h 93 0x2242 1 x +gemm_bfp16.h 95 0x2242 2 +gemm_bfp16.h 93 0x224c +gemm_bfp16.h 94 0x224c 1 +gemm_bfp16.h 94 0x224c 2 x +gemm_bfp16.h 95 0x224c 3 +gemm_bfp16.h 96 0x224c 4 +gemm_bfp16.h 94 0x2256 +gemm_bfp16.h 95 0x2256 1 x +gemm_bfp16.h 96 0x2256 2 +gemm_bfp16.h 92 0x2260 x +gemm_bfp16.h 94 0x2260 1 x +gemm_bfp16.h 95 0x2260 2 +gemm_bfp16.h 94 0x226a +gemm_bfp16.h 95 0x226a 1 x +gemm_bfp16.h 96 0x226a 2 x +gemm_bfp16.h 95 0x2274 +gemm_bfp16.h 96 0x2274 1 +gemm_bfp16.h 95 0x227e +gemm_bfp16.h 96 0x227e 1 +gemm_bfp16.h 97 0x227e 2 +gemm_bfp16.h 97 0x227e 3 +gemm_bfp16.h 98 0x227e 4 +gemm_bfp16.h 98 0x227e 5 +gemm_bfp16.h 98 0x227e 6 +gemm_bfp16.h 99 0x227e 7 +gemm_bfp16.h 99 0x227e 8 +gemm_bfp16.h 99 0x227e 9 +gemm_bfp16.h 100 0x227e 10 +gemm_bfp16.h 100 0x227e 11 +gemm_bfp16.h 100 0x227e 12 +gemm_bfp16.h 92 0x2288 x +gemm_bfp16.h 96 0x2288 1 +gemm_bfp16.h 92 0x2292 +gemm_bfp16.h 96 0x2292 1 x +gemm_bfp16.h 98 0x229c +gemm_bfp16.h 99 0x229c 1 +gemm_bfp16.h 100 0x229c 2 +gemm_bfp16.h 97 0x22b0 x +gemm_bfp16.h 97 0x22b0 1 x +gemm_bfp16.h 98 0x22b0 2 x +gemm_bfp16.h 98 0x22b6 +gemm_bfp16.h 98 0x22b6 1 +gemm_bfp16.h 98 0x22b6 2 +gemm_bfp16.h 99 0x22b6 3 x +gemm_bfp16.h 98 0x22bc x +gemm_bfp16.h 98 0x22bc 1 x +gemm_bfp16.h 98 0x22bc 2 x +gemm_bfp16.h 99 0x22bc 3 +gemm_bfp16.h 99 0x22bc 4 +gemm_bfp16.h 99 0x22bc 5 +gemm_bfp16.h 100 0x22bc 6 x +gemm_bfp16.h 94 0x22c6 x +gemm_bfp16.h 99 0x22c6 1 x +gemm_bfp16.h 99 0x22c6 2 x +gemm_bfp16.h 99 0x22c6 3 x +gemm_bfp16.h 100 0x22c6 4 +gemm_bfp16.h 100 0x22c6 5 +gemm_bfp16.h 100 0x22c6 6 +gemm_bfp16.h 95 0x22d0 x +gemm_bfp16.h 96 0x22d4 x +gemm_bfp16.h 100 0x22d4 1 x +gemm_bfp16.h 100 0x22d4 2 x +gemm_bfp16.h 100 0x22d4 3 x +gemm_bfp16.h 93 0x22de x +gemm_bfp16.h 94 0x22de 1 x +gemm_bfp16.h 93 0x22e4 +gemm_bfp16.h 94 0x22e4 1 +gemm_bfp16.h 95 0x22e4 2 x +gemm_bfp16.h 94 0x22ee x +gemm_bfp16.h 95 0x22ee 1 +gemm_bfp16.h 96 0x22ee 2 x +gemm_bfp16.h 94 0x22f8 +gemm_bfp16.h 96 0x22f8 1 +gemm_bfp16.h 95 0x22fe x +gemm_bfp16.h 95 0x2302 +gemm_bfp16.h 96 0x2306 x +gemm_bfp16.h 96 0x230a +gemm_bfp16.h 97 0x2320 x +gemm_bfp16.h 97 0x2320 1 x +gemm_bfp16.h 98 0x2320 2 x +gemm_bfp16.h 98 0x2326 +gemm_bfp16.h 98 0x2326 1 +gemm_bfp16.h 98 0x2326 2 +gemm_bfp16.h 102 0x2326 3 x +gemm_bfp16.h 98 0x232c x +gemm_bfp16.h 98 0x232c 1 x +gemm_bfp16.h 98 0x232c 2 x +gemm_bfp16.h 99 0x232c 3 x +gemm_bfp16.h 99 0x2332 +gemm_bfp16.h 99 0x2332 1 +gemm_bfp16.h 99 0x2332 2 +gemm_bfp16.h 100 0x2332 3 x +gemm_bfp16.h 99 0x2338 x +gemm_bfp16.h 99 0x2338 1 x +gemm_bfp16.h 99 0x2338 2 x +gemm_bfp16.h 100 0x2338 3 +gemm_bfp16.h 100 0x2338 4 +gemm_bfp16.h 100 0x2338 5 +gemm_bfp16.h 100 0x2340 x +gemm_bfp16.h 100 0x2340 1 x +gemm_bfp16.h 100 0x2340 2 x +gemm_bfp16.h 225 0x2350 x +gemm_bfp16.h 231 0x2350 1 +gemm_bfp16.h 231 0x2350 2 +gemm_bfp16.h 231 0x235c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2366 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2366 1 +array_helpers.hpp 313 0x2366 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2366 3 +accum.hpp 940 0x2366 4 +accum.hpp 940 0x2366 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 225 0x236e +gemm_bfp16.h 231 0x2378 +gemm_bfp16.h 231 0x2378 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x238a +tile.hpp 74 0x238a 1 +tile.hpp 74 0x2396 x +tile.hpp 86 0x2396 1 +tile.hpp 86 0x2396 2 +tile.hpp 86 0x23a2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 235 0x23a2 1 +gemm_bfp16.h 235 0x23ac +gemm_bfp16.h 235 0x23ac 1 x +gemm_bfp16.h 235 0x23b2 +gemm_bfp16.h 235 0x23b6 +gemm_bfp16.h 235 0x23ba +gemm_bfp16.h 235 0x23c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x23d0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23d0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x23d0 2 +gemm_bfp16.h 252 0x23d0 3 +gemm_bfp16.h 252 0x23da x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x23de + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x23de 1 +gemm_bfp16.h 126 0x23e2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23e6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x23ea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x23ea 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x23f2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x23f2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x23f6 x +tuple 562 0x23f6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x23f6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x23fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 134 0x23fc 1 x +gemm_bfp16.h 252 0x23fc 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x2406 +tuple 562 0x2406 1 +tuple 562 0x2406 2 x +tuple 562 0x240c +tuple 562 0x2410 +tuple 562 0x2410 1 +tuple 562 0x2414 +tuple 562 0x2418 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x241c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x241c 1 +gemm_bfp16.h 135 0x241c 2 +gemm_bfp16.h 110 0x2426 x +gemm_bfp16.h 135 0x2426 1 x +gemm_bfp16.h 110 0x2430 +gemm_bfp16.h 135 0x2430 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x243a +vector.hpp 109 0x243a 1 +vector.hpp 1365 0x243a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x243a 3 +transpose.hpp 225 0x243a 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x2446 +vector.hpp 109 0x244a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 135 0x244a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x2450 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 110 0x2450 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x2458 x +vector.hpp 109 0x245c +vector.hpp 109 0x2464 +vector.hpp 109 0x2480 +vector.hpp 109 0x2480 1 +vector.hpp 1365 0x2480 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2480 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x2490 x +vector.hpp 109 0x2490 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2490 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x24a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x24a0 1 x +gemm_bfp16.h 113 0x24b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x24d0 x +vector.hpp 1365 0x24d0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x24d0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x24d4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x24d4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x24d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x24d8 1 x +gemm_bfp16.h 113 0x24e0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x24e8 +vector.hpp 1365 0x24e8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x24e8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x24ec x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x24ec 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x24f0 x +gemm_bfp16.h 113 0x24f8 +gemm_bfp16.h 141 0x2500 x +gemm_bfp16.h 148 0x2504 x +gemm_bfp16.h 148 0x2508 +gemm_bfp16.h 148 0x2508 1 +gemm_bfp16.h 148 0x250e +gemm_bfp16.h 172 0x2512 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2526 +aie_core.h 81 0x2526 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x2526 2 +gemm_bfp16.h 175 0x2526 3 +gemm_bfp16.h 176 0x2526 4 +gemm_bfp16.h 177 0x2526 5 +gemm_bfp16.h 202 0x2526 6 +gemm_bfp16.h 203 0x2526 7 +gemm_bfp16.h 204 0x2526 8 +gemm_bfp16.h 205 0x2526 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x252c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x252c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 172 0x252c 2 +gemm_bfp16.h 172 0x2532 x +gemm_bfp16.h 172 0x2538 +gemm_bfp16.h 172 0x253e +gemm_bfp16.h 172 0x2544 +gemm_bfp16.h 172 0x254a +gemm_bfp16.h 172 0x254a 1 +gemm_bfp16.h 172 0x2550 +gemm_bfp16.h 172 0x2550 1 +gemm_bfp16.h 172 0x2556 +gemm_bfp16.h 172 0x255a +gemm_bfp16.h 172 0x255e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2562 +aie_core.h 81 0x2562 1 +aie_core.h 100 0x2562 2 +aie_core.h 100 0x2562 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2562 4 +array_helpers.hpp 252 0x2562 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2562 6 +accum.hpp 940 0x2562 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 172 0x2562 8 +gemm_bfp16.h 172 0x2568 +gemm_bfp16.h 175 0x2568 1 +gemm_bfp16.h 268 0x2568 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2572 +aie_core.h 100 0x2572 1 +aie_core.h 100 0x2572 2 +aie_core.h 100 0x2572 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2572 4 +array_helpers.hpp 252 0x2572 5 +array_helpers.hpp 252 0x2572 6 +array_helpers.hpp 313 0x2572 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2572 8 +accum.hpp 940 0x2572 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 172 0x2572 10 +gemm_bfp16.h 175 0x2572 11 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2580 +aie_core.h 100 0x2580 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2580 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x2580 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x258a +aie_core.h 81 0x258a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x258a 2 +gemm_bfp16.h 175 0x258a 3 +gemm_bfp16.h 176 0x258a 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2594 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x2594 1 +mmul_bf16_bf16.hpp 116 0x2594 2 +mmul_bf16_bf16.hpp 116 0x2594 3 +mmul_bf16_bf16.hpp 116 0x2594 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x2594 5 +gemm_bfp16.h 176 0x2594 6 +gemm_bfp16.h 177 0x2594 7 x +gemm_bfp16.h 202 0x2594 8 +gemm_bfp16.h 203 0x2594 9 +gemm_bfp16.h 204 0x2594 10 +gemm_bfp16.h 204 0x2594 11 +gemm_bfp16.h 205 0x2594 12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x25a2 +aie_core.h 100 0x25a2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x25a2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x25a2 3 +mmul_bf16_bf16.hpp 113 0x25a2 4 +mmul_bf16_bf16.hpp 114 0x25a2 5 +mmul_bf16_bf16.hpp 114 0x25a2 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 177 0x25a2 7 +gemm_bfp16.h 268 0x25a2 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x25b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x25b0 1 +transpose.hpp 224 0x25b0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x25b0 3 +gemm_bfp16.h 177 0x25b0 4 +gemm_bfp16.h 203 0x25b0 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x25c0 +aie_core.h 100 0x25c0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x25c0 2 +vector.hpp 1365 0x25c0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x25c0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x25c0 5 +transpose.hpp 225 0x25c0 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 177 0x25c0 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x25d0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x25d0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x25d0 2 +mmul_bf16_bf16.hpp 111 0x25d0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x25d0 4 +gemm_bfp16.h 176 0x25d0 5 x +gemm_bfp16.h 177 0x25d0 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x25e0 +aie_core.h 100 0x25e0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x25e0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x25e0 3 x +gemm_bfp16.h 176 0x25e0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x25ec x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x25ec 1 x +gemm_bfp16.h 182 0x25ec 2 x +gemm_bfp16.h 176 0x25f6 +gemm_bfp16.h 182 0x25f6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2600 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2600 1 x +array_helpers.hpp 313 0x2600 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x2600 3 +gemm_bfp16.h 182 0x2600 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x260e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x260e 1 x +gemm_bfp16.h 175 0x260e 2 +gemm_bfp16.h 203 0x260e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2618 x +array_helpers.hpp 252 0x2618 1 x +array_helpers.hpp 313 0x2618 2 +array_helpers.hpp 252 0x2620 +array_helpers.hpp 313 0x2620 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x2620 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2628 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2628 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x2628 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x262e +array_helpers.hpp 313 0x262e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x262e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2638 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2638 1 x +array_helpers.hpp 313 0x2638 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2638 3 x +accum.hpp 940 0x2638 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x263e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x263e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x263e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2648 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2648 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x264c x +array_helpers.hpp 313 0x264c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x264c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2656 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2656 1 +array_helpers.hpp 313 0x2656 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2656 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2656 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2656 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2662 x +array_helpers.hpp 252 0x2662 1 x +array_helpers.hpp 313 0x2662 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2662 3 x +accum.hpp 940 0x2662 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2662 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x266e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x266e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x266e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2674 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2674 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2674 2 x +mmul_bf16_bf16.hpp 113 0x2674 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x267e x +aie_core.h 100 0x267e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x267e 2 +array_helpers.hpp 252 0x267e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x267e 4 x +accum.hpp 940 0x267e 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x267e 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2688 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2688 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x268c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2690 x +array_helpers.hpp 252 0x2690 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2690 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2690 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x269a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x269a 1 x +accum.hpp 940 0x269a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x269a 3 x +mmul_bf16_bf16.hpp 114 0x26a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x26b0 x +array_helpers.hpp 313 0x26b0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x26b0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26ba x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x26ba 1 +array_helpers.hpp 252 0x26ba 2 +array_helpers.hpp 313 0x26ba 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x26ba 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x26ba 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x26ba 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26ca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x26ca 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x26ca 2 x +array_helpers.hpp 313 0x26ca 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x26ca 4 x +accum.hpp 940 0x26ca 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x26ca 6 x +mmul_bf16_bf16.hpp 116 0x26ca 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x26ca 8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x26da +array_helpers.hpp 313 0x26da 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x26da 2 x +mmul_bf16_bf16.hpp 116 0x26da 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x26e6 x +array_helpers.hpp 252 0x26e6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x26e6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x26e6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x26e6 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x26f2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x26f2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x26f2 2 x +accum.hpp 940 0x26f2 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x26f2 4 x +mmul_bf16_bf16.hpp 116 0x26f2 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x26f2 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x2700 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2710 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2710 1 x +array_helpers.hpp 252 0x2710 2 x +array_helpers.hpp 313 0x2710 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2710 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2710 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x2710 6 +gemm_bfp16.h 205 0x2710 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2720 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2720 1 +array_helpers.hpp 313 0x2720 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2720 3 x +accum.hpp 940 0x2720 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x2720 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2720 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 203 0x2720 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2730 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x2730 1 x +mmul_bf16_bf16.hpp 116 0x2730 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2730 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x2730 4 x +gemm_bfp16.h 203 0x2730 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x2740 x +mmul_bf16_bf16.hpp 116 0x2740 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x2740 2 +gemm_bfp16.h 204 0x2740 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x274a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x274a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x274a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x274a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2754 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2754 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2754 2 x +accum.hpp 940 0x2754 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x2754 4 x +mmul_bf16_bf16.hpp 116 0x2754 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2754 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2762 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x2762 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x2762 2 x +gemm_bfp16.h 205 0x2762 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x276c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x276c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2774 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2774 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2774 2 x +mmul_bf16_bf16.hpp 113 0x2774 3 x +mmul_bf16_bf16.hpp 116 0x277e x +mmul_bf16_bf16.hpp 114 0x2782 x +mmul_bf16_bf16.hpp 116 0x2782 1 +mmul_bf16_bf16.hpp 116 0x278a x +mmul_bf16_bf16.hpp 113 0x2790 x +mmul_bf16_bf16.hpp 114 0x2794 x +mmul_bf16_bf16.hpp 116 0x2794 1 x +mmul_bf16_bf16.hpp 116 0x27a0 +mmul_bf16_bf16.hpp 116 0x27a4 +mmul_bf16_bf16.hpp 116 0x27a8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x27ac x +gemm_bfp16.h 202 0x27b0 +gemm_bfp16.h 202 0x27b4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x27b8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x27b8 1 +gemm_bfp16.h 205 0x27b8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x27c2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x27c2 1 +gemm_bfp16.h 203 0x27c2 2 +gemm_bfp16.h 203 0x27c2 3 x +gemm_bfp16.h 203 0x27cc +gemm_bfp16.h 203 0x27d0 +gemm_bfp16.h 175 0x27d4 +gemm_bfp16.h 203 0x27d4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x27dc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x27dc 1 x +gemm_bfp16.h 204 0x27dc 2 x +gemm_bfp16.h 175 0x27e6 +gemm_bfp16.h 204 0x27e6 1 +gemm_bfp16.h 172 0x27ec x +gemm_bfp16.h 175 0x27ec 1 +gemm_bfp16.h 204 0x27ec 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x27f6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x27f6 1 +gemm_bfp16.h 175 0x27f6 2 x +gemm_bfp16.h 176 0x27f6 3 +gemm_bfp16.h 177 0x27f6 4 x +gemm_bfp16.h 204 0x27f6 5 x +gemm_bfp16.h 177 0x2802 +gemm_bfp16.h 205 0x2802 1 x +gemm_bfp16.h 177 0x2808 x +gemm_bfp16.h 205 0x2808 1 +gemm_bfp16.h 177 0x280e +gemm_bfp16.h 205 0x280e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2814 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x2814 1 +gemm_bfp16.h 176 0x2814 2 x +gemm_bfp16.h 177 0x2814 3 x +gemm_bfp16.h 205 0x2814 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2820 +aie_core.h 100 0x2820 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2820 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x2820 3 x +gemm_bfp16.h 176 0x2820 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x282c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x282c 1 x +gemm_bfp16.h 182 0x282c 2 x +gemm_bfp16.h 176 0x2836 +gemm_bfp16.h 182 0x2836 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2840 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2840 1 x +array_helpers.hpp 313 0x2840 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x2840 3 +gemm_bfp16.h 182 0x2840 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x284e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x284e 1 x +gemm_bfp16.h 175 0x284e 2 +gemm_bfp16.h 203 0x284e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2858 x +array_helpers.hpp 252 0x2858 1 x +array_helpers.hpp 313 0x2858 2 +array_helpers.hpp 252 0x2860 +array_helpers.hpp 313 0x2860 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x2860 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2868 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2868 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x2868 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x286e +array_helpers.hpp 313 0x286e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x286e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2878 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2878 1 x +array_helpers.hpp 313 0x2878 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2878 3 x +accum.hpp 940 0x2878 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x287e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x287e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x287e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2888 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2888 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x288c x +array_helpers.hpp 313 0x288c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x288c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2896 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2896 1 +array_helpers.hpp 313 0x2896 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2896 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2896 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2896 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x28a2 x +array_helpers.hpp 252 0x28a2 1 x +array_helpers.hpp 313 0x28a2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x28a2 3 x +accum.hpp 940 0x28a2 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x28a2 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x28ae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x28ae 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x28ae 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x28b4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x28b4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x28b4 2 x +mmul_bf16_bf16.hpp 113 0x28b4 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x28be x +aie_core.h 100 0x28be 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x28be 2 +array_helpers.hpp 252 0x28be 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x28be 4 x +accum.hpp 940 0x28be 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x28be 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x28c8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x28c8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x28cc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x28d0 x +array_helpers.hpp 252 0x28d0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x28d0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x28d0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x28da + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x28da 1 x +accum.hpp 940 0x28da 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x28da 3 x +mmul_bf16_bf16.hpp 114 0x28e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x28f0 x +array_helpers.hpp 313 0x28f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x28f0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x28fa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x28fa 1 +array_helpers.hpp 252 0x28fa 2 +array_helpers.hpp 313 0x28fa 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x28fa 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x28fa 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x28fa 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x290a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x290a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x290a 2 x +array_helpers.hpp 313 0x290a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x290a 4 x +accum.hpp 940 0x290a 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x290a 6 x +mmul_bf16_bf16.hpp 116 0x290a 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x290a 8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x291a +array_helpers.hpp 313 0x291a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x291a 2 x +mmul_bf16_bf16.hpp 116 0x291a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2926 x +array_helpers.hpp 252 0x2926 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2926 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2926 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2926 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x2932 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2932 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x2932 2 x +accum.hpp 940 0x2932 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x2932 4 x +mmul_bf16_bf16.hpp 116 0x2932 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2932 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x2940 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2950 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x2950 1 x +array_helpers.hpp 313 0x2950 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2950 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x2950 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x2950 5 +gemm_bfp16.h 205 0x2950 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x295e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x295e 1 +array_helpers.hpp 313 0x295e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x295e 3 x +accum.hpp 940 0x295e 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x295e 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x295e 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 203 0x295e 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x296e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x296e 1 x +mmul_bf16_bf16.hpp 116 0x296e 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x296e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x296e 4 x +gemm_bfp16.h 268 0x296e 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x297e x +mmul_bf16_bf16.hpp 116 0x297e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 203 0x297e 2 x +gemm_bfp16.h 268 0x297e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x298c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x298c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x298c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x298c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 268 0x298c 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x299a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x299a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x299a 2 x +accum.hpp 940 0x299a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x299a 4 x +mmul_bf16_bf16.hpp 116 0x299a 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x299a 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x29a8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x29a8 1 +gemm_bfp16.h 204 0x29a8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x29b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x29b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x29b0 2 x +gemm_bfp16.h 205 0x29b0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x29ba x +mmul_bf16_bf16.hpp 113 0x29ba 1 x +mmul_bf16_bf16.hpp 116 0x29c2 x +mmul_bf16_bf16.hpp 114 0x29c6 x +mmul_bf16_bf16.hpp 116 0x29c6 1 +mmul_bf16_bf16.hpp 116 0x29ce x +mmul_bf16_bf16.hpp 113 0x29d4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 268 0x29d4 1 +gemm_bfp16.h 268 0x29d4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x29de x +mmul_bf16_bf16.hpp 116 0x29de 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 268 0x29de 2 +gemm_bfp16.h 269 0x29de 3 +gemm_bfp16.h 269 0x29de 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x29f2 +mmul_bf16_bf16.hpp 116 0x29f6 +mmul_bf16_bf16.hpp 116 0x29fa + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x29fe x +gemm_bfp16.h 268 0x29fe 1 x +gemm_bfp16.h 202 0x2a04 +gemm_bfp16.h 268 0x2a04 1 +gemm_bfp16.h 202 0x2a0a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2a0e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x2a0e 1 +gemm_bfp16.h 205 0x2a0e 2 x +gemm_bfp16.h 203 0x2a18 x +gemm_bfp16.h 203 0x2a1c +gemm_bfp16.h 203 0x2a20 +gemm_bfp16.h 203 0x2a24 +gemm_bfp16.h 204 0x2a28 x +gemm_bfp16.h 204 0x2a2c +gemm_bfp16.h 204 0x2a30 +gemm_bfp16.h 268 0x2a30 1 x +gemm_bfp16.h 204 0x2a3a x +gemm_bfp16.h 205 0x2a3e x +gemm_bfp16.h 205 0x2a42 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2a46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 205 0x2a46 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x2a4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 205 0x2a4e 1 +gemm_bfp16.h 269 0x2a54 x +gemm_bfp16.h 269 0x2a58 +gemm_bfp16.h 269 0x2a5c +gemm_bfp16.h 269 0x2a62 +gemm_bfp16.h 269 0x2a6e +gemm_bfp16.h 272 0x2aa0 x +gemm_bfp16.h 274 0x2ab0 +gemm_bfp16.h 274 0x2ac4 x +gemm_bfp16.h 274 0x2ad0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2ae0 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 381 0x2ae0 1 x +superkernels.cpp 382 0x2ae0 2 +superkernels.cpp 388 0x2ae0 3 +superkernels.cpp 382 0x2aea x +superkernels.cpp 382 0x2afa +superkernels.cpp 382 0x2afa 1 +superkernels.cpp 381 0x2b00 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2b0e +io_buffer_main.h 149 0x2b16 +io_buffer_main.h 242 0x2b16 1 +io_buffer_main.h 348 0x2b16 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2b1a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 384 0x2b1a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x2b24 +tile.hpp 86 0x2b24 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 384 0x2b30 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2b3a +tile.hpp 74 0x2b3e +tile.hpp 74 0x2b42 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2b50 +superkernels.cpp 385 0x2b50 1 x +superkernels.cpp 385 0x2b66 +superkernels.cpp 387 0x2b70 +superkernels.cpp 388 0x2b70 1 x +superkernels.cpp 387 0x2b7a x +superkernels.cpp 389 0x2b8a x +superkernels.cpp 389 0x2b8a 1 x +superkernels.cpp 388 0x2b90 x +superkernels.cpp 388 0x2b94 +superkernels.cpp 387 0x2b98 x +superkernels.cpp 387 0x2b9c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x2ba2 +io_buffer_main.h 242 0x2ba6 x +io_buffer_main.h 242 0x2baa +io_buffer_main.h 242 0x2bae +io_buffer_main.h 242 0x2bb2 +io_buffer_main.h 259 0x2bb8 x +io_buffer_main.h 242 0x2bc4 x +io_buffer_main.h 242 0x2bc4 1 x +io_buffer_main.h 242 0x2bc8 +io_buffer_main.h 419 0x2bcc +io_buffer_main.h 419 0x2bd6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 285 0x2be0 +gemm_bfp16.h 285 0x2be0 1 +gemm_bfp16.h 285 0x2bea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2bf0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 285 0x2bf0 1 +gemm_bfp16.h 285 0x2bfa x +gemm_bfp16.h 285 0x2bfe + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2c02 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x2c02 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2c08 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 285 0x2c10 +gemm_bfp16.h 285 0x2c14 x +gemm_bfp16.h 285 0x2c1a +gemm_bfp16.h 285 0x2c1e +gemm_bfp16.h 285 0x2c22 +gemm_bfp16.h 285 0x2c26 +gemm_bfp16.h 285 0x2c2a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x2c30 +superkernels.cpp 393 0x2c30 1 x +superkernels.cpp 394 0x2c30 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x2c3a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x2c3a 1 +superkernels.cpp 393 0x2c4e +superkernels.cpp 393 0x2c52 +superkernels.cpp 394 0x2c58 +superkernels.cpp 398 0x2c58 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x2c64 x +io_buffer_main.h 351 0x2c64 1 +io_buffer_main.h 449 0x2c64 2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 394 0x2c64 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x2c6e +io_buffer_main.h 449 0x2c7e x +io_buffer_main.h 351 0x2c82 x +io_buffer_main.h 351 0x2c92 +io_buffer_main.h 351 0x2c96 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2ca0 +superkernels.cpp 398 0x2ca0 1 +superkernels.cpp 397 0x2ca6 x +superkernels.cpp 397 0x2ca6 1 +superkernels.cpp 397 0x2cb0 +superkernels.cpp 397 0x2cc0 +superkernels.cpp 397 0x2cc4 +superkernels.cpp 398 0x2cd6 x +superkernels.cpp 400 0x2ce0 +superkernels.cpp 400 0x2cf0 x +superkernels.cpp 400 0x2cf4 +superkernels.cpp - 0x2cf5 + + +rmsnorm_row_major_params.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 76 0x32d0 x +rms_norm_adf_wrapper.cpp 83 0x32d0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x32d6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 83 0x32d6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 285 0x32e4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 83 0x32f0 +rms_norm_adf_wrapper.cpp 83 0x32f0 1 +rms_norm_adf_wrapper.cpp 76 0x32f6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x330c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 85 0x330c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3316 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 84 0x3316 1 +rms_norm_adf_wrapper.cpp 84 0x3320 x +rms_norm_adf_wrapper.cpp 89 0x3320 1 x +rms_norm_adf_wrapper.cpp 85 0x3326 x +rms_norm_adf_wrapper.cpp 89 0x3326 1 +rms_norm_adf_wrapper.cpp 89 0x332c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3332 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 89 0x3332 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x333c +tile.hpp 86 0x333c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 85 0x3346 x +rms_norm_adf_wrapper.cpp 85 0x334a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3358 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 85 0x335c x +rms_norm_adf_wrapper.cpp 95 0x3360 +rms_norm_adf_wrapper.cpp 97 0x3360 1 +rms_norm_adf_wrapper.cpp 95 0x336a x +rms_norm_adf_wrapper.cpp 105 0x336a 1 +rms_norm_adf_wrapper.cpp 97 0x3374 x +rms_norm_adf_wrapper.cpp 99 0x337a x +rms_norm_adf_wrapper.cpp 99 0x337e +rms_norm_adf_wrapper.cpp 99 0x3382 +rms_norm_adf_wrapper.cpp 99 0x3386 +rms_norm_adf_wrapper.cpp 99 0x338a +rms_norm_adf_wrapper.cpp 99 0x338e +rms_norm_adf_wrapper.cpp 100 0x3392 x +rms_norm_adf_wrapper.cpp 99 0x3398 x +rms_norm_adf_wrapper.cpp 100 0x3398 1 +rms_norm_adf_wrapper.cpp 99 0x339e +rms_norm_adf_wrapper.cpp 99 0x33a2 +rms_norm_adf_wrapper.cpp 99 0x33a6 +rms_norm_adf_wrapper.cpp 99 0x33aa +rms_norm_adf_wrapper.cpp 99 0x33ae +rms_norm_adf_wrapper.cpp 99 0x33b2 +rms_norm_adf_wrapper.cpp 99 0x33b6 +rms_norm_adf_wrapper.cpp 100 0x33ba x +rms_norm_adf_wrapper.cpp 100 0x33be +rms_norm_adf_wrapper.cpp 102 0x33c4 x +rms_norm_adf_wrapper.cpp 99 0x33d2 x +rms_norm_adf_wrapper.cpp 99 0x33d6 +rms_norm_adf_wrapper.cpp 105 0x33da +rms_norm_adf_wrapper.cpp 106 0x33da 1 x +rms_norm_adf_wrapper.cpp 105 0x33e4 +rms_norm_adf_wrapper.cpp 105 0x33ea x +rms_norm_adf_wrapper.cpp 105 0x33ee +rms_norm_adf_wrapper.cpp 107 0x3400 x +rms_norm_adf_wrapper.cpp 107 0x3400 1 +rms_norm_adf_wrapper.cpp 107 0x340a +rms_norm_adf_wrapper.cpp 107 0x340e +rms_norm_adf_wrapper.cpp 107 0x341c +rms_norm_adf_wrapper.cpp 107 0x3422 +rms_norm_adf_wrapper.cpp 107 0x3428 +rms_norm_adf_wrapper.cpp 110 0x3430 +rms_norm_adf_wrapper.cpp 110 0x3430 1 +rms_norm_adf_wrapper.cpp 110 0x343a x +rms_norm_adf_wrapper.cpp 110 0x343a 1 +rms_norm_adf_wrapper.cpp 110 0x344a +rms_norm_adf_wrapper.cpp 110 0x344e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 285 0x3460 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 110 0x3460 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 285 0x3466 +io_buffer_main.h 285 0x346a +io_buffer_main.h 285 0x346e +io_buffer_main.h 302 0x3472 x +io_buffer_main.h 285 0x3480 x +io_buffer_main.h 285 0x3480 1 x +io_buffer_main.h 285 0x3484 +io_buffer_main.h 434 0x3488 +io_buffer_main.h 434 0x3492 x +io_buffer_main.h 397 0x3496 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 143 0x3496 1 +rms_norm_adf_wrapper.cpp 143 0x3496 2 +rms_norm_adf_wrapper.cpp 143 0x34a4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x34a8 x +io_buffer_main.h 149 0x34ac + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 145 0x34b2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 400 0x34b8 +io_buffer_main.h 400 0x34b8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 145 0x34b8 2 +rms_norm_adf_wrapper.cpp 159 0x34b8 3 +rms_norm_adf_wrapper.cpp 159 0x34c0 +rms_norm_adf_wrapper.cpp 143 0x34c4 x +rms_norm_adf_wrapper.cpp 143 0x34ca +rms_norm_adf_wrapper.cpp 159 0x34d0 +rms_norm_adf_wrapper.cpp 159 0x34d4 +rms_norm_adf_wrapper.cpp 159 0x34d4 1 x +rms_norm_adf_wrapper.cpp 159 0x34de +rms_norm_adf_wrapper.cpp 159 0x34e2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 400 0x34e8 +io_buffer_main.h 400 0x34e8 1 +io_buffer_main.h 397 0x34f0 +io_buffer_main.h 397 0x34f4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 159 0x34f4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 397 0x3500 x +io_buffer_main.h 400 0x3500 1 +io_buffer_main.h 464 0x3500 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 159 0x3500 3 +rms_norm_adf_wrapper.cpp 159 0x350a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0x3518 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 161 0x351c +rms_norm_adf_wrapper.cpp 169 0x351c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 400 0x3526 +io_buffer_main.h 400 0x3526 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 161 0x3526 2 +rms_norm_adf_wrapper.cpp 161 0x3530 +rms_norm_adf_wrapper.cpp 161 0x3534 x +rms_norm_adf_wrapper.cpp 169 0x3544 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 400 0x3548 x +io_buffer_main.h 400 0x354c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 161 0x354c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 400 0x3552 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 161 0x3552 1 +rms_norm_adf_wrapper.cpp 161 0x3552 2 +rms_norm_adf_wrapper.cpp 161 0x355c x +rms_norm_adf_wrapper.cpp 169 0x3562 x +rms_norm_adf_wrapper.cpp 178 0x35a0 x +rms_norm_adf_wrapper.cpp 184 0x35a0 1 +rms_norm_adf_wrapper.cpp 184 0x35a6 x +rms_norm_adf_wrapper.cpp 178 0x35aa +rms_norm_adf_wrapper.cpp 184 0x35bc +rms_norm_adf_wrapper.cpp 184 0x35bc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x35d6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 186 0x35d6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x35e0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 185 0x35e0 1 +rms_norm_adf_wrapper.cpp 185 0x35ea x +rms_norm_adf_wrapper.cpp 190 0x35ea 1 x +rms_norm_adf_wrapper.cpp 186 0x35f0 x +rms_norm_adf_wrapper.cpp 190 0x35f0 1 +rms_norm_adf_wrapper.cpp 190 0x35f6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x35fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 190 0x35fc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3606 +tile.hpp 86 0x3606 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 186 0x3610 x +rms_norm_adf_wrapper.cpp 186 0x3614 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3622 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 186 0x3626 x +rms_norm_adf_wrapper.cpp 197 0x3630 +rms_norm_adf_wrapper.cpp 197 0x3630 1 +rms_norm_adf_wrapper.cpp 199 0x3630 2 x +rms_norm_adf_wrapper.cpp 201 0x3630 3 +rms_norm_adf_wrapper.cpp 199 0x363c +rms_norm_adf_wrapper.cpp 201 0x363c 1 +rms_norm_adf_wrapper.cpp 199 0x3646 +rms_norm_adf_wrapper.cpp 199 0x364c +rms_norm_adf_wrapper.cpp 199 0x3652 +rms_norm_adf_wrapper.cpp 199 0x3656 +rms_norm_adf_wrapper.cpp 200 0x365a x +rms_norm_adf_wrapper.cpp 199 0x365e x +rms_norm_adf_wrapper.cpp 200 0x365e 1 +rms_norm_adf_wrapper.cpp 197 0x3664 x +rms_norm_adf_wrapper.cpp 199 0x3664 1 +rms_norm_adf_wrapper.cpp 199 0x366a x +rms_norm_adf_wrapper.cpp 199 0x366e +rms_norm_adf_wrapper.cpp 199 0x3672 +rms_norm_adf_wrapper.cpp 199 0x3676 +rms_norm_adf_wrapper.cpp 199 0x367a +rms_norm_adf_wrapper.cpp 199 0x367e +rms_norm_adf_wrapper.cpp 200 0x3682 x +rms_norm_adf_wrapper.cpp 200 0x3686 +rms_norm_adf_wrapper.cpp 203 0x3690 x +rms_norm_adf_wrapper.cpp 199 0x3698 x +rms_norm_adf_wrapper.cpp 199 0x369c +rms_norm_adf_wrapper.cpp 201 0x36a0 x +rms_norm_adf_wrapper.cpp 202 0x36a0 1 +rms_norm_adf_wrapper.cpp 202 0x36a6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x36b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 204 0x36b0 1 +rms_norm_adf_wrapper.cpp 204 0x36ba x +rms_norm_adf_wrapper.cpp 206 0x36ba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x36c4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 51 0x36c4 1 x +rms_norm_adf_wrapper.cpp 51 0x36ce +rms_norm_adf_wrapper.cpp 206 0x36ce 1 x +rms_norm_adf_wrapper.cpp 215 0x36ce 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x36d8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 51 0x36d8 1 x +rms_norm_adf_wrapper.cpp 205 0x36e2 +rms_norm_adf_wrapper.cpp 224 0x36e2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x36e8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 204 0x36ec x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x36f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 205 0x36f0 1 x +rms_norm_adf_wrapper.cpp 206 0x36f6 x +rms_norm_adf_wrapper.cpp 43 0x36fa x +rms_norm_adf_wrapper.cpp 206 0x36fe x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3702 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 51 0x370e x +rms_norm_adf_wrapper.cpp 51 0x3712 +rms_norm_adf_wrapper.cpp 206 0x3712 1 x +rms_norm_adf_wrapper.cpp 206 0x371a +rms_norm_adf_wrapper.cpp 206 0x3720 +rms_norm_adf_wrapper.cpp 215 0x3720 1 x +rms_norm_adf_wrapper.cpp 215 0x3726 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3730 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 54 0x3730 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3790 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 56 0x3790 1 x +rms_norm_adf_wrapper.cpp 61 0x37a0 x +rms_norm_adf_wrapper.cpp 61 0x37a6 +rms_norm_adf_wrapper.cpp 61 0x37ac +rms_norm_adf_wrapper.cpp 63 0x37b0 x +rms_norm_adf_wrapper.cpp 64 0x37b0 1 x +rms_norm_adf_wrapper.cpp 63 0x37d0 +rms_norm_adf_wrapper.cpp 65 0x37d0 1 x +rms_norm_adf_wrapper.cpp 228 0x3820 +rms_norm_adf_wrapper.cpp 228 0x3830 x +rms_norm_adf_wrapper.cpp 228 0x3838 +rms_norm_adf_wrapper.cpp 222 0x383c x +rms_norm_adf_wrapper.cpp 224 0x3840 x +rms_norm_adf_wrapper.cpp 224 0x3844 +rms_norm_adf_wrapper.cpp 237 0x3b20 x +rms_norm_adf_wrapper.cpp 243 0x3b20 1 +rms_norm_adf_wrapper.cpp 243 0x3b26 x +rms_norm_adf_wrapper.cpp 237 0x3b2a +rms_norm_adf_wrapper.cpp 243 0x3b3e +rms_norm_adf_wrapper.cpp 243 0x3b3e 1 +rms_norm_adf_wrapper.cpp 295 0x3b4c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3b5c +tile.hpp 86 0x3b5c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 244 0x3b66 +rms_norm_adf_wrapper.cpp 245 0x3b66 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3b70 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 244 0x3b70 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3b7a x +tile.hpp 86 0x3b7a 1 +tile.hpp 86 0x3b84 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 245 0x3b84 1 x +rms_norm_adf_wrapper.cpp 249 0x3b8a x +rms_norm_adf_wrapper.cpp 245 0x3b8e x +rms_norm_adf_wrapper.cpp 249 0x3b8e 1 +rms_norm_adf_wrapper.cpp 249 0x3b94 +rms_norm_adf_wrapper.cpp 245 0x3b9a +rms_norm_adf_wrapper.cpp 249 0x3ba4 x +rms_norm_adf_wrapper.cpp 245 0x3ba8 x +rms_norm_adf_wrapper.cpp 256 0x3bb0 +rms_norm_adf_wrapper.cpp 257 0x3bb0 1 +rms_norm_adf_wrapper.cpp 256 0x3bba x +rms_norm_adf_wrapper.cpp 259 0x3bba 1 +rms_norm_adf_wrapper.cpp 267 0x3bba 2 +rms_norm_adf_wrapper.cpp 257 0x3bc6 x +rms_norm_adf_wrapper.cpp 259 0x3bce x +rms_norm_adf_wrapper.cpp 261 0x3bd2 x +rms_norm_adf_wrapper.cpp 261 0x3bd6 +rms_norm_adf_wrapper.cpp 261 0x3bda +rms_norm_adf_wrapper.cpp 261 0x3bde +rms_norm_adf_wrapper.cpp 261 0x3be2 +rms_norm_adf_wrapper.cpp 261 0x3be6 +rms_norm_adf_wrapper.cpp 262 0x3bea x +rms_norm_adf_wrapper.cpp 261 0x3bee x +rms_norm_adf_wrapper.cpp 262 0x3bee 1 +rms_norm_adf_wrapper.cpp 261 0x3bf4 +rms_norm_adf_wrapper.cpp 261 0x3bf8 +rms_norm_adf_wrapper.cpp 261 0x3bfc +rms_norm_adf_wrapper.cpp 261 0x3c00 +rms_norm_adf_wrapper.cpp 261 0x3c04 +rms_norm_adf_wrapper.cpp 261 0x3c08 +rms_norm_adf_wrapper.cpp 261 0x3c0c +rms_norm_adf_wrapper.cpp 262 0x3c10 x +rms_norm_adf_wrapper.cpp 262 0x3c14 +rms_norm_adf_wrapper.cpp 264 0x3c1a x +rms_norm_adf_wrapper.cpp 261 0x3c28 x +rms_norm_adf_wrapper.cpp 261 0x3c2c +rms_norm_adf_wrapper.cpp 267 0x3c30 +rms_norm_adf_wrapper.cpp 268 0x3c30 1 x +rms_norm_adf_wrapper.cpp 267 0x3c3a +rms_norm_adf_wrapper.cpp 267 0x3c40 x +rms_norm_adf_wrapper.cpp 267 0x3c44 +rms_norm_adf_wrapper.cpp 269 0x3c50 x +rms_norm_adf_wrapper.cpp 269 0x3c50 1 +rms_norm_adf_wrapper.cpp 269 0x3c5a +rms_norm_adf_wrapper.cpp 269 0x3c5e +rms_norm_adf_wrapper.cpp 269 0x3c6c +rms_norm_adf_wrapper.cpp 269 0x3c72 +rms_norm_adf_wrapper.cpp 269 0x3c78 +rms_norm_adf_wrapper.cpp 279 0x3c80 +rms_norm_adf_wrapper.cpp 279 0x3c80 1 +rms_norm_adf_wrapper.cpp 281 0x3c80 2 +rms_norm_adf_wrapper.cpp 282 0x3c80 3 +rms_norm_adf_wrapper.cpp 282 0x3c80 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3c8a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 279 0x3c8a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3c94 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 279 0x3c94 1 +rms_norm_adf_wrapper.cpp 280 0x3c94 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3c9e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 279 0x3ca2 x +rms_norm_adf_wrapper.cpp 279 0x3caa +rms_norm_adf_wrapper.cpp 279 0x3cae +rms_norm_adf_wrapper.cpp 279 0x3cb2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3cb6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 282 0x3cba x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3cbe x +io_buffer_main.h 149 0x3cc4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/rms_norm_adf_wrapper.cpp: +rms_norm_adf_wrapper.cpp 279 0x3cc8 x +rms_norm_adf_wrapper.cpp 279 0x3cc8 1 x +rms_norm_adf_wrapper.cpp 281 0x3ccc x +rms_norm_adf_wrapper.cpp 281 0x3cd0 +rms_norm_adf_wrapper.cpp 282 0x3cd4 x +rms_norm_adf_wrapper.cpp 279 0x3cd8 x +rms_norm_adf_wrapper.cpp 282 0x3cd8 1 +rms_norm_adf_wrapper.cpp 279 0x3cde +rms_norm_adf_wrapper.cpp 284 0x3ce2 x +rms_norm_adf_wrapper.cpp 279 0x3ce8 x +rms_norm_adf_wrapper.cpp 289 0x3cec +rms_norm_adf_wrapper.cpp 280 0x3cf2 x +rms_norm_adf_wrapper.cpp 280 0x3cf6 +rms_norm_adf_wrapper.cpp 281 0x3cfa x +rms_norm_adf_wrapper.cpp 284 0x3cfa 1 +rms_norm_adf_wrapper.cpp 289 0x3d00 x +rms_norm_adf_wrapper.cpp 291 0x3d00 1 +rms_norm_adf_wrapper.cpp 295 0x3d00 2 +rms_norm_adf_wrapper.cpp 295 0x3d18 x +rms_norm_adf_wrapper.cpp 295 0x3d1c +rms_norm_adf_wrapper.cpp 289 0x3d22 x +rms_norm_adf_wrapper.cpp 291 0x3d26 x +rms_norm_adf_wrapper.cpp 291 0x3d2c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 48 0x2f70 x +rmsnorm_row_major.h 60 0x2f70 1 +rmsnorm_row_major.h 65 0x2f70 2 +rmsnorm_row_major.h 65 0x2f70 3 +rmsnorm_row_major.h 60 0x2f7c +rmsnorm_row_major.h 65 0x2f7c 1 +rmsnorm_row_major.h 53 0x2f86 +rmsnorm_row_major.h 65 0x2f86 1 x +rmsnorm_row_major.h 53 0x2f92 x +rmsnorm_row_major.h 65 0x2f92 1 +rmsnorm_row_major.h 55 0x2f98 x +rmsnorm_row_major.h 65 0x2f98 1 +rmsnorm_row_major.h 58 0x2f9e x +rmsnorm_row_major.h 48 0x2fa2 +rmsnorm_row_major.h 67 0x2fa8 x +rmsnorm_row_major.h 65 0x2fb2 +rmsnorm_row_major.h 65 0x2fb8 x +rmsnorm_row_major.h 67 0x2fbc x +rmsnorm_row_major.h 65 0x2fc6 x +rmsnorm_row_major.h 60 0x2fce x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x2fd4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 60 0x2fdc +rmsnorm_row_major.h 60 0x2fe0 x +rmsnorm_row_major.h 54 0x2ff0 x +rmsnorm_row_major.h 67 0x2ff4 x +rmsnorm_row_major.h 67 0x2ff4 1 x +rmsnorm_row_major.h 107 0x3004 +rmsnorm_row_major.h 80 0x3010 x +rmsnorm_row_major.h 87 0x3020 x +rmsnorm_row_major.h 87 0x302a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x3034 +accum.hpp 1119 0x3034 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 332 0x3034 2 +add_reduce.hpp 335 0x3034 3 +add_reduce.hpp 337 0x3034 4 +add_reduce.hpp 337 0x3034 5 +add_reduce.hpp 337 0x3034 6 +add_reduce.hpp 337 0x3034 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x3034 8 +rmsnorm_row_major.h 99 0x3034 9 +rmsnorm_row_major.h 99 0x3034 10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x303e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x303e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x303e 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 89 0x303e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x3048 +add_reduce.hpp 335 0x3052 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 80 0x3052 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x305c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x305c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x306a +accum.hpp 1119 0x306a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x306a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x306a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3070 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 87 0x3070 1 x +rmsnorm_row_major.h 89 0x3070 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x30b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 89 0x30b0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x30e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 97 0x30f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x30f8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x3102 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x3108 x +accum.hpp 940 0x310e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 332 0x3112 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x3112 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x311a x +accum.hpp 198 0x311a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x311e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x3128 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x312c x +add_reduce.hpp 337 0x312c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x3134 x +accum.hpp 153 0x3140 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x3144 x +add_reduce.hpp 337 0x3144 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x314c x +accum.hpp 153 0x3158 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x315c x +add_reduce.hpp 337 0x315c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x3164 x +accum.hpp 153 0x3170 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x3174 x +add_reduce.hpp 337 0x3174 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x317c x +accum.hpp 1119 0x3188 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x318e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x3194 x +rmsnorm_row_major.h 99 0x3198 +rmsnorm_row_major.h 99 0x3198 1 +rmsnorm_row_major.h 99 0x31a0 +rmsnorm_row_major.h 80 0x31a6 x +rmsnorm_row_major.h 99 0x31aa x +rmsnorm_row_major.h 104 0x31aa 1 x +rmsnorm_row_major.h 99 0x31b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x31b4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 99 0x31b8 +rmsnorm_row_major.h 107 0x31c4 x +rmsnorm_row_major.h 107 0x31c8 +rmsnorm_row_major.h 67 0x31e0 x +rmsnorm_row_major.h 70 0x31f0 x +rmsnorm_row_major.h 72 0x31f0 1 +rmsnorm_row_major.h 72 0x31f0 2 +rmsnorm_row_major.h 73 0x31f0 3 +rmsnorm_row_major.h 70 0x31fc +rmsnorm_row_major.h 70 0x3206 +rmsnorm_row_major.h 72 0x3210 x +rmsnorm_row_major.h 73 0x3220 x +rmsnorm_row_major.h 107 0x3296 +rmsnorm_row_major.h 107 0x32b6 +rmsnorm_row_major.h 118 0x3880 x +rmsnorm_row_major.h 125 0x3880 1 +rmsnorm_row_major.h 125 0x3886 x +rmsnorm_row_major.h 126 0x3886 1 +rmsnorm_row_major.h 126 0x388c x +rmsnorm_row_major.h 127 0x3890 x +rmsnorm_row_major.h 118 0x3896 +rmsnorm_row_major.h 128 0x389c x +rmsnorm_row_major.h 129 0x389c 1 +rmsnorm_row_major.h 129 0x38a6 x +rmsnorm_row_major.h 134 0x38a6 1 +rmsnorm_row_major.h 143 0x38a6 2 +rmsnorm_row_major.h 134 0x38b0 x +rmsnorm_row_major.h 143 0x38b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x38ba + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x38ba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 127 0x38ba 2 +rmsnorm_row_major.h 128 0x38ba 3 +rmsnorm_row_major.h 143 0x38ba 4 +rmsnorm_row_major.h 144 0x38ba 5 +rmsnorm_row_major.h 127 0x38c6 x +rmsnorm_row_major.h 139 0x38c6 1 +rmsnorm_row_major.h 144 0x38c6 2 +rmsnorm_row_major.h 144 0x38c6 3 +rmsnorm_row_major.h 129 0x38d2 x +rmsnorm_row_major.h 144 0x38d2 1 x +rmsnorm_row_major.h 147 0x38dc x +rmsnorm_row_major.h 147 0x38dc 1 x +rmsnorm_row_major.h 128 0x38e6 x +rmsnorm_row_major.h 129 0x38e6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x38f0 +vector.hpp 1454 0x38f0 1 +vector.hpp 1454 0x38f0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1117 0x38f0 3 +accum.hpp 1117 0x38f0 4 +accum.hpp 1119 0x38f0 5 +accum.hpp 1119 0x38f0 6 +accum.hpp 1119 0x38f0 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 129 0x38f0 8 +rmsnorm_row_major.h 129 0x38f0 9 x +rmsnorm_row_major.h 130 0x38f0 10 +rmsnorm_row_major.h 130 0x38f0 11 x +rmsnorm_row_major.h 173 0x38f0 12 +rmsnorm_row_major.h 129 0x38fa +rmsnorm_row_major.h 129 0x38fa 1 +rmsnorm_row_major.h 130 0x38fa 2 +rmsnorm_row_major.h 139 0x38fa 3 x +rmsnorm_row_major.h 144 0x38fa 4 +rmsnorm_row_major.h 139 0x3906 +rmsnorm_row_major.h 144 0x3906 1 x +rmsnorm_row_major.h 129 0x3910 x +rmsnorm_row_major.h 130 0x3910 1 x +rmsnorm_row_major.h 139 0x3910 2 x +rmsnorm_row_major.h 144 0x3910 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3918 +aie_core.h 81 0x3918 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3918 2 +vector.hpp 1139 0x3918 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 161 0x3918 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3924 +mul_acc32_fp.hpp 36 0x3924 1 +mul_acc32_fp.hpp 36 0x3924 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 161 0x3924 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x392e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3938 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3938 1 +vector.hpp 915 0x3938 2 +vector.hpp 915 0x3938 3 +vector.hpp 915 0x3938 4 +vector.hpp 915 0x3938 5 +vector.hpp 915 0x3938 6 +vector.hpp 915 0x3938 7 +vector.hpp 915 0x3938 8 +vector.hpp 915 0x3938 9 +vector.hpp 915 0x3938 10 +vector.hpp 915 0x3938 11 +vector.hpp 915 0x3938 12 +vector.hpp 915 0x3938 13 +vector.hpp 915 0x3938 14 +vector.hpp 915 0x3938 15 +vector.hpp 915 0x3938 16 +vector.hpp 1139 0x3938 17 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 147 0x3946 +rmsnorm_row_major.h 150 0x3950 x +rmsnorm_row_major.h 151 0x3950 1 x +rmsnorm_row_major.h 161 0x3950 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x3962 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3968 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x3970 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x397e x +accum.hpp 940 0x3984 x +accum.hpp 153 0x398a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x398e x +vector.hpp 915 0x3992 +vector.hpp 915 0x3996 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x3996 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3996 2 x +elementary.hpp 381 0x3996 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x399c x +vector.hpp 915 0x399c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x399c 2 +elementary.hpp 381 0x399c 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39a2 +vector.hpp 915 0x39a2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39a2 2 x +elementary.hpp 381 0x39a2 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39a8 x +vector.hpp 915 0x39a8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39a8 2 +elementary.hpp 381 0x39a8 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39ae +vector.hpp 915 0x39ae 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39ae 2 x +elementary.hpp 381 0x39ae 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39b4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39b4 1 +elementary.hpp 381 0x39b4 2 +elementary.hpp 381 0x39b4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39ba +vector.hpp 915 0x39ba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39ba 2 x +elementary.hpp 381 0x39ba 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39c0 x +vector.hpp 915 0x39c0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39c0 2 +elementary.hpp 381 0x39c0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39c6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39c6 1 x +elementary.hpp 381 0x39c6 2 x +elementary.hpp 381 0x39c6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39cc x +vector.hpp 915 0x39cc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39cc 2 +elementary.hpp 381 0x39cc 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39d2 +vector.hpp 915 0x39d2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39d2 2 x +elementary.hpp 381 0x39d2 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39d8 x +vector.hpp 915 0x39d8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39d8 2 +elementary.hpp 381 0x39d8 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39de + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39de 1 x +elementary.hpp 381 0x39de 2 x +elementary.hpp 381 0x39de 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39e4 x +vector.hpp 915 0x39e4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39e4 2 +elementary.hpp 381 0x39e4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39ea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39ea 1 x +elementary.hpp 381 0x39ea 2 x +elementary.hpp 381 0x39ea 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39f0 1 +elementary.hpp 381 0x39f0 2 +elementary.hpp 381 0x39f0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39f6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39f6 1 x +elementary.hpp 381 0x39f6 2 x +elementary.hpp 381 0x39f6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x39fc x +vector.hpp 915 0x39fc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x39fc 2 +elementary.hpp 381 0x39fc 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a02 +vector.hpp 915 0x3a02 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a02 2 x +elementary.hpp 381 0x3a02 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a08 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a08 1 +elementary.hpp 381 0x3a08 2 +elementary.hpp 381 0x3a08 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a0e +vector.hpp 915 0x3a0e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a0e 2 x +elementary.hpp 381 0x3a0e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a14 x +vector.hpp 915 0x3a14 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a14 2 +elementary.hpp 381 0x3a14 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a1a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a1a 1 x +elementary.hpp 381 0x3a1a 2 x +elementary.hpp 381 0x3a1a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a20 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a20 1 +elementary.hpp 381 0x3a20 2 +elementary.hpp 381 0x3a20 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a26 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a26 1 x +elementary.hpp 381 0x3a26 2 x +elementary.hpp 381 0x3a26 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a2c 1 +elementary.hpp 381 0x3a2c 2 +elementary.hpp 381 0x3a2c 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a32 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a32 1 x +elementary.hpp 381 0x3a32 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a36 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a36 1 +elementary.hpp 381 0x3a36 2 +elementary.hpp 381 0x3a36 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a3c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a3c 1 x +elementary.hpp 381 0x3a3c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a40 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a40 1 +elementary.hpp 381 0x3a40 2 +elementary.hpp 381 0x3a40 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a46 +vector.hpp 1454 0x3a46 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1117 0x3a46 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a46 3 x +elementary.hpp 381 0x3a46 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x3a4c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 342 0x3a4c 1 +elementary.hpp 381 0x3a4c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1454 0x3a50 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1117 0x3a50 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 381 0x3a50 2 x +elementary.hpp 381 0x3a5a +elementary.hpp 381 0x3a5e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1454 0x3a62 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1117 0x3a62 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1454 0x3a68 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1117 0x3a68 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3a80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3a80 1 x +vector.hpp 1139 0x3a80 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 165 0x3a80 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3a92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x3aa0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3aa6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3ac0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x3ac0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major.h: +rmsnorm_row_major.h 173 0x3ac0 2 x +rmsnorm_row_major.h 147 0x3ad0 x +rmsnorm_row_major.h 176 0x3b00 x +rmsnorm_row_major.h 176 0x3b04 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 20 0x2d10 x +0_0_reloadable5.cc 22 0x2d10 1 +0_0_reloadable5.cc 22 0x2d14 x +0_0_reloadable5.cc 23 0x2d18 x +0_0_reloadable5.cc 25 0x2d1c x +0_0_reloadable5.cc 24 0x2d20 x +0_0_reloadable5.cc 21 0x2d24 x +0_0_reloadable5.cc 29 0x2d40 x +0_0_reloadable5.cc 31 0x2d40 1 +0_0_reloadable5.cc 31 0x2d44 x +0_0_reloadable5.cc 32 0x2d48 x +0_0_reloadable5.cc 34 0x2d4c x +0_0_reloadable5.cc 33 0x2d50 x +0_0_reloadable5.cc 30 0x2d54 x +0_0_reloadable5.cc 38 0x2d70 x +0_0_reloadable5.cc 40 0x2d70 1 +0_0_reloadable5.cc 40 0x2d74 x +0_0_reloadable5.cc 42 0x2d78 x +0_0_reloadable5.cc 41 0x2d7c x +0_0_reloadable5.cc 39 0x2d80 x +0_0_reloadable5.cc 46 0x2d90 x +0_0_reloadable5.cc 48 0x2d90 1 +0_0_reloadable5.cc 48 0x2d94 x +0_0_reloadable5.cc 50 0x2d98 x +0_0_reloadable5.cc 49 0x2d9c x +0_0_reloadable5.cc 47 0x2da0 x +0_0_reloadable5.cc 54 0x2db0 x +0_0_reloadable5.cc 56 0x2db0 1 +0_0_reloadable5.cc 56 0x2db4 x +0_0_reloadable5.cc 57 0x2db8 x +0_0_reloadable5.cc 59 0x2dbc x +0_0_reloadable5.cc 58 0x2dc0 x +0_0_reloadable5.cc 55 0x2dc4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../misc/rmsnorm_row_major_params.h: +rmsnorm_row_major_params.h 45 0x2de0 x +rmsnorm_row_major_params.h 48 0x2de0 1 +rmsnorm_row_major_params.h 49 0x2de0 2 +rmsnorm_row_major_params.h 62 0x2de0 3 +rmsnorm_row_major_params.h 48 0x2dec x +rmsnorm_row_major_params.h 51 0x2dec 1 +rmsnorm_row_major_params.h 62 0x2dec 2 +rmsnorm_row_major_params.h 63 0x2dec 3 +rmsnorm_row_major_params.h 64 0x2dec 4 +rmsnorm_row_major_params.h 65 0x2dec 5 +rmsnorm_row_major_params.h 48 0x2df6 +rmsnorm_row_major_params.h 50 0x2df6 1 +rmsnorm_row_major_params.h 50 0x2e00 x +rmsnorm_row_major_params.h 49 0x2e06 x +rmsnorm_row_major_params.h 68 0x2e06 1 +rmsnorm_row_major_params.h 53 0x2e0c x +rmsnorm_row_major_params.h 45 0x2e10 +rmsnorm_row_major_params.h 51 0x2e1c x +rmsnorm_row_major_params.h 68 0x2e2c x +rmsnorm_row_major_params.h 53 0x2e32 x +rmsnorm_row_major_params.h 54 0x2e36 x +rmsnorm_row_major_params.h 54 0x2e4e +rmsnorm_row_major_params.h 55 0x2e52 x +rmsnorm_row_major_params.h 55 0x2e62 +rmsnorm_row_major_params.h 56 0x2e66 x +rmsnorm_row_major_params.h 56 0x2e76 +rmsnorm_row_major_params.h 57 0x2e7a x +rmsnorm_row_major_params.h 57 0x2e8a +rmsnorm_row_major_params.h 62 0x2e8a 1 x +rmsnorm_row_major_params.h 58 0x2e90 x +rmsnorm_row_major_params.h 62 0x2e90 1 +rmsnorm_row_major_params.h 62 0x2e96 x +rmsnorm_row_major_params.h 63 0x2e96 1 x +rmsnorm_row_major_params.h 63 0x2e9a +rmsnorm_row_major_params.h 58 0x2ea6 x +rmsnorm_row_major_params.h 59 0x2eaa x +rmsnorm_row_major_params.h 59 0x2eba +rmsnorm_row_major_params.h 62 0x2eba 1 x +rmsnorm_row_major_params.h 64 0x2eba 2 x +rmsnorm_row_major_params.h 60 0x2ec0 x +rmsnorm_row_major_params.h 62 0x2ec0 1 +rmsnorm_row_major_params.h 65 0x2ec0 2 x +rmsnorm_row_major_params.h 65 0x2ec6 +rmsnorm_row_major_params.h 62 0x2ecc x +rmsnorm_row_major_params.h 66 0x2ed4 x +rmsnorm_row_major_params.h 60 0x2eda x +rmsnorm_row_major_params.h 64 0x2eda 1 x +rmsnorm_row_major_params.h 62 0x2ee0 x +rmsnorm_row_major_params.h 63 0x2ee4 x +rmsnorm_row_major_params.h 64 0x2ee8 x +rmsnorm_row_major_params.h 65 0x2eec x +rmsnorm_row_major_params.h 66 0x2ef0 x +rmsnorm_row_major_params.h 67 0x2ef0 1 +rmsnorm_row_major_params.h 68 0x2ef0 2 +rmsnorm_row_major_params.h 75 0x2ef0 3 +rmsnorm_row_major_params.h 68 0x2efc x +rmsnorm_row_major_params.h 67 0x2f02 +rmsnorm_row_major_params.h 68 0x2f02 1 +rmsnorm_row_major_params.h 67 0x2f08 x +rmsnorm_row_major_params.h 67 0x2f0c +rmsnorm_row_major_params.h 68 0x2f20 x +rmsnorm_row_major_params.h 68 0x2f26 +rmsnorm_row_major_params.h 68 0x2f2a +rmsnorm_row_major_params.h 75 0x2f40 +rmsnorm_row_major_params.h 75 0x2f52 x +rmsnorm_row_major_params.h 68 0x2f56 x +rmsnorm_row_major_params.h 75 0x2f5a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 63 0x3570 x +0_0_reloadable5.cc 65 0x3570 1 +0_0_reloadable5.cc 65 0x3574 x +0_0_reloadable5.cc 66 0x3578 x +0_0_reloadable5.cc 68 0x357c x +0_0_reloadable5.cc 67 0x3580 x +0_0_reloadable5.cc 64 0x3584 x +0_0_reloadable5.cc 72 0x3850 x +0_0_reloadable5.cc 74 0x3850 1 +0_0_reloadable5.cc 74 0x3854 x +0_0_reloadable5.cc 75 0x3858 x +0_0_reloadable5.cc 77 0x385c x +0_0_reloadable5.cc 76 0x3860 x +0_0_reloadable5.cc 73 0x3864 x +0_0_reloadable5.cc 81 0x3d40 x +0_0_reloadable5.cc 83 0x3d40 1 +0_0_reloadable5.cc 83 0x3d44 x +0_0_reloadable5.cc 84 0x3d48 x +0_0_reloadable5.cc 86 0x3d4c x +0_0_reloadable5.cc 85 0x3d50 x +0_0_reloadable5.cc 82 0x3d54 x +0_0_reloadable5.cc 103 0x9e0 x +0_0_reloadable5.cc 105 0x9e0 1 +0_0_reloadable5.cc 105 0x9e0 2 x +0_0_reloadable5.cc 103 0x9e6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f0 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 110 0x9f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 107 0x9f8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa08 x +io_buffer_compiler.h 614 0xa0c +io_buffer_compiler.h 614 0xa10 +io_buffer_compiler.h 614 0xa14 +io_buffer_compiler.h 614 0xa18 +io_buffer_compiler.h 219 0xa28 x +io_buffer_compiler.h 219 0xa28 1 x +io_buffer_compiler.h 218 0xa2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa30 +io_buffer_main.h 434 0xa30 1 +io_buffer_main.h 434 0xa30 2 +io_buffer_main.h 434 0xa3c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 107 0xa40 +0_0_reloadable5.cc 107 0xa40 1 +0_0_reloadable5.cc 110 0xa40 2 +0_0_reloadable5.cc 113 0xa40 3 +0_0_reloadable5.cc 107 0xa46 +0_0_reloadable5.cc 107 0xa46 1 x +0_0_reloadable5.cc 107 0xa4c +0_0_reloadable5.cc 107 0xa4c 1 +0_0_reloadable5.cc 107 0xa52 +0_0_reloadable5.cc 110 0xa52 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa5c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa6e x +io_buffer_compiler.h 614 0xa72 +io_buffer_compiler.h 614 0xa76 +io_buffer_compiler.h 614 0xa7a +io_buffer_compiler.h 614 0xa7e +io_buffer_compiler.h 219 0xa8e x +io_buffer_compiler.h 219 0xa8e 1 x +io_buffer_compiler.h 218 0xa92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa9e x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 110 0xaa2 x +0_0_reloadable5.cc 110 0xaa6 +0_0_reloadable5.cc 110 0xaa6 1 +0_0_reloadable5.cc 110 0xaac +0_0_reloadable5.cc 110 0xaac 1 +0_0_reloadable5.cc 110 0xab2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xac4 x +io_buffer_compiler.h 614 0xac8 +io_buffer_compiler.h 614 0xacc +io_buffer_compiler.h 614 0xad0 +io_buffer_compiler.h 614 0xad4 +io_buffer_compiler.h 219 0xae4 x +io_buffer_compiler.h 219 0xae4 1 x +io_buffer_compiler.h 218 0xae8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xaf4 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 113 0xaf8 x +0_0_reloadable5.cc 113 0xafc +0_0_reloadable5.cc 113 0xb00 +0_0_reloadable5.cc 113 0xb06 +0_0_reloadable5.cc 113 0xb18 +0_0_reloadable5.cc 116 0xb1c +0_0_reloadable5.cc 118 0xb1c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb30 +io_buffer_compiler.h 630 0xb30 1 +io_buffer_compiler.h 630 0xb30 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb30 3 +io_buffer_main.h 464 0xb30 4 +io_buffer_main.h 464 0xb30 5 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 116 0xb30 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb36 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 118 0xb3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb3e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb3e 1 +io_buffer_main.h 464 0xb42 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 629 0xb4a x +io_buffer_compiler.h 629 0xb4e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb5e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb62 +io_buffer_compiler.h 630 0xb62 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb68 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 118 0xb68 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb78 x +io_buffer_compiler.h 629 0xb7c x +io_buffer_compiler.h 630 0xb7c 1 +io_buffer_compiler.h 629 0xb82 +io_buffer_compiler.h 630 0xb82 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb92 +io_buffer_main.h 464 0xb96 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb9a +io_buffer_compiler.h 630 0xb9a 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 121 0xba0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbae x +io_buffer_compiler.h 629 0xbb2 x +io_buffer_compiler.h 630 0xbb2 1 +io_buffer_compiler.h 629 0xbb8 +io_buffer_compiler.h 630 0xbb8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xbca x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 123 0xbce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbd2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 123 0xbe6 x +0_0_reloadable5.cc 123 0xbec + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbf0 x +io_buffer_compiler.h 630 0xbf6 +io_buffer_compiler.h 630 0xbfa +io_buffer_compiler.h 630 0xbfe +io_buffer_compiler.h - 0xbff + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x3d70 +me_div.c 108 0x3d70 1 +me_div.c 115 0x3d70 2 x +me_div.c 108 0x3d76 +me_div.c 108 0x3d7a +me_div.c 108 0x3d7e +me_div.c 108 0x3d82 +me_div.c 108 0x3d86 +me_div.c 108 0x3d8a +me_div.c 108 0x3d8e +me_div.c 108 0x3d92 +me_div.c 108 0x3d96 +me_div.c 108 0x3d9a +me_div.c 108 0x3d9e +me_div.c 108 0x3da2 +me_div.c 108 0x3da6 +me_div.c 108 0x3daa +me_div.c 108 0x3dae +me_div.c 108 0x3db2 +me_div.c 108 0x3db6 +me_div.c 108 0x3dba +me_div.c 108 0x3dbe +me_div.c 108 0x3dc2 +me_div.c 108 0x3dc6 +me_div.c 108 0x3dca +me_div.c 108 0x3dce +me_div.c 108 0x3dd2 +me_div.c 108 0x3dd6 +me_div.c 108 0x3dda +me_div.c 108 0x3dde +me_div.c 108 0x3de2 +me_div.c 119 0x3de6 x +me_div.c 108 0x3dea x +me_div.c 108 0x3dee +me_div.c 108 0x3df2 +me_div.c 108 0x3df6 +me_div.c - 0x3df7 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/scripts/0_0_reloadable80.bcf b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/scripts/0_0_reloadable80.bcf new file mode 100644 index 0000000000000000000000000000000000000000..b5025c34b99f02de39e461699cdc760aa2cbe456 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/scripts/0_0_reloadable80.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x9e0 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x9e0 + +_reserved DMb 0x7b540 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7bd40 0x40 //reserved for sync buffer +_stack DM_stack 0x7bd80 0x440 //stack for core +_reserved DMb 0x7c1c0 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c200 0x800//heap +_reserved DMb 0x40000 0x3b540 + +_reserved DMb 0x7ca00 0x3600 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/scripts/0_0_reloadable80.prx b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/scripts/0_0_reloadable80.prx new file mode 100644 index 0000000000000000000000000000000000000000..5bfde7f0f70432096dd58403f8ad8629802216d4 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/scripts/0_0_reloadable80.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/src/0_0_reloadable80.cc b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/src/0_0_reloadable80.cc new file mode 100644 index 0000000000000000000000000000000000000000..5790d6a6d9a916eb5ef17aa4e43ca10c6b97da73 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable80/src/0_0_reloadable80.cc @@ -0,0 +1,123 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_GemmBfp16(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[7],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_add1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_sigmoid1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +#include "rms_norm_adf_wrapper.cpp" + +// Declare Kernel objects and external arrays + + +void _b13786_wrapper(void* args[]) +{ + superkernel_GemmBfp16( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b13811_wrapper(void* args[]) +{ + superkernel_add1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b13739_wrapper(void* args[]) +{ + superkernel_add1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b13744_wrapper(void* args[]) +{ + superkernel_sigmoid1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b13749_wrapper(void* args[]) +{ + superkernel_mul1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b8292_wrapper(void* args[]) +{ + mllib_graphs::rmsnorm_row_major_part1_4x4_wrapper>, adf::io_buffer_config>, adf::io_buffer_config, adf::locking::async>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b8300_wrapper(void* args[]) +{ + mllib_graphs::rmsnorm_row_major_part2_4x4_wrapper>, adf::io_buffer_config>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b8308_wrapper(void* args[]) +{ + mllib_graphs::rmsnorm_row_major_part3_4x4_wrapper>, adf::io_buffer_config>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[8] = { + _b13786_wrapper, + _b13811_wrapper, + _b13739_wrapper, + _b13744_wrapper, + _b13749_wrapper, + _b8292_wrapper, + _b8300_wrapper, + _b8308_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->acquire(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->release(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->release(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; +} diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.calltree b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.calltree new file mode 100644 index 0000000000000000000000000000000000000000..07c064f11266eeb7f83e03bc96d24db2685f524f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.calltree @@ -0,0 +1,90 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:31:54 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable11 ../Release/0_0_reloadable11.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable11.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3586024 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z15_b14285_wrapperPPv (referenced text) + _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + _Z15_b14290_wrapperPPv (referenced text) + _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (*) + _Z15_b13811_wrapperPPv (referenced text) + _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (*) + _Z15_b13749_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z15_b14811_wrapperPPv (referenced text) + _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _ZN12me_primitive10udiv_dstepEjjRjS0_ (*) + _Z14_b8134_wrapperPPv (referenced text) + _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE + _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params + _Z14_b8096_wrapperPPv (referenced text) + _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 256 0 0 408 10872 _Z13kernelWrapperPPvjjjj + 0 128 1 1 36 1340 _Z15_b14285_wrapperPPv + 64 128 1 2 602 1304 _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 76 198 _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 122 122 _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + 0 64 2 3 20 504 _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E + 64 64 2 4 484 484 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 192 1 1 32 1352 _Z15_b14290_wrapperPPv + 64 192 1 2 488 1320 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 76 198 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 122 122 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + 64 128 2 3 150 634 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + 64 64 3 4 484 484 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 128 1 1 36 1378 _Z15_b13811_wrapperPPv + 64 128 1 2 602 1342 _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 114 236 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 3 4 122 122 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + 0 64 2 3 20 504 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 64 64 2 4 484 484 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 128 1 1 36 1124 _Z15_b13749_wrapperPPv + 64 128 1 2 602 1088 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 154 178 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 308 308 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 128 1 1 32 4276 _Z15_b14811_wrapperPPv + 64 128 1 2 1758 4244 _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 832 832 _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + 64 64 2 3 1512 1654 _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 3 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 128 1 1 32 1528 _Z14_b8134_wrapperPPv + 128 128 1 2 456 1496 _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE + 0 0 2 3 1040 1040 _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params + 0 0 1 1 32 434 _Z14_b8096_wrapperPPv + 0 0 1 2 402 402 _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE + + +Maximum call level : 4 +Maximum stack level: 3 +Maximum stack size : 256 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.cmic2 b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..e82e9f9b441b7ccfddefec5dd999d4904993fe42 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.cmic2 @@ -0,0 +1,15455 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:31:55 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable11 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable11.cc" 92 first +.src_ref 0 "0_0_reloadable11.cc" 94 60 +.src_ref 0 "0_0_reloadable11.cc" 94 110 first +.function_start + 2528 "10111010" // LDA r16, [p0]; NEZ r15, r1; MOV r4, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2529 "01111000" // /* MW 9 */ + 2530 "11010000" // /* MW 8 */ + 2531 "10001011" // /* MW 7 */ + 2532 "10000000" // /* MW 6 */ + 2533 "11110111" // /* MW 5 */ + 2534 "00000010" // /* MW 4 */ + 2535 "11010000" // /* MW 3 */ + 2536 "11000010" // /* MW 2 */ + 2537 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 92 + 2538 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2539 "00000001" // /* MW 5 */ + 2540 "00000000" // /* MW 4 */ + 2541 "00000000" // /* MW 3 */ + 2542 "00001000" // /* MW 2 */ + 2543 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2544 "00000010" // ST p6, [sp, #-20]; MOV r26, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "01001011" // /* MW 5 */ + 2548 "00000011" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "11100011" // /* MW 2 */ + 2551 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable11.cc" 97 112 + 2552 "00000010" // ST r13, [sp, #-12]; MOV r13, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "11010000" // /* MW 6 */ + 2555 "10101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "10110110" // /* MW 2 */ + 2559 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2560 "00000010" // ST r14, [sp, #-8]; MOV r14, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2561 "01110000" // /* MW 7 */ + 2562 "01010000" // /* MW 6 */ + 2563 "11001000" // /* MW 5 */ + 2564 "00000001" // /* MW 4 */ + 2565 "10110000" // /* MW 3 */ + 2566 "00111010" // /* MW 2 */ + 2567 "11111111" // /* MW 1 */ + 2568 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "10011101" // /* MW 3 */ + 2570 "11111111" // /* MW 2 */ + 2571 "00001111" // /* MW 1 */ + 2572 "10011000" // ST r4, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "10010101" // /* MW 3 */ + 2574 "11110000" // /* MW 2 */ + 2575 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2576 "00000010" // ST lr, [sp, #-24]; ADD.NC p6, r16, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2577 "00000000" // /* MW 7 */ + 2578 "00000001" // /* MW 6 */ + 2579 "00110100" // /* MW 5 */ + 2580 "00000011" // /* MW 4 */ + 2581 "10110000" // /* MW 3 */ + 2582 "00000111" // /* MW 2 */ + 2583 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2584 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "00010110" // /* MW 3 */ + 2586 "00011110" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2588 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2589 "01010110" // /* MW 3 */ + 2590 "00111110" // /* MW 2 */ + 2591 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2592 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2593 "00110110" // /* MW 3 */ + 2594 "11101110" // /* MW 2 */ + 2595 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2596 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2597 "01110110" // /* MW 3 */ + 2598 "00000111" // /* MW 2 */ + 2599 "00000110" // /* MW 1 */ + 2600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2601 "00000000" // /* MW 1 */ + 2602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2603 "00000000" // /* MW 1 */ + 2604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2605 "00000000" // /* MW 1 */ + 2606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2607 "00000000" // /* MW 1 */ + 2608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2609 "00000000" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2612 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2613 "00100010" // /* MW 3 */ + 2614 "00100001" // /* MW 2 */ + 2615 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2616 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "00010001" // /* MW 3 */ + 2618 "11010110" // /* MW 2 */ + 2619 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2620 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "11111101" // /* MW 3 */ + 2622 "11100000" // /* MW 2 */ + 2623 "00010111" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2630 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2631 "00001000" // /* MW 3 */ + 2632 "01010111" // /* MW 2 */ + 2633 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 +.src_ref 0 "0_0_reloadable11.cc" 97 112 first +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2634 "01100100" // NEZ r26, r13; MOV r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2635 "00001001" // /* MW 5 */ + 2636 "10100000" // /* MW 4 */ + 2637 "00001000" // /* MW 3 */ + 2638 "10011110" // /* MW 2 */ + 2639 "01101110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2640 "11100100" // LSHL r19, r15, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2641 "10000001" // /* MW 5 */ + 2642 "00100001" // /* MW 4 */ + 2643 "10111001" // /* MW 3 */ + 2644 "11100011" // /* MW 2 */ + 2645 "01111100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2646 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2647 "10010010" // /* MW 5 */ + 2648 "10110011" // /* MW 4 */ + 2649 "10111001" // /* MW 3 */ + 2650 "10100011" // /* MW 2 */ + 2651 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2652 "01011000" // ADD.NC p6, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2653 "11001001" // /* MW 3 */ + 2654 "01101001" // /* MW 2 */ + 2655 "00011110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2656 "00001100" // LDA r18, [p6]; ST r26, [sp, #-28] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2657 "10101011" // /* MW 5 */ + 2658 "11001110" // /* MW 4 */ + 2659 "11011111" // /* MW 3 */ + 2660 "11001010" // /* MW 2 */ + 2661 "11000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ + 2670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2671 "00000000" // /* MW 1 */ + 2672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2673 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2674 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "00000010" // /* MW 3 */ + 2676 "01101001" // /* MW 2 */ + 2677 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2678 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "01110110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2682 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "01010110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2686 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "10010110" // /* MW 3 */ + 2688 "00011110" // /* MW 2 */ + 2689 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2690 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "01110110" // /* MW 3 */ + 2692 "00000111" // /* MW 2 */ + 2693 "00000111" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2703 "00000000" // /* MW 1 */ + 2704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2705 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2706 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "01000010" // /* MW 3 */ + 2708 "11100111" // /* MW 2 */ + 2709 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2710 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2711 "01110001" // /* MW 3 */ + 2712 "11010110" // /* MW 2 */ + 2713 "00001111" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ + 2720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2721 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2722 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00001000" // /* MW 3 */ + 2724 "10010111" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 first + 2726 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2727 "00011101" // /* MW 3 */ + 2728 "00100001" // /* MW 2 */ + 2729 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2730 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2731 "00100000" // /* MW 3 */ + 2732 "10001000" // /* MW 2 */ + 2733 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2734 "01000100" // MOVXM p7, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2735 "00000000" // /* MW 5 */ + 2736 "11000101" // /* MW 4 */ + 2737 "11001110" // /* MW 3 */ + 2738 "00000111" // /* MW 2 */ + 2739 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2740 "10011000" // LDA p1, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2741 "10011110" // /* MW 3 */ + 2742 "00000000" // /* MW 2 */ + 2743 "00000111" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 4 +.no_stack_arguments + 2756 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2757 "01000000" // /* MW 3 */ + 2758 "00110000" // /* MW 2 */ + 2759 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 103 60 +.delay_slot + 2760 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "11000000" // /* MW 3 */ + 2762 "01100000" // /* MW 2 */ + 2763 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2769 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2770 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 2771 "00011100" // /* MW 13 */ + 2772 "00000000" // /* MW 12 */ + 2773 "00000000" // /* MW 11 */ + 2774 "01010111" // /* MW 10 */ + 2775 "00011010" // /* MW 9 */ + 2776 "01000000" // /* MW 8 */ + 2777 "00000000" // /* MW 7 */ + 2778 "00000000" // /* MW 6 */ + 2779 "10110110" // /* MW 5 */ + 2780 "00000010" // /* MW 4 */ + 2781 "11110000" // /* MW 3 */ + 2782 "00101100" // /* MW 2 */ + 2783 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 103 60 first +.return_address + 2784 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2785 "00010110" // /* MW 3 */ + 2786 "00000110" // /* MW 2 */ + 2787 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2788 "00011000" // LDA el0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "00101001" // /* MW 3 */ + 2790 "11100100" // /* MW 2 */ + 2791 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 108 + 2792 "00011000" // LDA lr, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "00111001" // /* MW 3 */ + 2794 "11101000" // /* MW 2 */ + 2795 "00000111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2804 "00011000" // ADD.NC p7, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "01101000" // /* MW 2 */ + 2807 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2808 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00110110" // /* MW 3 */ + 2810 "00000110" // /* MW 2 */ + 2811 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 + 2812 "11100100" // MOVX r16, #1; MOV r26, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2813 "01000001" // /* MW 5 */ + 2814 "00101111" // /* MW 4 */ + 2815 "10101101" // /* MW 3 */ + 2816 "00000000" // /* MW 2 */ + 2817 "00000100" // /* MW 1 */ + 2818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2819 "00000000" // /* MW 1 */ + 2820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2821 "00000000" // /* MW 1 */ + 2822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2823 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2826 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00011100" // /* MW 3 */ + 2828 "10100000" // /* MW 2 */ + 2829 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2830 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2831 "00001000" // /* MW 3 */ + 2832 "01010101" // /* MW 2 */ + 2833 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2834 "11010100" // LDA r17, [p7, #-4]; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2835 "01000001" // /* MW 5 */ + 2836 "10101110" // /* MW 4 */ + 2837 "11011101" // /* MW 3 */ + 2838 "11000110" // /* MW 2 */ + 2839 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 106 60 first + 2840 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "01010110" // /* MW 3 */ + 2842 "00000110" // /* MW 2 */ + 2843 "00000110" // /* MW 1 */ + 2844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2845 "00000000" // /* MW 1 */ + 2846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2847 "00000000" // /* MW 1 */ + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2854 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2855 "00010001" // /* MW 3 */ + 2856 "00100111" // /* MW 2 */ + 2857 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2858 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2859 "00010100" // /* MW 5 */ + 2860 "11010010" // /* MW 4 */ + 2861 "01000000" // /* MW 3 */ + 2862 "01100110" // /* MW 2 */ + 2863 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2864 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "01100011" // /* MW 5 */ + 2866 "11101100" // /* MW 4 */ + 2867 "11011111" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "00000000" // /* MW 1 */ + 2870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2871 "00000000" // /* MW 1 */ + 2872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2873 "00000000" // /* MW 1 */ + 2874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2875 "00000000" // /* MW 1 */ + 2876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2877 "00000000" // /* MW 1 */ + 2878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2879 "00000000" // /* MW 1 */ + 2880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2881 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2882 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001000" // /* MW 3 */ + 2884 "01010101" // /* MW 2 */ + 2885 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2886 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2887 "00110110" // /* MW 3 */ + 2888 "11100110" // /* MW 2 */ + 2889 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2890 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "00011001" // /* MW 3 */ + 2892 "11101111" // /* MW 2 */ + 2893 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2894 "00011000" // LDA r13, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "10110001" // /* MW 3 */ + 2896 "11110101" // /* MW 2 */ + 2897 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2898 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2899 "11010001" // /* MW 3 */ + 2900 "11111001" // /* MW 2 */ + 2901 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2902 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2903 "10011001" // /* MW 3 */ + 2904 "11111111" // /* MW 2 */ + 2905 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2906 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2907 "11110001" // /* MW 3 */ + 2908 "11110001" // /* MW 2 */ + 2909 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 108 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2910 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2911 "00000000" // /* MW 3 */ + 2912 "00101000" // /* MW 2 */ + 2913 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2914 "11111000" // MOV r27, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2915 "10100000" // /* MW 3 */ + 2916 "11010110" // /* MW 2 */ + 2917 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot + 2918 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2919 "00010001" // /* MW 3 */ + 2920 "00100001" // /* MW 2 */ + 2921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 2922 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2923 "00000010" // /* MW 3 */ + 2924 "01100001" // /* MW 2 */ + 2925 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 108 first +.delay_slot + 2926 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2927 "00000001" // /* MW 5 */ + 2928 "00000000" // /* MW 4 */ + 2929 "00000000" // /* MW 3 */ + 2930 "11111000" // /* MW 2 */ + 2931 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 first +.delay_slot + 2932 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2933 "00010001" // /* MW 3 */ + 2934 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2935 "00001000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 2944 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2945 "00010000" // /* MW 9 */ + 2946 "10000000" // /* MW 8 */ + 2947 "00110001" // /* MW 7 */ + 2948 "11110000" // /* MW 6 */ + 2949 "00000001" // /* MW 5 */ + 2950 "00000000" // /* MW 4 */ + 2951 "11010000" // /* MW 3 */ + 2952 "10000101" // /* MW 2 */ + 2953 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 2954 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2955 "01011000" // /* MW 9 */ + 2956 "00000000" // /* MW 8 */ + 2957 "00001000" // /* MW 7 */ + 2958 "00001011" // /* MW 6 */ + 2959 "00010000" // /* MW 5 */ + 2960 "00001000" // /* MW 4 */ + 2961 "00000000" // /* MW 3 */ + 2962 "00000000" // /* MW 2 */ + 2963 "11110000" // /* MW 1 */ + 2964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2965 "00000000" // /* MW 1 */ + 2966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2967 "00000000" // /* MW 1 */ + 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2969 "00000000" // /* MW 1 */ + 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2971 "00000000" // /* MW 1 */ + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 2974 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2975 "00101001" // /* MW 3 */ + 2976 "00011100" // /* MW 2 */ + 2977 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 2978 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2979 "00101110" // /* MW 3 */ + 2980 "00011100" // /* MW 2 */ + 2981 "00000001" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ + 2990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2991 "00000000" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 2994 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2995 "00101001" // /* MW 3 */ + 2996 "00011100" // /* MW 2 */ + 2997 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 2998 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2999 "00101110" // /* MW 3 */ + 3000 "00000100" // /* MW 2 */ + 3001 "00000001" // /* MW 1 */ + 3002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3003 "00000000" // /* MW 1 */ + 3004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 3014 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3015 "00101001" // /* MW 3 */ + 3016 "00011100" // /* MW 2 */ + 3017 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 3018 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "01110110" // /* MW 3 */ + 3020 "00010100" // /* MW 2 */ + 3021 "00000001" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ + 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3027 "00000000" // /* MW 1 */ + 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3031 "00000000" // /* MW 1 */ + 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3033 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3034 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "01110001" // /* MW 3 */ + 3036 "01001100" // /* MW 2 */ + 3037 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3038 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "00010111" // /* MW 3 */ + 3040 "00000100" // /* MW 2 */ + 3041 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3043 "00000000" // /* MW 3 */ + 3044 "00101000" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3046 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000000" // /* MW 5 */ + 3048 "00111110" // /* MW 4 */ + 3049 "11110001" // /* MW 3 */ + 3050 "00000000" // /* MW 2 */ + 3051 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3052 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3053 "00100100" // /* MW 3 */ + 3054 "11000100" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00100111" // /* MW 3 */ + 3058 "01110110" // /* MW 2 */ + 3059 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3060 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3061 "10000010" // /* MW 3 */ + 3062 "00000001" // /* MW 2 */ + 3063 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 3065 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 3072 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3073 "00000001" // /* MW 5 */ + 3074 "00000000" // /* MW 4 */ + 3075 "00000000" // /* MW 3 */ + 3076 "00001000" // /* MW 2 */ + 3077 "00000000" // /* MW 1 */ + 3078 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3079 "00111101" // /* MW 3 */ + 3080 "11111100" // /* MW 2 */ + 3081 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 3082 "00000100" // JL #2944 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2944 delay_slots=5 */ + 3083 "00000001" // /* MW 5 */ + 3084 "00000000" // /* MW 4 */ + 3085 "11000000" // /* MW 3 */ + 3086 "00000101" // /* MW 2 */ + 3087 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 3088 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3089 "00000000" // /* MW 5 */ + 3090 "11000110" // /* MW 4 */ + 3091 "11000000" // /* MW 3 */ + 3092 "00000111" // /* MW 2 */ + 3093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3100 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3101 "01100111" // /* MW 3 */ + 3102 "00000001" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "mul_impl.h" 152 25 +.return_address + 3104 "10111010" // LDA lr, [sp, #-4]; MOVXM p1, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "00010000" // /* MW 9 */ + 3106 "10000000" // /* MW 8 */ + 3107 "10110001" // /* MW 7 */ + 3108 "11110000" // /* MW 6 */ + 3109 "00000001" // /* MW 5 */ + 3110 "00000000" // /* MW 4 */ + 3111 "00100000" // /* MW 3 */ + 3112 "10000111" // /* MW 2 */ + 3113 "11111111" // /* MW 1 */ + 3114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3115 "00000000" // /* MW 1 */ + 3116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3117 "00000000" // /* MW 1 */ + 3118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3119 "00000000" // /* MW 1 */ + 3120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3121 "00000000" // /* MW 1 */ + 3122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3123 "00000000" // /* MW 1 */ + 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3125 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first + 3126 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3127 "00000000" // /* MW 3 */ + 3128 "00101000" // /* MW 2 */ + 3129 "00010000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 152 25 +.delay_slot + 3130 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3131 "00001001" // /* MW 3 */ + 3132 "00100000" // /* MW 2 */ + 3133 "00010000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 152 25 first +.delay_slot + 3134 "10011000" // ST r16, [p1, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3135 "00010001" // /* MW 3 */ + 3136 "01000110" // /* MW 2 */ + 3137 "00001001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.delay_slot + 3138 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3139 "00000001" // /* MW 5 */ + 3140 "00000000" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "11111000" // /* MW 2 */ + 3143 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 3147 "00000000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 2 "elementwise_binary_shared.h" 66 first +.src_ref 2 "elementwise_binary_shared.h" 78 37 +.function_start + 3152 "11111000" // MOV r2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3153 "11000000" // /* MW 3 */ + 3154 "10010110" // /* MW 2 */ + 3155 "00011000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 +.src_ref 2 "elementwise_binary_shared.h" 78 37 first + 3156 "00100100" // MOVX r0, #0; ADD.NC p5, r2, #14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3157 "00001110" // /* MW 5 */ + 3158 "11000010" // /* MW 4 */ + 3159 "00101010" // /* MW 3 */ + 3160 "00000000" // /* MW 2 */ + 3161 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 first +.src_ref 2 "elementwise_binary_shared.h" 81 22 first + 3162 "11010100" // LDA.s16 r0, [p5], #2; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3163 "11100101" // /* MW 5 */ + 3164 "00000010" // /* MW 4 */ + 3165 "01010000" // /* MW 3 */ + 3166 "10000010" // /* MW 2 */ + 3167 "10100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 15 first + 3168 "10011000" // LDA r2, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3169 "01010110" // /* MW 3 */ + 3170 "00000100" // /* MW 2 */ + 3171 "00000101" // /* MW 1 */ + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 3182 "00011000" // MOVX r1, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3183 "00001001" // /* MW 3 */ + 3184 "00000010" // /* MW 2 */ + 3185 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 3186 "10011000" // LTU r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00101100" // /* MW 3 */ + 3188 "01000010" // /* MW 2 */ + 3189 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 8 + 3190 "10000100" // JNZ r1, #3328 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3328 delay_slots=5 */ + 3191 "00000001" // /* MW 5 */ + 3192 "01000000" // /* MW 4 */ + 3193 "10000000" // /* MW 3 */ + 3194 "00000110" // /* MW 2 */ + 3195 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 66 +.delay_slot + 3196 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3197 "00000001" // /* MW 5 */ + 3198 "00000000" // /* MW 4 */ + 3199 "00000000" // /* MW 3 */ + 3200 "00001000" // /* MW 2 */ + 3201 "00000000" // /* MW 1 */ +.delay_slot + 3202 "11111000" // MOV p4, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3203 "11100000" // /* MW 3 */ + 3204 "01100101" // /* MW 2 */ + 3205 "00011100" // /* MW 1 */ +.delay_slot + 3206 "00011000" // PADDB [p4], #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3207 "10010000" // /* MW 3 */ + 3208 "11111111" // /* MW 2 */ + 3209 "00111100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 78 37 first +.delay_slot + 3210 "00011000" // VST x0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3211 "00010011" // /* MW 3 */ + 3212 "00000100" // /* MW 2 */ + 3213 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3215 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 85 34 +.src_ref 2 "elementwise_binary_shared.h" 90 19 + 3216 "11010100" // MOVA dj0, #12; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3217 "11100101" // /* MW 5 */ + 3218 "00000010" // /* MW 4 */ + 3219 "10000000" // /* MW 3 */ + 3220 "10000010" // /* MW 2 */ + 3221 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 85 34 first +.src_ref 2 "elementwise_binary_shared.h" 90 19 first + 3222 "10011000" // LDA.u8 r0, [p3, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3223 "00001010" // /* MW 3 */ + 3224 "00000000" // /* MW 2 */ + 3225 "00000011" // /* MW 1 */ + 3226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3227 "00000000" // /* MW 1 */ + 3228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3229 "00000000" // /* MW 1 */ + 3230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3231 "00000000" // /* MW 1 */ + 3232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3233 "00000000" // /* MW 1 */ + 3234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3235 "00000000" // /* MW 1 */ + 3236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3237 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 90 12 +.src_ref 2 "elementwise_binary_shared.h" 90 35 + 3238 "10000100" // JNZ r0, #3280 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3280 delay_slots=5 */ + 3239 "00000001" // /* MW 5 */ + 3240 "01000000" // /* MW 4 */ + 3241 "01101000" // /* MW 3 */ + 3242 "00000110" // /* MW 2 */ + 3243 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 3244 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "00000000" // /* MW 3 */ + 3246 "00000000" // /* MW 2 */ + 3247 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.delay_slot + 3248 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "10000000" // /* MW 3 */ + 3250 "00000000" // /* MW 2 */ + 3251 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3257 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 3258 "10111010" // MOVA m1, #0; J #3296 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3296 delay_slots=5 */ + 3259 "00100000" // /* MW 9 */ + 3260 "00000000" // /* MW 8 */ + 3261 "00000000" // /* MW 7 */ + 3262 "10011100" // /* MW 6 */ + 3263 "00000001" // /* MW 5 */ + 3264 "00000000" // /* MW 4 */ + 3265 "10000000" // /* MW 3 */ + 3266 "00000100" // /* MW 2 */ + 3267 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3275 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3276 "00011000" // VST x0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3277 "00010011" // /* MW 3 */ + 3278 "00000100" // /* MW 2 */ + 3279 "00001000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 3280 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3281 "10000000" // /* MW 3 */ + 3282 "00000000" // /* MW 2 */ + 3283 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "elementwise_binary_shared.h" 130 16 + 3284 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3285 "01010000" // /* MW 11 */ + 3286 "00000000" // /* MW 10 */ + 3287 "00000000" // /* MW 9 */ + 3288 "00000001" // /* MW 8 */ + 3289 "00010011" // /* MW 7 */ + 3290 "00000100" // /* MW 6 */ + 3291 "00100001" // /* MW 5 */ + 3292 "00000000" // /* MW 4 */ + 3293 "11110000" // /* MW 3 */ + 3294 "00101100" // /* MW 2 */ + 3295 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 3296 "10000100" // J #3424 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3424 delay_slots=5 */ + 3297 "00000000" // /* MW 5 */ + 3298 "00000000" // /* MW 4 */ + 3299 "10110000" // /* MW 3 */ + 3300 "00000110" // /* MW 2 */ + 3301 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 3302 "00000010" // MOVS p0, p4; MOV p4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3303 "01110000" // /* MW 7 */ + 3304 "01100000" // /* MW 6 */ + 3305 "00110000" // /* MW 5 */ + 3306 "00000010" // /* MW 4 */ + 3307 "01100000" // /* MW 3 */ + 3308 "00010001" // /* MW 2 */ + 3309 "00010010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3316 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3317 "10000001" // /* MW 11 */ + 3318 "10101101" // /* MW 10 */ + 3319 "00000000" // /* MW 9 */ + 3320 "00000000" // /* MW 8 */ + 3321 "00000000" // /* MW 7 */ + 3322 "00000000" // /* MW 6 */ + 3323 "00100000" // /* MW 5 */ + 3324 "00000000" // /* MW 4 */ + 3325 "11110000" // /* MW 3 */ + 3326 "00101100" // /* MW 2 */ + 3327 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 +.src_ref 2 "elementwise_binary_shared.h" 109 97 + 3328 "00011000" // MOVX r1, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3329 "00001101" // /* MW 3 */ + 3330 "00000010" // /* MW 2 */ + 3331 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 97 first + 3332 "10011000" // EQ r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3333 "00100111" // /* MW 3 */ + 3334 "01000010" // /* MW 2 */ + 3335 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3336 "10000100" // JNZ r1, #3376 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3376 delay_slots=5 */ + 3337 "00000001" // /* MW 5 */ + 3338 "01000000" // /* MW 4 */ + 3339 "10011000" // /* MW 3 */ + 3340 "00000110" // /* MW 2 */ + 3341 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 3342 "01000100" // MOVXM p3, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3343 "01100000" // /* MW 5 */ + 3344 "11000100" // /* MW 4 */ + 3345 "11000110" // /* MW 3 */ + 3346 "00000111" // /* MW 2 */ + 3347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3353 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 3354 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3355 "00000000" // /* MW 5 */ + 3356 "00100000" // /* MW 4 */ + 3357 "00000000" // /* MW 3 */ + 3358 "10000000" // /* MW 2 */ + 3359 "00111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3360 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3361 "00000000" // /* MW 15 */ + 3362 "00000000" // /* MW 14 */ + 3363 "00010000" // /* MW 13 */ + 3364 "00000000" // /* MW 12 */ + 3365 "00001000" // /* MW 11 */ + 3366 "00000000" // /* MW 10 */ + 3367 "11100000" // /* MW 9 */ + 3368 "00101111" // /* MW 8 */ + 3369 "01011011" // /* MW 7 */ + 3370 "00000001" // /* MW 6 */ + 3371 "00100000" // /* MW 5 */ + 3372 "00000000" // /* MW 4 */ + 3373 "11110000" // /* MW 3 */ + 3374 "00101100" // /* MW 2 */ + 3375 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3376 "01010100" // LDA.s8 r0, [p3]; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000001" // /* MW 4 */ + 3379 "01010000" // /* MW 3 */ + 3380 "10000000" // /* MW 2 */ + 3381 "01100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3382 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00000000" // /* MW 3 */ + 3384 "00000000" // /* MW 2 */ + 3385 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3386 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "10000000" // /* MW 3 */ + 3388 "00000000" // /* MW 2 */ + 3389 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3391 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3393 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3394 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "00010001" // /* MW 3 */ + 3396 "00000000" // /* MW 2 */ + 3397 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3398 "11111000" // VMOV bmll1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3399 "10010010" // /* MW 3 */ + 3400 "00000000" // /* MW 2 */ + 3401 "00011001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3402 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3403 "10000000" // /* MW 3 */ + 3404 "00111010" // /* MW 2 */ + 3405 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3406 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3407 "10010110" // /* MW 3 */ + 3408 "01000000" // /* MW 2 */ + 3409 "00001000" // /* MW 1 */ + 3410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3411 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3412 "01011000" // VEXTBCST.16 x0, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3413 "00000011" // /* MW 3 */ + 3414 "00000001" // /* MW 2 */ + 3415 "00011000" // /* MW 1 */ + 3416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3418 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "01100110" // /* MW 5 */ + 3420 "11111000" // /* MW 4 */ + 3421 "11111111" // /* MW 3 */ + 3422 "00101100" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 125 4 first +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first + 3424 "10110110" // LDA r1, [p5, #-16]; VLDB x1, [p4], m1; MOVXM ls, #3536 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3425 "00010000" // /* MW 11 */ + 3426 "11101000" // /* MW 10 */ + 3427 "01111110" // /* MW 9 */ + 3428 "00000000" // /* MW 8 */ + 3429 "00000000" // /* MW 7 */ + 3430 "00000000" // /* MW 6 */ + 3431 "11101000" // /* MW 5 */ + 3432 "01010000" // /* MW 4 */ + 3433 "11011000" // /* MW 3 */ + 3434 "10000110" // /* MW 2 */ + 3435 "10111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3436 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #3584 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3437 "00010000" // /* MW 11 */ + 3438 "00000000" // /* MW 10 */ + 3439 "10111111" // /* MW 9 */ + 3440 "00000001" // /* MW 8 */ + 3441 "00000000" // /* MW 7 */ + 3442 "00000000" // /* MW 6 */ + 3443 "01101000" // /* MW 5 */ + 3444 "10010000" // /* MW 4 */ + 3445 "00000010" // /* MW 3 */ + 3446 "01100011" // /* MW 2 */ + 3447 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 136 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3448 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p4], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3449 "11110001" // /* MW 7 */ + 3450 "00000000" // /* MW 6 */ + 3451 "11101000" // /* MW 5 */ + 3452 "01010000" // /* MW 4 */ + 3453 "01111000" // /* MW 3 */ + 3454 "00000101" // /* MW 2 */ + 3455 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3456 "10111010" // VLDA x0, [p1], m2; MOVXM p3, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3457 "00010000" // /* MW 9 */ + 3458 "00011000" // /* MW 8 */ + 3459 "10110001" // /* MW 7 */ + 3460 "11110001" // /* MW 6 */ + 3461 "00000001" // /* MW 5 */ + 3462 "00000000" // /* MW 4 */ + 3463 "01110000" // /* MW 3 */ + 3464 "00000011" // /* MW 2 */ + 3465 "00101001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3466 "10011000" // LDA.s8 r2, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3467 "01000010" // /* MW 3 */ + 3468 "00000100" // /* MW 2 */ + 3469 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3470 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "00101011" // /* MW 3 */ + 3472 "00001000" // /* MW 2 */ + 3473 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3476 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "00111101" // /* MW 3 */ + 3478 "01000010" // /* MW 2 */ + 3479 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3480 "01100010" // ADD.NC lc, r1, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3481 "00000001" // /* MW 7 */ + 3482 "00000010" // /* MW 6 */ + 3483 "00000001" // /* MW 5 */ + 3484 "10000110" // /* MW 4 */ + 3485 "11111110" // /* MW 3 */ + 3486 "01110000" // /* MW 2 */ + 3487 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3488 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p4], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3489 "11101000" // /* MW 5 */ + 3490 "01010000" // /* MW 4 */ + 3491 "01111000" // /* MW 3 */ + 3492 "00000011" // /* MW 2 */ + 3493 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3494 "10111010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3495 "01111110" // /* MW 9 */ + 3496 "10100101" // /* MW 8 */ + 3497 "00000001" // /* MW 7 */ + 3498 "00000000" // /* MW 6 */ + 3499 "00010000" // /* MW 5 */ + 3500 "00000000" // /* MW 4 */ + 3501 "01110000" // /* MW 3 */ + 3502 "00000101" // /* MW 2 */ + 3503 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary_shared.h" 144 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3504 "11100001" // NOPA; NOPB; NOPS; MOVX crRnd, r2; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3505 "00000000" // /* MW 15 */ + 3506 "00000000" // /* MW 14 */ + 3507 "01111000" // /* MW 13 */ + 3508 "10100101" // /* MW 12 */ + 3509 "00000001" // /* MW 11 */ + 3510 "00000000" // /* MW 10 */ + 3511 "11010100" // /* MW 9 */ + 3512 "00000101" // /* MW 8 */ + 3513 "01011011" // /* MW 7 */ + 3514 "00000001" // /* MW 6 */ + 3515 "00100000" // /* MW 5 */ + 3516 "00000000" // /* MW 4 */ + 3517 "11110000" // /* MW 3 */ + 3518 "00101100" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3520 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00010000" // /* MW 15 */ + 3522 "00001000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "01011011" // /* MW 7 */ + 3530 "00000001" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3536 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p4], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3537 "00000000" // /* MW 15 */ + 3538 "00000000" // /* MW 14 */ + 3539 "01111000" // /* MW 13 */ + 3540 "10100101" // /* MW 12 */ + 3541 "00000001" // /* MW 11 */ + 3542 "00000000" // /* MW 10 */ + 3543 "00000000" // /* MW 9 */ + 3544 "00000000" // /* MW 8 */ + 3545 "01011011" // /* MW 7 */ + 3546 "00000001" // /* MW 6 */ + 3547 "11101000" // /* MW 5 */ + 3548 "01010000" // /* MW 4 */ + 3549 "01111000" // /* MW 3 */ + 3550 "00000011" // /* MW 2 */ + 3551 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3552 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3553 "00000000" // /* MW 15 */ + 3554 "00000000" // /* MW 14 */ + 3555 "01111000" // /* MW 13 */ + 3556 "10100101" // /* MW 12 */ + 3557 "00000001" // /* MW 11 */ + 3558 "00000000" // /* MW 10 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "10100011" // /* MW 7 */ + 3562 "00011100" // /* MW 6 */ + 3563 "00100010" // /* MW 5 */ + 3564 "00000000" // /* MW 4 */ + 3565 "01110000" // /* MW 3 */ + 3566 "00000101" // /* MW 2 */ + 3567 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3568 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3569 "00000000" // /* MW 15 */ + 3570 "00000000" // /* MW 14 */ + 3571 "01111000" // /* MW 13 */ + 3572 "10100101" // /* MW 12 */ + 3573 "00000001" // /* MW 11 */ + 3574 "00000000" // /* MW 10 */ + 3575 "00000000" // /* MW 9 */ + 3576 "00000000" // /* MW 8 */ + 3577 "01011011" // /* MW 7 */ + 3578 "00000001" // /* MW 6 */ + 3579 "00100000" // /* MW 5 */ + 3580 "00000000" // /* MW 4 */ + 3581 "11110000" // /* MW 3 */ + 3582 "00101100" // /* MW 2 */ + 3583 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3584 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3585 "00010000" // /* MW 15 */ + 3586 "00001000" // /* MW 14 */ + 3587 "01111000" // /* MW 13 */ + 3588 "10100101" // /* MW 12 */ + 3589 "00000001" // /* MW 11 */ + 3590 "00000000" // /* MW 10 */ + 3591 "00000000" // /* MW 9 */ + 3592 "00000000" // /* MW 8 */ + 3593 "01011011" // /* MW 7 */ + 3594 "00000001" // /* MW 6 */ + 3595 "00100000" // /* MW 5 */ + 3596 "00000000" // /* MW 4 */ + 3597 "11110000" // /* MW 3 */ + 3598 "00101100" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3601 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "10100011" // /* MW 3 */ + 3604 "00011100" // /* MW 2 */ + 3605 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3608 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "00000001" // /* MW 3 */ + 3610 "00000010" // /* MW 2 */ + 3611 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3613 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3614 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3615 "00000000" // /* MW 3 */ + 3616 "00101000" // /* MW 2 */ + 3617 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3618 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3619 "10100011" // /* MW 3 */ + 3620 "00011100" // /* MW 2 */ + 3621 "00001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.delay_slot + 3622 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3623 "00000001" // /* MW 5 */ + 3624 "00000000" // /* MW 4 */ + 3625 "00000000" // /* MW 3 */ + 3626 "11111000" // /* MW 2 */ + 3627 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3629 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot + 3630 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "10100011" // /* MW 3 */ + 3632 "00011100" // /* MW 2 */ + 3633 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 3635 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary_shared.h" 196 first +.src_ref 2 "elementwise_binary_shared.h" 203 19 +.function_start + 3648 "01000100" // MOVXM p2, #508684 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3649 "00011000" // /* MW 5 */ + 3650 "11000110" // /* MW 4 */ + 3651 "11000100" // /* MW 3 */ + 3652 "00000111" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 203 19 first +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.src_ref 2 "elementwise_binary_shared.h" 206 12 +.src_ref 2 "elementwise_binary_shared.h" 206 12 + 3654 "01110110" // LDA.u8 r0, [p2]; MOVS p2, p1; MOVXM p3, #508672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3655 "00010000" // /* MW 11 */ + 3656 "10000000" // /* MW 10 */ + 3657 "10110001" // /* MW 9 */ + 3658 "11110001" // /* MW 8 */ + 3659 "00000001" // /* MW 7 */ + 3660 "00000000" // /* MW 6 */ + 3661 "10001011" // /* MW 5 */ + 3662 "10000100" // /* MW 4 */ + 3663 "01010010" // /* MW 3 */ + 3664 "10000001" // /* MW 2 */ + 3665 "01000000" // /* MW 1 */ + 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3667 "00000000" // /* MW 1 */ + 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3669 "00000000" // /* MW 1 */ + 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3671 "00000000" // /* MW 1 */ + 3672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3673 "00000000" // /* MW 1 */ + 3674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3675 "00000000" // /* MW 1 */ + 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3677 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 203 12 +.src_ref 2 "elementwise_binary_shared.h" 203 35 + 3678 "10000100" // JZ r0, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */ + 3679 "00000001" // /* MW 5 */ + 3680 "00000000" // /* MW 4 */ + 3681 "01010000" // /* MW 3 */ + 3682 "00000111" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 209 4 +.delay_slot + 3684 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3685 "11100000" // /* MW 3 */ + 3686 "11000001" // /* MW 2 */ + 3687 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 196 +.delay_slot + 3688 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3689 "00000001" // /* MW 5 */ + 3690 "00000000" // /* MW 4 */ + 3691 "00000000" // /* MW 3 */ + 3692 "00001000" // /* MW 2 */ + 3693 "00000000" // /* MW 1 */ +.delay_slot + 3694 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3695 "11100000" // /* MW 3 */ + 3696 "01010101" // /* MW 2 */ + 3697 "00011000" // /* MW 1 */ +.delay_slot + 3698 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3699 "11100000" // /* MW 3 */ + 3700 "01100000" // /* MW 2 */ + 3701 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 3702 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3703 "00101011" // /* MW 3 */ + 3704 "00000111" // /* MW 2 */ + 3705 "00001001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 206 12 first +.no_stack_arguments + 3706 "00000100" // JL #3152 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 3707 "00000001" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "00101000" // /* MW 3 */ + 3710 "00000110" // /* MW 2 */ + 3711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3717 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3719 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3720 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3721 "00011100" // /* MW 7 */ + 3722 "00000000" // /* MW 6 */ + 3723 "00000000" // /* MW 5 */ + 3724 "00000100" // /* MW 4 */ + 3725 "11110000" // /* MW 3 */ + 3726 "00101100" // /* MW 2 */ + 3727 "00000000" // /* MW 1 */ +.return_address + 3728 "10000100" // J #3776 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 3729 "00000000" // /* MW 5 */ + 3730 "00000000" // /* MW 4 */ + 3731 "01100000" // /* MW 3 */ + 3732 "00000111" // /* MW 2 */ + 3733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3737 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3741 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3743 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 2 "elementwise_binary_shared.h" 204 12 first +.no_stack_arguments + 3744 "00000100" // JL #3152 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 3745 "00000001" // /* MW 5 */ + 3746 "00000000" // /* MW 4 */ + 3747 "00101000" // /* MW 3 */ + 3748 "00000110" // /* MW 2 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.delay_slot + 3750 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3751 "01110000" // /* MW 7 */ + 3752 "01100000" // /* MW 6 */ + 3753 "10110000" // /* MW 5 */ + 3754 "00000000" // /* MW 4 */ + 3755 "01100000" // /* MW 3 */ + 3756 "10010001" // /* MW 2 */ + 3757 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3759 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3761 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3764 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3765 "10000001" // /* MW 11 */ + 3766 "10101101" // /* MW 10 */ + 3767 "00000000" // /* MW 9 */ + 3768 "00000000" // /* MW 8 */ + 3769 "00000000" // /* MW 7 */ + 3770 "00000000" // /* MW 6 */ + 3771 "00100000" // /* MW 5 */ + 3772 "00000000" // /* MW 4 */ + 3773 "11110000" // /* MW 3 */ + 3774 "00101100" // /* MW 2 */ + 3775 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 2 "elementwise_binary_shared.h" 209 4 +.return_address + 3776 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3777 "10000000" // /* MW 3 */ + 3778 "01110001" // /* MW 2 */ + 3779 "00011111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 209 4 first + 3780 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3781 "00000000" // /* MW 3 */ + 3782 "00101000" // /* MW 2 */ + 3783 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 209 4 +.delay_slot + 3784 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3785 "00000001" // /* MW 5 */ + 3786 "00000000" // /* MW 4 */ + 3787 "00000000" // /* MW 3 */ + 3788 "11111000" // /* MW 2 */ + 3789 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3793 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3795 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 3797 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 181 first +.src_ref 6 "superkernels.cpp" 186 6 +.function_start + 3808 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3809 "00000000" // /* MW 5 */ + 3810 "11000100" // /* MW 4 */ + 3811 "11000110" // /* MW 3 */ + 3812 "00000111" // /* MW 2 */ + 3813 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 186 6 first + 3814 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3815 "11000001" // /* MW 5 */ + 3816 "10110101" // /* MW 4 */ + 3817 "11011000" // /* MW 3 */ + 3818 "11000010" // /* MW 2 */ + 3819 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 181 + 3820 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3821 "00000001" // /* MW 5 */ + 3822 "00000000" // /* MW 4 */ + 3823 "00000000" // /* MW 3 */ + 3824 "00001000" // /* MW 2 */ + 3825 "00000000" // /* MW 1 */ + 3826 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3827 "01110000" // /* MW 7 */ + 3828 "11010000" // /* MW 6 */ + 3829 "00001011" // /* MW 5 */ + 3830 "00000000" // /* MW 4 */ + 3831 "10110000" // /* MW 3 */ + 3832 "01100011" // /* MW 2 */ + 3833 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 183 11 + 3834 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3835 "00010001" // /* MW 9 */ + 3836 "00000100" // /* MW 8 */ + 3837 "00110001" // /* MW 7 */ + 3838 "11110011" // /* MW 6 */ + 3839 "00000001" // /* MW 5 */ + 3840 "00000000" // /* MW 4 */ + 3841 "10110000" // /* MW 3 */ + 3842 "10000010" // /* MW 2 */ + 3843 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3844 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3845 "11000000" // /* MW 3 */ + 3846 "11010100" // /* MW 2 */ + 3847 "00011011" // /* MW 1 */ + 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3849 "00000000" // /* MW 1 */ + 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3851 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 186 6 +.src_ref 6 "superkernels.cpp" 186 16 + 3852 "10000100" // JNZ r16, #4016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4016 delay_slots=5 */ + 3853 "00000001" // /* MW 5 */ + 3854 "01000000" // /* MW 4 */ + 3855 "11011000" // /* MW 3 */ + 3856 "00000111" // /* MW 2 */ + 3857 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 183 22 first +.delay_slot + 3858 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3859 "10010000" // /* MW 3 */ + 3860 "01100010" // /* MW 2 */ + 3861 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 183 30 +.delay_slot + 3862 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3863 "11111011" // /* MW 3 */ + 3864 "01100011" // /* MW 2 */ + 3865 "00010100" // /* MW 1 */ +.delay_slot + 3866 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3867 "00111101" // /* MW 3 */ + 3868 "11110100" // /* MW 2 */ + 3869 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 183 11 +.delay_slot + 3870 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3871 "01110000" // /* MW 7 */ + 3872 "01100000" // /* MW 6 */ + 3873 "00110000" // /* MW 5 */ + 3874 "00000011" // /* MW 4 */ + 3875 "00110000" // /* MW 3 */ + 3876 "11000110" // /* MW 2 */ + 3877 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 189 4 +.src_ref 6 "superkernels.cpp" 200 2 +.delay_slot + 3878 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3879 "00000000" // /* MW 5 */ + 3880 "11000110" // /* MW 4 */ + 3881 "11000000" // /* MW 3 */ + 3882 "00000111" // /* MW 2 */ + 3883 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3884 "01000100" // MOVXM p2, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3885 "01100000" // /* MW 5 */ + 3886 "11000100" // /* MW 4 */ + 3887 "11000100" // /* MW 3 */ + 3888 "00000111" // /* MW 2 */ + 3889 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3890 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3891 "00010000" // /* MW 9 */ + 3892 "00010110" // /* MW 8 */ + 3893 "00110001" // /* MW 7 */ + 3894 "11110001" // /* MW 6 */ + 3895 "00000001" // /* MW 5 */ + 3896 "00000000" // /* MW 4 */ + 3897 "11100000" // /* MW 3 */ + 3898 "11000000" // /* MW 2 */ + 3899 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3901 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 189 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3902 "00000100" // JL #3072 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3072 delay_slots=5 */ + 3903 "00000001" // /* MW 5 */ + 3904 "00000000" // /* MW 4 */ + 3905 "00000000" // /* MW 3 */ + 3906 "00000110" // /* MW 2 */ + 3907 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3909 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3911 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3912 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3913 "00110001" // /* MW 3 */ + 3914 "00100000" // /* MW 2 */ + 3915 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 3916 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3917 "00000101" // /* MW 3 */ + 3918 "00100000" // /* MW 2 */ + 3919 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 3920 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3921 "00000000" // /* MW 15 */ + 3922 "00000000" // /* MW 14 */ + 3923 "01111000" // /* MW 13 */ + 3924 "10100101" // /* MW 12 */ + 3925 "00000001" // /* MW 11 */ + 3926 "00000000" // /* MW 10 */ + 3927 "00000000" // /* MW 9 */ + 3928 "10000000" // /* MW 8 */ + 3929 "00010001" // /* MW 7 */ + 3930 "00000110" // /* MW 6 */ + 3931 "00100010" // /* MW 5 */ + 3932 "00000000" // /* MW 4 */ + 3933 "11110000" // /* MW 3 */ + 3934 "00101100" // /* MW 2 */ + 3935 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 18 +.return_address + 3936 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3937 "00010000" // /* MW 5 */ + 3938 "11000100" // /* MW 4 */ + 3939 "11000100" // /* MW 3 */ + 3940 "00000111" // /* MW 2 */ + 3941 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 18 first +.src_ref 6 "superkernels.cpp" 193 65 + 3942 "10111010" // LDA r16, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3943 "00010000" // /* MW 9 */ + 3944 "10000000" // /* MW 8 */ + 3945 "00110001" // /* MW 7 */ + 3946 "11110001" // /* MW 6 */ + 3947 "00000001" // /* MW 5 */ + 3948 "00000000" // /* MW 4 */ + 3949 "11010000" // /* MW 3 */ + 3950 "11000010" // /* MW 2 */ + 3951 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 51 +.src_ref 6 "superkernels.cpp" 193 65 +.src_ref 6 "superkernels.cpp" 200 2 + 3952 "10111010" // LDA r17, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3953 "00010000" // /* MW 9 */ + 3954 "10000000" // /* MW 8 */ + 3955 "00110001" // /* MW 7 */ + 3956 "11110001" // /* MW 6 */ + 3957 "00000001" // /* MW 5 */ + 3958 "00000000" // /* MW 4 */ + 3959 "11010000" // /* MW 3 */ + 3960 "11000110" // /* MW 2 */ + 3961 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 51 first +.src_ref 6 "superkernels.cpp" 193 16 +.src_ref 6 "superkernels.cpp" 198 47 + 3962 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "00010000" // /* MW 9 */ + 3964 "00000110" // /* MW 8 */ + 3965 "10110001" // /* MW 7 */ + 3966 "11110000" // /* MW 6 */ + 3967 "00000001" // /* MW 5 */ + 3968 "00000000" // /* MW 4 */ + 3969 "01010000" // /* MW 3 */ + 3970 "11001011" // /* MW 2 */ + 3971 "01001010" // /* MW 1 */ + 3972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3973 "00000000" // /* MW 1 */ + 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3975 "00000000" // /* MW 1 */ + 3976 "10000100" // J #4032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4032 delay_slots=5 */ + 3977 "00000000" // /* MW 5 */ + 3978 "00000000" // /* MW 4 */ + 3979 "11100000" // /* MW 3 */ + 3980 "00000111" // /* MW 2 */ + 3981 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 13 +.delay_slot + 3982 "01000100" // MOVXM p0, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "01010000" // /* MW 5 */ + 3984 "11000100" // /* MW 4 */ + 3985 "11000000" // /* MW 3 */ + 3986 "00000111" // /* MW 2 */ + 3987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3989 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 27 first +.delay_slot + 3990 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3991 "00001111" // /* MW 3 */ + 3992 "01100001" // /* MW 2 */ + 3993 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 13 first +.delay_slot + 3994 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3995 "10100011" // /* MW 5 */ + 3996 "00001100" // /* MW 4 */ + 3997 "11110000" // /* MW 3 */ + 3998 "00101100" // /* MW 2 */ + 3999 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 16 first +.delay_slot + 4000 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4001 "00000000" // /* MW 15 */ + 4002 "00000000" // /* MW 14 */ + 4003 "01111000" // /* MW 13 */ + 4004 "10100101" // /* MW 12 */ + 4005 "00000001" // /* MW 11 */ + 4006 "00000000" // /* MW 10 */ + 4007 "00000000" // /* MW 9 */ + 4008 "10000000" // /* MW 8 */ + 4009 "00010001" // /* MW 7 */ + 4010 "00000110" // /* MW 6 */ + 4011 "00100001" // /* MW 5 */ + 4012 "00000000" // /* MW 4 */ + 4013 "11110000" // /* MW 3 */ + 4014 "00101100" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 198 47 +.src_ref 6 "superkernels.cpp" 200 2 + 4016 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508428; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "00010000" // /* MW 13 */ + 4020 "00000110" // /* MW 12 */ + 4021 "10110001" // /* MW 11 */ + 4022 "11110000" // /* MW 10 */ + 4023 "00000001" // /* MW 9 */ + 4024 "00000000" // /* MW 8 */ + 4025 "10001011" // /* MW 7 */ + 4026 "10000000" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4032 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4033 "00000000" // /* MW 7 */ + 4034 "11000011" // /* MW 6 */ + 4035 "10110011" // /* MW 5 */ + 4036 "00000011" // /* MW 4 */ + 4037 "01100000" // /* MW 3 */ + 4038 "10010001" // /* MW 2 */ + 4039 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 197 2 + 4040 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4041 "00010000" // /* MW 9 */ + 4042 "00000000" // /* MW 8 */ + 4043 "00110001" // /* MW 7 */ + 4044 "11110000" // /* MW 6 */ + 4045 "00000001" // /* MW 5 */ + 4046 "00000000" // /* MW 4 */ + 4047 "11010000" // /* MW 3 */ + 4048 "11101110" // /* MW 2 */ + 4049 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4050 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4051 "00010110" // /* MW 3 */ + 4052 "11111110" // /* MW 2 */ + 4053 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4054 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4055 "00110110" // /* MW 3 */ + 4056 "11111110" // /* MW 2 */ + 4057 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4058 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4059 "01010110" // /* MW 3 */ + 4060 "01000110" // /* MW 2 */ + 4061 "00000111" // /* MW 1 */ + 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4063 "00000000" // /* MW 1 */ + 4064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4065 "00000000" // /* MW 1 */ + 4066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4067 "00000000" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4072 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4073 "00000010" // /* MW 3 */ + 4074 "01100001" // /* MW 2 */ + 4075 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4076 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4077 "00010001" // /* MW 3 */ + 4078 "00000110" // /* MW 2 */ + 4079 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4080 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4081 "11111101" // /* MW 3 */ + 4082 "11100000" // /* MW 2 */ + 4083 "00010111" // /* MW 1 */ + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ + 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4087 "00000000" // /* MW 1 */ + 4088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4089 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4090 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4091 "00001000" // /* MW 3 */ + 4092 "10010011" // /* MW 2 */ + 4093 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 45 + 4094 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4095 "10000001" // /* MW 5 */ + 4096 "10101101" // /* MW 4 */ + 4097 "10100111" // /* MW 3 */ + 4098 "00000000" // /* MW 2 */ + 4099 "00000100" // /* MW 1 */ + 4100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4101 "00000000" // /* MW 1 */ + 4102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4103 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 197 2 first + 4104 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4105 "00110110" // /* MW 3 */ + 4106 "00000110" // /* MW 2 */ + 4107 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4108 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4109 "10000001" // /* MW 5 */ + 4110 "11011101" // /* MW 4 */ + 4111 "11011100" // /* MW 3 */ + 4112 "11001010" // /* MW 2 */ + 4113 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 47 first + 4114 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4115 "01110110" // /* MW 3 */ + 4116 "00000110" // /* MW 2 */ + 4117 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4118 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4119 "10011110" // /* MW 3 */ + 4120 "01011100" // /* MW 2 */ + 4121 "00000111" // /* MW 1 */ + 4122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4123 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 200 2 first +.no_stack_arguments + 4124 "00000100" // JL #3648 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3648 delay_slots=5 */ + 4125 "00000001" // /* MW 5 */ + 4126 "00000000" // /* MW 4 */ + 4127 "00100000" // /* MW 3 */ + 4128 "00000111" // /* MW 2 */ + 4129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4131 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 197 2 first +.delay_slot + 4132 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4133 "00000111" // /* MW 3 */ + 4134 "01100010" // /* MW 2 */ + 4135 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 197 2 +.delay_slot + 4136 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4137 "00110001" // /* MW 3 */ + 4138 "00000110" // /* MW 2 */ + 4139 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 45 first +.delay_slot + 4140 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4141 "00001101" // /* MW 3 */ + 4142 "11100001" // /* MW 2 */ + 4143 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 45 +.delay_slot + 4144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4145 "00000000" // /* MW 15 */ + 4146 "00000000" // /* MW 14 */ + 4147 "10101000" // /* MW 13 */ + 4148 "10100000" // /* MW 12 */ + 4149 "00110100" // /* MW 11 */ + 4150 "00000000" // /* MW 10 */ + 4151 "00000000" // /* MW 9 */ + 4152 "00000000" // /* MW 8 */ + 4153 "01011011" // /* MW 7 */ + 4154 "00000001" // /* MW 6 */ + 4155 "00100000" // /* MW 5 */ + 4156 "00000000" // /* MW 4 */ + 4157 "11110000" // /* MW 3 */ + 4158 "00101100" // /* MW 2 */ + 4159 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 202 6 +.src_ref 6 "superkernels.cpp" 203 14 +.return_address + 4160 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4161 "00010000" // /* MW 9 */ + 4162 "00000000" // /* MW 8 */ + 4163 "00110001" // /* MW 7 */ + 4164 "11110011" // /* MW 6 */ + 4165 "00000001" // /* MW 5 */ + 4166 "00000000" // /* MW 4 */ + 4167 "11010000" // /* MW 3 */ + 4168 "11000110" // /* MW 2 */ + 4169 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4170 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4171 "00000101" // /* MW 3 */ + 4172 "00100000" // /* MW 2 */ + 4173 "00010000" // /* MW 1 */ + 4174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4175 "00000000" // /* MW 1 */ + 4176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4177 "00000000" // /* MW 1 */ + 4178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4179 "00000000" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4184 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4185 "00001000" // /* MW 3 */ + 4186 "01010001" // /* MW 2 */ + 4187 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 202 19 + 4188 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4189 "00010000" // /* MW 9 */ + 4190 "00010100" // /* MW 8 */ + 4191 "00110001" // /* MW 7 */ + 4192 "11110001" // /* MW 6 */ + 4193 "00000001" // /* MW 5 */ + 4194 "00000000" // /* MW 4 */ + 4195 "11010000" // /* MW 3 */ + 4196 "11001110" // /* MW 2 */ + 4197 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 6 first + 4198 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4199 "00110110" // /* MW 3 */ + 4200 "00000110" // /* MW 2 */ + 4201 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 19 + 4202 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4203 "01010110" // /* MW 3 */ + 4204 "00000110" // /* MW 2 */ + 4205 "00000010" // /* MW 1 */ + 4206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4207 "00000000" // /* MW 1 */ + 4208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4209 "00000000" // /* MW 1 */ + 4210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4211 "00000000" // /* MW 1 */ + 4212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4213 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4214 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "00110001" // /* MW 3 */ + 4216 "00100001" // /* MW 2 */ + 4217 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4218 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4219 "00010001" // /* MW 3 */ + 4220 "11100110" // /* MW 2 */ + 4221 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 16 first + 4222 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4223 "00101000" // /* MW 3 */ + 4224 "01100001" // /* MW 2 */ + 4225 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 6 + 4226 "10000100" // JNZ r16, #4256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4256 delay_slots=5 */ + 4227 "00000001" // /* MW 5 */ + 4228 "01000000" // /* MW 4 */ + 4229 "01010000" // /* MW 3 */ + 4230 "00001000" // /* MW 2 */ + 4231 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4241 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 203 14 + 4242 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4243 "00000001" // /* MW 3 */ + 4244 "00100000" // /* MW 2 */ + 4245 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 203 14 first + 4246 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "00000000" // /* MW 7 */ + 4250 "10000000" // /* MW 6 */ + 4251 "00010001" // /* MW 5 */ + 4252 "00000110" // /* MW 4 */ + 4253 "11110110" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 205 + 4256 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4257 "00111001" // /* MW 3 */ + 4258 "11110100" // /* MW 2 */ + 4259 "00000111" // /* MW 1 */ + 4260 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4261 "00011001" // /* MW 3 */ + 4262 "11111011" // /* MW 2 */ + 4263 "00000111" // /* MW 1 */ + 4264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4265 "00000000" // /* MW 1 */ + 4266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4269 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4270 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4271 "11110001" // /* MW 3 */ + 4272 "11111101" // /* MW 2 */ + 4273 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 205 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4277 "00000000" // /* MW 3 */ + 4278 "00101000" // /* MW 2 */ + 4279 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4280 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "10100000" // /* MW 3 */ + 4282 "01100111" // /* MW 2 */ + 4283 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 205 +.delay_slot + 4284 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4285 "00000001" // /* MW 5 */ + 4286 "00000000" // /* MW 4 */ + 4287 "00000000" // /* MW 3 */ + 4288 "11111000" // /* MW 2 */ + 4289 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4295 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 4304 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4305 "00010000" // /* MW 9 */ + 4306 "10100000" // /* MW 8 */ + 4307 "00110001" // /* MW 7 */ + 4308 "11110000" // /* MW 6 */ + 4309 "00000001" // /* MW 5 */ + 4310 "00000000" // /* MW 4 */ + 4311 "11010000" // /* MW 3 */ + 4312 "10000101" // /* MW 2 */ + 4313 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 4314 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4315 "01011000" // /* MW 9 */ + 4316 "00000000" // /* MW 8 */ + 4317 "00001000" // /* MW 7 */ + 4318 "00001011" // /* MW 6 */ + 4319 "00010000" // /* MW 5 */ + 4320 "00001000" // /* MW 4 */ + 4321 "00000000" // /* MW 3 */ + 4322 "00000000" // /* MW 2 */ + 4323 "11110000" // /* MW 1 */ + 4324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4325 "00000000" // /* MW 1 */ + 4326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4327 "00000000" // /* MW 1 */ + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 4334 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4335 "00101001" // /* MW 3 */ + 4336 "00011100" // /* MW 2 */ + 4337 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 4338 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00101110" // /* MW 3 */ + 4340 "00011100" // /* MW 2 */ + 4341 "00000001" // /* MW 1 */ + 4342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4343 "00000000" // /* MW 1 */ + 4344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4345 "00000000" // /* MW 1 */ + 4346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4347 "00000000" // /* MW 1 */ + 4348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4349 "00000000" // /* MW 1 */ + 4350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4351 "00000000" // /* MW 1 */ + 4352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4353 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 4354 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4355 "00101001" // /* MW 3 */ + 4356 "00011100" // /* MW 2 */ + 4357 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 4358 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4359 "00101110" // /* MW 3 */ + 4360 "00000100" // /* MW 2 */ + 4361 "00000001" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ + 4366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4367 "00000000" // /* MW 1 */ + 4368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4369 "00000000" // /* MW 1 */ + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ + 4372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4373 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 4374 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4375 "00101001" // /* MW 3 */ + 4376 "00011100" // /* MW 2 */ + 4377 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 4378 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4379 "01110110" // /* MW 3 */ + 4380 "00010100" // /* MW 2 */ + 4381 "00000001" // /* MW 1 */ + 4382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4383 "00000000" // /* MW 1 */ + 4384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4385 "00000000" // /* MW 1 */ + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4391 "00000000" // /* MW 1 */ + 4392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4393 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4394 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4395 "01110001" // /* MW 3 */ + 4396 "01001100" // /* MW 2 */ + 4397 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4398 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4399 "00010111" // /* MW 3 */ + 4400 "00000100" // /* MW 2 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4403 "00000000" // /* MW 3 */ + 4404 "00101000" // /* MW 2 */ + 4405 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4407 "00000000" // /* MW 5 */ + 4408 "00111110" // /* MW 4 */ + 4409 "11110001" // /* MW 3 */ + 4410 "00000000" // /* MW 2 */ + 4411 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4412 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4413 "00100100" // /* MW 3 */ + 4414 "11000100" // /* MW 2 */ + 4415 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4416 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4417 "00100111" // /* MW 3 */ + 4418 "01110110" // /* MW 2 */ + 4419 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4420 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4421 "10000010" // /* MW 3 */ + 4422 "00000001" // /* MW 2 */ + 4423 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 4425 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 4432 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4433 "00000001" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "00000000" // /* MW 3 */ + 4436 "00001000" // /* MW 2 */ + 4437 "00000000" // /* MW 1 */ + 4438 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00111101" // /* MW 3 */ + 4440 "11111000" // /* MW 2 */ + 4441 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 4442 "00000100" // JL #4304 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4304 delay_slots=5 */ + 4443 "00000001" // /* MW 5 */ + 4444 "00000000" // /* MW 4 */ + 4445 "01101000" // /* MW 3 */ + 4446 "00001000" // /* MW 2 */ + 4447 "00000000" // /* MW 1 */ +.delay_slot + 4448 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "10100000" // /* MW 3 */ + 4450 "00010111" // /* MW 2 */ + 4451 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 4452 "00111010" // ST r0, [sp, #-4]; MOVXM r15, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4453 "00010001" // /* MW 9 */ + 4454 "10100000" // /* MW 8 */ + 4455 "11101001" // /* MW 7 */ + 4456 "11110001" // /* MW 6 */ + 4457 "00000001" // /* MW 5 */ + 4458 "00000000" // /* MW 4 */ + 4459 "10110000" // /* MW 3 */ + 4460 "10000010" // /* MW 2 */ + 4461 "11111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 4462 "11111000" // MOV p0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4463 "10100000" // /* MW 3 */ + 4464 "01100111" // /* MW 2 */ + 4465 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4469 "10000001" // /* MW 11 */ + 4470 "10101101" // /* MW 10 */ + 4471 "00000000" // /* MW 9 */ + 4472 "00000000" // /* MW 8 */ + 4473 "00000000" // /* MW 7 */ + 4474 "00000000" // /* MW 6 */ + 4475 "00100000" // /* MW 5 */ + 4476 "00000000" // /* MW 4 */ + 4477 "11110000" // /* MW 3 */ + 4478 "00101100" // /* MW 2 */ + 4479 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 4480 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p1, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4481 "00001000" // /* MW 9 */ + 4482 "11000100" // /* MW 8 */ + 4483 "10110011" // /* MW 7 */ + 4484 "01101000" // /* MW 6 */ + 4485 "00000000" // /* MW 5 */ + 4486 "00000001" // /* MW 4 */ + 4487 "00100000" // /* MW 3 */ + 4488 "00000111" // /* MW 2 */ + 4489 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 4490 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4491 "01011000" // /* MW 9 */ + 4492 "11111101" // /* MW 8 */ + 4493 "00000111" // /* MW 7 */ + 4494 "00001000" // /* MW 6 */ + 4495 "10000000" // /* MW 5 */ + 4496 "00000001" // /* MW 4 */ + 4497 "10000000" // /* MW 3 */ + 4498 "11100010" // /* MW 2 */ + 4499 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 4500 "01111010" // LDA r15, [sp, #-4]; ST r16, [p1], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4501 "00000001" // /* MW 9 */ + 4502 "10100000" // /* MW 8 */ + 4503 "00000111" // /* MW 7 */ + 4504 "10000000" // /* MW 6 */ + 4505 "00010001" // /* MW 5 */ + 4506 "00001010" // /* MW 4 */ + 4507 "00100001" // /* MW 3 */ + 4508 "10111110" // /* MW 2 */ + 4509 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 4510 "10011000" // LDA.u8 r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4511 "01001010" // /* MW 3 */ + 4512 "00000110" // /* MW 2 */ + 4513 "00000001" // /* MW 1 */ + 4514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4517 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4518 "00011000" // ST.s16 r16, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4519 "00010111" // /* MW 3 */ + 4520 "00000010" // /* MW 2 */ + 4521 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4523 "00000000" // /* MW 3 */ + 4524 "00101000" // /* MW 2 */ + 4525 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4526 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4527 "00000101" // /* MW 3 */ + 4528 "00100010" // /* MW 2 */ + 4529 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4530 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4531 "00000001" // /* MW 5 */ + 4532 "00000000" // /* MW 4 */ + 4533 "00000000" // /* MW 3 */ + 4534 "11111000" // /* MW 2 */ + 4535 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4536 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4537 "00100111" // /* MW 3 */ + 4538 "01110111" // /* MW 2 */ + 4539 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4540 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4541 "10000010" // /* MW 3 */ + 4542 "00100001" // /* MW 2 */ + 4543 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 4545 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_shared.h" 186 first +.src_ref 2 "elementwise_binary_shared.h" 191 8 first +.tail_call +.function_start + 4560 "10000100" // J #3152 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 4561 "00000000" // /* MW 5 */ + 4562 "00000000" // /* MW 4 */ + 4563 "00101000" // /* MW 3 */ + 4564 "00000110" // /* MW 2 */ + 4565 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 191 8 +.delay_slot + 4566 "01000100" // MOVXM p3, #508736 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4567 "10000000" // /* MW 5 */ + 4568 "11000110" // /* MW 4 */ + 4569 "11000110" // /* MW 3 */ + 4570 "00000111" // /* MW 2 */ + 4571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 4579 "00000000" // /* MW 1 */ +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_add1d _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 240 first +.src_ref 6 "superkernels.cpp" 245 6 +.function_start + 4592 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4593 "00000000" // /* MW 5 */ + 4594 "11000100" // /* MW 4 */ + 4595 "11001000" // /* MW 3 */ + 4596 "00000111" // /* MW 2 */ + 4597 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first + 4598 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4599 "11000001" // /* MW 5 */ + 4600 "10110101" // /* MW 4 */ + 4601 "11011000" // /* MW 3 */ + 4602 "11000010" // /* MW 2 */ + 4603 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 240 + 4604 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4605 "00000001" // /* MW 5 */ + 4606 "00000000" // /* MW 4 */ + 4607 "00000000" // /* MW 3 */ + 4608 "00001000" // /* MW 2 */ + 4609 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 242 22 first + 4610 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4611 "01111001" // /* MW 9 */ + 4612 "01100000" // /* MW 8 */ + 4613 "11001010" // /* MW 7 */ + 4614 "10000001" // /* MW 6 */ + 4615 "00010100" // /* MW 5 */ + 4616 "00100011" // /* MW 4 */ + 4617 "10110000" // /* MW 3 */ + 4618 "00111010" // /* MW 2 */ + 4619 "11111111" // /* MW 1 */ + 4620 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4621 "01110000" // /* MW 7 */ + 4622 "11010000" // /* MW 6 */ + 4623 "00001011" // /* MW 5 */ + 4624 "00000000" // /* MW 4 */ + 4625 "10110000" // /* MW 3 */ + 4626 "10000011" // /* MW 2 */ + 4627 "11111101" // /* MW 1 */ + 4628 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4629 "00010101" // /* MW 3 */ + 4630 "11111100" // /* MW 2 */ + 4631 "00001111" // /* MW 1 */ + 4632 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4633 "00111101" // /* MW 3 */ + 4634 "11110000" // /* MW 2 */ + 4635 "00001111" // /* MW 1 */ + 4636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4637 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first +.src_ref 6 "superkernels.cpp" 245 16 first + 4638 "10000100" // JNZ r16, #4784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4784 delay_slots=5 */ + 4639 "00000001" // /* MW 5 */ + 4640 "01000000" // /* MW 4 */ + 4641 "01011000" // /* MW 3 */ + 4642 "00001001" // /* MW 2 */ + 4643 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 30 first +.delay_slot + 4644 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4645 "11111011" // /* MW 3 */ + 4646 "01100011" // /* MW 2 */ + 4647 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 4648 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4649 "00010000" // /* MW 5 */ + 4650 "11000100" // /* MW 4 */ + 4651 "11000100" // /* MW 3 */ + 4652 "00000111" // /* MW 2 */ + 4653 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 4654 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4655 "01110000" // /* MW 7 */ + 4656 "01100000" // /* MW 6 */ + 4657 "00110111" // /* MW 5 */ + 4658 "00000001" // /* MW 4 */ + 4659 "00110000" // /* MW 3 */ + 4660 "11000110" // /* MW 2 */ + 4661 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 4662 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4663 "11000000" // /* MW 3 */ + 4664 "11010110" // /* MW 2 */ + 4665 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 +.src_ref 6 "superkernels.cpp" 250 28 +.src_ref 6 "superkernels.cpp" 252 42 +.src_ref 6 "superkernels.cpp" 264 2 +.delay_slot + 4666 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4667 "00010001" // /* MW 9 */ + 4668 "10100000" // /* MW 8 */ + 4669 "10110001" // /* MW 7 */ + 4670 "11110011" // /* MW 6 */ + 4671 "00000001" // /* MW 5 */ + 4672 "00000000" // /* MW 4 */ + 4673 "10110000" // /* MW 3 */ + 4674 "10100011" // /* MW 2 */ + 4675 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 248 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4676 "00111010" // MOVS p0, p7; MOVXM p2, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4677 "00010001" // /* MW 9 */ + 4678 "00011000" // /* MW 8 */ + 4679 "00110001" // /* MW 7 */ + 4680 "11110001" // /* MW 6 */ + 4681 "00000001" // /* MW 5 */ + 4682 "00000000" // /* MW 4 */ + 4683 "01100000" // /* MW 3 */ + 4684 "10010001" // /* MW 2 */ + 4685 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4686 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4687 "00010000" // /* MW 9 */ + 4688 "00010110" // /* MW 8 */ + 4689 "00110001" // /* MW 7 */ + 4690 "11110001" // /* MW 6 */ + 4691 "00000001" // /* MW 5 */ + 4692 "00000000" // /* MW 4 */ + 4693 "11100000" // /* MW 3 */ + 4694 "11000000" // /* MW 2 */ + 4695 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4697 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4698 "00000100" // JL #4432 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4432 delay_slots=5 */ + 4699 "00000001" // /* MW 5 */ + 4700 "00000000" // /* MW 4 */ + 4701 "10101000" // /* MW 3 */ + 4702 "00001000" // /* MW 2 */ + 4703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4708 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4709 "00110001" // /* MW 3 */ + 4710 "00100000" // /* MW 2 */ + 4711 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4712 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4713 "00000101" // /* MW 3 */ + 4714 "00100000" // /* MW 2 */ + 4715 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4716 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4717 "00010001" // /* MW 3 */ + 4718 "00000110" // /* MW 2 */ + 4719 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 252 42 first +.return_address + 4720 "10111010" // LDA r16, [p7]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4721 "00010000" // /* MW 9 */ + 4722 "00000100" // /* MW 8 */ + 4723 "10110001" // /* MW 7 */ + 4724 "11110000" // /* MW 6 */ + 4725 "00000001" // /* MW 5 */ + 4726 "00000000" // /* MW 4 */ + 4727 "11010000" // /* MW 3 */ + 4728 "11000010" // /* MW 2 */ + 4729 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 261 48 + 4730 "10111010" // LDA r17, [p1]; MOVXM p3, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4731 "00010000" // /* MW 9 */ + 4732 "00000110" // /* MW 8 */ + 4733 "10110001" // /* MW 7 */ + 4734 "11110001" // /* MW 6 */ + 4735 "00000001" // /* MW 5 */ + 4736 "00000000" // /* MW 4 */ + 4737 "11010000" // /* MW 3 */ + 4738 "11000110" // /* MW 2 */ + 4739 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 28 first +.src_ref 6 "superkernels.cpp" 253 16 +.src_ref 6 "superkernels.cpp" 262 48 + 4740 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4741 "00010000" // /* MW 9 */ + 4742 "00001000" // /* MW 8 */ + 4743 "10110001" // /* MW 7 */ + 4744 "11110000" // /* MW 6 */ + 4745 "00000001" // /* MW 5 */ + 4746 "00000000" // /* MW 4 */ + 4747 "01010000" // /* MW 3 */ + 4748 "11001011" // /* MW 2 */ + 4749 "11101010" // /* MW 1 */ + 4750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4751 "00000000" // /* MW 1 */ + 4752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4753 "00000000" // /* MW 1 */ + 4754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4755 "00000000" // /* MW 1 */ + 4756 "10000100" // J #4800 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4800 delay_slots=5 */ + 4757 "00000000" // /* MW 5 */ + 4758 "00000000" // /* MW 4 */ + 4759 "01100000" // /* MW 3 */ + 4760 "00001001" // /* MW 2 */ + 4761 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 +.delay_slot + 4762 "01000100" // MOVXM p2, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4763 "01010000" // /* MW 5 */ + 4764 "11000100" // /* MW 4 */ + 4765 "11000100" // /* MW 3 */ + 4766 "00000111" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 27 first +.delay_slot + 4768 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4769 "00001111" // /* MW 3 */ + 4770 "01100001" // /* MW 2 */ + 4771 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 first +.delay_slot + 4772 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4773 "01010001" // /* MW 3 */ + 4774 "00000110" // /* MW 2 */ + 4775 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 first +.delay_slot + 4776 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4777 "00010001" // /* MW 3 */ + 4778 "00000110" // /* MW 2 */ + 4779 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 253 16 first +.delay_slot + 4780 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4781 "00010001" // /* MW 3 */ + 4782 "00000110" // /* MW 2 */ + 4783 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 261 48 + 4784 "01000100" // MOVXM p3, #508428 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4785 "00011000" // /* MW 5 */ + 4786 "11000100" // /* MW 4 */ + 4787 "11000110" // /* MW 3 */ + 4788 "00000111" // /* MW 2 */ + 4789 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 + 4790 "10111010" // NOPA; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4791 "00010000" // /* MW 9 */ + 4792 "00001000" // /* MW 8 */ + 4793 "10110001" // /* MW 7 */ + 4794 "11110000" // /* MW 6 */ + 4795 "00000001" // /* MW 5 */ + 4796 "00000000" // /* MW 4 */ + 4797 "11110000" // /* MW 3 */ + 4798 "00101100" // /* MW 2 */ + 4799 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4800 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4801 "10000110" // /* MW 3 */ + 4802 "01100111" // /* MW 2 */ + 4803 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 256 2 + 4804 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4805 "00010000" // /* MW 9 */ + 4806 "00000000" // /* MW 8 */ + 4807 "00110001" // /* MW 7 */ + 4808 "11110001" // /* MW 6 */ + 4809 "00000001" // /* MW 5 */ + 4810 "00000000" // /* MW 4 */ + 4811 "11010000" // /* MW 3 */ + 4812 "11101110" // /* MW 2 */ + 4813 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4814 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "00010110" // /* MW 3 */ + 4816 "11111110" // /* MW 2 */ + 4817 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4818 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4819 "00110110" // /* MW 3 */ + 4820 "11111110" // /* MW 2 */ + 4821 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 first + 4822 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4823 "01010110" // /* MW 3 */ + 4824 "00000110" // /* MW 2 */ + 4825 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4826 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4827 "01110110" // /* MW 3 */ + 4828 "01000110" // /* MW 2 */ + 4829 "00000000" // /* MW 1 */ + 4830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4831 "00000000" // /* MW 1 */ + 4832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4833 "00000000" // /* MW 1 */ + 4834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4835 "00000000" // /* MW 1 */ + 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4837 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4838 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4839 "00000010" // /* MW 3 */ + 4840 "01100001" // /* MW 2 */ + 4841 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 256 2 first + 4842 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4843 "00001110" // /* MW 5 */ + 4844 "01000000" // /* MW 4 */ + 4845 "00111001" // /* MW 3 */ + 4846 "11000010" // /* MW 2 */ + 4847 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 + 4848 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "00010001" // /* MW 3 */ + 4850 "00000110" // /* MW 2 */ + 4851 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 4852 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4853 "11111101" // /* MW 3 */ + 4854 "11100000" // /* MW 2 */ + 4855 "00010111" // /* MW 1 */ + 4856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4857 "00000000" // /* MW 1 */ + 4858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4859 "00000000" // /* MW 1 */ + 4860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4861 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4862 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4863 "00001000" // /* MW 3 */ + 4864 "11010011" // /* MW 2 */ + 4865 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 4866 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4867 "00000110" // /* MW 3 */ + 4868 "01100111" // /* MW 2 */ + 4869 "00011010" // /* MW 1 */ + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ + 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4873 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 4874 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4875 "01110110" // /* MW 3 */ + 4876 "11111111" // /* MW 2 */ + 4877 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4878 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4879 "00110110" // /* MW 3 */ + 4880 "11111110" // /* MW 2 */ + 4881 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4882 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "01010110" // /* MW 3 */ + 4884 "11111110" // /* MW 2 */ + 4885 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 4886 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "01110110" // /* MW 3 */ + 4888 "01010110" // /* MW 2 */ + 4889 "00000010" // /* MW 1 */ + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ + 4894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4895 "00000000" // /* MW 1 */ + 4896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4897 "00000000" // /* MW 1 */ + 4898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4899 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4900 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "00010010" // /* MW 3 */ + 4902 "10100011" // /* MW 2 */ + 4903 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4904 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00110001" // /* MW 3 */ + 4906 "00000110" // /* MW 2 */ + 4907 "00001010" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4916 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4917 "00001000" // /* MW 3 */ + 4918 "11010011" // /* MW 2 */ + 4919 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 261 46 +.src_ref 6 "superkernels.cpp" 262 46 + 4920 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4921 "01111001" // /* MW 9 */ + 4922 "01100000" // /* MW 8 */ + 4923 "11001110" // /* MW 7 */ + 4924 "00101001" // /* MW 6 */ + 4925 "00000000" // /* MW 5 */ + 4926 "00000001" // /* MW 4 */ + 4927 "01100000" // /* MW 3 */ + 4928 "00010001" // /* MW 2 */ + 4929 "11010001" // /* MW 1 */ + 4930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4931 "00000000" // /* MW 1 */ + 4932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4933 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 4934 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4935 "00011001" // /* MW 3 */ + 4936 "11101110" // /* MW 2 */ + 4937 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 48 first + 4938 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4939 "00111011" // /* MW 5 */ + 4940 "11011000" // /* MW 4 */ + 4941 "11011111" // /* MW 3 */ + 4942 "11000110" // /* MW 2 */ + 4943 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 first +.src_ref 6 "superkernels.cpp" 264 2 + 4944 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4945 "10000001" // /* MW 5 */ + 4946 "11011101" // /* MW 4 */ + 4947 "11010110" // /* MW 3 */ + 4948 "11010010" // /* MW 2 */ + 4949 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4950 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4951 "01010110" // /* MW 3 */ + 4952 "01001110" // /* MW 2 */ + 4953 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4954 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4955 "00011110" // /* MW 3 */ + 4956 "01011101" // /* MW 2 */ + 4957 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4958 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4959 "11000000" // /* MW 3 */ + 4960 "01100000" // /* MW 2 */ + 4961 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4963 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4964 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4965 "01110110" // /* MW 3 */ + 4966 "00000110" // /* MW 2 */ + 4967 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4969 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 264 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4970 "00000100" // JL #4560 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4560 delay_slots=5 */ + 4971 "00000001" // /* MW 5 */ + 4972 "00000000" // /* MW 4 */ + 4973 "11101000" // /* MW 3 */ + 4974 "00001000" // /* MW 2 */ + 4975 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4976 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4977 "11000000" // /* MW 3 */ + 4978 "11010100" // /* MW 2 */ + 4979 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 4980 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4981 "00001101" // /* MW 3 */ + 4982 "01100011" // /* MW 2 */ + 4983 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 first +.delay_slot + 4984 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "00001101" // /* MW 3 */ + 4986 "00100001" // /* MW 2 */ + 4987 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 +.delay_slot + 4988 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "01000001" // /* MW 3 */ + 4990 "01101001" // /* MW 2 */ + 4991 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4993 "00000000" // /* MW 15 */ + 4994 "00000000" // /* MW 14 */ + 4995 "10101000" // /* MW 13 */ + 4996 "11100010" // /* MW 12 */ + 4997 "00110100" // /* MW 11 */ + 4998 "00000000" // /* MW 10 */ + 4999 "00000000" // /* MW 9 */ + 5000 "00000000" // /* MW 8 */ + 5001 "01011011" // /* MW 7 */ + 5002 "00000001" // /* MW 6 */ + 5003 "00100000" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11110000" // /* MW 3 */ + 5006 "00101100" // /* MW 2 */ + 5007 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 5008 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5009 "01111000" // /* MW 9 */ + 5010 "11010000" // /* MW 8 */ + 5011 "10110011" // /* MW 7 */ + 5012 "00101000" // /* MW 6 */ + 5013 "00000000" // /* MW 5 */ + 5014 "00000001" // /* MW 4 */ + 5015 "11010000" // /* MW 3 */ + 5016 "11000110" // /* MW 2 */ + 5017 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 + 5018 "01000100" // MOVXM p6, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01010000" // /* MW 5 */ + 5020 "11000100" // /* MW 4 */ + 5021 "11001100" // /* MW 3 */ + 5022 "00000111" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ + 5024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5025 "00000000" // /* MW 1 */ + 5026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5027 "00000000" // /* MW 1 */ + 5028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5029 "00000000" // /* MW 1 */ + 5030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5031 "00000000" // /* MW 1 */ + 5032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5033 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5034 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5035 "00001000" // /* MW 3 */ + 5036 "01010001" // /* MW 2 */ + 5037 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 5038 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5039 "00110110" // /* MW 3 */ + 5040 "11110110" // /* MW 2 */ + 5041 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 5042 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5043 "00011001" // /* MW 3 */ + 5044 "11101101" // /* MW 2 */ + 5045 "00000111" // /* MW 1 */ + 5046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5047 "00000000" // /* MW 1 */ + 5048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5049 "00000000" // /* MW 1 */ + 5050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5051 "00000000" // /* MW 1 */ + 5052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5053 "00000000" // /* MW 1 */ + 5054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5055 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 5056 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5057 "00010001" // /* MW 3 */ + 5058 "00100011" // /* MW 2 */ + 5059 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 5060 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5061 "01100011" // /* MW 5 */ + 5062 "11101100" // /* MW 4 */ + 5063 "11010011" // /* MW 3 */ + 5064 "11000110" // /* MW 2 */ + 5065 "01001010" // /* MW 1 */ + 5066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5067 "00000000" // /* MW 1 */ + 5068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5069 "00000000" // /* MW 1 */ + 5070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5071 "00000000" // /* MW 1 */ + 5072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5073 "00000000" // /* MW 1 */ + 5074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5075 "00000000" // /* MW 1 */ + 5076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5077 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5078 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00001000" // /* MW 3 */ + 5080 "01010001" // /* MW 2 */ + 5081 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 268 6 +.src_ref 6 "superkernels.cpp" 269 14 + 5082 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5083 "00010000" // /* MW 9 */ + 5084 "00000000" // /* MW 8 */ + 5085 "10110001" // /* MW 7 */ + 5086 "11110000" // /* MW 6 */ + 5087 "00000001" // /* MW 5 */ + 5088 "00000000" // /* MW 4 */ + 5089 "11010000" // /* MW 3 */ + 5090 "11001110" // /* MW 2 */ + 5091 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 first + 5092 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5093 "01010110" // /* MW 3 */ + 5094 "00000110" // /* MW 2 */ + 5095 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 5096 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5097 "00110110" // /* MW 3 */ + 5098 "00000110" // /* MW 2 */ + 5099 "00000001" // /* MW 1 */ + 5100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5101 "00000000" // /* MW 1 */ + 5102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5103 "00000000" // /* MW 1 */ + 5104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5105 "00000000" // /* MW 1 */ + 5106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5107 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5108 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5109 "00110001" // /* MW 3 */ + 5110 "00100001" // /* MW 2 */ + 5111 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5112 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5113 "00010001" // /* MW 3 */ + 5114 "11100110" // /* MW 2 */ + 5115 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 16 first + 5116 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5117 "00101000" // /* MW 3 */ + 5118 "01100001" // /* MW 2 */ + 5119 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 5120 "10000100" // JNZ r16, #5152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5152 delay_slots=5 */ + 5121 "00000001" // /* MW 5 */ + 5122 "01000000" // /* MW 4 */ + 5123 "00010000" // /* MW 3 */ + 5124 "00001010" // /* MW 2 */ + 5125 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5133 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 + 5136 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5137 "00000001" // /* MW 3 */ + 5138 "00100000" // /* MW 2 */ + 5139 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 first + 5140 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5141 "11000001" // /* MW 11 */ + 5142 "00001000" // /* MW 10 */ + 5143 "10000011" // /* MW 9 */ + 5144 "00000000" // /* MW 8 */ + 5145 "00000000" // /* MW 7 */ + 5146 "00000000" // /* MW 6 */ + 5147 "00100000" // /* MW 5 */ + 5148 "00000000" // /* MW 4 */ + 5149 "11110000" // /* MW 3 */ + 5150 "00101100" // /* MW 2 */ + 5151 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 271 + 5152 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "00111001" // /* MW 3 */ + 5154 "11110000" // /* MW 2 */ + 5155 "00000111" // /* MW 1 */ + 5156 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "11110001" // /* MW 3 */ + 5158 "11111101" // /* MW 2 */ + 5159 "00000111" // /* MW 1 */ + 5160 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "10011001" // /* MW 3 */ + 5162 "11110111" // /* MW 2 */ + 5163 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5165 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5166 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5167 "11010001" // /* MW 3 */ + 5168 "11111001" // /* MW 2 */ + 5169 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5173 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5174 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5175 "00000000" // /* MW 3 */ + 5176 "00101000" // /* MW 2 */ + 5177 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5178 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5179 "00001011" // /* MW 3 */ + 5180 "10001110" // /* MW 2 */ + 5181 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 +.delay_slot + 5182 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5183 "00000001" // /* MW 5 */ + 5184 "00000000" // /* MW 4 */ + 5185 "00000000" // /* MW 3 */ + 5186 "11111000" // /* MW 2 */ + 5187 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 5193 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 5200 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5201 "00000000" // /* MW 3 */ + 5202 "00101000" // /* MW 2 */ + 5203 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5204 "01000100" // MOVXM p0, #508832 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5205 "01000000" // /* MW 5 */ + 5206 "11000111" // /* MW 4 */ + 5207 "11000000" // /* MW 3 */ + 5208 "00000111" // /* MW 2 */ + 5209 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5210 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5211 "10000000" // /* MW 3 */ + 5212 "00000000" // /* MW 2 */ + 5213 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 5214 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5215 "00000001" // /* MW 3 */ + 5216 "00000100" // /* MW 2 */ + 5217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5218 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5219 "00000001" // /* MW 3 */ + 5220 "00010100" // /* MW 2 */ + 5221 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 5223 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 5232 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5233 "00010000" // /* MW 9 */ + 5234 "11000000" // /* MW 8 */ + 5235 "00110001" // /* MW 7 */ + 5236 "11110000" // /* MW 6 */ + 5237 "00000001" // /* MW 5 */ + 5238 "00000000" // /* MW 4 */ + 5239 "11010000" // /* MW 3 */ + 5240 "10000101" // /* MW 2 */ + 5241 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 5242 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5243 "00000001" // /* MW 5 */ + 5244 "00000000" // /* MW 4 */ + 5245 "00000000" // /* MW 3 */ + 5246 "00001000" // /* MW 2 */ + 5247 "00000000" // /* MW 1 */ + 5248 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00111101" // /* MW 3 */ + 5250 "11111100" // /* MW 2 */ + 5251 "00001111" // /* MW 1 */ + 5252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5253 "00000000" // /* MW 1 */ + 5254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5255 "00000000" // /* MW 1 */ + 5256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5257 "00000000" // /* MW 1 */ + 5258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5259 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 5260 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5261 "00101001" // /* MW 3 */ + 5262 "00011100" // /* MW 2 */ + 5263 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 5264 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "00101110" // /* MW 3 */ + 5266 "00011100" // /* MW 2 */ + 5267 "00000001" // /* MW 1 */ + 5268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5269 "00000000" // /* MW 1 */ + 5270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5271 "00000000" // /* MW 1 */ + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ + 5276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5277 "00000000" // /* MW 1 */ + 5278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5279 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 5280 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5281 "00101001" // /* MW 3 */ + 5282 "00011100" // /* MW 2 */ + 5283 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 5284 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5285 "00101110" // /* MW 3 */ + 5286 "00000100" // /* MW 2 */ + 5287 "00000001" // /* MW 1 */ + 5288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5289 "00000000" // /* MW 1 */ + 5290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5291 "00000000" // /* MW 1 */ + 5292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5293 "00000000" // /* MW 1 */ + 5294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5295 "00000000" // /* MW 1 */ + 5296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5297 "00000000" // /* MW 1 */ + 5298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5299 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 5300 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5301 "00101001" // /* MW 3 */ + 5302 "00011100" // /* MW 2 */ + 5303 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 5304 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5305 "00101110" // /* MW 3 */ + 5306 "00010100" // /* MW 2 */ + 5307 "00000001" // /* MW 1 */ + 5308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5309 "00000000" // /* MW 1 */ + 5310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5311 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 5312 "00000100" // JL #5200 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5200 delay_slots=5 */ + 5313 "00000001" // /* MW 5 */ + 5314 "00000000" // /* MW 4 */ + 5315 "00101000" // /* MW 3 */ + 5316 "00001010" // /* MW 2 */ + 5317 "00000000" // /* MW 1 */ +.delay_slot + 5318 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5319 "10011101" // /* MW 3 */ + 5320 "11111011" // /* MW 2 */ + 5321 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5325 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 5326 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "00101001" // /* MW 3 */ + 5328 "11011100" // /* MW 2 */ + 5329 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 5330 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5331 "00011100" // /* MW 13 */ + 5332 "00000000" // /* MW 12 */ + 5333 "00000000" // /* MW 11 */ + 5334 "00000111" // /* MW 10 */ + 5335 "00000110" // /* MW 9 */ + 5336 "01111011" // /* MW 8 */ + 5337 "00000000" // /* MW 7 */ + 5338 "00000000" // /* MW 6 */ + 5339 "10110110" // /* MW 5 */ + 5340 "00000010" // /* MW 4 */ + 5341 "11110000" // /* MW 3 */ + 5342 "00101100" // /* MW 2 */ + 5343 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 5344 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "00111001" // /* MW 3 */ + 5346 "11111100" // /* MW 2 */ + 5347 "00000111" // /* MW 1 */ + 5348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5349 "00000000" // /* MW 1 */ + 5350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5351 "00000000" // /* MW 1 */ + 5352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5353 "00000000" // /* MW 1 */ + 5354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5355 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5357 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5358 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5359 "10011001" // /* MW 3 */ + 5360 "11111011" // /* MW 2 */ + 5361 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5362 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5363 "00000000" // /* MW 3 */ + 5364 "00101000" // /* MW 2 */ + 5365 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5367 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5371 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5372 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5373 "00000001" // /* MW 3 */ + 5374 "00100000" // /* MW 2 */ + 5375 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5376 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5377 "01110001" // /* MW 9 */ + 5378 "00000000" // /* MW 8 */ + 5379 "00000000" // /* MW 7 */ + 5380 "00000000" // /* MW 6 */ + 5381 "11111110" // /* MW 5 */ + 5382 "00111111" // /* MW 4 */ + 5383 "00110000" // /* MW 3 */ + 5384 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 5385 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 5392 "10111010" // MOVA m0, #32; MOVXM p3, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5393 "00010000" // /* MW 9 */ + 5394 "11000000" // /* MW 8 */ + 5395 "10110001" // /* MW 7 */ + 5396 "11110001" // /* MW 6 */ + 5397 "00000001" // /* MW 5 */ + 5398 "00000000" // /* MW 4 */ + 5399 "10000000" // /* MW 3 */ + 5400 "00000000" // /* MW 2 */ + 5401 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 5402 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5403 "00010000" // /* MW 9 */ + 5404 "00011000" // /* MW 8 */ + 5405 "00110001" // /* MW 7 */ + 5406 "11110010" // /* MW 6 */ + 5407 "00000001" // /* MW 5 */ + 5408 "00000000" // /* MW 4 */ + 5409 "11010000" // /* MW 3 */ + 5410 "00000110" // /* MW 2 */ + 5411 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 5412 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5413 "01011000" // /* MW 9 */ + 5414 "11111010" // /* MW 8 */ + 5415 "01101111" // /* MW 7 */ + 5416 "10001000" // /* MW 6 */ + 5417 "00000111" // /* MW 5 */ + 5418 "00011000" // /* MW 4 */ + 5419 "11010000" // /* MW 3 */ + 5420 "10010000" // /* MW 2 */ + 5421 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 5422 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #5584 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5423 "00010000" // /* MW 9 */ + 5424 "11101000" // /* MW 8 */ + 5425 "01111010" // /* MW 7 */ + 5426 "00000100" // /* MW 6 */ + 5427 "00000000" // /* MW 5 */ + 5428 "00000000" // /* MW 4 */ + 5429 "11010000" // /* MW 3 */ + 5430 "10000000" // /* MW 2 */ + 5431 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 5432 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #5600 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5433 "00010000" // /* MW 9 */ + 5434 "11110000" // /* MW 8 */ + 5435 "10111010" // /* MW 7 */ + 5436 "00000101" // /* MW 6 */ + 5437 "00000000" // /* MW 5 */ + 5438 "00000000" // /* MW 4 */ + 5439 "01010000" // /* MW 3 */ + 5440 "10001000" // /* MW 2 */ + 5441 "10000000" // /* MW 1 */ + 5442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5443 "00000000" // /* MW 1 */ + 5444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5445 "00000000" // /* MW 1 */ + 5446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5447 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 5448 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5449 "00111101" // /* MW 3 */ + 5450 "01000010" // /* MW 2 */ + 5451 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 5452 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5453 "11111100" // /* MW 3 */ + 5454 "01110000" // /* MW 2 */ + 5455 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 5456 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5457 "11101000" // /* MW 5 */ + 5458 "01010000" // /* MW 4 */ + 5459 "01110000" // /* MW 3 */ + 5460 "00010011" // /* MW 2 */ + 5461 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5462 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5463 "10000000" // /* MW 7 */ + 5464 "10111010" // /* MW 6 */ + 5465 "01101000" // /* MW 5 */ + 5466 "01010000" // /* MW 4 */ + 5467 "01110000" // /* MW 3 */ + 5468 "00011011" // /* MW 2 */ + 5469 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5470 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5471 "11101000" // /* MW 5 */ + 5472 "01010000" // /* MW 4 */ + 5473 "01110000" // /* MW 3 */ + 5474 "00010011" // /* MW 2 */ + 5475 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5476 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5477 "01101000" // /* MW 5 */ + 5478 "01010000" // /* MW 4 */ + 5479 "01110000" // /* MW 3 */ + 5480 "00011011" // /* MW 2 */ + 5481 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5482 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5483 "11101000" // /* MW 5 */ + 5484 "01010000" // /* MW 4 */ + 5485 "01110000" // /* MW 3 */ + 5486 "00010011" // /* MW 2 */ + 5487 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5488 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5489 "01101000" // /* MW 5 */ + 5490 "01010000" // /* MW 4 */ + 5491 "01110000" // /* MW 3 */ + 5492 "00011011" // /* MW 2 */ + 5493 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5494 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5495 "11101000" // /* MW 5 */ + 5496 "01010000" // /* MW 4 */ + 5497 "01110000" // /* MW 3 */ + 5498 "00010011" // /* MW 2 */ + 5499 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5500 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5501 "01000001" // /* MW 9 */ + 5502 "11100010" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00011101" // /* MW 6 */ + 5505 "00110100" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01110000" // /* MW 3 */ + 5508 "00011011" // /* MW 2 */ + 5509 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5510 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5511 "01100001" // /* MW 9 */ + 5512 "11100000" // /* MW 8 */ + 5513 "00000001" // /* MW 7 */ + 5514 "00011101" // /* MW 6 */ + 5515 "01110100" // /* MW 5 */ + 5516 "00101000" // /* MW 4 */ + 5517 "01110000" // /* MW 3 */ + 5518 "00010011" // /* MW 2 */ + 5519 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5520 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5521 "01000001" // /* MW 9 */ + 5522 "11100010" // /* MW 8 */ + 5523 "00000000" // /* MW 7 */ + 5524 "00011101" // /* MW 6 */ + 5525 "00110100" // /* MW 5 */ + 5526 "00101000" // /* MW 4 */ + 5527 "01110000" // /* MW 3 */ + 5528 "00011011" // /* MW 2 */ + 5529 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5531 "01100001" // /* MW 9 */ + 5532 "11100000" // /* MW 8 */ + 5533 "00000001" // /* MW 7 */ + 5534 "00011101" // /* MW 6 */ + 5535 "01110100" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01110000" // /* MW 3 */ + 5538 "00010011" // /* MW 2 */ + 5539 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5540 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5541 "01000001" // /* MW 11 */ + 5542 "11100010" // /* MW 10 */ + 5543 "00000000" // /* MW 9 */ + 5544 "10001110" // /* MW 8 */ + 5545 "10101101" // /* MW 7 */ + 5546 "00000000" // /* MW 6 */ + 5547 "01101000" // /* MW 5 */ + 5548 "01010000" // /* MW 4 */ + 5549 "01110000" // /* MW 3 */ + 5550 "00011011" // /* MW 2 */ + 5551 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5552 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5553 "00000011" // /* MW 15 */ + 5554 "00001111" // /* MW 14 */ + 5555 "01111000" // /* MW 13 */ + 5556 "10100101" // /* MW 12 */ + 5557 "00000001" // /* MW 11 */ + 5558 "00000000" // /* MW 10 */ + 5559 "00000000" // /* MW 9 */ + 5560 "00000000" // /* MW 8 */ + 5561 "01011011" // /* MW 7 */ + 5562 "00000001" // /* MW 6 */ + 5563 "11101000" // /* MW 5 */ + 5564 "01010000" // /* MW 4 */ + 5565 "01110000" // /* MW 3 */ + 5566 "00010011" // /* MW 2 */ + 5567 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5568 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5569 "00010010" // /* MW 15 */ + 5570 "00000111" // /* MW 14 */ + 5571 "01111000" // /* MW 13 */ + 5572 "10100101" // /* MW 12 */ + 5573 "00000001" // /* MW 11 */ + 5574 "00000000" // /* MW 10 */ + 5575 "00000000" // /* MW 9 */ + 5576 "00000000" // /* MW 8 */ + 5577 "00100011" // /* MW 7 */ + 5578 "00011100" // /* MW 6 */ + 5579 "01101010" // /* MW 5 */ + 5580 "01010000" // /* MW 4 */ + 5581 "01110000" // /* MW 3 */ + 5582 "00011011" // /* MW 2 */ + 5583 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5584 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5585 "00000011" // /* MW 15 */ + 5586 "00001111" // /* MW 14 */ + 5587 "01111000" // /* MW 13 */ + 5588 "10100101" // /* MW 12 */ + 5589 "00000001" // /* MW 11 */ + 5590 "00000000" // /* MW 10 */ + 5591 "00000000" // /* MW 9 */ + 5592 "00000000" // /* MW 8 */ + 5593 "10100011" // /* MW 7 */ + 5594 "00011100" // /* MW 6 */ + 5595 "11101010" // /* MW 5 */ + 5596 "01010000" // /* MW 4 */ + 5597 "01110000" // /* MW 3 */ + 5598 "00010011" // /* MW 2 */ + 5599 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5600 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5601 "00010010" // /* MW 15 */ + 5602 "00000111" // /* MW 14 */ + 5603 "01111000" // /* MW 13 */ + 5604 "10100101" // /* MW 12 */ + 5605 "00000001" // /* MW 11 */ + 5606 "00000000" // /* MW 10 */ + 5607 "00000000" // /* MW 9 */ + 5608 "00000000" // /* MW 8 */ + 5609 "00100011" // /* MW 7 */ + 5610 "00011100" // /* MW 6 */ + 5611 "01101010" // /* MW 5 */ + 5612 "01010000" // /* MW 4 */ + 5613 "01110000" // /* MW 3 */ + 5614 "00011011" // /* MW 2 */ + 5615 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5616 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5617 "01100001" // /* MW 7 */ + 5618 "11100000" // /* MW 6 */ + 5619 "00000001" // /* MW 5 */ + 5620 "00000010" // /* MW 4 */ + 5621 "01100000" // /* MW 3 */ + 5622 "10010100" // /* MW 2 */ + 5623 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5624 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5625 "01000001" // /* MW 7 */ + 5626 "11100010" // /* MW 6 */ + 5627 "00000000" // /* MW 5 */ + 5628 "00000010" // /* MW 4 */ + 5629 "01100000" // /* MW 3 */ + 5630 "10000100" // /* MW 2 */ + 5631 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5632 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5633 "01100001" // /* MW 7 */ + 5634 "11100000" // /* MW 6 */ + 5635 "00000001" // /* MW 5 */ + 5636 "00000010" // /* MW 4 */ + 5637 "01100000" // /* MW 3 */ + 5638 "10010100" // /* MW 2 */ + 5639 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5640 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5641 "01000001" // /* MW 7 */ + 5642 "11100010" // /* MW 6 */ + 5643 "00000000" // /* MW 5 */ + 5644 "00000010" // /* MW 4 */ + 5645 "01100000" // /* MW 3 */ + 5646 "10000100" // /* MW 2 */ + 5647 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5649 "01100001" // /* MW 7 */ + 5650 "11100000" // /* MW 6 */ + 5651 "00000001" // /* MW 5 */ + 5652 "00000010" // /* MW 4 */ + 5653 "01100000" // /* MW 3 */ + 5654 "10010100" // /* MW 2 */ + 5655 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5657 "01000001" // /* MW 7 */ + 5658 "11100010" // /* MW 6 */ + 5659 "00000000" // /* MW 5 */ + 5660 "00000010" // /* MW 4 */ + 5661 "01100000" // /* MW 3 */ + 5662 "10000100" // /* MW 2 */ + 5663 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5665 "01100001" // /* MW 7 */ + 5666 "11100000" // /* MW 6 */ + 5667 "00000001" // /* MW 5 */ + 5668 "00000010" // /* MW 4 */ + 5669 "01100000" // /* MW 3 */ + 5670 "10010100" // /* MW 2 */ + 5671 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5672 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5673 "00100011" // /* MW 3 */ + 5674 "00011100" // /* MW 2 */ + 5675 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5676 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 5677 "00000000" // /* MW 5 */ + 5678 "01010000" // /* MW 4 */ + 5679 "01100000" // /* MW 3 */ + 5680 "10010100" // /* MW 2 */ + 5681 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5682 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5683 "00100011" // /* MW 3 */ + 5684 "00011100" // /* MW 2 */ + 5685 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5686 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5687 "10100011" // /* MW 3 */ + 5688 "00011100" // /* MW 2 */ + 5689 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 5690 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5691 "00100011" // /* MW 3 */ + 5692 "00011100" // /* MW 2 */ + 5693 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 5694 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5695 "10100011" // /* MW 3 */ + 5696 "00011100" // /* MW 2 */ + 5697 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 5699 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 5712 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5713 "00000000" // /* MW 5 */ + 5714 "11000100" // /* MW 4 */ + 5715 "11001000" // /* MW 3 */ + 5716 "00000111" // /* MW 2 */ + 5717 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 5718 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5719 "11000001" // /* MW 5 */ + 5720 "10110101" // /* MW 4 */ + 5721 "11011000" // /* MW 3 */ + 5722 "11000010" // /* MW 2 */ + 5723 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 5724 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5725 "00000001" // /* MW 5 */ + 5726 "00000000" // /* MW 4 */ + 5727 "00000000" // /* MW 3 */ + 5728 "00001000" // /* MW 2 */ + 5729 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 5730 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5731 "01111001" // /* MW 9 */ + 5732 "01100000" // /* MW 8 */ + 5733 "11001010" // /* MW 7 */ + 5734 "10000001" // /* MW 6 */ + 5735 "00010100" // /* MW 5 */ + 5736 "00100011" // /* MW 4 */ + 5737 "10110000" // /* MW 3 */ + 5738 "00111010" // /* MW 2 */ + 5739 "11111111" // /* MW 1 */ + 5740 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5741 "01110000" // /* MW 7 */ + 5742 "11010000" // /* MW 6 */ + 5743 "00001011" // /* MW 5 */ + 5744 "00000000" // /* MW 4 */ + 5745 "10110000" // /* MW 3 */ + 5746 "10000011" // /* MW 2 */ + 5747 "11111101" // /* MW 1 */ + 5748 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5749 "00010101" // /* MW 3 */ + 5750 "11111100" // /* MW 2 */ + 5751 "00001111" // /* MW 1 */ + 5752 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5753 "00111101" // /* MW 3 */ + 5754 "11110000" // /* MW 2 */ + 5755 "00001111" // /* MW 1 */ + 5756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5757 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 5758 "10000100" // JNZ r16, #5904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5904 delay_slots=5 */ + 5759 "00000001" // /* MW 5 */ + 5760 "01000000" // /* MW 4 */ + 5761 "10001000" // /* MW 3 */ + 5762 "00001011" // /* MW 2 */ + 5763 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 5764 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5765 "11111011" // /* MW 3 */ + 5766 "01100011" // /* MW 2 */ + 5767 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5768 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5769 "00010000" // /* MW 5 */ + 5770 "11000100" // /* MW 4 */ + 5771 "11000100" // /* MW 3 */ + 5772 "00000111" // /* MW 2 */ + 5773 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5774 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5775 "01110000" // /* MW 7 */ + 5776 "01100000" // /* MW 6 */ + 5777 "00110111" // /* MW 5 */ + 5778 "00000001" // /* MW 4 */ + 5779 "00110000" // /* MW 3 */ + 5780 "11000110" // /* MW 2 */ + 5781 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 5782 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5783 "11000000" // /* MW 3 */ + 5784 "11010110" // /* MW 2 */ + 5785 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 5786 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5787 "00010001" // /* MW 9 */ + 5788 "11000000" // /* MW 8 */ + 5789 "10110001" // /* MW 7 */ + 5790 "11110011" // /* MW 6 */ + 5791 "00000001" // /* MW 5 */ + 5792 "00000000" // /* MW 4 */ + 5793 "10110000" // /* MW 3 */ + 5794 "10100011" // /* MW 2 */ + 5795 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5796 "00111010" // MOVS p0, p7; MOVXM p2, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5797 "00010001" // /* MW 9 */ + 5798 "00011000" // /* MW 8 */ + 5799 "00110001" // /* MW 7 */ + 5800 "11110001" // /* MW 6 */ + 5801 "00000001" // /* MW 5 */ + 5802 "00000000" // /* MW 4 */ + 5803 "01100000" // /* MW 3 */ + 5804 "10010001" // /* MW 2 */ + 5805 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5806 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5807 "00010000" // /* MW 9 */ + 5808 "00010110" // /* MW 8 */ + 5809 "00110001" // /* MW 7 */ + 5810 "11110001" // /* MW 6 */ + 5811 "00000001" // /* MW 5 */ + 5812 "00000000" // /* MW 4 */ + 5813 "11100000" // /* MW 3 */ + 5814 "11000000" // /* MW 2 */ + 5815 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5817 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5818 "00000100" // JL #5232 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5232 delay_slots=5 */ + 5819 "00000001" // /* MW 5 */ + 5820 "00000000" // /* MW 4 */ + 5821 "00111000" // /* MW 3 */ + 5822 "00001010" // /* MW 2 */ + 5823 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5827 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5828 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5829 "00110001" // /* MW 3 */ + 5830 "00100000" // /* MW 2 */ + 5831 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5832 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "00000101" // /* MW 3 */ + 5834 "00100000" // /* MW 2 */ + 5835 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5836 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5837 "00010001" // /* MW 3 */ + 5838 "00000110" // /* MW 2 */ + 5839 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 5840 "10111010" // LDA r16, [p7]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5841 "00010000" // /* MW 9 */ + 5842 "00000100" // /* MW 8 */ + 5843 "10110001" // /* MW 7 */ + 5844 "11110000" // /* MW 6 */ + 5845 "00000001" // /* MW 5 */ + 5846 "00000000" // /* MW 4 */ + 5847 "11010000" // /* MW 3 */ + 5848 "11000010" // /* MW 2 */ + 5849 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 5850 "10111010" // LDA r17, [p1]; MOVXM p3, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5851 "00010000" // /* MW 9 */ + 5852 "00000110" // /* MW 8 */ + 5853 "10110001" // /* MW 7 */ + 5854 "11110001" // /* MW 6 */ + 5855 "00000001" // /* MW 5 */ + 5856 "00000000" // /* MW 4 */ + 5857 "11010000" // /* MW 3 */ + 5858 "11000110" // /* MW 2 */ + 5859 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 5860 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5861 "00010000" // /* MW 9 */ + 5862 "00001000" // /* MW 8 */ + 5863 "10110001" // /* MW 7 */ + 5864 "11110000" // /* MW 6 */ + 5865 "00000001" // /* MW 5 */ + 5866 "00000000" // /* MW 4 */ + 5867 "01010000" // /* MW 3 */ + 5868 "11001011" // /* MW 2 */ + 5869 "11101010" // /* MW 1 */ + 5870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5871 "00000000" // /* MW 1 */ + 5872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5873 "00000000" // /* MW 1 */ + 5874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5875 "00000000" // /* MW 1 */ + 5876 "10000100" // J #5920 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5920 delay_slots=5 */ + 5877 "00000000" // /* MW 5 */ + 5878 "00000000" // /* MW 4 */ + 5879 "10010000" // /* MW 3 */ + 5880 "00001011" // /* MW 2 */ + 5881 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 5882 "01000100" // MOVXM p2, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5883 "01010000" // /* MW 5 */ + 5884 "11000100" // /* MW 4 */ + 5885 "11000100" // /* MW 3 */ + 5886 "00000111" // /* MW 2 */ + 5887 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 5888 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5889 "00001111" // /* MW 3 */ + 5890 "01100001" // /* MW 2 */ + 5891 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 5892 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5893 "01010001" // /* MW 3 */ + 5894 "00000110" // /* MW 2 */ + 5895 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 5896 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5897 "00010001" // /* MW 3 */ + 5898 "00000110" // /* MW 2 */ + 5899 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 5900 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5901 "00010001" // /* MW 3 */ + 5902 "00000110" // /* MW 2 */ + 5903 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 5904 "01000100" // MOVXM p3, #508428 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5905 "00011000" // /* MW 5 */ + 5906 "11000100" // /* MW 4 */ + 5907 "11000110" // /* MW 3 */ + 5908 "00000111" // /* MW 2 */ + 5909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 5910 "10111010" // NOPA; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5911 "00010000" // /* MW 9 */ + 5912 "00001000" // /* MW 8 */ + 5913 "10110001" // /* MW 7 */ + 5914 "11110000" // /* MW 6 */ + 5915 "00000001" // /* MW 5 */ + 5916 "00000000" // /* MW 4 */ + 5917 "11110000" // /* MW 3 */ + 5918 "00101100" // /* MW 2 */ + 5919 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5920 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5921 "10000110" // /* MW 3 */ + 5922 "01100111" // /* MW 2 */ + 5923 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 5924 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5925 "00010000" // /* MW 9 */ + 5926 "00000000" // /* MW 8 */ + 5927 "00110001" // /* MW 7 */ + 5928 "11110001" // /* MW 6 */ + 5929 "00000001" // /* MW 5 */ + 5930 "00000000" // /* MW 4 */ + 5931 "11010000" // /* MW 3 */ + 5932 "11101110" // /* MW 2 */ + 5933 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5934 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5935 "00010110" // /* MW 3 */ + 5936 "11111110" // /* MW 2 */ + 5937 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5938 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5939 "00110110" // /* MW 3 */ + 5940 "11111110" // /* MW 2 */ + 5941 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 5942 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "01010110" // /* MW 3 */ + 5944 "00000110" // /* MW 2 */ + 5945 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5946 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "01110110" // /* MW 3 */ + 5948 "01000110" // /* MW 2 */ + 5949 "00000000" // /* MW 1 */ + 5950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5951 "00000000" // /* MW 1 */ + 5952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5953 "00000000" // /* MW 1 */ + 5954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5955 "00000000" // /* MW 1 */ + 5956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5957 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5958 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5959 "00000010" // /* MW 3 */ + 5960 "01100001" // /* MW 2 */ + 5961 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 5962 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5963 "00001110" // /* MW 5 */ + 5964 "01000000" // /* MW 4 */ + 5965 "00111001" // /* MW 3 */ + 5966 "11000010" // /* MW 2 */ + 5967 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 5968 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5969 "00010001" // /* MW 3 */ + 5970 "00000110" // /* MW 2 */ + 5971 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 5972 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5973 "11111101" // /* MW 3 */ + 5974 "11100000" // /* MW 2 */ + 5975 "00010111" // /* MW 1 */ + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5977 "00000000" // /* MW 1 */ + 5978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5979 "00000000" // /* MW 1 */ + 5980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5981 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5982 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5983 "00001000" // /* MW 3 */ + 5984 "11010011" // /* MW 2 */ + 5985 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 5986 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5987 "00000110" // /* MW 3 */ + 5988 "01100111" // /* MW 2 */ + 5989 "00011010" // /* MW 1 */ + 5990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5991 "00000000" // /* MW 1 */ + 5992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5993 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 5994 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5995 "01110110" // /* MW 3 */ + 5996 "11111111" // /* MW 2 */ + 5997 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5998 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5999 "00110110" // /* MW 3 */ + 6000 "11111110" // /* MW 2 */ + 6001 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6002 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6003 "01010110" // /* MW 3 */ + 6004 "11111110" // /* MW 2 */ + 6005 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6006 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6007 "01110110" // /* MW 3 */ + 6008 "01010110" // /* MW 2 */ + 6009 "00000010" // /* MW 1 */ + 6010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6011 "00000000" // /* MW 1 */ + 6012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6013 "00000000" // /* MW 1 */ + 6014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6015 "00000000" // /* MW 1 */ + 6016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6017 "00000000" // /* MW 1 */ + 6018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6019 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6020 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6021 "00010010" // /* MW 3 */ + 6022 "10100011" // /* MW 2 */ + 6023 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6024 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6025 "00110001" // /* MW 3 */ + 6026 "00000110" // /* MW 2 */ + 6027 "00001010" // /* MW 1 */ + 6028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6029 "00000000" // /* MW 1 */ + 6030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6031 "00000000" // /* MW 1 */ + 6032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6033 "00000000" // /* MW 1 */ + 6034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6035 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6036 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6037 "00001000" // /* MW 3 */ + 6038 "11010011" // /* MW 2 */ + 6039 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 6040 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6041 "01111001" // /* MW 9 */ + 6042 "01100000" // /* MW 8 */ + 6043 "11001110" // /* MW 7 */ + 6044 "00101001" // /* MW 6 */ + 6045 "00000000" // /* MW 5 */ + 6046 "00000001" // /* MW 4 */ + 6047 "01100000" // /* MW 3 */ + 6048 "00010001" // /* MW 2 */ + 6049 "11010001" // /* MW 1 */ + 6050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6051 "00000000" // /* MW 1 */ + 6052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6053 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6054 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6055 "00011001" // /* MW 3 */ + 6056 "11101110" // /* MW 2 */ + 6057 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 6058 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6059 "00111011" // /* MW 5 */ + 6060 "11011000" // /* MW 4 */ + 6061 "11011111" // /* MW 3 */ + 6062 "11000110" // /* MW 2 */ + 6063 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 6064 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6065 "10000001" // /* MW 5 */ + 6066 "11011101" // /* MW 4 */ + 6067 "11010110" // /* MW 3 */ + 6068 "11010010" // /* MW 2 */ + 6069 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6070 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6071 "01010110" // /* MW 3 */ + 6072 "01001110" // /* MW 2 */ + 6073 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6074 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6075 "00011110" // /* MW 3 */ + 6076 "01011101" // /* MW 2 */ + 6077 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6078 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6079 "11000000" // /* MW 3 */ + 6080 "01100000" // /* MW 2 */ + 6081 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6083 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6084 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6085 "01110110" // /* MW 3 */ + 6086 "00000110" // /* MW 2 */ + 6087 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6089 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6090 "00000100" // JL #5392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5392 delay_slots=5 */ + 6091 "00000001" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "10001000" // /* MW 3 */ + 6094 "00001010" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6096 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "11000000" // /* MW 3 */ + 6098 "11010100" // /* MW 2 */ + 6099 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6100 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "00001101" // /* MW 3 */ + 6102 "01100011" // /* MW 2 */ + 6103 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 6104 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00001101" // /* MW 3 */ + 6106 "00100001" // /* MW 2 */ + 6107 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 6108 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01000001" // /* MW 3 */ + 6110 "01101001" // /* MW 2 */ + 6111 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6112 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6113 "00000000" // /* MW 15 */ + 6114 "00000000" // /* MW 14 */ + 6115 "10101000" // /* MW 13 */ + 6116 "11100010" // /* MW 12 */ + 6117 "00110100" // /* MW 11 */ + 6118 "00000000" // /* MW 10 */ + 6119 "00000000" // /* MW 9 */ + 6120 "00000000" // /* MW 8 */ + 6121 "01011011" // /* MW 7 */ + 6122 "00000001" // /* MW 6 */ + 6123 "00100000" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "11110000" // /* MW 3 */ + 6126 "00101100" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6128 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6129 "01111000" // /* MW 9 */ + 6130 "11010000" // /* MW 8 */ + 6131 "10110011" // /* MW 7 */ + 6132 "00101000" // /* MW 6 */ + 6133 "00000000" // /* MW 5 */ + 6134 "00000001" // /* MW 4 */ + 6135 "11010000" // /* MW 3 */ + 6136 "11000110" // /* MW 2 */ + 6137 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 6138 "01000100" // MOVXM p6, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6139 "01010000" // /* MW 5 */ + 6140 "11000100" // /* MW 4 */ + 6141 "11001100" // /* MW 3 */ + 6142 "00000111" // /* MW 2 */ + 6143 "00000000" // /* MW 1 */ + 6144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6145 "00000000" // /* MW 1 */ + 6146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6147 "00000000" // /* MW 1 */ + 6148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6149 "00000000" // /* MW 1 */ + 6150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6151 "00000000" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6154 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "00001000" // /* MW 3 */ + 6156 "01010001" // /* MW 2 */ + 6157 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6158 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6159 "00110110" // /* MW 3 */ + 6160 "11110110" // /* MW 2 */ + 6161 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6162 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6163 "00011001" // /* MW 3 */ + 6164 "11101101" // /* MW 2 */ + 6165 "00000111" // /* MW 1 */ + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6176 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6177 "00010001" // /* MW 3 */ + 6178 "00100011" // /* MW 2 */ + 6179 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6180 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6181 "01100011" // /* MW 5 */ + 6182 "11101100" // /* MW 4 */ + 6183 "11010011" // /* MW 3 */ + 6184 "11000110" // /* MW 2 */ + 6185 "01001010" // /* MW 1 */ + 6186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6187 "00000000" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ + 6190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6191 "00000000" // /* MW 1 */ + 6192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6193 "00000000" // /* MW 1 */ + 6194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6195 "00000000" // /* MW 1 */ + 6196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6197 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6198 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6199 "00001000" // /* MW 3 */ + 6200 "01010001" // /* MW 2 */ + 6201 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 6202 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6203 "00010000" // /* MW 9 */ + 6204 "00000000" // /* MW 8 */ + 6205 "10110001" // /* MW 7 */ + 6206 "11110000" // /* MW 6 */ + 6207 "00000001" // /* MW 5 */ + 6208 "00000000" // /* MW 4 */ + 6209 "11010000" // /* MW 3 */ + 6210 "11001110" // /* MW 2 */ + 6211 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 6212 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6213 "01010110" // /* MW 3 */ + 6214 "00000110" // /* MW 2 */ + 6215 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6216 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6217 "00110110" // /* MW 3 */ + 6218 "00000110" // /* MW 2 */ + 6219 "00000001" // /* MW 1 */ + 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6221 "00000000" // /* MW 1 */ + 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6223 "00000000" // /* MW 1 */ + 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6225 "00000000" // /* MW 1 */ + 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6227 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6228 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6229 "00110001" // /* MW 3 */ + 6230 "00100001" // /* MW 2 */ + 6231 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6232 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6233 "00010001" // /* MW 3 */ + 6234 "11100110" // /* MW 2 */ + 6235 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 6236 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6237 "00101000" // /* MW 3 */ + 6238 "01100001" // /* MW 2 */ + 6239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6240 "10000100" // JNZ r16, #6272 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6272 delay_slots=5 */ + 6241 "00000001" // /* MW 5 */ + 6242 "01000000" // /* MW 4 */ + 6243 "01000000" // /* MW 3 */ + 6244 "00001100" // /* MW 2 */ + 6245 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6255 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 6256 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "00000001" // /* MW 3 */ + 6258 "00100000" // /* MW 2 */ + 6259 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 6260 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6261 "11000001" // /* MW 11 */ + 6262 "00001000" // /* MW 10 */ + 6263 "10000011" // /* MW 9 */ + 6264 "00000000" // /* MW 8 */ + 6265 "00000000" // /* MW 7 */ + 6266 "00000000" // /* MW 6 */ + 6267 "00100000" // /* MW 5 */ + 6268 "00000000" // /* MW 4 */ + 6269 "11110000" // /* MW 3 */ + 6270 "00101100" // /* MW 2 */ + 6271 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 6272 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00111001" // /* MW 3 */ + 6274 "11110000" // /* MW 2 */ + 6275 "00000111" // /* MW 1 */ + 6276 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110001" // /* MW 3 */ + 6278 "11111101" // /* MW 2 */ + 6279 "00000111" // /* MW 1 */ + 6280 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10011001" // /* MW 3 */ + 6282 "11110111" // /* MW 2 */ + 6283 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6285 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6286 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6287 "11010001" // /* MW 3 */ + 6288 "11111001" // /* MW 2 */ + 6289 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6291 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6293 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6294 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00101000" // /* MW 2 */ + 6297 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6298 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00001011" // /* MW 3 */ + 6300 "10001110" // /* MW 2 */ + 6301 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 6302 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6303 "00000001" // /* MW 5 */ + 6304 "00000000" // /* MW 4 */ + 6305 "00000000" // /* MW 3 */ + 6306 "11111000" // /* MW 2 */ + 6307 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6313 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 6320 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6321 "00010000" // /* MW 9 */ + 6322 "11100000" // /* MW 8 */ + 6323 "00110001" // /* MW 7 */ + 6324 "11110000" // /* MW 6 */ + 6325 "00000001" // /* MW 5 */ + 6326 "00000000" // /* MW 4 */ + 6327 "11010000" // /* MW 3 */ + 6328 "10000101" // /* MW 2 */ + 6329 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 6330 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6331 "01011000" // /* MW 9 */ + 6332 "00000000" // /* MW 8 */ + 6333 "00001000" // /* MW 7 */ + 6334 "00001011" // /* MW 6 */ + 6335 "00010000" // /* MW 5 */ + 6336 "00001000" // /* MW 4 */ + 6337 "00000000" // /* MW 3 */ + 6338 "00000000" // /* MW 2 */ + 6339 "11110000" // /* MW 1 */ + 6340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6341 "00000000" // /* MW 1 */ + 6342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6343 "00000000" // /* MW 1 */ + 6344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6345 "00000000" // /* MW 1 */ + 6346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6347 "00000000" // /* MW 1 */ + 6348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6349 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 6350 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6351 "00101001" // /* MW 3 */ + 6352 "00011100" // /* MW 2 */ + 6353 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 6354 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6355 "00101110" // /* MW 3 */ + 6356 "00011100" // /* MW 2 */ + 6357 "00000001" // /* MW 1 */ + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ + 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6363 "00000000" // /* MW 1 */ + 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6365 "00000000" // /* MW 1 */ + 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6367 "00000000" // /* MW 1 */ + 6368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6369 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 6370 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6371 "00101001" // /* MW 3 */ + 6372 "00011100" // /* MW 2 */ + 6373 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 6374 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6375 "00101110" // /* MW 3 */ + 6376 "00000100" // /* MW 2 */ + 6377 "00000001" // /* MW 1 */ + 6378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6379 "00000000" // /* MW 1 */ + 6380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ + 6388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6389 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 6390 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6391 "00101001" // /* MW 3 */ + 6392 "00011100" // /* MW 2 */ + 6393 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 6394 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01110110" // /* MW 3 */ + 6396 "00010100" // /* MW 2 */ + 6397 "00000001" // /* MW 1 */ + 6398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6399 "00000000" // /* MW 1 */ + 6400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6401 "00000000" // /* MW 1 */ + 6402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6403 "00000000" // /* MW 1 */ + 6404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6405 "00000000" // /* MW 1 */ + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6410 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6411 "01110001" // /* MW 3 */ + 6412 "01001100" // /* MW 2 */ + 6413 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6414 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00010111" // /* MW 3 */ + 6416 "00000100" // /* MW 2 */ + 6417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6418 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6419 "00000000" // /* MW 3 */ + 6420 "00101000" // /* MW 2 */ + 6421 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6422 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6423 "00000000" // /* MW 5 */ + 6424 "00111110" // /* MW 4 */ + 6425 "11110001" // /* MW 3 */ + 6426 "00000000" // /* MW 2 */ + 6427 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6428 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6429 "00100100" // /* MW 3 */ + 6430 "11000100" // /* MW 2 */ + 6431 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6432 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00100111" // /* MW 3 */ + 6434 "01110110" // /* MW 2 */ + 6435 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6436 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "10000010" // /* MW 3 */ + 6438 "00000001" // /* MW 2 */ + 6439 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 6441 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 6448 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6449 "00000001" // /* MW 5 */ + 6450 "00000000" // /* MW 4 */ + 6451 "00000000" // /* MW 3 */ + 6452 "00001000" // /* MW 2 */ + 6453 "00000000" // /* MW 1 */ + 6454 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6455 "00111101" // /* MW 3 */ + 6456 "11111100" // /* MW 2 */ + 6457 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 6458 "00000100" // JL #6320 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6320 delay_slots=5 */ + 6459 "00000001" // /* MW 5 */ + 6460 "00000000" // /* MW 4 */ + 6461 "01011000" // /* MW 3 */ + 6462 "00001100" // /* MW 2 */ + 6463 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 6464 "01000100" // MOVXM p0, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6465 "10000000" // /* MW 5 */ + 6466 "11000111" // /* MW 4 */ + 6467 "11000000" // /* MW 3 */ + 6468 "00000111" // /* MW 2 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6476 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6477 "01100111" // /* MW 3 */ + 6478 "00000001" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "sub_impl.h" 88 27 +.return_address + 6480 "10111010" // LDA lr, [sp, #-4]; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6481 "00010000" // /* MW 9 */ + 6482 "11100000" // /* MW 8 */ + 6483 "10110001" // /* MW 7 */ + 6484 "11110000" // /* MW 6 */ + 6485 "00000001" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "00100000" // /* MW 3 */ + 6488 "10000111" // /* MW 2 */ + 6489 "11111111" // /* MW 1 */ + 6490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6491 "00000000" // /* MW 1 */ + 6492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6493 "00000000" // /* MW 1 */ + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ + 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6497 "00000000" // /* MW 1 */ + 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6499 "00000000" // /* MW 1 */ + 6500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6501 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first + 6502 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6503 "00000000" // /* MW 3 */ + 6504 "00101000" // /* MW 2 */ + 6505 "00010000" // /* MW 1 */ +.src_ref 3 "sub_impl.h" 88 27 +.delay_slot + 6506 "00011000" // MOVX r16, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6507 "00011001" // /* MW 3 */ + 6508 "00100000" // /* MW 2 */ + 6509 "00010000" // /* MW 1 */ +.src_ref 3 "sub_impl.h" 88 27 first +.delay_slot + 6510 "10011000" // ST r16, [p1, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6511 "00010001" // /* MW 3 */ + 6512 "01000110" // /* MW 2 */ + 6513 "00001001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.delay_slot + 6514 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6515 "00000001" // /* MW 5 */ + 6516 "00000000" // /* MW 4 */ + 6517 "00000000" // /* MW 3 */ + 6518 "11111000" // /* MW 2 */ + 6519 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 6523 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary_shared.h" 186 first +.src_ref 2 "elementwise_binary_shared.h" 191 8 first +.tail_call +.function_start + 6528 "10000100" // J #3152 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 6529 "00000000" // /* MW 5 */ + 6530 "00000000" // /* MW 4 */ + 6531 "00101000" // /* MW 3 */ + 6532 "00000110" // /* MW 2 */ + 6533 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 191 8 +.delay_slot + 6534 "01000100" // MOVXM p3, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6535 "10000000" // /* MW 5 */ + 6536 "11000111" // /* MW 4 */ + 6537 "11000110" // /* MW 3 */ + 6538 "00000111" // /* MW 2 */ + 6539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 6547 "00000000" // /* MW 1 */ +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_sub1d _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 314 first +.src_ref 6 "superkernels.cpp" 319 6 +.function_start + 6560 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6561 "00000000" // /* MW 5 */ + 6562 "11000100" // /* MW 4 */ + 6563 "11001000" // /* MW 3 */ + 6564 "00000111" // /* MW 2 */ + 6565 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 319 6 first + 6566 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6567 "11000001" // /* MW 5 */ + 6568 "10110101" // /* MW 4 */ + 6569 "11011000" // /* MW 3 */ + 6570 "11000010" // /* MW 2 */ + 6571 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 314 + 6572 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6573 "00000001" // /* MW 5 */ + 6574 "00000000" // /* MW 4 */ + 6575 "00000000" // /* MW 3 */ + 6576 "00001000" // /* MW 2 */ + 6577 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 316 22 first + 6578 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6579 "01111001" // /* MW 9 */ + 6580 "01100000" // /* MW 8 */ + 6581 "11001010" // /* MW 7 */ + 6582 "10000001" // /* MW 6 */ + 6583 "00010100" // /* MW 5 */ + 6584 "00100011" // /* MW 4 */ + 6585 "10110000" // /* MW 3 */ + 6586 "00111010" // /* MW 2 */ + 6587 "11111111" // /* MW 1 */ + 6588 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6589 "01110000" // /* MW 7 */ + 6590 "11010000" // /* MW 6 */ + 6591 "00001011" // /* MW 5 */ + 6592 "00000000" // /* MW 4 */ + 6593 "10110000" // /* MW 3 */ + 6594 "10000011" // /* MW 2 */ + 6595 "11111101" // /* MW 1 */ + 6596 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6597 "00010101" // /* MW 3 */ + 6598 "11111100" // /* MW 2 */ + 6599 "00001111" // /* MW 1 */ + 6600 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6601 "00111101" // /* MW 3 */ + 6602 "11110000" // /* MW 2 */ + 6603 "00001111" // /* MW 1 */ + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 319 6 first +.src_ref 6 "superkernels.cpp" 319 16 first + 6606 "10000100" // JNZ r16, #6752 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6752 delay_slots=5 */ + 6607 "00000001" // /* MW 5 */ + 6608 "01000000" // /* MW 4 */ + 6609 "00110000" // /* MW 3 */ + 6610 "00001101" // /* MW 2 */ + 6611 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 316 30 first +.delay_slot + 6612 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6613 "11111011" // /* MW 3 */ + 6614 "01100011" // /* MW 2 */ + 6615 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 316 11 +.delay_slot + 6616 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6617 "00010000" // /* MW 5 */ + 6618 "11000100" // /* MW 4 */ + 6619 "11000100" // /* MW 3 */ + 6620 "00000111" // /* MW 2 */ + 6621 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 316 11 +.delay_slot + 6622 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "01110000" // /* MW 7 */ + 6624 "01100000" // /* MW 6 */ + 6625 "00110111" // /* MW 5 */ + 6626 "00000001" // /* MW 4 */ + 6627 "00110000" // /* MW 3 */ + 6628 "11000110" // /* MW 2 */ + 6629 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 6630 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6631 "11000000" // /* MW 3 */ + 6632 "11010110" // /* MW 2 */ + 6633 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 322 4 +.src_ref 6 "superkernels.cpp" 324 28 +.src_ref 6 "superkernels.cpp" 326 42 +.src_ref 6 "superkernels.cpp" 338 2 +.delay_slot + 6634 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6635 "00010001" // /* MW 9 */ + 6636 "11100000" // /* MW 8 */ + 6637 "10110001" // /* MW 7 */ + 6638 "11110011" // /* MW 6 */ + 6639 "00000001" // /* MW 5 */ + 6640 "00000000" // /* MW 4 */ + 6641 "10110000" // /* MW 3 */ + 6642 "10100011" // /* MW 2 */ + 6643 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 322 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6644 "00111010" // MOVS p0, p7; MOVXM p2, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6645 "00010001" // /* MW 9 */ + 6646 "00011000" // /* MW 8 */ + 6647 "00110001" // /* MW 7 */ + 6648 "11110001" // /* MW 6 */ + 6649 "00000001" // /* MW 5 */ + 6650 "00000000" // /* MW 4 */ + 6651 "01100000" // /* MW 3 */ + 6652 "10010001" // /* MW 2 */ + 6653 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6654 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6655 "00010000" // /* MW 9 */ + 6656 "00010110" // /* MW 8 */ + 6657 "00110001" // /* MW 7 */ + 6658 "11110001" // /* MW 6 */ + 6659 "00000001" // /* MW 5 */ + 6660 "00000000" // /* MW 4 */ + 6661 "11100000" // /* MW 3 */ + 6662 "11000000" // /* MW 2 */ + 6663 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6665 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 322 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6666 "00000100" // JL #6448 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6448 delay_slots=5 */ + 6667 "00000001" // /* MW 5 */ + 6668 "00000000" // /* MW 4 */ + 6669 "10011000" // /* MW 3 */ + 6670 "00001100" // /* MW 2 */ + 6671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6673 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6675 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6676 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "00110001" // /* MW 3 */ + 6678 "00100000" // /* MW 2 */ + 6679 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6680 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00000101" // /* MW 3 */ + 6682 "00100000" // /* MW 2 */ + 6683 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6684 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "00010001" // /* MW 3 */ + 6686 "00000110" // /* MW 2 */ + 6687 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 18 +.src_ref 6 "superkernels.cpp" 326 42 first +.return_address + 6688 "10111010" // LDA r16, [p7]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6689 "00010000" // /* MW 9 */ + 6690 "00000100" // /* MW 8 */ + 6691 "10110001" // /* MW 7 */ + 6692 "11110000" // /* MW 6 */ + 6693 "00000001" // /* MW 5 */ + 6694 "00000000" // /* MW 4 */ + 6695 "11010000" // /* MW 3 */ + 6696 "11000010" // /* MW 2 */ + 6697 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 16 +.src_ref 6 "superkernels.cpp" 326 18 +.src_ref 6 "superkernels.cpp" 335 48 + 6698 "10111010" // LDA r17, [p1]; MOVXM p3, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6699 "00010000" // /* MW 9 */ + 6700 "00000110" // /* MW 8 */ + 6701 "10110001" // /* MW 7 */ + 6702 "11110001" // /* MW 6 */ + 6703 "00000001" // /* MW 5 */ + 6704 "00000000" // /* MW 4 */ + 6705 "11010000" // /* MW 3 */ + 6706 "11000110" // /* MW 2 */ + 6707 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 324 28 first +.src_ref 6 "superkernels.cpp" 327 16 +.src_ref 6 "superkernels.cpp" 336 48 + 6708 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6709 "00010000" // /* MW 9 */ + 6710 "00001000" // /* MW 8 */ + 6711 "10110001" // /* MW 7 */ + 6712 "11110000" // /* MW 6 */ + 6713 "00000001" // /* MW 5 */ + 6714 "00000000" // /* MW 4 */ + 6715 "01010000" // /* MW 3 */ + 6716 "11001011" // /* MW 2 */ + 6717 "11101010" // /* MW 1 */ + 6718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6719 "00000000" // /* MW 1 */ + 6720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6721 "00000000" // /* MW 1 */ + 6722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6723 "00000000" // /* MW 1 */ + 6724 "10000100" // J #6768 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6768 delay_slots=5 */ + 6725 "00000000" // /* MW 5 */ + 6726 "00000000" // /* MW 4 */ + 6727 "00111000" // /* MW 3 */ + 6728 "00001101" // /* MW 2 */ + 6729 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 324 13 +.delay_slot + 6730 "01000100" // MOVXM p2, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6731 "01010000" // /* MW 5 */ + 6732 "11000100" // /* MW 4 */ + 6733 "11000100" // /* MW 3 */ + 6734 "00000111" // /* MW 2 */ + 6735 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 27 first +.delay_slot + 6736 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6737 "00001111" // /* MW 3 */ + 6738 "01100001" // /* MW 2 */ + 6739 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 324 13 first +.delay_slot + 6740 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6741 "01010001" // /* MW 3 */ + 6742 "00000110" // /* MW 2 */ + 6743 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 16 first +.delay_slot + 6744 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6745 "00010001" // /* MW 3 */ + 6746 "00000110" // /* MW 2 */ + 6747 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 327 16 first +.delay_slot + 6748 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6749 "00010001" // /* MW 3 */ + 6750 "00000110" // /* MW 2 */ + 6751 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 335 48 + 6752 "01000100" // MOVXM p3, #508428 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6753 "00011000" // /* MW 5 */ + 6754 "11000100" // /* MW 4 */ + 6755 "11000110" // /* MW 3 */ + 6756 "00000111" // /* MW 2 */ + 6757 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 48 + 6758 "10111010" // NOPA; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6759 "00010000" // /* MW 9 */ + 6760 "00001000" // /* MW 8 */ + 6761 "10110001" // /* MW 7 */ + 6762 "11110000" // /* MW 6 */ + 6763 "00000001" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6768 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10000110" // /* MW 3 */ + 6770 "01100111" // /* MW 2 */ + 6771 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 330 2 + 6772 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6773 "00010000" // /* MW 9 */ + 6774 "00000000" // /* MW 8 */ + 6775 "00110001" // /* MW 7 */ + 6776 "11110001" // /* MW 6 */ + 6777 "00000001" // /* MW 5 */ + 6778 "00000000" // /* MW 4 */ + 6779 "11010000" // /* MW 3 */ + 6780 "11101110" // /* MW 2 */ + 6781 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6782 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6783 "00010110" // /* MW 3 */ + 6784 "11111110" // /* MW 2 */ + 6785 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6786 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6787 "00110110" // /* MW 3 */ + 6788 "11111110" // /* MW 2 */ + 6789 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 330 2 first + 6790 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6791 "01010110" // /* MW 3 */ + 6792 "00000110" // /* MW 2 */ + 6793 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6794 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6795 "01110110" // /* MW 3 */ + 6796 "01000110" // /* MW 2 */ + 6797 "00000000" // /* MW 1 */ + 6798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6799 "00000000" // /* MW 1 */ + 6800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6801 "00000000" // /* MW 1 */ + 6802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6803 "00000000" // /* MW 1 */ + 6804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6805 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6806 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "00000010" // /* MW 3 */ + 6808 "01100001" // /* MW 2 */ + 6809 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 330 2 first + 6810 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6811 "00001110" // /* MW 5 */ + 6812 "01000000" // /* MW 4 */ + 6813 "00111001" // /* MW 3 */ + 6814 "11000010" // /* MW 2 */ + 6815 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 330 2 + 6816 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00010001" // /* MW 3 */ + 6818 "00000110" // /* MW 2 */ + 6819 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6820 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6821 "11111101" // /* MW 3 */ + 6822 "11100000" // /* MW 2 */ + 6823 "00010111" // /* MW 1 */ + 6824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6825 "00000000" // /* MW 1 */ + 6826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6827 "00000000" // /* MW 1 */ + 6828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6829 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6830 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6831 "00001000" // /* MW 3 */ + 6832 "11010011" // /* MW 2 */ + 6833 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6834 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6835 "00000110" // /* MW 3 */ + 6836 "01100111" // /* MW 2 */ + 6837 "00011010" // /* MW 1 */ + 6838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6839 "00000000" // /* MW 1 */ + 6840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6841 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6842 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6843 "01110110" // /* MW 3 */ + 6844 "11111111" // /* MW 2 */ + 6845 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6846 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6847 "00110110" // /* MW 3 */ + 6848 "11111110" // /* MW 2 */ + 6849 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6850 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6851 "01010110" // /* MW 3 */ + 6852 "11111110" // /* MW 2 */ + 6853 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6854 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6855 "01110110" // /* MW 3 */ + 6856 "01010110" // /* MW 2 */ + 6857 "00000010" // /* MW 1 */ + 6858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6859 "00000000" // /* MW 1 */ + 6860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6861 "00000000" // /* MW 1 */ + 6862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6863 "00000000" // /* MW 1 */ + 6864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6865 "00000000" // /* MW 1 */ + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6868 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6869 "00010010" // /* MW 3 */ + 6870 "10100011" // /* MW 2 */ + 6871 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6872 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6873 "00110001" // /* MW 3 */ + 6874 "00000110" // /* MW 2 */ + 6875 "00001010" // /* MW 1 */ + 6876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6877 "00000000" // /* MW 1 */ + 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6879 "00000000" // /* MW 1 */ + 6880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6881 "00000000" // /* MW 1 */ + 6882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6883 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6884 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6885 "00001000" // /* MW 3 */ + 6886 "11010011" // /* MW 2 */ + 6887 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 335 46 +.src_ref 6 "superkernels.cpp" 336 46 + 6888 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6889 "01111001" // /* MW 9 */ + 6890 "01100000" // /* MW 8 */ + 6891 "11001110" // /* MW 7 */ + 6892 "00101001" // /* MW 6 */ + 6893 "00000000" // /* MW 5 */ + 6894 "00000001" // /* MW 4 */ + 6895 "01100000" // /* MW 3 */ + 6896 "00010001" // /* MW 2 */ + 6897 "11010001" // /* MW 1 */ + 6898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6899 "00000000" // /* MW 1 */ + 6900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6901 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6902 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6903 "00011001" // /* MW 3 */ + 6904 "11101110" // /* MW 2 */ + 6905 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 335 48 first + 6906 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6907 "00111011" // /* MW 5 */ + 6908 "11011000" // /* MW 4 */ + 6909 "11011111" // /* MW 3 */ + 6910 "11000110" // /* MW 2 */ + 6911 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 48 first +.src_ref 6 "superkernels.cpp" 338 2 + 6912 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6913 "10000001" // /* MW 5 */ + 6914 "11011101" // /* MW 4 */ + 6915 "11010110" // /* MW 3 */ + 6916 "11010010" // /* MW 2 */ + 6917 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6918 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6919 "01010110" // /* MW 3 */ + 6920 "01001110" // /* MW 2 */ + 6921 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6922 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6923 "00011110" // /* MW 3 */ + 6924 "01011101" // /* MW 2 */ + 6925 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6926 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6927 "11000000" // /* MW 3 */ + 6928 "01100000" // /* MW 2 */ + 6929 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6931 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6932 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "01110110" // /* MW 3 */ + 6934 "00000110" // /* MW 2 */ + 6935 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 338 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6938 "00000100" // JL #6528 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6528 delay_slots=5 */ + 6939 "00000001" // /* MW 5 */ + 6940 "00000000" // /* MW 4 */ + 6941 "11000000" // /* MW 3 */ + 6942 "00001100" // /* MW 2 */ + 6943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6944 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6945 "11000000" // /* MW 3 */ + 6946 "11010100" // /* MW 2 */ + 6947 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 335 46 first +.delay_slot + 6948 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "00001101" // /* MW 3 */ + 6950 "01100011" // /* MW 2 */ + 6951 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 46 first +.delay_slot + 6952 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6953 "00001101" // /* MW 3 */ + 6954 "00100001" // /* MW 2 */ + 6955 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 46 +.delay_slot + 6956 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6957 "01000001" // /* MW 3 */ + 6958 "01101001" // /* MW 2 */ + 6959 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 335 46 first +.delay_slot + 6960 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6961 "00000000" // /* MW 15 */ + 6962 "00000000" // /* MW 14 */ + 6963 "10101000" // /* MW 13 */ + 6964 "11100010" // /* MW 12 */ + 6965 "00110100" // /* MW 11 */ + 6966 "00000000" // /* MW 10 */ + 6967 "00000000" // /* MW 9 */ + 6968 "00000000" // /* MW 8 */ + 6969 "01011011" // /* MW 7 */ + 6970 "00000001" // /* MW 6 */ + 6971 "00100000" // /* MW 5 */ + 6972 "00000000" // /* MW 4 */ + 6973 "11110000" // /* MW 3 */ + 6974 "00101100" // /* MW 2 */ + 6975 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6976 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6977 "01111000" // /* MW 9 */ + 6978 "11010000" // /* MW 8 */ + 6979 "10110011" // /* MW 7 */ + 6980 "00101000" // /* MW 6 */ + 6981 "00000000" // /* MW 5 */ + 6982 "00000001" // /* MW 4 */ + 6983 "11010000" // /* MW 3 */ + 6984 "11000110" // /* MW 2 */ + 6985 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 19 + 6986 "01000100" // MOVXM p6, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6987 "01010000" // /* MW 5 */ + 6988 "11000100" // /* MW 4 */ + 6989 "11001100" // /* MW 3 */ + 6990 "00000111" // /* MW 2 */ + 6991 "00000000" // /* MW 1 */ + 6992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6993 "00000000" // /* MW 1 */ + 6994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6995 "00000000" // /* MW 1 */ + 6996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6997 "00000000" // /* MW 1 */ + 6998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6999 "00000000" // /* MW 1 */ + 7000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7001 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7002 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7003 "00001000" // /* MW 3 */ + 7004 "01010001" // /* MW 2 */ + 7005 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 7006 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7007 "00110110" // /* MW 3 */ + 7008 "11110110" // /* MW 2 */ + 7009 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 7010 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7011 "00011001" // /* MW 3 */ + 7012 "11101101" // /* MW 2 */ + 7013 "00000111" // /* MW 1 */ + 7014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7015 "00000000" // /* MW 1 */ + 7016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7017 "00000000" // /* MW 1 */ + 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7019 "00000000" // /* MW 1 */ + 7020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7021 "00000000" // /* MW 1 */ + 7022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7023 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 7024 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7025 "00010001" // /* MW 3 */ + 7026 "00100011" // /* MW 2 */ + 7027 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 7028 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7029 "01100011" // /* MW 5 */ + 7030 "11101100" // /* MW 4 */ + 7031 "11010011" // /* MW 3 */ + 7032 "11000110" // /* MW 2 */ + 7033 "01001010" // /* MW 1 */ + 7034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7035 "00000000" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ + 7038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7039 "00000000" // /* MW 1 */ + 7040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7041 "00000000" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7046 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7047 "00001000" // /* MW 3 */ + 7048 "01010001" // /* MW 2 */ + 7049 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 342 6 +.src_ref 6 "superkernels.cpp" 343 14 + 7050 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7051 "00010000" // /* MW 9 */ + 7052 "00000000" // /* MW 8 */ + 7053 "10110001" // /* MW 7 */ + 7054 "11110000" // /* MW 6 */ + 7055 "00000001" // /* MW 5 */ + 7056 "00000000" // /* MW 4 */ + 7057 "11010000" // /* MW 3 */ + 7058 "11001110" // /* MW 2 */ + 7059 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 19 first + 7060 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "01010110" // /* MW 3 */ + 7062 "00000110" // /* MW 2 */ + 7063 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 6 + 7064 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7065 "00110110" // /* MW 3 */ + 7066 "00000110" // /* MW 2 */ + 7067 "00000001" // /* MW 1 */ + 7068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7069 "00000000" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 7076 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7077 "00110001" // /* MW 3 */ + 7078 "00100001" // /* MW 2 */ + 7079 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 7080 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7081 "00010001" // /* MW 3 */ + 7082 "11100110" // /* MW 2 */ + 7083 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 16 first + 7084 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7085 "00101000" // /* MW 3 */ + 7086 "01100001" // /* MW 2 */ + 7087 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 6 + 7088 "10000100" // JNZ r16, #7120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7120 delay_slots=5 */ + 7089 "00000001" // /* MW 5 */ + 7090 "01000000" // /* MW 4 */ + 7091 "11101000" // /* MW 3 */ + 7092 "00001101" // /* MW 2 */ + 7093 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7103 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 343 14 + 7104 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7105 "00000001" // /* MW 3 */ + 7106 "00100000" // /* MW 2 */ + 7107 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 343 14 first + 7108 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7109 "11000001" // /* MW 11 */ + 7110 "00001000" // /* MW 10 */ + 7111 "10000011" // /* MW 9 */ + 7112 "00000000" // /* MW 8 */ + 7113 "00000000" // /* MW 7 */ + 7114 "00000000" // /* MW 6 */ + 7115 "00100000" // /* MW 5 */ + 7116 "00000000" // /* MW 4 */ + 7117 "11110000" // /* MW 3 */ + 7118 "00101100" // /* MW 2 */ + 7119 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 345 + 7120 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7121 "00111001" // /* MW 3 */ + 7122 "11110000" // /* MW 2 */ + 7123 "00000111" // /* MW 1 */ + 7124 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7125 "11110001" // /* MW 3 */ + 7126 "11111101" // /* MW 2 */ + 7127 "00000111" // /* MW 1 */ + 7128 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7129 "10011001" // /* MW 3 */ + 7130 "11110111" // /* MW 2 */ + 7131 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7134 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7135 "11010001" // /* MW 3 */ + 7136 "11111001" // /* MW 2 */ + 7137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7141 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 345 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7143 "00000000" // /* MW 3 */ + 7144 "00101000" // /* MW 2 */ + 7145 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7146 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00001011" // /* MW 3 */ + 7148 "10001110" // /* MW 2 */ + 7149 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 345 +.delay_slot + 7150 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7151 "00000001" // /* MW 5 */ + 7152 "00000000" // /* MW 4 */ + 7153 "00000000" // /* MW 3 */ + 7154 "11111000" // /* MW 2 */ + 7155 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 7161 "00000000" // /* MW 1 */ +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_begin0 +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.function setup _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.src_ref 2 "reduce_base.h" 144 first +.src_ref 2 "reduce_base.h" 146 25 +.src_ref 2 "reduce_base.h" 146 27 first +.function_start + 7168 "10111010" // LDA r1, [p1], #4; MOVXM p0, #508480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7169 "00010000" // /* MW 9 */ + 7170 "00100000" // /* MW 8 */ + 7171 "00110001" // /* MW 7 */ + 7172 "11110000" // /* MW 6 */ + 7173 "00000001" // /* MW 5 */ + 7174 "00000000" // /* MW 4 */ + 7175 "11010000" // /* MW 3 */ + 7176 "10000110" // /* MW 2 */ + 7177 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 152 30 +.src_ref 2 "reduce_base.h" 154 31 +.src_ref 2 "reduce_base.h" 155 8 + 7178 "10111010" // MOVA r24, #0; MOVX r4, #4; MOV m0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7179 "01011000" // /* MW 9 */ + 7180 "00000110" // /* MW 8 */ + 7181 "00000000" // /* MW 7 */ + 7182 "10001000" // /* MW 6 */ + 7183 "01000000" // /* MW 5 */ + 7184 "00000000" // /* MW 4 */ + 7185 "00000000" // /* MW 3 */ + 7186 "00011000" // /* MW 2 */ + 7187 "00000000" // /* MW 1 */ + 7188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7189 "00000000" // /* MW 1 */ + 7190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7191 "00000000" // /* MW 1 */ + 7192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7193 "00000000" // /* MW 1 */ + 7194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7195 "00000000" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 146 25 first + 7198 "10011000" // ST r1, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "00110001" // /* MW 3 */ + 7200 "00011100" // /* MW 2 */ + 7201 "00001000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 147 28 first + 7202 "10011000" // LDA r6, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7203 "11010110" // /* MW 3 */ + 7204 "00011100" // /* MW 2 */ + 7205 "00000001" // /* MW 1 */ + 7206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7207 "00000000" // /* MW 1 */ + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ + 7216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7217 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 147 26 +.src_ref 2 "reduce_base.h" 189 37 first + 7218 "01011100" // ST r6, [p0], #4; ADD r0, r6, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7219 "11111110" // /* MW 5 */ + 7220 "00000011" // /* MW 4 */ + 7221 "00110011" // /* MW 3 */ + 7222 "10011010" // /* MW 2 */ + 7223 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 148 26 first + 7224 "10011000" // LDA r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7225 "10110110" // /* MW 3 */ + 7226 "00011100" // /* MW 2 */ + 7227 "00000001" // /* MW 1 */ + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ + 7234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7235 "00000000" // /* MW 1 */ + 7236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7237 "00000000" // /* MW 1 */ + 7238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7239 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 148 24 +.src_ref 2 "reduce_base.h" 191 53 first + 7240 "01011100" // ST r5, [p0], #4; MUL r7, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7241 "11011111" // /* MW 5 */ + 7242 "10011100" // /* MW 4 */ + 7243 "00110010" // /* MW 3 */ + 7244 "10010110" // /* MW 2 */ + 7245 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 149 29 first + 7246 "10011000" // LDA r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7247 "00010110" // /* MW 3 */ + 7248 "00011110" // /* MW 2 */ + 7249 "00000001" // /* MW 1 */ + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7251 "00000000" // /* MW 1 */ + 7252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7253 "00000000" // /* MW 1 */ + 7254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7255 "00000000" // /* MW 1 */ + 7256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7257 "00000000" // /* MW 1 */ + 7258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7259 "00000000" // /* MW 1 */ + 7260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7261 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 149 27 +.src_ref 2 "reduce_base.h" 155 8 first + 7262 "01011100" // ST r16, [p0], #4; EQ r4, r4, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7263 "00001111" // /* MW 5 */ + 7264 "00010010" // /* MW 4 */ + 7265 "00110010" // /* MW 3 */ + 7266 "11000010" // /* MW 2 */ + 7267 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 150 33 first + 7268 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7269 "00101110" // /* MW 3 */ + 7270 "00011100" // /* MW 2 */ + 7271 "00000001" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ + 7276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7277 "00000000" // /* MW 1 */ + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ + 7280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7281 "00000000" // /* MW 1 */ + 7282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7283 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 150 31 +.src_ref 2 "reduce_base.h" 153 53 + 7284 "00000010" // ST el0, [p0], #4; MOV r18, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7285 "01110000" // /* MW 7 */ + 7286 "00001110" // /* MW 6 */ + 7287 "01010000" // /* MW 5 */ + 7288 "00000010" // /* MW 4 */ + 7289 "00110000" // /* MW 3 */ + 7290 "10000101" // /* MW 2 */ + 7291 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 151 34 first + 7292 "10011000" // LDA r19, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7293 "01110110" // /* MW 3 */ + 7294 "00011110" // /* MW 2 */ + 7295 "00000001" // /* MW 1 */ + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ + 7298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7299 "00000000" // /* MW 1 */ + 7300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7301 "00000000" // /* MW 1 */ + 7302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7303 "00000000" // /* MW 1 */ + 7304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7305 "00000000" // /* MW 1 */ + 7306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7307 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 151 32 +.src_ref 2 "reduce_base.h" 153 53 first + 7308 "01011100" // ST r19, [p0], #4; MUL r18, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7309 "01011111" // /* MW 5 */ + 7310 "11001010" // /* MW 4 */ + 7311 "00111001" // /* MW 3 */ + 7312 "11001110" // /* MW 2 */ + 7313 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 152 32 first + 7314 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7315 "00001110" // /* MW 3 */ + 7316 "00000100" // /* MW 2 */ + 7317 "00000001" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ + 7320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7321 "00000000" // /* MW 1 */ + 7322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7323 "00000000" // /* MW 1 */ + 7324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7325 "00000000" // /* MW 1 */ + 7326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7327 "00000000" // /* MW 1 */ + 7328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7329 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 152 30 +.src_ref 2 "reduce_base.h" 153 79 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7330 "00000010" // ST eh0, [p0], m0; MOV r20, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7331 "01110000" // /* MW 7 */ + 7332 "10001110" // /* MW 6 */ + 7333 "10010000" // /* MW 5 */ + 7334 "00000010" // /* MW 4 */ + 7335 "00110000" // /* MW 3 */ + 7336 "00000001" // /* MW 2 */ + 7337 "00000001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 153 28 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7338 "00011000" // ST.s16 r18, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7339 "01010111" // /* MW 3 */ + 7340 "00101110" // /* MW 2 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 153 79 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7342 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "01001111" // /* MW 3 */ + 7344 "10100101" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 first + 7346 "10000100" // JNZ r4, #7440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7440 delay_slots=5 */ + 7347 "00000001" // /* MW 5 */ + 7348 "01000000" // /* MW 4 */ + 7349 "10001000" // /* MW 3 */ + 7350 "00001110" // /* MW 2 */ + 7351 "00100000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 167 55 +.src_ref 2 "reduce_base.h" 172 89 +.src_ref 2 "reduce_base.h" 187 53 +.src_ref 2 "reduce_base.h" 193 89 +.src_ref 2 "reduce_base.h" 195 55 +.delay_slot + 7352 "00011000" // MOVX r2, #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7353 "11101101" // /* MW 3 */ + 7354 "11000100" // /* MW 2 */ + 7355 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 187 53 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7356 "10011000" // LSHL r3, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7357 "00101101" // /* MW 3 */ + 7358 "01000110" // /* MW 2 */ + 7359 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 193 70 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7360 "10011000" // MUL r3, r7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7361 "00011111" // /* MW 3 */ + 7362 "11000110" // /* MW 2 */ + 7363 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 187 37 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7364 "00011000" // ADD r17, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "11111111" // /* MW 3 */ + 7366 "11100011" // /* MW 2 */ + 7367 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 154 31 first +.src_ref 2 "reduce_base.h" 193 89 first +.delay_slot + 7368 "00101100" // ST.s16 r24, [p0]; LSHL r3, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7369 "01011011" // /* MW 5 */ + 7370 "10001100" // /* MW 4 */ + 7371 "11100001" // /* MW 3 */ + 7372 "11100010" // /* MW 2 */ + 7373 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7374 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "00001001" // /* MW 3 */ + 7376 "00100100" // /* MW 2 */ + 7377 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 first + 7378 "10011000" // EQ r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00000111" // /* MW 3 */ + 7380 "10100101" // /* MW 2 */ + 7381 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7382 "10000100" // JNZ r18, #7840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7840 delay_slots=5 */ + 7383 "00000001" // /* MW 5 */ + 7384 "01000000" // /* MW 4 */ + 7385 "01010000" // /* MW 3 */ + 7386 "00001111" // /* MW 2 */ + 7387 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7395 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 178 37 first +.delay_slot + 7396 "00011000" // ADD r4, r1, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7397 "11111111" // /* MW 3 */ + 7398 "01001001" // /* MW 2 */ + 7399 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7400 "00011000" // MOVX r6, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7401 "00000101" // /* MW 3 */ + 7402 "00001100" // /* MW 2 */ + 7403 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 first + 7404 "10011000" // EQ r6, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7405 "00000111" // /* MW 3 */ + 7406 "10001101" // /* MW 2 */ + 7407 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7408 "10000100" // JNZ r6, #7680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7680 delay_slots=5 */ + 7409 "00000001" // /* MW 5 */ + 7410 "01000000" // /* MW 4 */ + 7411 "00000000" // /* MW 3 */ + 7412 "00001111" // /* MW 2 */ + 7413 "00110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7423 "00000000" // /* MW 1 */ + 7424 "10000100" // J #7632 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7632 delay_slots=5 */ + 7425 "00000000" // /* MW 5 */ + 7426 "00000000" // /* MW 4 */ + 7427 "11101000" // /* MW 3 */ + 7428 "00001110" // /* MW 2 */ + 7429 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7435 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_272 +.src_ref 2 "reduce_base.h" 186 34 +.src_ref 2 "reduce_base.h" 186 34 +.src_ref 2 "reduce_base.h" 188 34 +.src_ref 2 "reduce_base.h" 190 36 + 7440 "10111010" // MOVA r1, #32; MOVXM p2, #508518 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7441 "00010000" // /* MW 9 */ + 7442 "00110011" // /* MW 8 */ + 7443 "00110001" // /* MW 7 */ + 7444 "11110001" // /* MW 6 */ + 7445 "00000001" // /* MW 5 */ + 7446 "00000000" // /* MW 4 */ + 7447 "00000000" // /* MW 3 */ + 7448 "00000001" // /* MW 2 */ + 7449 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 192 34 +.src_ref 2 "reduce_base.h" 194 38 +.src_ref 2 "reduce_base.h" 195 55 first + 7450 "10111010" // MOVA r4, #32; LSHL r16, r7, r2; MOV r2, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7451 "01111000" // /* MW 9 */ + 7452 "00001110" // /* MW 8 */ + 7453 "01010000" // /* MW 7 */ + 7454 "01101100" // /* MW 6 */ + 7455 "00000001" // /* MW 5 */ + 7456 "00001111" // /* MW 4 */ + 7457 "00000000" // /* MW 3 */ + 7458 "00000100" // /* MW 2 */ + 7459 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 194 36 +.src_ref 2 "reduce_base.h" 194 38 first + 7460 "01100100" // MSC r4, r4, r5, r6; MOV dj0, #26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7461 "01101001" // /* MW 5 */ + 7462 "00000000" // /* MW 4 */ + 7463 "11000001" // /* MW 3 */ + 7464 "00001101" // /* MW 2 */ + 7465 "00101001" // /* MW 1 */ + 7466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7467 "00000000" // /* MW 1 */ + 7468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7469 "00000000" // /* MW 1 */ + 7470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7471 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 186 34 first + 7472 "00011000" // ST.s16 r1, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7473 "00110111" // /* MW 3 */ + 7474 "00011100" // /* MW 2 */ + 7475 "00000010" // /* MW 1 */ + 7476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7477 "00000000" // /* MW 1 */ + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7479 "00000000" // /* MW 1 */ + 7480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7481 "00000000" // /* MW 1 */ + 7482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7483 "00000000" // /* MW 1 */ + 7484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7485 "00000000" // /* MW 1 */ + 7486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7487 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 187 35 first + 7488 "00011000" // ST.s16 r17, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00110111" // /* MW 3 */ + 7490 "00011110" // /* MW 2 */ + 7491 "00000010" // /* MW 1 */ + 7492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7493 "00000000" // /* MW 1 */ + 7494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7495 "00000000" // /* MW 1 */ + 7496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7497 "00000000" // /* MW 1 */ + 7498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7499 "00000000" // /* MW 1 */ + 7500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7501 "00000000" // /* MW 1 */ + 7502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7503 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 188 34 first + 7504 "00011000" // ST.s16 r1, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7505 "00110111" // /* MW 3 */ + 7506 "00011100" // /* MW 2 */ + 7507 "00000010" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ + 7512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7513 "00000000" // /* MW 1 */ + 7514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7515 "00000000" // /* MW 1 */ + 7516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7517 "00000000" // /* MW 1 */ + 7518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 189 35 first + 7520 "00011000" // ST.s16 r0, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00010111" // /* MW 3 */ + 7522 "00011100" // /* MW 2 */ + 7523 "00000010" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ + 7532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7533 "00000000" // /* MW 1 */ + 7534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7535 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 190 36 first + 7536 "00011000" // ST.s16 r1, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00110111" // /* MW 3 */ + 7538 "00011100" // /* MW 2 */ + 7539 "00000010" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 191 35 first + 7552 "10011000" // ST r7, [p2], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "11110001" // /* MW 3 */ + 7554 "11001100" // /* MW 2 */ + 7555 "00001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 192 34 first + 7556 "00011000" // ST.s16 r2, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "01010111" // /* MW 3 */ + 7558 "11101100" // /* MW 2 */ + 7559 "00000010" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ + 7564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7565 "00000000" // /* MW 1 */ + 7566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7567 "00000000" // /* MW 1 */ + 7568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7569 "00000000" // /* MW 1 */ + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 194 36 first + 7572 "00011000" // ST.s16 r4, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7573 "10010111" // /* MW 3 */ + 7574 "00000000" // /* MW 2 */ + 7575 "00000010" // /* MW 1 */ + 7576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7577 "00000000" // /* MW 1 */ + 7578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7579 "00000000" // /* MW 1 */ + 7580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7581 "00000000" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 193 32 first + 7588 "00110110" // ST.s16 r3, [p2]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7589 "10000001" // /* MW 11 */ + 7590 "10101101" // /* MW 10 */ + 7591 "00000000" // /* MW 9 */ + 7592 "00000000" // /* MW 8 */ + 7593 "00000000" // /* MW 7 */ + 7594 "00000000" // /* MW 6 */ + 7595 "00100000" // /* MW 5 */ + 7596 "00000000" // /* MW 4 */ + 7597 "11100000" // /* MW 3 */ + 7598 "10001110" // /* MW 2 */ + 7599 "01000000" // /* MW 1 */ +.label __ll7__ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + 7600 "00011000" // ADD r0, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7601 "11111111" // /* MW 3 */ + 7602 "00000001" // /* MW 2 */ + 7603 "00010100" // /* MW 1 */ + 7604 "01000100" // MOVXM p2, #508532 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7605 "11101000" // /* MW 5 */ + 7606 "11000100" // /* MW 4 */ + 7607 "11000100" // /* MW 3 */ + 7608 "00000111" // /* MW 2 */ + 7609 "00000000" // /* MW 1 */ + 7610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7611 "00000000" // /* MW 1 */ + 7612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7613 "00000000" // /* MW 1 */ + 7614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7615 "00000000" // /* MW 1 */ + 7616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00101110" // ST.s16 r0, [p2]; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7619 "00011100" // /* MW 13 */ + 7620 "00000000" // /* MW 12 */ + 7621 "00000000" // /* MW 11 */ + 7622 "01010111" // /* MW 10 */ + 7623 "00011010" // /* MW 9 */ + 7624 "01000000" // /* MW 8 */ + 7625 "00000000" // /* MW 7 */ + 7626 "00000000" // /* MW 6 */ + 7627 "10110110" // /* MW 5 */ + 7628 "00000010" // /* MW 4 */ + 7629 "11100000" // /* MW 3 */ + 7630 "10000010" // /* MW 2 */ + 7631 "01000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_464 +.src_ref 3 "reducesum_impl.h" 95 38 +.src_ref 3 "reducesum_impl.h" 95 50 first + 7632 "01010100" // LDA r0, [p1, #4]; MOV dj0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7633 "01011001" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "11010001" // /* MW 3 */ + 7636 "10000010" // /* MW 2 */ + 7637 "00100010" // /* MW 1 */ + 7638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7639 "00000000" // /* MW 1 */ + 7640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7641 "00000000" // /* MW 1 */ + 7642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7643 "00000000" // /* MW 1 */ + 7644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7645 "00000000" // /* MW 1 */ + 7646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7647 "00000000" // /* MW 1 */ +.src_ref 3 "reducesum_impl.h" 95 38 + 7648 "00011000" // ST.s8 r0, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7649 "00000111" // /* MW 3 */ + 7650 "00000000" // /* MW 2 */ + 7651 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 219 4 first + 7652 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7653 "00000000" // /* MW 3 */ + 7654 "00101000" // /* MW 2 */ + 7655 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "01111000" // /* MW 13 */ + 7668 "10100101" // /* MW 12 */ + 7669 "00000001" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_512 +.src_ref 2 "reduce_base.h" 158 34 +.src_ref 2 "reduce_base.h" 158 34 + 7680 "10111010" // MOVA r6, #32; MOVXM p2, #508518 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "00010000" // /* MW 9 */ + 7682 "00110011" // /* MW 8 */ + 7683 "00110001" // /* MW 7 */ + 7684 "11110001" // /* MW 6 */ + 7685 "00000001" // /* MW 5 */ + 7686 "00000000" // /* MW 4 */ + 7687 "00000000" // /* MW 3 */ + 7688 "00000110" // /* MW 2 */ + 7689 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 158 34 first +.src_ref 2 "reduce_base.h" 163 53 first + 7690 "00101100" // ST.s16 r6, [p2], #2; MUL r1, r5, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7691 "00111111" // /* MW 5 */ + 7692 "10000100" // /* MW 4 */ + 7693 "11100010" // /* MW 3 */ + 7694 "10011010" // /* MW 2 */ + 7695 "01000011" // /* MW 1 */ + 7696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7697 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 167 55 first + 7698 "10011000" // LSHL r16, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00101101" // /* MW 3 */ + 7700 "01100000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ + 7704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7705 "00000000" // /* MW 1 */ + 7706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7707 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 160 36 +.src_ref 2 "reduce_base.h" 160 36 first +.src_ref 2 "reduce_base.h" 160 63 +.src_ref 2 "reduce_base.h" 160 63 first +.src_ref 2 "reduce_base.h" 162 38 +.src_ref 2 "reduce_base.h" 162 90 +.src_ref 2 "reduce_base.h" 166 38 + 7708 "10111010" // MOVA r0, #32; MAC r6, r6, r5, r0; MOV r6, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7709 "01011000" // /* MW 9 */ + 7710 "00100000" // /* MW 8 */ + 7711 "11001000" // /* MW 7 */ + 7712 "00110000" // /* MW 6 */ + 7713 "01100000" // /* MW 5 */ + 7714 "00001010" // /* MW 4 */ + 7715 "00000000" // /* MW 3 */ + 7716 "00000000" // /* MW 2 */ + 7717 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 159 35 first +.src_ref 2 "reduce_base.h" 166 38 first + 7718 "00101100" // ST.s16 r17, [p2], #2; SUB r5, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7719 "00100011" // /* MW 5 */ + 7720 "00010100" // /* MW 4 */ + 7721 "11100000" // /* MW 3 */ + 7722 "11000110" // /* MW 2 */ + 7723 "01000011" // /* MW 1 */ + 7724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7725 "00000000" // /* MW 1 */ + 7726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7727 "00000000" // /* MW 1 */ + 7728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7729 "00000000" // /* MW 1 */ + 7730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7731 "00000000" // /* MW 1 */ + 7732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7733 "00000000" // /* MW 1 */ + 7734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7735 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 160 34 first + 7736 "00011000" // ST.s16 r6, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "11010111" // /* MW 3 */ + 7738 "00011100" // /* MW 2 */ + 7739 "00000010" // /* MW 1 */ + 7740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7741 "00000000" // /* MW 1 */ + 7742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7743 "00000000" // /* MW 1 */ + 7744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7745 "00000000" // /* MW 1 */ + 7746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7747 "00000000" // /* MW 1 */ + 7748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7749 "00000000" // /* MW 1 */ + 7750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7751 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 161 35 first + 7752 "00011000" // ST.s16 r4, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "10010111" // /* MW 3 */ + 7754 "00011100" // /* MW 2 */ + 7755 "00000010" // /* MW 1 */ + 7756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7757 "00000000" // /* MW 1 */ + 7758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7759 "00000000" // /* MW 1 */ + 7760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7761 "00000000" // /* MW 1 */ + 7762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7763 "00000000" // /* MW 1 */ + 7764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7765 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7767 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 162 36 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7768 "00011000" // ST.s16 r0, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7769 "00010111" // /* MW 3 */ + 7770 "00011100" // /* MW 2 */ + 7771 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7777 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 162 38 +.src_ref 2 "reduce_base.h" 162 90 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7778 "00011000" // MSC r0, r0, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7779 "01001110" // /* MW 3 */ + 7780 "11000000" // /* MW 2 */ + 7781 "00010001" // /* MW 1 */ + 7782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7783 "00000000" // /* MW 1 */ + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 163 35 first + 7786 "10011000" // ST r1, [p2], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7787 "00110001" // /* MW 3 */ + 7788 "11001100" // /* MW 2 */ + 7789 "00001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 164 34 first + 7790 "00011000" // ST.s16 r19, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "01110111" // /* MW 3 */ + 7792 "11101110" // /* MW 2 */ + 7793 "00000010" // /* MW 1 */ + 7794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7795 "00000000" // /* MW 1 */ + 7796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7797 "00000000" // /* MW 1 */ + 7798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7799 "00000000" // /* MW 1 */ + 7800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7801 "00000000" // /* MW 1 */ + 7802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7803 "00000000" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 165 32 first + 7806 "00011000" // ST.s16 r3, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7807 "01110111" // /* MW 3 */ + 7808 "00000100" // /* MW 2 */ + 7809 "00000010" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "10000100" // J #7600 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7600 delay_slots=5 */ + 7813 "00000000" // /* MW 5 */ + 7814 "00000000" // /* MW 4 */ + 7815 "11011000" // /* MW 3 */ + 7816 "00001110" // /* MW 2 */ + 7817 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 166 36 +.delay_slot + 7818 "10111000" // MOV dj0, #26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7819 "00110100" // /* MW 3 */ + 7820 "10000000" // /* MW 2 */ + 7821 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7823 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7827 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 166 36 first +.delay_slot + 7828 "00110110" // ST.s16 r5, [p2, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7829 "10000001" // /* MW 11 */ + 7830 "10101101" // /* MW 10 */ + 7831 "00000000" // /* MW 9 */ + 7832 "00000000" // /* MW 8 */ + 7833 "00000000" // /* MW 7 */ + 7834 "00000000" // /* MW 6 */ + 7835 "00100000" // /* MW 5 */ + 7836 "00000000" // /* MW 4 */ + 7837 "11100000" // /* MW 3 */ + 7838 "00010110" // /* MW 2 */ + 7839 "01000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_672 +.src_ref 2 "reduce_base.h" 172 32 +.src_ref 2 "reduce_base.h" 173 34 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 7840 "10111010" // MOVA m1, #16; MOVXM p2, #508508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7841 "00010000" // /* MW 9 */ + 7842 "00101110" // /* MW 8 */ + 7843 "00110001" // /* MW 7 */ + 7844 "11110001" // /* MW 6 */ + 7845 "00000001" // /* MW 5 */ + 7846 "00000000" // /* MW 4 */ + 7847 "10000000" // /* MW 3 */ + 7848 "00000100" // /* MW 2 */ + 7849 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 172 32 first +.src_ref 2 "reduce_base.h" 173 34 +.src_ref 2 "reduce_base.h" 180 38 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 7850 "10111010" // ST.s16 r1, [p2], #4; MOVX r17, #5; MOV r7, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7851 "01111000" // /* MW 9 */ + 7852 "10001110" // /* MW 8 */ + 7853 "11110000" // /* MW 7 */ + 7854 "10101000" // /* MW 6 */ + 7855 "00010000" // /* MW 5 */ + 7856 "00000001" // /* MW 4 */ + 7857 "11100000" // /* MW 3 */ + 7858 "10000110" // /* MW 2 */ + 7859 "01000101" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 174 35 +.src_ref 2 "reduce_base.h" 174 55 first +.src_ref 2 "reduce_base.h" 179 38 +.src_ref 2 "reduce_base.h" 179 110 +.src_ref 2 "reduce_base.h" 180 38 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7860 "10111010" // MOVA r3, #32; MUL r16, r1, r6; MOV m0, #-10 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7861 "01011000" // /* MW 9 */ + 7862 "11110110" // /* MW 8 */ + 7863 "00000111" // /* MW 7 */ + 7864 "01111100" // /* MW 6 */ + 7865 "00000011" // /* MW 5 */ + 7866 "00000011" // /* MW 4 */ + 7867 "00000000" // /* MW 3 */ + 7868 "00000011" // /* MW 2 */ + 7869 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7871 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 172 70 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7872 "10011000" // MUL r1, r5, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00001111" // /* MW 3 */ + 7874 "01000011" // /* MW 2 */ + 7875 "00010001" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 172 89 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7878 "10011000" // LSHL r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7879 "00101101" // /* MW 3 */ + 7880 "01000010" // /* MW 2 */ + 7881 "00010000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 173 34 first + 7884 "00011000" // ST.s16 r7, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7885 "11110111" // /* MW 3 */ + 7886 "00101000" // /* MW 2 */ + 7887 "00000010" // /* MW 1 */ + 7888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7889 "00000000" // /* MW 1 */ + 7890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7891 "00000000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ + 7894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7895 "00000000" // /* MW 1 */ + 7896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7897 "00000000" // /* MW 1 */ + 7898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 174 35 first + 7900 "10011000" // ST r16, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7901 "00010001" // /* MW 3 */ + 7902 "00001010" // /* MW 2 */ + 7903 "00001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 175 34 first + 7904 "00011000" // ST.s16 r5, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "10110111" // /* MW 3 */ + 7906 "00011100" // /* MW 2 */ + 7907 "00000010" // /* MW 1 */ + 7908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7909 "00000000" // /* MW 1 */ + 7910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7911 "00000000" // /* MW 1 */ + 7912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7913 "00000000" // /* MW 1 */ + 7914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7915 "00000000" // /* MW 1 */ + 7916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7917 "00000000" // /* MW 1 */ + 7918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7919 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 176 35 first + 7920 "00011000" // ST.s16 r0, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7921 "00010111" // /* MW 3 */ + 7922 "00011100" // /* MW 2 */ + 7923 "00000010" // /* MW 1 */ + 7924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7925 "00000000" // /* MW 1 */ + 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7927 "00000000" // /* MW 1 */ + 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7929 "00000000" // /* MW 1 */ + 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7931 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 179 90 first + 7932 "00011000" // MAC r0, r0, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7933 "01000110" // /* MW 3 */ + 7934 "10000000" // /* MW 2 */ + 7935 "00010001" // /* MW 1 */ + 7936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7937 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 175 34 first +.src_ref 2 "reduce_base.h" 177 34 first + 7938 "00011000" // ST.s16 r5, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7939 "10110111" // /* MW 3 */ + 7940 "00011100" // /* MW 2 */ + 7941 "00000010" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ + 7944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7945 "00000000" // /* MW 1 */ + 7946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7947 "00000000" // /* MW 1 */ + 7948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7949 "00000000" // /* MW 1 */ + 7950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7951 "00000000" // /* MW 1 */ + 7952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 178 35 first + 7954 "00011000" // ST.s16 r4, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7955 "10010111" // /* MW 3 */ + 7956 "00011100" // /* MW 2 */ + 7957 "00000010" // /* MW 1 */ + 7958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7959 "00000000" // /* MW 1 */ + 7960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7961 "00000000" // /* MW 1 */ + 7962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7963 "00000000" // /* MW 1 */ + 7964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7965 "00000000" // /* MW 1 */ + 7966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7967 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 7968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7969 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 179 36 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 7970 "00011000" // ST.s16 r3, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7971 "01110111" // /* MW 3 */ + 7972 "00000100" // /* MW 2 */ + 7973 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7975 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7976 "10000100" // J #7600 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7600 delay_slots=5 */ + 7977 "00000000" // /* MW 5 */ + 7978 "00000000" // /* MW 4 */ + 7979 "11011000" // /* MW 3 */ + 7980 "00001110" // /* MW 2 */ + 7981 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 180 38 first +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7982 "10011000" // LSHL r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7983 "00011101" // /* MW 3 */ + 7984 "00100011" // /* MW 2 */ + 7985 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 179 38 first +.src_ref 2 "reduce_base.h" 179 110 first +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7986 "00011000" // MSC r3, r3, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "00001110" // /* MW 3 */ + 7988 "01000110" // /* MW 2 */ + 7989 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 180 38 first +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7990 "10011000" // SUB r1, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7991 "00010001" // /* MW 3 */ + 7992 "11000011" // /* MW 2 */ + 7993 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7995 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 180 36 +.delay_slot + 7996 "00011000" // ST.s16 r1, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7997 "00110111" // /* MW 3 */ + 7998 "01000100" // /* MW 2 */ +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv__end +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_end0 + 7999 "00000010" // /* MW 1 */ +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_begin0 +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E +.function run _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E +.src_ref 2 "reduce_base.h" 232 first +.src_ref 2 "reduce_base.h" 236 19 +.src_ref 2 "reduce_base.h" 236 19 +.function_start + 8000 "10111010" // MOVA m4, #-24; MOVXM p2, #508516 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8001 "00010000" // /* MW 9 */ + 8002 "00110010" // /* MW 8 */ + 8003 "00110001" // /* MW 7 */ + 8004 "11110001" // /* MW 6 */ + 8005 "00000001" // /* MW 5 */ + 8006 "00000000" // /* MW 4 */ + 8007 "10000000" // /* MW 3 */ + 8008 "00010000" // /* MW 2 */ + 8009 "11111101" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 236 19 first +.src_ref 2 "reduce_base.h" 240 69 + 8010 "00101100" // LDA.u16 r16, [p2], m4; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8011 "00000010" // /* MW 5 */ + 8012 "01100000" // /* MW 4 */ + 8013 "01010000" // /* MW 3 */ + 8014 "01000011" // /* MW 2 */ + 8015 "01010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 first + 8016 "01111000" // VINSERT.32 x0, x0, #0, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "00010001" // /* MW 3 */ + 8018 "00000011" // /* MW 2 */ + 8019 "00011000" // /* MW 1 */ + 8020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8021 "00000000" // /* MW 1 */ + 8022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8023 "00000000" // /* MW 1 */ + 8024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8025 "00000000" // /* MW 1 */ + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ + 8028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8029 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 236 12 first +.src_ref 2 "reduce_base.h" 236 27 first + 8030 "10000100" // JNZ r16, #8256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8256 delay_slots=5 */ + 8031 "00000001" // /* MW 5 */ + 8032 "01000000" // /* MW 4 */ + 8033 "00100000" // /* MW 3 */ + 8034 "00010000" // /* MW 2 */ + 8035 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 232 +.delay_slot + 8036 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8037 "00000001" // /* MW 5 */ + 8038 "00000000" // /* MW 4 */ + 8039 "00000000" // /* MW 3 */ + 8040 "00001000" // /* MW 2 */ + 8041 "00000000" // /* MW 1 */ +.delay_slot + 8042 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8043 "00111101" // /* MW 3 */ + 8044 "11111100" // /* MW 2 */ + 8045 "00001111" // /* MW 1 */ +.delay_slot + 8046 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8047 "10011101" // /* MW 3 */ + 8048 "11111011" // /* MW 2 */ + 8049 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 + 8054 "01000100" // MOVXM p7, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8055 "01100000" // /* MW 5 */ + 8056 "11000100" // /* MW 4 */ + 8057 "11001110" // /* MW 3 */ + 8058 "00000111" // /* MW 2 */ + 8059 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 first +.src_ref 2 "reduce_base.h" 241 39 + 8060 "10111010" // LDA.s8 r17, [p7]; MOVXM p7, #508528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8061 "00010000" // /* MW 9 */ + 8062 "00111000" // /* MW 8 */ + 8063 "10110001" // /* MW 7 */ + 8064 "11110011" // /* MW 6 */ + 8065 "00000001" // /* MW 5 */ + 8066 "00000000" // /* MW 4 */ + 8067 "01010000" // /* MW 3 */ + 8068 "11000100" // /* MW 2 */ + 8069 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 39 first + 8070 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8071 "00010110" // /* MW 3 */ + 8072 "00000110" // /* MW 2 */ + 8073 "00000111" // /* MW 1 */ + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ + 8076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8077 "00000000" // /* MW 1 */ + 8078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8079 "00000000" // /* MW 1 */ + 8080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8081 "00000000" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 +.src_ref 2 "reduce_base.h" 241 30 + 8086 "10000100" // JZ r16, #8256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8256 delay_slots=5 */ + 8087 "00000001" // /* MW 5 */ + 8088 "00000000" // /* MW 4 */ + 8089 "00100000" // /* MW 3 */ + 8090 "00010000" // /* MW 2 */ + 8091 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 +.delay_slot + 8092 "11111000" // VMOV bmhl1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8093 "10010010" // /* MW 3 */ + 8094 "10000000" // /* MW 2 */ + 8095 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 +.delay_slot + 8096 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8097 "10000000" // /* MW 3 */ + 8098 "01111010" // /* MW 2 */ + 8099 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 first +.delay_slot + 8100 "00011000" // VCONV.bf16.fp32 wl2, bmhl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "11010110" // /* MW 3 */ + 8102 "01000000" // /* MW 2 */ + 8103 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8105 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "reduce_base.h" 240 69 +.delay_slot + 8106 "01011000" // VEXTBCST.16 x1, x2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8107 "00000011" // /* MW 3 */ + 8108 "10010001" // /* MW 2 */ + 8109 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base.h" 241 12 +.src_ref 2 "reduce_base.h" 241 12 first +.src_ref 2 "reduce_base.h" 243 29 + 8110 "01110110" // MOVA r17, #-5; MOVS p7, p1; MOVXM ls, #8240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8111 "00010000" // /* MW 11 */ + 8112 "00011000" // /* MW 10 */ + 8113 "01111000" // /* MW 9 */ + 8114 "00001000" // /* MW 8 */ + 8115 "00000000" // /* MW 7 */ + 8116 "00000000" // /* MW 6 */ + 8117 "10001011" // /* MW 5 */ + 8118 "10000100" // /* MW 4 */ + 8119 "00000111" // /* MW 3 */ + 8120 "01110001" // /* MW 2 */ + 8121 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 + 8122 "01000100" // MOVXM le, #8240 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8123 "01100000" // /* MW 5 */ + 8124 "11100000" // /* MW 4 */ + 8125 "00100110" // /* MW 3 */ + 8126 "00000000" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base.h" 241 12 +.src_ref 2 "reduce_base.h" 243 29 + 8128 "11100100" // ADD r16, r16, #-1; VMOV bmhl1, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8129 "00100101" // /* MW 5 */ + 8130 "00000101" // /* MW 4 */ + 8131 "11100011" // /* MW 3 */ + 8132 "00111111" // /* MW 2 */ + 8133 "10000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 + 8134 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8135 "00011101" // /* MW 3 */ + 8136 "00100001" // /* MW 2 */ + 8137 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 + 8138 "00010100" // NOPA; ADD.NC lc, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8139 "00000001" // /* MW 5 */ + 8140 "11110000" // /* MW 4 */ + 8141 "11111010" // /* MW 3 */ + 8142 "00101100" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ + 8144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "00000000" // /* MW 8 */ + 8153 "01011011" // /* MW 7 */ + 8154 "00000001" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ + 8160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8161 "00000000" // /* MW 15 */ + 8162 "00000000" // /* MW 14 */ + 8163 "01111000" // /* MW 13 */ + 8164 "10100101" // /* MW 12 */ + 8165 "00000001" // /* MW 11 */ + 8166 "00000000" // /* MW 10 */ + 8167 "00000000" // /* MW 9 */ + 8168 "00000000" // /* MW 8 */ + 8169 "01011011" // /* MW 7 */ + 8170 "00000001" // /* MW 6 */ + 8171 "00100000" // /* MW 5 */ + 8172 "00000000" // /* MW 4 */ + 8173 "11110000" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ + 8176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8177 "00000000" // /* MW 15 */ + 8178 "00000000" // /* MW 14 */ + 8179 "01111000" // /* MW 13 */ + 8180 "10100101" // /* MW 12 */ + 8181 "00000001" // /* MW 11 */ + 8182 "00000000" // /* MW 10 */ + 8183 "00000000" // /* MW 9 */ + 8184 "00000000" // /* MW 8 */ + 8185 "01011011" // /* MW 7 */ + 8186 "00000001" // /* MW 6 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ + 8192 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8193 "00000000" // /* MW 15 */ + 8194 "00000000" // /* MW 14 */ + 8195 "01111000" // /* MW 13 */ + 8196 "10100101" // /* MW 12 */ + 8197 "00000001" // /* MW 11 */ + 8198 "00000000" // /* MW 10 */ + 8199 "00000000" // /* MW 9 */ + 8200 "00000000" // /* MW 8 */ + 8201 "01011011" // /* MW 7 */ + 8202 "00000001" // /* MW 6 */ + 8203 "00100000" // /* MW 5 */ + 8204 "00000000" // /* MW 4 */ + 8205 "11110000" // /* MW 3 */ + 8206 "00101100" // /* MW 2 */ + 8207 "00000000" // /* MW 1 */ + 8208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8209 "00000000" // /* MW 15 */ + 8210 "00000000" // /* MW 14 */ + 8211 "01111000" // /* MW 13 */ + 8212 "10100101" // /* MW 12 */ + 8213 "00000001" // /* MW 11 */ + 8214 "00000000" // /* MW 10 */ + 8215 "00000000" // /* MW 9 */ + 8216 "00000000" // /* MW 8 */ + 8217 "01011011" // /* MW 7 */ + 8218 "00000001" // /* MW 6 */ + 8219 "00100000" // /* MW 5 */ + 8220 "00000000" // /* MW 4 */ + 8221 "11110000" // /* MW 3 */ + 8222 "00101100" // /* MW 2 */ + 8223 "00000000" // /* MW 1 */ + 8224 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8225 "00000000" // /* MW 15 */ + 8226 "00000000" // /* MW 14 */ + 8227 "01111000" // /* MW 13 */ + 8228 "10100101" // /* MW 12 */ + 8229 "00000001" // /* MW 11 */ + 8230 "00000000" // /* MW 10 */ + 8231 "00000000" // /* MW 9 */ + 8232 "00000000" // /* MW 8 */ + 8233 "01011011" // /* MW 7 */ + 8234 "00000001" // /* MW 6 */ + 8235 "00100000" // /* MW 5 */ + 8236 "00000000" // /* MW 4 */ + 8237 "11110000" // /* MW 3 */ + 8238 "00101100" // /* MW 2 */ + 8239 "00000000" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_240 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 243 29 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 8240 "11100001" // NOPA; NOPB; VST bmhl1, [p7], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8241 "00000000" // /* MW 15 */ + 8242 "00000000" // /* MW 14 */ + 8243 "01111000" // /* MW 13 */ + 8244 "10100101" // /* MW 12 */ + 8245 "00000001" // /* MW 11 */ + 8246 "00000000" // /* MW 10 */ + 8247 "00000000" // /* MW 9 */ + 8248 "10000000" // /* MW 8 */ + 8249 "11000110" // /* MW 7 */ + 8250 "00011100" // /* MW 6 */ + 8251 "00100111" // /* MW 5 */ + 8252 "00000000" // /* MW 4 */ + 8253 "11110000" // /* MW 3 */ + 8254 "00101100" // /* MW 2 */ + 8255 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_256 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "reduce_base.h" 267 19 first +.src_ref 2 "reduce_base.h" 267 31 +.loop_nesting 0 + 8256 "10111010" // LDA r16, [p2], #-12; MOVX r18, #2; MOV p3, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8257 "01111000" // /* MW 9 */ + 8258 "01100000" // /* MW 8 */ + 8259 "10110001" // /* MW 7 */ + 8260 "01001001" // /* MW 6 */ + 8261 "00100000" // /* MW 5 */ + 8262 "00000001" // /* MW 4 */ + 8263 "11010000" // /* MW 3 */ + 8264 "11000010" // /* MW 2 */ + 8265 "01011011" // /* MW 1 */ + 8266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8267 "00000000" // /* MW 1 */ + 8268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8269 "00000000" // /* MW 1 */ + 8270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8271 "00000000" // /* MW 1 */ + 8272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8273 "00000000" // /* MW 1 */ + 8274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8275 "00000000" // /* MW 1 */ + 8276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8277 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 267 31 + 8278 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8279 "00001000" // /* MW 3 */ + 8280 "10100001" // /* MW 2 */ + 8281 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 267 12 + 8282 "10000100" // JNZ r16, #8576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8576 delay_slots=5 */ + 8283 "00000001" // /* MW 5 */ + 8284 "01000000" // /* MW 4 */ + 8285 "11000000" // /* MW 3 */ + 8286 "00010000" // /* MW 2 */ + 8287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 405 45 +.delay_slot + 8296 "11111000" // MOV r17, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8297 "11000000" // /* MW 3 */ + 8298 "01010100" // /* MW 2 */ + 8299 "00011100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 41 + 8300 "01000100" // MOVXM p7, #508528 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8301 "11100000" // /* MW 5 */ + 8302 "11000100" // /* MW 4 */ + 8303 "11001110" // /* MW 3 */ + 8304 "00000111" // /* MW 2 */ + 8305 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 41 first + 8306 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8307 "01010110" // /* MW 3 */ + 8308 "00000110" // /* MW 2 */ + 8309 "00000111" // /* MW 1 */ + 8310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8311 "00000000" // /* MW 1 */ + 8312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8313 "00000000" // /* MW 1 */ + 8314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8315 "00000000" // /* MW 1 */ + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ + 8318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8319 "00000000" // /* MW 1 */ + 8320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 first +.src_ref 2 "reduce_base.h" 274 30 first + 8322 "10000100" // JZ r18, #8576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8576 delay_slots=5 */ + 8323 "00000001" // /* MW 5 */ + 8324 "00000000" // /* MW 4 */ + 8325 "11000000" // /* MW 3 */ + 8326 "00010000" // /* MW 2 */ + 8327 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8329 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 31 +.delay_slot + 8330 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8331 "00000101" // /* MW 3 */ + 8332 "00100110" // /* MW 2 */ + 8333 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 31 +.src_ref 2 "reduce_base.h" 269 31 first +.delay_slot + 8334 "11100100" // LSHL r19, r18, r19; MOV r20, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8335 "10000001" // /* MW 5 */ + 8336 "00100101" // /* MW 4 */ + 8337 "10111010" // /* MW 3 */ + 8338 "11100111" // /* MW 2 */ + 8339 "10010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 31 +.delay_slot + 8340 "01011000" // ADD.NC p3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8341 "11010001" // /* MW 3 */ + 8342 "01101001" // /* MW 2 */ + 8343 "00011011" // /* MW 1 */ +.delay_slot + 8344 "01000100" // MOVXM p7, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8345 "01100000" // /* MW 5 */ + 8346 "11000100" // /* MW 4 */ + 8347 "11001110" // /* MW 3 */ + 8348 "00000111" // /* MW 2 */ + 8349 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base.h" 280 16 first +.src_ref 2 "reduce_base.h" 289 45 + 8350 "01110110" // LDA.s8 r20, [p7]; MOVS p2, p3; MOVXM ls, #8512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8351 "00010000" // /* MW 11 */ + 8352 "10100000" // /* MW 10 */ + 8353 "01111000" // /* MW 9 */ + 8354 "00001000" // /* MW 8 */ + 8355 "00000000" // /* MW 7 */ + 8356 "00000000" // /* MW 6 */ + 8357 "10001011" // /* MW 5 */ + 8358 "10001100" // /* MW 4 */ + 8359 "01010010" // /* MW 3 */ + 8360 "11010000" // /* MW 2 */ + 8361 "11100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "reduce_base.h" 274 12 +.src_ref 2 "reduce_base.h" 277 29 +.src_ref 2 "reduce_base.h" 280 16 + 8362 "01110110" // MOVA r19, #-5; MOVS p4, p1; MOVXM le, #8528 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8363 "00010000" // /* MW 11 */ + 8364 "10101000" // /* MW 10 */ + 8365 "10111000" // /* MW 9 */ + 8366 "00001001" // /* MW 8 */ + 8367 "00000000" // /* MW 7 */ + 8368 "00000000" // /* MW 6 */ + 8369 "10001011" // /* MW 5 */ + 8370 "10000100" // /* MW 4 */ + 8371 "00000100" // /* MW 3 */ + 8372 "01110011" // /* MW 2 */ + 8373 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 + 8374 "01000100" // MOVXM p7, #8416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8375 "11000000" // /* MW 5 */ + 8376 "11000001" // /* MW 4 */ + 8377 "00101110" // /* MW 3 */ + 8378 "00000000" // /* MW 2 */ + 8379 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 + 8380 "00011000" // MOVX vaddSign0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8381 "01000000" // /* MW 3 */ + 8382 "01011010" // /* MW 2 */ + 8383 "00010000" // /* MW 1 */ + 8384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8385 "00000000" // /* MW 1 */ + 8386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8387 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 282 91 + 8388 "11111000" // VMOV bmhl1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "10010010" // /* MW 3 */ + 8390 "10000000" // /* MW 2 */ + 8391 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 first +.src_ref 2 "reduce_base.h" 282 91 + 8392 "00100100" // MOVX crRnd, r20; ADD.NC r21, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8393 "11111111" // /* MW 5 */ + 8394 "10110010" // /* MW 4 */ + 8395 "00001010" // /* MW 3 */ + 8396 "01010000" // /* MW 2 */ + 8397 "10100111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 +.src_ref 2 "reduce_base.h" 282 91 first + 8398 "01011100" // VCONV.bf16.fp32 wl0, bmhl1; LSHL r19, r21, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8399 "01111011" // /* MW 5 */ + 8400 "11001110" // /* MW 4 */ + 8401 "11001010" // /* MW 3 */ + 8402 "00011010" // /* MW 2 */ + 8403 "00001000" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "reduce_base.h" 282 91 + 8406 "10111010" // NOPA; NOPB; VEXTBCST.16 x0, x0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8407 "10101110" // /* MW 9 */ + 8408 "10000001" // /* MW 8 */ + 8409 "00000000" // /* MW 7 */ + 8410 "00000000" // /* MW 6 */ + 8411 "00010000" // /* MW 5 */ + 8412 "00000000" // /* MW 4 */ + 8413 "11110000" // /* MW 3 */ + 8414 "00101100" // /* MW 2 */ + 8415 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_416 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "reduce_base.h" 277 29 first +.src_ref 2 "reduce_base.h" 280 16 first +.loop_nesting 1 + 8416 "00011100" // VLDB x1, [p4], #64; MOVX lc, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8417 "11110010" // /* MW 5 */ + 8418 "00000010" // /* MW 4 */ + 8419 "10000000" // /* MW 3 */ + 8420 "10001110" // /* MW 2 */ + 8421 "10000011" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ + 8426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8427 "00000000" // /* MW 1 */ + 8428 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8429 "01100111" // /* MW 3 */ + 8430 "00000001" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ + 8432 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "00000000" // /* MW 8 */ + 8441 "01011011" // /* MW 7 */ + 8442 "00000001" // /* MW 6 */ + 8443 "00100000" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 + 8448 "11100001" // NOPA; NOPB; NOPS; MOVX r20, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "01111000" // /* MW 13 */ + 8452 "10100101" // /* MW 12 */ + 8453 "00000001" // /* MW 11 */ + 8454 "00001000" // /* MW 10 */ + 8455 "01000000" // /* MW 9 */ + 8456 "00000001" // /* MW 8 */ + 8457 "01011011" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00100000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 2 "reduce_base.h" 280 67 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8464 "11100001" // NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8465 "00000000" // /* MW 15 */ + 8466 "00000000" // /* MW 14 */ + 8467 "01101000" // /* MW 13 */ + 8468 "10101000" // /* MW 12 */ + 8469 "10100010" // /* MW 11 */ + 8470 "00111010" // /* MW 10 */ + 8471 "01000000" // /* MW 9 */ + 8472 "00101001" // /* MW 8 */ + 8473 "01011011" // /* MW 7 */ + 8474 "00000001" // /* MW 6 */ + 8475 "00100000" // /* MW 5 */ + 8476 "00000000" // /* MW 4 */ + 8477 "11110000" // /* MW 3 */ + 8478 "00101100" // /* MW 2 */ + 8479 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 2 "reduce_base.h" 280 67 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8480 "11100001" // NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8481 "00000000" // /* MW 15 */ + 8482 "00000000" // /* MW 14 */ + 8483 "01101000" // /* MW 13 */ + 8484 "10101000" // /* MW 12 */ + 8485 "10100010" // /* MW 11 */ + 8486 "00111010" // /* MW 10 */ + 8487 "01000000" // /* MW 9 */ + 8488 "00101001" // /* MW 8 */ + 8489 "01011011" // /* MW 7 */ + 8490 "00000001" // /* MW 6 */ + 8491 "00100000" // /* MW 5 */ + 8492 "00000000" // /* MW 4 */ + 8493 "11110000" // /* MW 3 */ + 8494 "00101100" // /* MW 2 */ + 8495 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 856 23 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8496 "11100001" // NOPA; NOPB; NOPS; NOPX; VINSERT.16 x2, x0, #0, r21; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8497 "00000000" // /* MW 15 */ + 8498 "00000000" // /* MW 14 */ + 8499 "10111000" // /* MW 13 */ + 8500 "01010100" // /* MW 12 */ + 8501 "10000001" // /* MW 11 */ + 8502 "00000000" // /* MW 10 */ + 8503 "00000000" // /* MW 9 */ + 8504 "00000000" // /* MW 8 */ + 8505 "01011011" // /* MW 7 */ + 8506 "00000001" // /* MW 6 */ + 8507 "00100000" // /* MW 5 */ + 8508 "00000000" // /* MW 4 */ + 8509 "11110000" // /* MW 3 */ + 8510 "00101100" // /* MW 2 */ + 8511 "00000000" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_512 +.src_ref 4 "vector.hpp" 915 23 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 2 + 8512 "11100001" // NOPA; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8513 "00000000" // /* MW 15 */ + 8514 "00000000" // /* MW 14 */ + 8515 "01101000" // /* MW 13 */ + 8516 "10101000" // /* MW 12 */ + 8517 "10100010" // /* MW 11 */ + 8518 "00000010" // /* MW 10 */ + 8519 "00000000" // /* MW 9 */ + 8520 "00000000" // /* MW 8 */ + 8521 "01011011" // /* MW 7 */ + 8522 "00000001" // /* MW 6 */ + 8523 "00100000" // /* MW 5 */ + 8524 "00000000" // /* MW 4 */ + 8525 "11110000" // /* MW 3 */ + 8526 "00101100" // /* MW 2 */ + 8527 "00000000" // /* MW 1 */ +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_528 +.src_ref 4 "vector.hpp" 856 23 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 280 67 +.src_ref 2 "reduce_base.h" 289 45 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8528 "11100001" // NOPA; NOPB; VST x2, [p2], #64; ADD r20, r20, #1; VINSERT.16 x2, x0, #0, r21; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8529 "00000000" // /* MW 15 */ + 8530 "00000000" // /* MW 14 */ + 8531 "10111000" // /* MW 13 */ + 8532 "01010100" // /* MW 12 */ + 8533 "10000001" // /* MW 11 */ + 8534 "00111000" // /* MW 10 */ + 8535 "01000000" // /* MW 9 */ + 8536 "00101001" // /* MW 8 */ + 8537 "10010011" // /* MW 7 */ + 8538 "00011100" // /* MW 6 */ + 8539 "00100010" // /* MW 5 */ + 8540 "00000000" // /* MW 4 */ + 8541 "11110000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 first +.loop_nesting 1 + 8544 "00011000" // JNZD r19, r19, p7 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 8545 "11100000" // /* MW 3 */ + 8546 "11100111" // /* MW 2 */ + 8547 "00010100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 289 45 first +.delay_slot + 8548 "00011000" // VST x2, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8549 "10010011" // /* MW 3 */ + 8550 "00011100" // /* MW 2 */ + 8551 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8553 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 856 23 first +.delay_slot + 8554 "01111000" // VINSERT.16 x2, x0, #0, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8555 "10101001" // /* MW 3 */ + 8556 "00000010" // /* MW 2 */ + 8557 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8559 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 289 45 first +.delay_slot + 8560 "11100001" // NOPA; NOPB; VST x2, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8561 "00000000" // /* MW 15 */ + 8562 "00000000" // /* MW 14 */ + 8563 "01111000" // /* MW 13 */ + 8564 "10100101" // /* MW 12 */ + 8565 "00000001" // /* MW 11 */ + 8566 "00000000" // /* MW 10 */ + 8567 "00000000" // /* MW 9 */ + 8568 "00000000" // /* MW 8 */ + 8569 "10010011" // /* MW 7 */ + 8570 "00011100" // /* MW 6 */ + 8571 "00100010" // /* MW 5 */ + 8572 "00000000" // /* MW 4 */ + 8573 "11110000" // /* MW 3 */ + 8574 "00101100" // /* MW 2 */ + 8575 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_576 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "reduce_base.h" 405 45 first +.loop_nesting 0 + 8576 "10111010" // MOVA dc2, #0; MOVX vaddSign0, #1; ADD.NC p4, r17, #54 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8577 "10001000" // /* MW 9 */ + 8578 "01001101" // /* MW 8 */ + 8579 "00110100" // /* MW 7 */ + 8580 "00000010" // /* MW 6 */ + 8581 "11010010" // /* MW 5 */ + 8582 "00000010" // /* MW 4 */ + 8583 "10000000" // /* MW 3 */ + 8584 "00001011" // /* MW 2 */ + 8585 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 294 28 +.src_ref 2 "reduce_base.h" 405 45 + 8586 "01110110" // LDA.s16 r19, [p4], #-2; MOVS dc3, dc2; MOVXM p2, #508464 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8587 "00010000" // /* MW 11 */ + 8588 "00011000" // /* MW 10 */ + 8589 "00110001" // /* MW 9 */ + 8590 "11110001" // /* MW 8 */ + 8591 "00000001" // /* MW 7 */ + 8592 "00000000" // /* MW 6 */ + 8593 "01001011" // /* MW 5 */ + 8594 "00001000" // /* MW 4 */ + 8595 "01010011" // /* MW 3 */ + 8596 "11001110" // /* MW 2 */ + 8597 "10011111" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 +.src_ref 4 "add_reduce.hpp" 332 18 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 2 "reduce_base.h" 294 28 +.src_ref 2 "reduce_base.h" 406 38 first + 8598 "01110110" // LDA.u16 r26, [p4], #-6; MOVS dc4, dc2; MOVX r17, #60; ADD.NC p7, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8599 "00001000" // /* MW 11 */ + 8600 "01000111" // /* MW 10 */ + 8601 "10110100" // /* MW 9 */ + 8602 "10001011" // /* MW 8 */ + 8603 "00010111" // /* MW 7 */ + 8604 "00000001" // /* MW 6 */ + 8605 "01001011" // /* MW 5 */ + 8606 "00001000" // /* MW 4 */ + 8607 "01010100" // /* MW 3 */ + 8608 "11101011" // /* MW 2 */ + 8609 "10011011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 409 43 first + 8610 "10111010" // LDA.s16 r20, [p4], #-6; MOVS dc1, dc4; MOV dj3, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8611 "01010010" // /* MW 9 */ + 8612 "01000000" // /* MW 8 */ + 8613 "11000000" // /* MW 7 */ + 8614 "00000001" // /* MW 6 */ + 8615 "01001011" // /* MW 5 */ + 8616 "00010000" // /* MW 4 */ + 8617 "01010001" // /* MW 3 */ + 8618 "11010010" // /* MW 2 */ + 8619 "10011011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 409 64 + 8620 "01010100" // LDA.u16 r28, [p4], #-2; MOV dc0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8621 "00000001" // /* MW 5 */ + 8622 "10000000" // /* MW 4 */ + 8623 "01010001" // /* MW 3 */ + 8624 "11110011" // /* MW 2 */ + 8625 "10011111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 305 8 first +.src_ref 2 "reduce_base.h" 410 56 first + 8626 "10111010" // LDA.s16 r21, [p4], #6; MOVXM ls, #8816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8627 "00010000" // /* MW 9 */ + 8628 "00111000" // /* MW 8 */ + 8629 "01111001" // /* MW 7 */ + 8630 "00001000" // /* MW 6 */ + 8631 "00000000" // /* MW 5 */ + 8632 "00000000" // /* MW 4 */ + 8633 "01010000" // /* MW 3 */ + 8634 "11010110" // /* MW 2 */ + 8635 "10000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 305 8 +.src_ref 2 "reduce_base.h" 411 56 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8636 "10111010" // LDA.s16 r22, [p4, #-2]; MOVXM le, #8832 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8637 "00010000" // /* MW 9 */ + 8638 "01000000" // /* MW 8 */ + 8639 "10111001" // /* MW 7 */ + 8640 "00001001" // /* MW 6 */ + 8641 "00000000" // /* MW 5 */ + 8642 "00000000" // /* MW 4 */ + 8643 "01010000" // /* MW 3 */ + 8644 "11011010" // /* MW 2 */ + 8645 "10011110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "reduce_base.h" 410 75 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8646 "10111010" // LDA.u16 r26, [p4]; MOVS p4, p3; MOV dj2, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8647 "01010010" // /* MW 9 */ + 8648 "01000000" // /* MW 8 */ + 8649 "01000000" // /* MW 7 */ + 8650 "00000001" // /* MW 6 */ + 8651 "10001011" // /* MW 5 */ + 8652 "10001100" // /* MW 4 */ + 8653 "01010100" // /* MW 3 */ + 8654 "11101011" // /* MW 2 */ + 8655 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 +.src_ref 2 "reduce_base.h" 294 28 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8656 "11010100" // LDA.s8 r23, [p2]; MOV p2, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8657 "10000001" // /* MW 5 */ + 8658 "11011101" // /* MW 4 */ + 8659 "01010100" // /* MW 3 */ + 8660 "11011100" // /* MW 2 */ + 8661 "01000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 294 28 +.src_ref 2 "reduce_base.h" 313 60 +.src_ref 2 "reduce_base.h" 314 27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8662 "10111010" // LDA.u16 r19, [p7], #6; MOVX r18, #1; MOV dn3, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8663 "01111000" // /* MW 9 */ + 8664 "10010000" // /* MW 8 */ + 8665 "10100110" // /* MW 7 */ + 8666 "00101001" // /* MW 6 */ + 8667 "00100000" // /* MW 5 */ + 8668 "00000001" // /* MW 4 */ + 8669 "01010000" // /* MW 3 */ + 8670 "11001111" // /* MW 2 */ + 8671 "11100111" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8672 "11100100" // LSHL r27, r19, r18; MOV dn2, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8673 "01000001" // /* MW 5 */ + 8674 "10011010" // /* MW 4 */ + 8675 "10110100" // /* MW 3 */ + 8676 "11100101" // /* MW 2 */ + 8677 "10011110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 + 8678 "00111010" // MOVS dn0, r28; LSHL r20, r20, r18; MOV m3, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "01111001" // /* MW 9 */ + 8680 "11010000" // /* MW 8 */ + 8681 "10000110" // /* MW 7 */ + 8682 "01101101" // /* MW 6 */ + 8683 "01001001" // /* MW 5 */ + 8684 "00101001" // /* MW 4 */ + 8685 "01100000" // /* MW 3 */ + 8686 "10000001" // /* MW 2 */ + 8687 "00001011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 + 8688 "11100100" // LSHL r20, r21, r18; MOV m0, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8689 "01000001" // /* MW 5 */ + 8690 "00010100" // /* MW 4 */ + 8691 "10110000" // /* MW 3 */ + 8692 "00100101" // /* MW 2 */ + 8693 "10101101" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 + 8694 "11100100" // LSHL r20, r22, r18; MOV dj0, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8695 "01000001" // /* MW 5 */ + 8696 "00010100" // /* MW 4 */ + 8697 "10110001" // /* MW 3 */ + 8698 "00100101" // /* MW 2 */ + 8699 "10110101" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 first + 8700 "10111010" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; MOVS dn4, r26; MOV dj4, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8701 "01110010" // /* MW 9 */ + 8702 "00010000" // /* MW 8 */ + 8703 "01000101" // /* MW 7 */ + 8704 "00000010" // /* MW 6 */ + 8705 "00001011" // /* MW 5 */ + 8706 "01011010" // /* MW 4 */ + 8707 "01110100" // /* MW 3 */ + 8708 "00010101" // /* MW 2 */ + 8709 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8710 "10111010" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0; MOVX crRnd, r23; MOV m2, m3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8711 "01111000" // /* MW 9 */ + 8712 "00000000" // /* MW 8 */ + 8713 "00000011" // /* MW 7 */ + 8714 "00000001" // /* MW 6 */ + 8715 "11010100" // /* MW 5 */ + 8716 "00101111" // /* MW 4 */ + 8717 "01110000" // /* MW 3 */ + 8718 "00100101" // /* MW 2 */ + 8719 "00000011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "reduce_base.h" 305 8 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8720 "00010100" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; ADD.NC lc, r19, #-5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8721 "11111011" // /* MW 5 */ + 8722 "11110011" // /* MW 4 */ + 8723 "01111010" // /* MW 3 */ + 8724 "00010101" // /* MW 2 */ + 8725 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8726 "10011000" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8727 "00101011" // /* MW 3 */ + 8728 "00011001" // /* MW 2 */ + 8729 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8730 "00111100" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8731 "00100000" // /* MW 5 */ + 8732 "00000000" // /* MW 4 */ + 8733 "01110000" // /* MW 3 */ + 8734 "00010101" // /* MW 2 */ + 8735 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8736 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8737 "01000001" // /* MW 15 */ + 8738 "01000001" // /* MW 14 */ + 8739 "01111100" // /* MW 13 */ + 8740 "10100101" // /* MW 12 */ + 8741 "00000001" // /* MW 11 */ + 8742 "00000000" // /* MW 10 */ + 8743 "00000000" // /* MW 9 */ + 8744 "00000000" // /* MW 8 */ + 8745 "01011011" // /* MW 7 */ + 8746 "00000001" // /* MW 6 */ + 8747 "00100000" // /* MW 5 */ + 8748 "00000000" // /* MW 4 */ + 8749 "01110000" // /* MW 3 */ + 8750 "00100101" // /* MW 2 */ + 8751 "00000011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8752 "11100001" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8753 "00000000" // /* MW 15 */ + 8754 "00000000" // /* MW 14 */ + 8755 "01111000" // /* MW 13 */ + 8756 "10100101" // /* MW 12 */ + 8757 "00000001" // /* MW 11 */ + 8758 "00000000" // /* MW 10 */ + 8759 "00000000" // /* MW 9 */ + 8760 "00000000" // /* MW 8 */ + 8761 "01011011" // /* MW 7 */ + 8762 "00000001" // /* MW 6 */ + 8763 "00100000" // /* MW 5 */ + 8764 "00000000" // /* MW 4 */ + 8765 "01110000" // /* MW 3 */ + 8766 "00010101" // /* MW 2 */ + 8767 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8768 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "01000001" // /* MW 15 */ + 8770 "01000001" // /* MW 14 */ + 8771 "01111100" // /* MW 13 */ + 8772 "10100101" // /* MW 12 */ + 8773 "00000001" // /* MW 11 */ + 8774 "00000000" // /* MW 10 */ + 8775 "00000000" // /* MW 9 */ + 8776 "00000000" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "01110000" // /* MW 3 */ + 8782 "00100101" // /* MW 2 */ + 8783 "00000011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8784 "11100001" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8785 "00000000" // /* MW 15 */ + 8786 "00000000" // /* MW 14 */ + 8787 "01111000" // /* MW 13 */ + 8788 "10100101" // /* MW 12 */ + 8789 "00000001" // /* MW 11 */ + 8790 "00000000" // /* MW 10 */ + 8791 "00000000" // /* MW 9 */ + 8792 "00000000" // /* MW 8 */ + 8793 "01011011" // /* MW 7 */ + 8794 "00000001" // /* MW 6 */ + 8795 "00100000" // /* MW 5 */ + 8796 "00000000" // /* MW 4 */ + 8797 "01110000" // /* MW 3 */ + 8798 "00010101" // /* MW 2 */ + 8799 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8800 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8801 "01000001" // /* MW 15 */ + 8802 "01000001" // /* MW 14 */ + 8803 "01111100" // /* MW 13 */ + 8804 "10100101" // /* MW 12 */ + 8805 "00000001" // /* MW 11 */ + 8806 "00000000" // /* MW 10 */ + 8807 "00000000" // /* MW 9 */ + 8808 "00000000" // /* MW 8 */ + 8809 "01011011" // /* MW 7 */ + 8810 "00000001" // /* MW 6 */ + 8811 "00100000" // /* MW 5 */ + 8812 "00000000" // /* MW 4 */ + 8813 "01110000" // /* MW 3 */ + 8814 "00100101" // /* MW 2 */ + 8815 "00000011" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_816 +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8816 "11100001" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8817 "00000000" // /* MW 15 */ + 8818 "00000000" // /* MW 14 */ + 8819 "01111000" // /* MW 13 */ + 8820 "10100101" // /* MW 12 */ + 8821 "00000001" // /* MW 11 */ + 8822 "00000000" // /* MW 10 */ + 8823 "00000000" // /* MW 9 */ + 8824 "00000000" // /* MW 8 */ + 8825 "01011011" // /* MW 7 */ + 8826 "00000001" // /* MW 6 */ + 8827 "00100000" // /* MW 5 */ + 8828 "00000000" // /* MW 4 */ + 8829 "01110000" // /* MW 3 */ + 8830 "00010101" // /* MW 2 */ + 8831 "01101110" // /* MW 1 */ +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_832 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 4 "add.hpp" 28 49 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8832 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; VST.2D.CONV.bf16.fp32 cml0, [p4], d2;NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8833 "01000001" // /* MW 15 */ + 8834 "01000001" // /* MW 14 */ + 8835 "01111100" // /* MW 13 */ + 8836 "10100101" // /* MW 12 */ + 8837 "00000001" // /* MW 11 */ + 8838 "00000000" // /* MW 10 */ + 8839 "00000000" // /* MW 9 */ + 8840 "00000000" // /* MW 8 */ + 8841 "00100011" // /* MW 7 */ + 8842 "01010000" // /* MW 6 */ + 8843 "00100100" // /* MW 5 */ + 8844 "00000000" // /* MW 4 */ + 8845 "01110000" // /* MW 3 */ + 8846 "00100101" // /* MW 2 */ + 8847 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8849 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 4 "add.hpp" 28 49 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8850 "01100010" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8851 "00111101" // /* MW 7 */ + 8852 "00101000" // /* MW 6 */ + 8853 "10001000" // /* MW 5 */ + 8854 "00000010" // /* MW 4 */ + 8855 "01100000" // /* MW 3 */ + 8856 "00000100" // /* MW 2 */ + 8857 "10001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8859 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8860 "01100010" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8861 "00111101" // /* MW 7 */ + 8862 "00101000" // /* MW 6 */ + 8863 "10001000" // /* MW 5 */ + 8864 "00000010" // /* MW 4 */ + 8865 "01100000" // /* MW 3 */ + 8866 "00000100" // /* MW 2 */ + 8867 "10001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 312 12 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8868 "10000100" // JNZ r16, #9424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9424 delay_slots=5 */ + 8869 "00000001" // /* MW 5 */ + 8870 "01000000" // /* MW 4 */ + 8871 "01101000" // /* MW 3 */ + 8872 "00010010" // /* MW 2 */ + 8873 "10000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1119 102 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8874 "00011000" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8875 "00100011" // /* MW 3 */ + 8876 "01010000" // /* MW 2 */ + 8877 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8879 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8880 "00011000" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "00100011" // /* MW 3 */ + 8882 "01010000" // /* MW 2 */ + 8883 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8885 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.delay_slot + 8886 "00011000" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8887 "00100011" // /* MW 3 */ + 8888 "01010000" // /* MW 2 */ + 8889 "00001100" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 314 27 + 8890 "10111010" // MOVA dj1, #64; MOVXM p0, #508534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8891 "00010000" // /* MW 9 */ + 8892 "00111011" // /* MW 8 */ + 8893 "00110001" // /* MW 7 */ + 8894 "11110000" // /* MW 6 */ + 8895 "00000001" // /* MW 5 */ + 8896 "00000000" // /* MW 4 */ + 8897 "10000000" // /* MW 3 */ + 8898 "00000110" // /* MW 2 */ + 8899 "00001000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 2 "reduce_base.h" 313 60 +.src_ref 2 "reduce_base.h" 314 27 first + 8900 "10111010" // LDA.s16 r7, [p0], #-2; MOVX r16, #32; MOV r23, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8901 "01111000" // /* MW 9 */ + 8902 "01100000" // /* MW 8 */ + 8903 "11101001" // /* MW 7 */ + 8904 "00001010" // /* MW 6 */ + 8905 "00000100" // /* MW 5 */ + 8906 "00000001" // /* MW 4 */ + 8907 "01010000" // /* MW 3 */ + 8908 "10011110" // /* MW 2 */ + 8909 "00011111" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 2 "reduce_base.h" 313 69 first + 8910 "10111010" // LDA r29, [p0, #-4]; MOVX r19, #16; MOV r20, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8911 "01011000" // /* MW 9 */ + 8912 "00001000" // /* MW 8 */ + 8913 "10001000" // /* MW 7 */ + 8914 "00001010" // /* MW 6 */ + 8915 "00110010" // /* MW 5 */ + 8916 "00000001" // /* MW 4 */ + 8917 "11010000" // /* MW 3 */ + 8918 "11110110" // /* MW 2 */ + 8919 "00011110" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 314 27 first +.src_ref 2 "reduce_base.h" 319 12 first + 8920 "10111010" // LDA.u16 r27, [p0]; MOVXM ls, #9168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8921 "00010000" // /* MW 9 */ + 8922 "11101000" // /* MW 8 */ + 8923 "01111001" // /* MW 7 */ + 8924 "00001000" // /* MW 6 */ + 8925 "00000000" // /* MW 5 */ + 8926 "00000000" // /* MW 4 */ + 8927 "01010000" // /* MW 3 */ + 8928 "11101111" // /* MW 2 */ + 8929 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 2 "reduce_base.h" 319 12 + 8930 "10111010" // MOVA r22, #4; MOVXM le, #9264 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8931 "00010000" // /* MW 9 */ + 8932 "00011000" // /* MW 8 */ + 8933 "10111010" // /* MW 7 */ + 8934 "00001001" // /* MW 6 */ + 8935 "00000000" // /* MW 5 */ + 8936 "00000000" // /* MW 4 */ + 8937 "00000000" // /* MW 3 */ + 8938 "10010110" // /* MW 2 */ + 8939 "00000000" // /* MW 1 */ + 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8941 "00000000" // /* MW 1 */ + 8942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8943 "00000000" // /* MW 1 */ + 8944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8945 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 314 27 + 8946 "10011000" // LSHL r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8947 "00101101" // /* MW 3 */ + 8948 "11001111" // /* MW 2 */ + 8949 "00010001" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 313 60 first + 8950 "11100100" // LSHL r18, r29, r18; MOV m1, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8951 "01000001" // /* MW 5 */ + 8952 "00000111" // /* MW 4 */ + 8953 "10110010" // /* MW 3 */ + 8954 "10100101" // /* MW 2 */ + 8955 "11101100" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 319 12 first + 8956 "00000010" // MOVS dn1, r27; ADD.NC lc, r29, #-3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8957 "01000000" // /* MW 7 */ + 8958 "01111111" // /* MW 6 */ + 8959 "10111111" // /* MW 5 */ + 8960 "00000010" // /* MW 4 */ + 8961 "01100000" // /* MW 3 */ + 8962 "01100001" // /* MW 2 */ + 8963 "00101011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 313 60 first + 8964 "01011000" // ADD.NC p0, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8965 "11001001" // /* MW 3 */ + 8966 "01101011" // /* MW 2 */ + 8967 "00011000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first + 8968 "10011000" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8969 "00101011" // /* MW 3 */ + 8970 "00110000" // /* MW 2 */ + 8971 "00000000" // /* MW 1 */ + 8972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8973 "00000000" // /* MW 1 */ + 8974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8975 "00000000" // /* MW 1 */ + 8976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8977 "00000000" // /* MW 1 */ + 8978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8979 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8981 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 332 18 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8982 "01001000" // VADD.f dm0, dm0, dm3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8983 "00111101" // /* MW 3 */ + 8984 "00001100" // /* MW 2 */ + 8985 "10001000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8986 "11111000" // VMOV bmll3, bmlh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8987 "00010010" // /* MW 3 */ + 8988 "00000001" // /* MW 2 */ + 8989 "00011011" // /* MW 1 */ + 8990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8991 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8993 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8994 "10011000" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8995 "00101011" // /* MW 3 */ + 8996 "00110000" // /* MW 2 */ + 8997 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8999 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 9000 "11111000" // VMOV x0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9001 "00010010" // /* MW 3 */ + 9002 "00100000" // /* MW 2 */ + 9003 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 9004 "01100010" // VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9005 "00111101" // /* MW 7 */ + 9006 "00001100" // /* MW 6 */ + 9007 "10001001" // /* MW 5 */ + 9008 "11000110" // /* MW 4 */ + 9009 "01000010" // /* MW 3 */ + 9010 "10000000" // /* MW 2 */ + 9011 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9012 "11111000" // VMOV bmll3, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9013 "10010010" // /* MW 3 */ + 9014 "00000010" // /* MW 2 */ + 9015 "00011011" // /* MW 1 */ + 9016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9017 "00000000" // /* MW 1 */ + 9018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9019 "00000000" // /* MW 1 */ + 9020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9021 "00000000" // /* MW 1 */ + 9022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9023 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 9024 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9025 "00010010" // /* MW 3 */ + 9026 "00100100" // /* MW 2 */ + 9027 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 9028 "01100010" // VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9029 "00111101" // /* MW 7 */ + 9030 "00101100" // /* MW 6 */ + 9031 "10001001" // /* MW 5 */ + 9032 "11000110" // /* MW 4 */ + 9033 "01001110" // /* MW 3 */ + 9034 "10010000" // /* MW 2 */ + 9035 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9036 "11111000" // VMOV bmll3, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9037 "10010010" // /* MW 3 */ + 9038 "00000110" // /* MW 2 */ + 9039 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 332 18 first + 9040 "01100010" // VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "00111101" // /* MW 7 */ + 9042 "00001100" // /* MW 6 */ + 9043 "10001000" // /* MW 5 */ + 9044 "11100110" // /* MW 4 */ + 9045 "00010010" // /* MW 3 */ + 9046 "00000001" // /* MW 2 */ + 9047 "00000011" // /* MW 1 */ + 9048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9049 "00000000" // /* MW 1 */ + 9050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9051 "00000000" // /* MW 1 */ + 9052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9053 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9054 "11111000" // VMOV x4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9055 "00010010" // /* MW 3 */ + 9056 "00100100" // /* MW 2 */ + 9057 "00011010" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9058 "10010100" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9059 "10100101" // /* MW 5 */ + 9060 "01000000" // /* MW 4 */ + 9061 "01110101" // /* MW 3 */ + 9062 "00000101" // /* MW 2 */ + 9063 "00000110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9064 "11111000" // VMOV x0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9065 "00010010" // /* MW 3 */ + 9066 "00100000" // /* MW 2 */ + 9067 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9068 "01100010" // VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9069 "00111101" // /* MW 7 */ + 9070 "00001100" // /* MW 6 */ + 9071 "10001001" // /* MW 5 */ + 9072 "11000110" // /* MW 4 */ + 9073 "01000010" // /* MW 3 */ + 9074 "10000000" // /* MW 2 */ + 9075 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9076 "11111000" // VMOV bmll3, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "10010010" // /* MW 3 */ + 9078 "00000010" // /* MW 2 */ + 9079 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9080 "01100010" // VMOV bmll4, x5; VADD.f dm2, dm1, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9081 "00111101" // /* MW 7 */ + 9082 "00110000" // /* MW 6 */ + 9083 "10001010" // /* MW 5 */ + 9084 "11100110" // /* MW 4 */ + 9085 "10010010" // /* MW 3 */ + 9086 "00001010" // /* MW 2 */ + 9087 "00000100" // /* MW 1 */ + 9088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9089 "00000000" // /* MW 1 */ + 9090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9091 "00000000" // /* MW 1 */ + 9092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9093 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 9094 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9095 "00010010" // /* MW 3 */ + 9096 "00100100" // /* MW 2 */ + 9097 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 9098 "01100010" // VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9099 "00111101" // /* MW 7 */ + 9100 "00101100" // /* MW 6 */ + 9101 "10001001" // /* MW 5 */ + 9102 "11000110" // /* MW 4 */ + 9103 "01001110" // /* MW 3 */ + 9104 "10010000" // /* MW 2 */ + 9105 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9106 "11111000" // VMOV bmll3, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9107 "10010010" // /* MW 3 */ + 9108 "00000110" // /* MW 2 */ + 9109 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 332 18 first + 9110 "01100010" // VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9111 "00111101" // /* MW 7 */ + 9112 "00001100" // /* MW 6 */ + 9113 "10001000" // /* MW 5 */ + 9114 "11100110" // /* MW 4 */ + 9115 "00010010" // /* MW 3 */ + 9116 "00000001" // /* MW 2 */ + 9117 "00000011" // /* MW 1 */ + 9118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9119 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 + 9120 "11111000" // VMOV x6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9121 "00010010" // /* MW 3 */ + 9122 "00101000" // /* MW 2 */ + 9123 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first + 9124 "11011000" // VSHIFT x7, x6, x0, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9125 "01011010" // /* MW 3 */ + 9126 "10110000" // /* MW 2 */ + 9127 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first + 9128 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9129 "10010010" // /* MW 3 */ + 9130 "00001110" // /* MW 2 */ + 9131 "00011100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 4 "add_reduce.hpp" 337 22 first + 9132 "01100010" // VMOV x4, bmll1; VADD.f dm2, dm2, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9133 "00111101" // /* MW 7 */ + 9134 "01010000" // /* MW 6 */ + 9135 "10001010" // /* MW 5 */ + 9136 "11100110" // /* MW 4 */ + 9137 "00010010" // /* MW 3 */ + 9138 "00100100" // /* MW 2 */ + 9139 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 + 9140 "11111000" // VMOV x0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "00010010" // /* MW 3 */ + 9142 "00100000" // /* MW 2 */ + 9143 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 9144 "11011000" // VSHIFT x1, x0, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9145 "01000010" // /* MW 3 */ + 9146 "10000000" // /* MW 2 */ + 9147 "00011000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.noswbrkpt + 9148 "01100010" // VMOV bmll3, x1; VADD.f dm1, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9149 "00111101" // /* MW 7 */ + 9150 "00001100" // /* MW 6 */ + 9151 "10001001" // /* MW 5 */ + 9152 "11100110" // /* MW 4 */ + 9153 "10010010" // /* MW 3 */ + 9154 "00000010" // /* MW 2 */ + 9155 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9156 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9157 "10000001" // /* MW 11 */ + 9158 "10101101" // /* MW 10 */ + 9159 "00000000" // /* MW 9 */ + 9160 "00000000" // /* MW 8 */ + 9161 "00000000" // /* MW 7 */ + 9162 "00000000" // /* MW 6 */ + 9163 "00100000" // /* MW 5 */ + 9164 "00000000" // /* MW 4 */ + 9165 "11110000" // /* MW 3 */ + 9166 "00101100" // /* MW 2 */ + 9167 "00000000" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1168 +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 +.begin_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9168 "01001010" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9169 "00111101" // /* MW 9 */ + 9170 "00110000" // /* MW 8 */ + 9171 "10001010" // /* MW 7 */ + 9172 "11000010" // /* MW 6 */ + 9173 "01010010" // /* MW 5 */ + 9174 "10100000" // /* MW 4 */ + 9175 "01110010" // /* MW 3 */ + 9176 "00000101" // /* MW 2 */ + 9177 "00000110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9178 "00000010" // VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9179 "01110000" // /* MW 7 */ + 9180 "01001001" // /* MW 6 */ + 9181 "00000101" // /* MW 5 */ + 9182 "00000010" // /* MW 4 */ + 9183 "11000000" // /* MW 3 */ + 9184 "00100010" // /* MW 2 */ + 9185 "10000010" // /* MW 1 */ +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9187 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9188 "10111000" // VEXTRACT.16 r21, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9189 "00000001" // /* MW 3 */ + 9190 "01100001" // /* MW 2 */ + 9191 "00011101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9192 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9193 "00010010" // /* MW 3 */ + 9194 "00100100" // /* MW 2 */ + 9195 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9196 "11011000" // VSHIFT x3, x2, x0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9197 "01001110" // /* MW 3 */ + 9198 "10010000" // /* MW 2 */ + 9199 "00011001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9200 "01100010" // VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9201 "00111101" // /* MW 7 */ + 9202 "00101100" // /* MW 6 */ + 9203 "10001001" // /* MW 5 */ + 9204 "11100110" // /* MW 4 */ + 9205 "10010010" // /* MW 3 */ + 9206 "00000110" // /* MW 2 */ + 9207 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 4 "add_reduce.hpp" 332 18 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9208 "01100010" // VMOV x6, bmll2; VADD.f dm0, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9209 "00111101" // /* MW 7 */ + 9210 "00001100" // /* MW 6 */ + 9211 "10001000" // /* MW 5 */ + 9212 "11100110" // /* MW 4 */ + 9213 "00010010" // /* MW 3 */ + 9214 "00101000" // /* MW 2 */ + 9215 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9216 "11111000" // VMOV bmll3, bmlh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9217 "00010010" // /* MW 3 */ + 9218 "00000001" // /* MW 2 */ + 9219 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9220 "11011000" // VSHIFT x7, x6, x0, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9221 "01011010" // /* MW 3 */ + 9222 "10110000" // /* MW 2 */ + 9223 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9224 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9225 "10010010" // /* MW 3 */ + 9226 "00001110" // /* MW 2 */ + 9227 "00011100" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9228 "01001000" // VADD.f dm2, dm2, dm4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9229 "00111101" // /* MW 3 */ + 9230 "01010000" // /* MW 2 */ + 9231 "10001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9232 "11111000" // VMOV x4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9233 "00010010" // /* MW 3 */ + 9234 "00100100" // /* MW 2 */ + 9235 "00011010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9236 "11110110" // NOPA; NOPB; NOPS; VMOV x0, bmll0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9237 "01110000" // /* MW 11 */ + 9238 "00001001" // /* MW 10 */ + 9239 "00010000" // /* MW 9 */ + 9240 "00000000" // /* MW 8 */ + 9241 "01011011" // /* MW 7 */ + 9242 "00000001" // /* MW 6 */ + 9243 "00100000" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11110000" // /* MW 3 */ + 9246 "00101100" // /* MW 2 */ + 9247 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.src_ref 2 "reduce_base.h" 326 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9248 "11101011" // ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9249 "01100001" // /* MW 15 */ + 9250 "01001000" // /* MW 14 */ + 9251 "01101100" // /* MW 13 */ + 9252 "00100001" // /* MW 12 */ + 9253 "01000000" // /* MW 11 */ + 9254 "00000000" // /* MW 10 */ + 9255 "00000000" // /* MW 9 */ + 9256 "00000000" // /* MW 8 */ + 9257 "01011011" // /* MW 7 */ + 9258 "00000001" // /* MW 6 */ + 9259 "00100000" // /* MW 5 */ + 9260 "00000000" // /* MW 4 */ + 9261 "11100000" // /* MW 3 */ + 9262 "11010110" // /* MW 2 */ + 9263 "00100011" // /* MW 1 */ +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1264 +.src_ref 5 "accum.hpp" 198 120 first +.end_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9264 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9265 "00000000" // /* MW 15 */ + 9266 "00000000" // /* MW 14 */ + 9267 "01111000" // /* MW 13 */ + 9268 "01001001" // /* MW 12 */ + 9269 "10000001" // /* MW 11 */ + 9270 "00000001" // /* MW 10 */ + 9271 "00000000" // /* MW 9 */ + 9272 "00000000" // /* MW 8 */ + 9273 "01011011" // /* MW 7 */ + 9274 "00000001" // /* MW 6 */ + 9275 "00100000" // /* MW 5 */ + 9276 "00000000" // /* MW 4 */ + 9277 "11110000" // /* MW 3 */ + 9278 "00101100" // /* MW 2 */ + 9279 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9280 "01100010" // VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9281 "00111101" // /* MW 7 */ + 9282 "00110000" // /* MW 6 */ + 9283 "10001010" // /* MW 5 */ + 9284 "11000110" // /* MW 4 */ + 9285 "01010010" // /* MW 3 */ + 9286 "10100000" // /* MW 2 */ + 9287 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9288 "00000010" // VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9289 "01110000" // /* MW 7 */ + 9290 "01001001" // /* MW 6 */ + 9291 "00000101" // /* MW 5 */ + 9292 "00000010" // /* MW 4 */ + 9293 "11000000" // /* MW 3 */ + 9294 "00100010" // /* MW 2 */ + 9295 "10000010" // /* MW 1 */ + 9296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9297 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first + 9298 "10111000" // VEXTRACT.16 r21, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "00000001" // /* MW 3 */ + 9300 "01100001" // /* MW 2 */ + 9301 "00011101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first + 9302 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9303 "00010010" // /* MW 3 */ + 9304 "00100100" // /* MW 2 */ + 9305 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 2 "reduce_base.h" 326 30 first + 9306 "10010100" // ST.s16 r21, [p1], #2; VSHIFT x3, x2, x0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9307 "10011101" // /* MW 5 */ + 9308 "00100000" // /* MW 4 */ + 9309 "11100011" // /* MW 3 */ + 9310 "11010110" // /* MW 2 */ + 9311 "00100011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 9312 "11111000" // VMOV x6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9313 "00010010" // /* MW 3 */ + 9314 "00101000" // /* MW 2 */ + 9315 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 9316 "01100010" // VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9317 "00111101" // /* MW 7 */ + 9318 "01010000" // /* MW 6 */ + 9319 "10001010" // /* MW 5 */ + 9320 "11000110" // /* MW 4 */ + 9321 "01011010" // /* MW 3 */ + 9322 "10110000" // /* MW 2 */ + 9323 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9324 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9325 "10010010" // /* MW 3 */ + 9326 "00001110" // /* MW 2 */ + 9327 "00011100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 337 22 first + 9328 "01100010" // VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00111101" // /* MW 7 */ + 9330 "00101100" // /* MW 6 */ + 9331 "10001001" // /* MW 5 */ + 9332 "11100110" // /* MW 4 */ + 9333 "10010010" // /* MW 3 */ + 9334 "00000110" // /* MW 2 */ + 9335 "00000011" // /* MW 1 */ + 9336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9337 "00000000" // /* MW 1 */ + 9338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9339 "00000000" // /* MW 1 */ + 9340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9341 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 9342 "00011000" // VCONV.bf16.fp32 x8, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9343 "00010110" // /* MW 3 */ + 9344 "00010001" // /* MW 2 */ + 9345 "00001100" // /* MW 1 */ +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 9346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9347 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 2 "reduce_base.h" 326 30 first +.aggressive_scheduled_block_id 11 +.noswbrkpt + 9348 "11010100" // ST.s16 r21, [p1], #2; VMOV x4, bmll1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9349 "00100101" // /* MW 5 */ + 9350 "01001000" // /* MW 4 */ + 9351 "11100100" // /* MW 3 */ + 9352 "11010110" // /* MW 2 */ + 9353 "00100011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 9354 "01100010" // VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9355 "00111101" // /* MW 7 */ + 9356 "00110000" // /* MW 6 */ + 9357 "10001010" // /* MW 5 */ + 9358 "11000110" // /* MW 4 */ + 9359 "01010010" // /* MW 3 */ + 9360 "10100000" // /* MW 2 */ + 9361 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 9362 "11111000" // VMOV bmll4, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9363 "10010010" // /* MW 3 */ + 9364 "00001010" // /* MW 2 */ + 9365 "00011100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9366 "10111000" // VEXTRACT.16 r21, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9367 "00000001" // /* MW 3 */ + 9368 "01100001" // /* MW 2 */ + 9369 "00011101" // /* MW 1 */ + 9370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9371 "00000000" // /* MW 1 */ + 9372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9373 "00000000" // /* MW 1 */ + 9374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9375 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 9376 "11111000" // VMOV x6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "00010010" // /* MW 3 */ + 9378 "00101000" // /* MW 2 */ + 9379 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 12 +.noswbrkpt + 9380 "01100010" // VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9381 "00111101" // /* MW 7 */ + 9382 "01010000" // /* MW 6 */ + 9383 "10001010" // /* MW 5 */ + 9384 "11000110" // /* MW 4 */ + 9385 "01011010" // /* MW 3 */ + 9386 "10110000" // /* MW 2 */ + 9387 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9388 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9389 "10010010" // /* MW 3 */ + 9390 "00001110" // /* MW 2 */ + 9391 "00011100" // /* MW 1 */ + 9392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9393 "00000000" // /* MW 1 */ + 9394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9395 "00000000" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 9400 "00011000" // VCONV.bf16.fp32 x8, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00010110" // /* MW 3 */ + 9402 "00010001" // /* MW 2 */ + 9403 "00001100" // /* MW 1 */ + 9404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9405 "01100111" // /* MW 3 */ + 9406 "00000001" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 2 "reduce_base.h" 326 30 first + 9408 "11100001" // ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x8, #0, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9409 "00000000" // /* MW 15 */ + 9410 "00000000" // /* MW 14 */ + 9411 "11011000" // /* MW 13 */ + 9412 "10000000" // /* MW 12 */ + 9413 "10110000" // /* MW 11 */ + 9414 "00000010" // /* MW 10 */ + 9415 "00000000" // /* MW 9 */ + 9416 "00000000" // /* MW 8 */ + 9417 "01011011" // /* MW 7 */ + 9418 "00000001" // /* MW 6 */ + 9419 "00100000" // /* MW 5 */ + 9420 "00000000" // /* MW 4 */ + 9421 "11100000" // /* MW 3 */ + 9422 "11010110" // /* MW 2 */ + 9423 "00100011" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1424 +.src_ref 2 "reduce_base.h" 222 30 first + 9424 "10011000" // LDA.u16 r16, [p2, #6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9425 "00011010" // /* MW 3 */ + 9426 "00110110" // /* MW 2 */ + 9427 "00000010" // /* MW 1 */ + 9428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9429 "00000000" // /* MW 1 */ + 9430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9431 "00000000" // /* MW 1 */ + 9432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9433 "00000000" // /* MW 1 */ + 9434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9435 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 9436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9437 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 +.aggressive_scheduled_block_id 13 +.noswbrkpt + 9438 "00011000" // ST.s16 r16, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9439 "00010111" // /* MW 3 */ + 9440 "11111110" // /* MW 2 */ + 9441 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 223 57 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 9442 "10011000" // LDA.u16 r1, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9443 "00111010" // /* MW 3 */ + 9444 "00000100" // /* MW 2 */ + 9445 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 223 57 +.aggressive_scheduled_block_id 13 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9446 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 9447 "00000001" // /* MW 5 */ + 9448 "00000000" // /* MW 4 */ + 9449 "01010000" // /* MW 3 */ + 9450 "00011010" // /* MW 2 */ + 9451 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 first +.delay_slot +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9452 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9453 "00000111" // /* MW 3 */ + 9454 "00100000" // /* MW 2 */ + 9455 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 +.delay_slot + 9456 "00011000" // EXTEND.u16 r0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9457 "10110000" // /* MW 3 */ + 9458 "00000000" // /* MW 2 */ + 9459 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9464 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9465 "00011100" // /* MW 7 */ + 9466 "00000000" // /* MW 6 */ + 9467 "00000000" // /* MW 5 */ + 9468 "00000100" // /* MW 4 */ + 9469 "11110000" // /* MW 3 */ + 9470 "00101100" // /* MW 2 */ + 9471 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 355 4 +.return_address + 9472 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9473 "00111001" // /* MW 3 */ + 9474 "11111100" // /* MW 2 */ + 9475 "00000111" // /* MW 1 */ + 9476 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9477 "10011001" // /* MW 3 */ + 9478 "11111000" // /* MW 2 */ + 9479 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 223 23 first + 9480 "00011000" // ST.s16 r3, [p7, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "01110111" // /* MW 3 */ + 9482 "00100100" // /* MW 2 */ + 9483 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 355 4 first + 9484 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9485 "00000001" // /* MW 5 */ + 9486 "00000000" // /* MW 4 */ + 9487 "00000000" // /* MW 3 */ + 9488 "11111000" // /* MW 2 */ + 9489 "11111111" // /* MW 1 */ + 9490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9491 "00000000" // /* MW 1 */ + 9492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9493 "00000000" // /* MW 1 */ + 9494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9495 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 355 4 + 9496 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9497 "00000000" // /* MW 3 */ + 9498 "00101000" // /* MW 2 */ + 9499 "00010000" // /* MW 1 */ +.delay_slot + 9500 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9501 "11000000" // /* MW 3 */ + 9502 "01100010" // /* MW 2 */ + 9503 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E__end +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_end0 + 9511 "00000000" // /* MW 1 */ +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_reducesum _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 405 first +.src_ref 6 "superkernels.cpp" 410 6 +.function_start + 9520 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9521 "00000000" // /* MW 5 */ + 9522 "11000100" // /* MW 4 */ + 9523 "11000110" // /* MW 3 */ + 9524 "00000111" // /* MW 2 */ + 9525 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 410 6 first + 9526 "11010100" // LDA r16, [p3]; MOV r3, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9527 "01000001" // /* MW 5 */ + 9528 "10101110" // /* MW 4 */ + 9529 "11010001" // /* MW 3 */ + 9530 "11000010" // /* MW 2 */ + 9531 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 405 + 9532 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9533 "00000001" // /* MW 5 */ + 9534 "00000000" // /* MW 4 */ + 9535 "00000000" // /* MW 3 */ + 9536 "00001000" // /* MW 2 */ + 9537 "00000000" // /* MW 1 */ + 9538 "00000010" // ST p7, [sp, #-20]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9539 "01110000" // /* MW 7 */ + 9540 "11010000" // /* MW 6 */ + 9541 "00101011" // /* MW 5 */ + 9542 "00000000" // /* MW 4 */ + 9543 "10110000" // /* MW 3 */ + 9544 "11110011" // /* MW 2 */ + 9545 "11111101" // /* MW 1 */ + 9546 "00000010" // ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9547 "01110000" // /* MW 7 */ + 9548 "01110000" // /* MW 6 */ + 9549 "00101101" // /* MW 5 */ + 9550 "00000010" // /* MW 4 */ + 9551 "10110000" // /* MW 3 */ + 9552 "11100011" // /* MW 2 */ + 9553 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 407 22 first + 9554 "00111010" // ST r13, [sp, #-8]; EXTEND.u8 r17, r17; MOV r15, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9555 "01111001" // /* MW 9 */ + 9556 "01100000" // /* MW 8 */ + 9557 "11101010" // /* MW 7 */ + 9558 "10000001" // /* MW 6 */ + 9559 "00010100" // /* MW 5 */ + 9560 "00100011" // /* MW 4 */ + 9561 "10110000" // /* MW 3 */ + 9562 "00110110" // /* MW 2 */ + 9563 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 517 + 9564 "00000010" // ST r1, [sp, #-16]; MOV r14, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9565 "01110000" // /* MW 7 */ + 9566 "11110000" // /* MW 6 */ + 9567 "11001000" // /* MW 5 */ + 9568 "00000001" // /* MW 4 */ + 9569 "10110000" // /* MW 3 */ + 9570 "00000110" // /* MW 2 */ + 9571 "11111110" // /* MW 1 */ + 9572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9573 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 410 6 first +.src_ref 6 "superkernels.cpp" 410 16 first + 9574 "10000100" // JNZ r16, #9776 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9776 delay_slots=5 */ + 9575 "00000001" // /* MW 5 */ + 9576 "01000000" // /* MW 4 */ + 9577 "00011000" // /* MW 3 */ + 9578 "00010011" // /* MW 2 */ + 9579 "10000000" // /* MW 1 */ +.delay_slot + 9580 "10011000" // ST r3, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9581 "01110101" // /* MW 3 */ + 9582 "11110100" // /* MW 2 */ + 9583 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 413 4 +.delay_slot + 9584 "00111010" // MOVS p7, p0; MOVXM p0, #508480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9585 "00010001" // /* MW 9 */ + 9586 "00100000" // /* MW 8 */ + 9587 "00110001" // /* MW 7 */ + 9588 "11110000" // /* MW 6 */ + 9589 "00000001" // /* MW 5 */ + 9590 "00000000" // /* MW 4 */ + 9591 "01100000" // /* MW 3 */ + 9592 "00010001" // /* MW 2 */ + 9593 "11110000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 407 11 +.delay_slot + 9594 "01000100" // MOVXM p6, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9595 "00010000" // /* MW 5 */ + 9596 "11000100" // /* MW 4 */ + 9597 "11001100" // /* MW 3 */ + 9598 "00000111" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 407 30 first +.delay_slot + 9600 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9601 "11111011" // /* MW 3 */ + 9602 "01100011" // /* MW 2 */ + 9603 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 407 11 +.delay_slot + 9604 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9605 "00110001" // /* MW 3 */ + 9606 "00000110" // /* MW 2 */ + 9607 "00001110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9608 "01000100" // MOVXM p6, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9609 "01100000" // /* MW 5 */ + 9610 "11000100" // /* MW 4 */ + 9611 "11001100" // /* MW 3 */ + 9612 "00000111" // /* MW 2 */ + 9613 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9614 "10111010" // ST.s8 r16, [p6]; MOVXM p6, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9615 "00010000" // /* MW 9 */ + 9616 "00010110" // /* MW 8 */ + 9617 "00110001" // /* MW 7 */ + 9618 "11110011" // /* MW 6 */ + 9619 "00000001" // /* MW 5 */ + 9620 "00000000" // /* MW 4 */ + 9621 "11100000" // /* MW 3 */ + 9622 "11000000" // /* MW 2 */ + 9623 "11000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9625 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 413 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9626 "00000100" // JL #7168 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7168 delay_slots=5 */ + 9627 "00000001" // /* MW 5 */ + 9628 "00000000" // /* MW 4 */ + 9629 "00000000" // /* MW 3 */ + 9630 "00001110" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9636 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9637 "00110001" // /* MW 3 */ + 9638 "00100000" // /* MW 2 */ + 9639 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9640 "00100010" // MOVX r16, #1; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9641 "00011100" // /* MW 7 */ + 9642 "00000000" // /* MW 6 */ + 9643 "00000000" // /* MW 5 */ + 9644 "00101001" // /* MW 4 */ + 9645 "00000000" // /* MW 3 */ + 9646 "00000001" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 6 "superkernels.cpp" 416 15 +.delay_slot + 9648 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; MOV p6, p1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "01100000" // /* MW 12 */ + 9653 "00110001" // /* MW 11 */ + 9654 "00000011" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "10000000" // /* MW 8 */ + 9657 "00010001" // /* MW 7 */ + 9658 "00000110" // /* MW 6 */ + 9659 "00100110" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11110000" // /* MW 3 */ + 9662 "00101100" // /* MW 2 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 46 +.return_address + 9664 "01000100" // MOVXM p3, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9665 "10000000" // /* MW 5 */ + 9666 "11000100" // /* MW 4 */ + 9667 "11000110" // /* MW 3 */ + 9668 "00000111" // /* MW 2 */ + 9669 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 18 +.src_ref 6 "superkernels.cpp" 414 46 first + 9670 "10111010" // LDA r18, [p3], #4; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9671 "00010000" // /* MW 9 */ + 9672 "00000100" // /* MW 8 */ + 9673 "10110001" // /* MW 7 */ + 9674 "11110000" // /* MW 6 */ + 9675 "00000001" // /* MW 5 */ + 9676 "00000000" // /* MW 4 */ + 9677 "11010000" // /* MW 3 */ + 9678 "11001010" // /* MW 2 */ + 9679 "01100011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 18 + 9680 "10011000" // LDA r20, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9681 "10010110" // /* MW 3 */ + 9682 "00000110" // /* MW 2 */ + 9683 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 75 + 9684 "10011000" // LDA r17, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "00110110" // /* MW 3 */ + 9686 "00011110" // /* MW 2 */ + 9687 "00000011" // /* MW 1 */ + 9688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9689 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 105 + 9690 "10011000" // LDA r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9691 "00010110" // /* MW 3 */ + 9692 "00000110" // /* MW 2 */ + 9693 "00000011" // /* MW 1 */ + 9694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9695 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 415 35 first + 9696 "10011000" // LDA r19, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9697 "01110110" // /* MW 3 */ + 9698 "00010110" // /* MW 2 */ + 9699 "00000011" // /* MW 1 */ + 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 27 first + 9702 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "01001111" // /* MW 3 */ + 9704 "10100101" // /* MW 2 */ + 9705 "00010100" // /* MW 1 */ + 9706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9707 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 56 + 9708 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9709 "00101111" // /* MW 3 */ + 9710 "01100011" // /* MW 2 */ + 9711 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 415 16 +.src_ref 6 "superkernels.cpp" 444 7 + 9712 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9713 "00101000" // /* MW 5 */ + 9714 "11000100" // /* MW 4 */ + 9715 "11000010" // /* MW 3 */ + 9716 "00000111" // /* MW 2 */ + 9717 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 86 + 9718 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9719 "00001111" // /* MW 3 */ + 9720 "01100001" // /* MW 2 */ + 9721 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 16 +.src_ref 6 "superkernels.cpp" 415 16 first + 9722 "00111010" // ST r19, [p1]; MOVXM p2, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9723 "00010001" // /* MW 9 */ + 9724 "00000110" // /* MW 8 */ + 9725 "00110001" // /* MW 7 */ + 9726 "11110001" // /* MW 6 */ + 9727 "00000001" // /* MW 5 */ + 9728 "00000000" // /* MW 4 */ + 9729 "00110000" // /* MW 3 */ + 9730 "11001110" // /* MW 2 */ + 9731 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 16 first +.src_ref 6 "superkernels.cpp" 416 15 + 9732 "00000010" // ST r16, [p2]; MOV dj0, #40 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9733 "01010000" // /* MW 7 */ + 9734 "00101000" // /* MW 6 */ + 9735 "01000000" // /* MW 5 */ + 9736 "00000000" // /* MW 4 */ + 9737 "00110000" // /* MW 3 */ + 9738 "11000010" // /* MW 2 */ + 9739 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 416 15 first + 9740 "10011000" // LDA el0, [p6, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9741 "00101110" // /* MW 3 */ + 9742 "00000000" // /* MW 2 */ + 9743 "00000110" // /* MW 1 */ + 9744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9745 "00000000" // /* MW 1 */ + 9746 "10000100" // J #9792 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9792 delay_slots=5 */ + 9747 "00000000" // /* MW 5 */ + 9748 "00000000" // /* MW 4 */ + 9749 "00100000" // /* MW 3 */ + 9750 "00010011" // /* MW 2 */ + 9751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9757 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 416 13 +.delay_slot + 9758 "01000100" // MOVXM p0, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9759 "01010000" // /* MW 5 */ + 9760 "11000100" // /* MW 4 */ + 9761 "11000000" // /* MW 3 */ + 9762 "00000111" // /* MW 2 */ + 9763 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 416 13 +.delay_slot + 9764 "00110110" // NOPA; NOPB; ST el0, [p0]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9765 "11000001" // /* MW 11 */ + 9766 "00010100" // /* MW 10 */ + 9767 "00000010" // /* MW 9 */ + 9768 "00000000" // /* MW 8 */ + 9769 "00000000" // /* MW 7 */ + 9770 "00000000" // /* MW 6 */ + 9771 "00100000" // /* MW 5 */ + 9772 "00000000" // /* MW 4 */ + 9773 "11110000" // /* MW 3 */ + 9774 "00101100" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.src_ref 6 "superkernels.cpp" 441 7 +.src_ref 6 "superkernels.cpp" 444 7 + 9776 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9777 "00101000" // /* MW 5 */ + 9778 "11000100" // /* MW 4 */ + 9779 "11000010" // /* MW 3 */ + 9780 "00000111" // /* MW 2 */ + 9781 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 7 first + 9782 "10111010" // LDA r19, [p1]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9783 "01111110" // /* MW 9 */ + 9784 "10100101" // /* MW 8 */ + 9785 "00000001" // /* MW 7 */ + 9786 "00000000" // /* MW 6 */ + 9787 "00010000" // /* MW 5 */ + 9788 "00000000" // /* MW 4 */ + 9789 "11010000" // /* MW 3 */ + 9790 "11001110" // /* MW 2 */ + 9791 "00100000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_272 + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ + 9794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9795 "00000000" // /* MW 1 */ + 9796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9797 "00000000" // /* MW 1 */ + 9798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9799 "00000000" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 19 + 9802 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9803 "00000101" // /* MW 3 */ + 9804 "00100000" // /* MW 2 */ + 9805 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 19 + 9806 "10011000" // NE r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9807 "00001000" // /* MW 3 */ + 9808 "11100001" // /* MW 2 */ + 9809 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 25 + 9810 "10000100" // JNZ r16, #9984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9984 delay_slots=5 */ + 9811 "00000001" // /* MW 5 */ + 9812 "01000000" // /* MW 4 */ + 9813 "10000000" // /* MW 3 */ + 9814 "00010011" // /* MW 2 */ + 9815 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first +.delay_slot + 9816 "00011000" // ADD.NC p6, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "10000110" // /* MW 3 */ + 9818 "01100111" // /* MW 2 */ + 9819 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 29 + 9828 "00111010" // ST r15, [sp, #-24]; MOVXM p1, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9829 "00010001" // /* MW 9 */ + 9830 "00000010" // /* MW 8 */ + 9831 "10110001" // /* MW 7 */ + 9832 "11110000" // /* MW 6 */ + 9833 "00000001" // /* MW 5 */ + 9834 "00000000" // /* MW 4 */ + 9835 "10110000" // /* MW 3 */ + 9836 "00111110" // /* MW 2 */ + 9837 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 29 first +.src_ref 6 "superkernels.cpp" 441 60 + 9838 "10111010" // LDA r16, [p1]; MOVXM p1, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9839 "00010000" // /* MW 9 */ + 9840 "00110000" // /* MW 8 */ + 9841 "10110001" // /* MW 7 */ + 9842 "11110000" // /* MW 6 */ + 9843 "00000001" // /* MW 5 */ + 9844 "00000000" // /* MW 4 */ + 9845 "11010000" // /* MW 3 */ + 9846 "11000010" // /* MW 2 */ + 9847 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 60 + 9848 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9849 "00111010" // /* MW 3 */ + 9850 "00000100" // /* MW 2 */ + 9851 "00000001" // /* MW 1 */ + 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9853 "00000000" // /* MW 1 */ + 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.no_stack_arguments + 9856 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 9857 "00000001" // /* MW 5 */ + 9858 "00000000" // /* MW 4 */ + 9859 "01010000" // /* MW 3 */ + 9860 "00011010" // /* MW 2 */ + 9861 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9862 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9863 "00000001" // /* MW 3 */ + 9864 "00011010" // /* MW 2 */ + 9865 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9867 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9868 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9869 "11011010" // /* MW 3 */ + 9870 "00110110" // /* MW 2 */ + 9871 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9872 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9873 "01000001" // /* MW 5 */ + 9874 "10111011" // /* MW 4 */ + 9875 "00110111" // /* MW 3 */ + 9876 "01100000" // /* MW 2 */ + 9877 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9878 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "00010010" // /* MW 9 */ + 9880 "00000001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "01011011" // /* MW 5 */ + 9884 "00000001" // /* MW 4 */ + 9885 "11110000" // /* MW 3 */ + 9886 "00101100" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.src_ref 6 "superkernels.cpp" 441 41 +.return_address + 9888 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "01000001" // /* MW 5 */ + 9890 "10101111" // /* MW 4 */ + 9891 "00111101" // /* MW 3 */ + 9892 "00000110" // /* MW 2 */ + 9893 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 + 9894 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00000010" // /* MW 3 */ + 9896 "11100001" // /* MW 2 */ + 9897 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 6 +.src_ref 6 "superkernels.cpp" 441 71 + 9898 "10000100" // JNZ r16, #9984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9984 delay_slots=5 */ + 9899 "00000001" // /* MW 5 */ + 9900 "01000000" // /* MW 4 */ + 9901 "10000000" // /* MW 3 */ + 9902 "00010011" // /* MW 2 */ + 9903 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 7 +.delay_slot + 9904 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9905 "00101000" // /* MW 5 */ + 9906 "11000100" // /* MW 4 */ + 9907 "11000010" // /* MW 3 */ + 9908 "00000111" // /* MW 2 */ + 9909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 9910 "00011000" // LDA r15, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9911 "11110001" // /* MW 3 */ + 9912 "11101001" // /* MW 2 */ + 9913 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9919 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 419 8 + 9920 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9921 "10000001" // /* MW 5 */ + 9922 "11011001" // /* MW 4 */ + 9923 "10100100" // /* MW 3 */ + 9924 "00011111" // /* MW 2 */ + 9925 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 9926 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9927 "01110110" // /* MW 3 */ + 9928 "11111111" // /* MW 2 */ + 9929 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 9930 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9931 "00110110" // /* MW 3 */ + 9932 "11111110" // /* MW 2 */ + 9933 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 9934 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9935 "01010110" // /* MW 3 */ + 9936 "11111110" // /* MW 2 */ + 9937 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9940 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9941 "00110110" // /* MW 3 */ + 9942 "01000110" // /* MW 2 */ + 9943 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9949 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9951 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9952 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "00010010" // /* MW 3 */ + 9954 "10100011" // /* MW 2 */ + 9955 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9956 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9957 "00110001" // /* MW 3 */ + 9958 "00000110" // /* MW 2 */ + 9959 "00001010" // /* MW 1 */ + 9960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9961 "00000000" // /* MW 1 */ + 9962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9963 "00000000" // /* MW 1 */ + 9964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9965 "00000000" // /* MW 1 */ + 9966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9967 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 9968 "11100001" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9969 "00000000" // /* MW 15 */ + 9970 "00000000" // /* MW 14 */ + 9971 "01111000" // /* MW 13 */ + 9972 "10100101" // /* MW 12 */ + 9973 "00000001" // /* MW 11 */ + 9974 "01000000" // /* MW 10 */ + 9975 "10011000" // /* MW 9 */ + 9976 "00100010" // /* MW 8 */ + 9977 "01011011" // /* MW 7 */ + 9978 "00000001" // /* MW 6 */ + 9979 "00100000" // /* MW 5 */ + 9980 "00000000" // /* MW 4 */ + 9981 "11110000" // /* MW 3 */ + 9982 "00101100" // /* MW 2 */ + 9983 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_464 +.src_ref 6 "superkernels.cpp" 444 19 + 9984 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9985 "00001001" // /* MW 3 */ + 9986 "00100010" // /* MW 2 */ + 9987 "00010000" // /* MW 1 */ + 9988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9989 "00000000" // /* MW 1 */ + 9990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9991 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 7 first + 9992 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9993 "00010110" // /* MW 3 */ + 9994 "00000110" // /* MW 2 */ + 9995 "00000001" // /* MW 1 */ + 9996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9997 "00000000" // /* MW 1 */ + 9998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9999 "00000000" // /* MW 1 */ + 10000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10001 "00000000" // /* MW 1 */ + 10002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10003 "00000000" // /* MW 1 */ + 10004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10005 "00000000" // /* MW 1 */ + 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10007 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 19 + 10008 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10009 "00001000" // /* MW 3 */ + 10010 "01100011" // /* MW 2 */ + 10011 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 25 + 10012 "10000100" // JNZ r17, #10208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10208 delay_slots=5 */ + 10013 "00000001" // /* MW 5 */ + 10014 "01000000" // /* MW 4 */ + 10015 "11110000" // /* MW 3 */ + 10016 "00010011" // /* MW 2 */ + 10017 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 29 + 10028 "00111010" // ST r15, [sp, #-24]; MOVXM p1, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10029 "00010001" // /* MW 9 */ + 10030 "00001100" // /* MW 8 */ + 10031 "10110001" // /* MW 7 */ + 10032 "11110000" // /* MW 6 */ + 10033 "00000001" // /* MW 5 */ + 10034 "00000000" // /* MW 4 */ + 10035 "10110000" // /* MW 3 */ + 10036 "00111110" // /* MW 2 */ + 10037 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 29 +.src_ref 6 "superkernels.cpp" 444 60 + 10038 "10111010" // LDA r16, [p1]; MOVXM p1, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "00010000" // /* MW 9 */ + 10040 "00110000" // /* MW 8 */ + 10041 "10110001" // /* MW 7 */ + 10042 "11110000" // /* MW 6 */ + 10043 "00000001" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11010000" // /* MW 3 */ + 10046 "11000010" // /* MW 2 */ + 10047 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 60 + 10048 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10049 "00111010" // /* MW 3 */ + 10050 "00000100" // /* MW 2 */ + 10051 "00000001" // /* MW 1 */ + 10052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10053 "00000000" // /* MW 1 */ + 10054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.no_stack_arguments + 10056 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10057 "00000001" // /* MW 5 */ + 10058 "00000000" // /* MW 4 */ + 10059 "01010000" // /* MW 3 */ + 10060 "00011010" // /* MW 2 */ + 10061 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10062 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000001" // /* MW 3 */ + 10064 "00011010" // /* MW 2 */ + 10065 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10067 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10068 "00101100" // NOPA; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10069 "10110101" // /* MW 5 */ + 10070 "01101101" // /* MW 4 */ + 10071 "11111000" // /* MW 3 */ + 10072 "00101100" // /* MW 2 */ + 10073 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10074 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10075 "01000001" // /* MW 5 */ + 10076 "10111011" // /* MW 4 */ + 10077 "00110111" // /* MW 3 */ + 10078 "01100000" // /* MW 2 */ + 10079 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10080 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "10100101" // /* MW 12 */ + 10085 "00000001" // /* MW 11 */ + 10086 "10010000" // /* MW 10 */ + 10087 "00001000" // /* MW 9 */ + 10088 "00100000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11110000" // /* MW 3 */ + 10094 "00101100" // /* MW 2 */ + 10095 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 444 41 +.src_ref 6 "superkernels.cpp" 444 41 +.return_address + 10096 "10111010" // LDA r15, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10097 "01111000" // /* MW 9 */ + 10098 "11010000" // /* MW 8 */ + 10099 "01101011" // /* MW 7 */ + 10100 "10001111" // /* MW 6 */ + 10101 "00000001" // /* MW 5 */ + 10102 "00011011" // /* MW 4 */ + 10103 "00100000" // /* MW 3 */ + 10104 "00111110" // /* MW 2 */ + 10105 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 + 10106 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10107 "00000010" // /* MW 3 */ + 10108 "11100001" // /* MW 2 */ + 10109 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 6 +.src_ref 6 "superkernels.cpp" 444 71 + 10110 "10000100" // JNZ r16, #10192 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10192 delay_slots=5 */ + 10111 "00000001" // /* MW 5 */ + 10112 "01000000" // /* MW 4 */ + 10113 "11101000" // /* MW 3 */ + 10114 "00010011" // /* MW 2 */ + 10115 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 7 +.delay_slot + 10116 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10117 "00101000" // /* MW 5 */ + 10118 "11000100" // /* MW 4 */ + 10119 "11000010" // /* MW 3 */ + 10120 "00000111" // /* MW 2 */ + 10121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10129 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 419 8 + 10130 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10131 "10000001" // /* MW 5 */ + 10132 "11011001" // /* MW 4 */ + 10133 "10100100" // /* MW 3 */ + 10134 "00011111" // /* MW 2 */ + 10135 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 10136 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10137 "01110110" // /* MW 3 */ + 10138 "11111111" // /* MW 2 */ + 10139 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 10140 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10141 "00110110" // /* MW 3 */ + 10142 "11111110" // /* MW 2 */ + 10143 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 10144 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10145 "01010110" // /* MW 3 */ + 10146 "11111110" // /* MW 2 */ + 10147 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10149 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10150 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10151 "00110110" // /* MW 3 */ + 10152 "01000110" // /* MW 2 */ + 10153 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10155 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10159 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10161 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10162 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10163 "00010010" // /* MW 3 */ + 10164 "10100011" // /* MW 2 */ + 10165 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10166 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10167 "00110001" // /* MW 3 */ + 10168 "00000110" // /* MW 2 */ + 10169 "00001010" // /* MW 1 */ + 10170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10171 "00000000" // /* MW 1 */ + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ + 10174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10175 "00000000" // /* MW 1 */ + 10176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10177 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 10178 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10179 "01100000" // /* MW 13 */ + 10180 "00101011" // /* MW 12 */ + 10181 "00000000" // /* MW 11 */ + 10182 "10101111" // /* MW 10 */ + 10183 "00110100" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "00001000" // /* MW 7 */ + 10186 "01010011" // /* MW 6 */ + 10187 "00100100" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_672 + 10192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10193 "00000000" // /* MW 1 */ + 10194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10195 "00000000" // /* MW 1 */ + 10196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10197 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 7 first + 10198 "10111010" // LDA r16, [p1]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10199 "01111110" // /* MW 9 */ + 10200 "10100101" // /* MW 8 */ + 10201 "00000001" // /* MW 7 */ + 10202 "00000000" // /* MW 6 */ + 10203 "00010000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "11010000" // /* MW 3 */ + 10206 "11000010" // /* MW 2 */ + 10207 "00100000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_688 +.src_ref 6 "superkernels.cpp" 447 42 +.src_ref 6 "superkernels.cpp" 496 43 +.src_ref 6 "superkernels.cpp" 499 15 +.src_ref 6 "superkernels.cpp" 502 43 +.src_ref 6 "superkernels.cpp" 505 15 +.src_ref 6 "superkernels.cpp" 508 44 +.src_ref 6 "superkernels.cpp" 511 16 +.src_ref 6 "superkernels.cpp" 515 14 + 10208 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10209 "00000001" // /* MW 3 */ + 10210 "00011010" // /* MW 2 */ + 10211 "00010000" // /* MW 1 */ + 10212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10213 "00000000" // /* MW 1 */ + 10214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10215 "00000000" // /* MW 1 */ + 10216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10217 "00000000" // /* MW 1 */ + 10218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 19 + 10220 "00011000" // MOVX r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10221 "00010001" // /* MW 3 */ + 10222 "00100010" // /* MW 2 */ + 10223 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 19 + 10224 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10225 "00001000" // /* MW 3 */ + 10226 "01100001" // /* MW 2 */ + 10227 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 25 + 10228 "10000100" // JNZ r16, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10229 "00000001" // /* MW 5 */ + 10230 "01000000" // /* MW 4 */ + 10231 "01101000" // /* MW 3 */ + 10232 "00010100" // /* MW 2 */ + 10233 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 29 +.delay_slot + 10234 "01000100" // MOVXM p1, #508444 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10235 "00111000" // /* MW 5 */ + 10236 "11000100" // /* MW 4 */ + 10237 "11000010" // /* MW 3 */ + 10238 "00000111" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 61 +.delay_slot + 10240 "01000100" // MOVXM p2, #508512 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10241 "11000000" // /* MW 5 */ + 10242 "11000100" // /* MW 4 */ + 10243 "11000100" // /* MW 3 */ + 10244 "00000111" // /* MW 2 */ + 10245 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10251 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 29 + 10252 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10253 "00010110" // /* MW 3 */ + 10254 "00000110" // /* MW 2 */ + 10255 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 61 + 10256 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10257 "00111010" // /* MW 3 */ + 10258 "00000100" // /* MW 2 */ + 10259 "00000010" // /* MW 1 */ + 10260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10261 "00000000" // /* MW 1 */ + 10262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10263 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.no_stack_arguments + 10264 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10265 "00000001" // /* MW 5 */ + 10266 "00000000" // /* MW 4 */ + 10267 "01010000" // /* MW 3 */ + 10268 "00011010" // /* MW 2 */ + 10269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10273 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.delay_slot + 10274 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10275 "11011010" // /* MW 3 */ + 10276 "00110110" // /* MW 2 */ + 10277 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.delay_slot + 10278 "01011100" // ST r27, [sp, #-24]; SUB r17, r13, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10279 "00000011" // /* MW 5 */ + 10280 "11000110" // /* MW 4 */ + 10281 "10110110" // /* MW 3 */ + 10282 "01101110" // /* MW 2 */ + 10283 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.delay_slot + 10284 "00011000" // SEL.EQZ r0, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10285 "00010010" // /* MW 3 */ + 10286 "00000001" // /* MW 2 */ + 10287 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.src_ref 6 "superkernels.cpp" 447 42 +.return_address + 10288 "00101100" // LDA r27, [sp, #-24]; SUB r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10289 "01100011" // /* MW 5 */ + 10290 "11000000" // /* MW 4 */ + 10291 "00100110" // /* MW 3 */ + 10292 "01101110" // /* MW 2 */ + 10293 "11111101" // /* MW 1 */ + 10294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10295 "00000000" // /* MW 1 */ + 10296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10297 "00000000" // /* MW 1 */ + 10298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10299 "00000000" // /* MW 1 */ + 10300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10301 "00000000" // /* MW 1 */ + 10302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10303 "00000000" // /* MW 1 */ + 10304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10305 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 + 10306 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10307 "00000010" // /* MW 3 */ + 10308 "11100001" // /* MW 2 */ + 10309 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 6 +.src_ref 6 "superkernels.cpp" 447 72 + 10310 "10000100" // JNZ r16, #10416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10416 delay_slots=5 */ + 10311 "00000001" // /* MW 5 */ + 10312 "01000000" // /* MW 4 */ + 10313 "01011000" // /* MW 3 */ + 10314 "00010100" // /* MW 2 */ + 10315 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 10316 "11111000" // MOV p1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "11000000" // /* MW 3 */ + 10318 "01101110" // /* MW 2 */ + 10319 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10327 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first +.src_ref 6 "superkernels.cpp" 494 2 + 10328 "10111010" // LDA r27, [p6], #-4; MOVXM p2, #508480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10329 "00010000" // /* MW 9 */ + 10330 "00100000" // /* MW 8 */ + 10331 "00110001" // /* MW 7 */ + 10332 "11110001" // /* MW 6 */ + 10333 "00000001" // /* MW 5 */ + 10334 "00000000" // /* MW 4 */ + 10335 "11010000" // /* MW 3 */ + 10336 "11101110" // /* MW 2 */ + 10337 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 +.src_ref 6 "superkernels.cpp" 496 7 + 10338 "10111010" // LDA r17, [p6], #-4; MOVXM p7, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10339 "00010000" // /* MW 9 */ + 10340 "00001010" // /* MW 8 */ + 10341 "10110001" // /* MW 7 */ + 10342 "11110011" // /* MW 6 */ + 10343 "00000001" // /* MW 5 */ + 10344 "00000000" // /* MW 4 */ + 10345 "11010000" // /* MW 3 */ + 10346 "11000110" // /* MW 2 */ + 10347 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 80 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 452 45 +.src_ref 6 "superkernels.cpp" 496 19 + 10348 "10111010" // LDA r18, [p6], #-4; MOVX r15, #1; MOV r1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10349 "01111000" // /* MW 9 */ + 10350 "11010000" // /* MW 8 */ + 10351 "00101011" // /* MW 7 */ + 10352 "00101000" // /* MW 6 */ + 10353 "11110000" // /* MW 5 */ + 10354 "00000000" // /* MW 4 */ + 10355 "11010000" // /* MW 3 */ + 10356 "11001010" // /* MW 2 */ + 10357 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 10358 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10359 "11111101" // /* MW 3 */ + 10360 "11100000" // /* MW 2 */ + 10361 "00010111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 10362 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10363 "00110110" // /* MW 3 */ + 10364 "01000110" // /* MW 2 */ + 10365 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10367 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10374 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10375 "00010010" // /* MW 3 */ + 10376 "10100011" // /* MW 2 */ + 10377 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10378 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10379 "00110001" // /* MW 3 */ + 10380 "00000110" // /* MW 2 */ + 10381 "00001110" // /* MW 1 */ + 10382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10383 "00000000" // /* MW 1 */ + 10384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10385 "00000000" // /* MW 1 */ + 10386 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10387 "00000000" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "01111000" // /* MW 3 */ + 10390 "00010100" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 10392 "11111000" // MOV p6, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "10100000" // /* MW 3 */ + 10394 "01100000" // /* MW 2 */ + 10395 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first +.delay_slot + 10396 "00011000" // ACQ r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "00001000" // /* MW 3 */ + 10398 "01010011" // /* MW 2 */ + 10399 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10405 "10000001" // /* MW 11 */ + 10406 "10101101" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "00000000" // /* MW 7 */ + 10410 "00000000" // /* MW 6 */ + 10411 "00100000" // /* MW 5 */ + 10412 "00000000" // /* MW 4 */ + 10413 "11110000" // /* MW 3 */ + 10414 "00101100" // /* MW 2 */ + 10415 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_896 +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 + 10416 "00111010" // MOVS p6, r15; J #10480 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10417 "00100001" // /* MW 9 */ + 10418 "00000000" // /* MW 8 */ + 10419 "00000000" // /* MW 7 */ + 10420 "00011110" // /* MW 6 */ + 10421 "00000101" // /* MW 5 */ + 10422 "00000000" // /* MW 4 */ + 10423 "01100000" // /* MW 3 */ + 10424 "11100001" // /* MW 2 */ + 10425 "11010001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 7 +.delay_slot + 10426 "01000100" // MOVXM p7, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10427 "00101000" // /* MW 5 */ + 10428 "11000100" // /* MW 4 */ + 10429 "11001110" // /* MW 3 */ + 10430 "00000111" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 494 2 +.delay_slot + 10432 "01000100" // MOVXM p2, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10433 "10000000" // /* MW 5 */ + 10434 "11000100" // /* MW 4 */ + 10435 "11000100" // /* MW 3 */ + 10436 "00000111" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 452 45 +.src_ref 6 "superkernels.cpp" 496 19 +.delay_slot + 10438 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10439 "00000101" // /* MW 3 */ + 10440 "00011110" // /* MW 2 */ + 10441 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "01100111" // /* MW 3 */ + 10446 "00000001" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_928 +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 494 2 + 10448 "11100001" // NOPA; NOPB; MOVS p1, p7; MOVXM p2, #508480; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00000000" // /* MW 15 */ + 10450 "00000000" // /* MW 14 */ + 10451 "00010000" // /* MW 13 */ + 10452 "00100000" // /* MW 12 */ + 10453 "00110001" // /* MW 11 */ + 10454 "11110001" // /* MW 10 */ + 10455 "00000001" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "10001011" // /* MW 7 */ + 10458 "10011100" // /* MW 6 */ + 10459 "00100001" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 452 45 +.src_ref 6 "superkernels.cpp" 496 7 +.src_ref 6 "superkernels.cpp" 496 19 + 10464 "11100001" // MOVA r15, #1; NOPB; MOVS p6, r15; MOVXM p7, #508436; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "00010000" // /* MW 13 */ + 10468 "00001010" // /* MW 12 */ + 10469 "10110001" // /* MW 11 */ + 10470 "11110011" // /* MW 10 */ + 10471 "00000001" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00001011" // /* MW 7 */ + 10474 "10001111" // /* MW 6 */ + 10475 "00100110" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "00000000" // /* MW 3 */ + 10478 "00101111" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_960 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 6 "superkernels.cpp" 450 2 + 10480 "01110110" // LDA r16, [p1]; ST p6, [sp, #-24]; MOVXM p3, #508416 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10481 "00010000" // /* MW 11 */ + 10482 "00000000" // /* MW 10 */ + 10483 "10110001" // /* MW 9 */ + 10484 "11110001" // /* MW 8 */ + 10485 "00000001" // /* MW 7 */ + 10486 "10000000" // /* MW 6 */ + 10487 "00011101" // /* MW 5 */ + 10488 "11101011" // /* MW 4 */ + 10489 "11010111" // /* MW 3 */ + 10490 "11000010" // /* MW 2 */ + 10491 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 450 2 first +.src_ref 6 "superkernels.cpp" 452 47 + 10492 "10111010" // LDA r17, [p3]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10493 "00010000" // /* MW 9 */ + 10494 "00000110" // /* MW 8 */ + 10495 "10110001" // /* MW 7 */ + 10496 "11110000" // /* MW 6 */ + 10497 "00000001" // /* MW 5 */ + 10498 "00000000" // /* MW 4 */ + 10499 "11010000" // /* MW 3 */ + 10500 "11000110" // /* MW 2 */ + 10501 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 452 47 first + 10502 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10503 "01010110" // /* MW 3 */ + 10504 "00000110" // /* MW 2 */ + 10505 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 first + 10506 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10507 "10011110" // /* MW 3 */ + 10508 "01011100" // /* MW 2 */ + 10509 "00000110" // /* MW 1 */ + 10510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10511 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 494 2 first +.no_stack_arguments + 10512 "00000100" // JL #8000 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8000 delay_slots=5 */ + 10513 "00000001" // /* MW 5 */ + 10514 "00000000" // /* MW 4 */ + 10515 "10100000" // /* MW 3 */ + 10516 "00001111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 450 2 first +.delay_slot + 10522 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10523 "00000111" // /* MW 3 */ + 10524 "01100010" // /* MW 2 */ + 10525 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 450 2 +.src_ref 6 "superkernels.cpp" 452 45 first +.delay_slot + 10526 "01011100" // ST r17, [p3]; LSHL r18, r18, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10527 "11111011" // /* MW 5 */ + 10528 "01001001" // /* MW 4 */ + 10529 "00111001" // /* MW 3 */ + 10530 "11000110" // /* MW 2 */ + 10531 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 452 45 +.delay_slot + 10532 "11110110" // NOPA; NOPB; NOPS; ADD.NC p0, r18, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10533 "10100000" // /* MW 11 */ + 10534 "10100000" // /* MW 10 */ + 10535 "00110100" // /* MW 9 */ + 10536 "00000000" // /* MW 8 */ + 10537 "01011011" // /* MW 7 */ + 10538 "00000001" // /* MW 6 */ + 10539 "00100000" // /* MW 5 */ + 10540 "00000000" // /* MW 4 */ + 10541 "11110000" // /* MW 3 */ + 10542 "00101100" // /* MW 2 */ + 10543 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 7 first +.return_address + 10544 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10545 "00110110" // /* MW 3 */ + 10546 "00000110" // /* MW 2 */ + 10547 "00000111" // /* MW 1 */ + 10548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10549 "00000000" // /* MW 1 */ + 10550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10551 "00000000" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ + 10556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10557 "00000000" // /* MW 1 */ + 10558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10559 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 19 + 10560 "10011000" // NE r18, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "00011000" // /* MW 3 */ + 10562 "11100101" // /* MW 2 */ + 10563 "00010011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 25 + 10564 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */ + 10565 "00000001" // /* MW 5 */ + 10566 "01000000" // /* MW 4 */ + 10567 "00001000" // /* MW 3 */ + 10568 "00010101" // /* MW 2 */ + 10569 "10010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 19 +.delay_slot + 10570 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00001001" // /* MW 3 */ + 10572 "00100000" // /* MW 2 */ + 10573 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10581 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.src_ref 6 "superkernels.cpp" 499 15 + 10582 "01000100" // MOVXM p7, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00001000" // /* MW 5 */ + 10584 "11000100" // /* MW 4 */ + 10585 "11001110" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.src_ref 6 "superkernels.cpp" 496 62 + 10588 "10111010" // LDA r16, [p7]; MOVXM p1, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10589 "00010000" // /* MW 9 */ + 10590 "00110000" // /* MW 8 */ + 10591 "10110001" // /* MW 7 */ + 10592 "11110000" // /* MW 6 */ + 10593 "00000001" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "11010000" // /* MW 3 */ + 10596 "11000010" // /* MW 2 */ + 10597 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 62 + 10598 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10599 "00111010" // /* MW 3 */ + 10600 "00000100" // /* MW 2 */ + 10601 "00000001" // /* MW 1 */ + 10602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10603 "00000000" // /* MW 1 */ + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 43 +.no_stack_arguments + 10608 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10609 "00000001" // /* MW 5 */ + 10610 "00000000" // /* MW 4 */ + 10611 "01010000" // /* MW 3 */ + 10612 "00011010" // /* MW 2 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.delay_slot + 10616 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00000111" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.src_ref 6 "superkernels.cpp" 496 43 +.delay_slot + 10620 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10621 "10110101" // /* MW 5 */ + 10622 "01101101" // /* MW 4 */ + 10623 "00111000" // /* MW 3 */ + 10624 "11000010" // /* MW 2 */ + 10625 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 43 +.src_ref 6 "superkernels.cpp" 496 43 +.delay_slot + 10626 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10627 "01000001" // /* MW 5 */ + 10628 "10111011" // /* MW 4 */ + 10629 "00110111" // /* MW 3 */ + 10630 "01100000" // /* MW 2 */ + 10631 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 43 +.delay_slot + 10632 "00100010" // SEL.EQZ r0, r16, r17, r27; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10633 "00011100" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00000000" // /* MW 5 */ + 10636 "10010001" // /* MW 4 */ + 10637 "00001000" // /* MW 3 */ + 10638 "00100000" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 496 43 +.src_ref 6 "superkernels.cpp" 496 43 +.return_address + 10640 "10111010" // LDA p1, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10641 "01111000" // /* MW 9 */ + 10642 "11010000" // /* MW 8 */ + 10643 "01101011" // /* MW 7 */ + 10644 "10001111" // /* MW 6 */ + 10645 "00000001" // /* MW 5 */ + 10646 "00011011" // /* MW 4 */ + 10647 "00100000" // /* MW 3 */ + 10648 "00010011" // /* MW 2 */ + 10649 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 496 43 + 10650 "01100100" // SEL.EQZ r18, r3, r16, r27; MOV r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10651 "00000101" // /* MW 5 */ + 10652 "10100000" // /* MW 4 */ + 10653 "01001000" // /* MW 3 */ + 10654 "10100000" // /* MW 2 */ + 10655 "00011100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 6 +.src_ref 6 "superkernels.cpp" 496 73 + 10656 "10000100" // JNZ r18, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */ + 10657 "00000001" // /* MW 5 */ + 10658 "01000000" // /* MW 4 */ + 10659 "11111000" // /* MW 3 */ + 10660 "00010100" // /* MW 2 */ + 10661 "10010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 19 +.delay_slot + 10662 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10663 "00001001" // /* MW 3 */ + 10664 "00100000" // /* MW 2 */ + 10665 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10673 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 499 15 first + 10674 "00001100" // LDA r18, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10675 "01100011" // /* MW 5 */ + 10676 "00001011" // /* MW 4 */ + 10677 "11011110" // /* MW 3 */ + 10678 "11001010" // /* MW 2 */ + 10679 "00101010" // /* MW 1 */ + 10680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10681 "00000000" // /* MW 1 */ + 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10683 "00000000" // /* MW 1 */ + 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10685 "00000000" // /* MW 1 */ + 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10687 "00000000" // /* MW 1 */ + 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10689 "00000000" // /* MW 1 */ + 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10691 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 10692 "00011000" // REL r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10693 "00011000" // /* MW 3 */ + 10694 "10010001" // /* MW 2 */ + 10695 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 502 7 + 10696 "10111010" // LDA r18, [p6, #-8]; MOVXM p7, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10697 "00010000" // /* MW 9 */ + 10698 "00001010" // /* MW 8 */ + 10699 "10110001" // /* MW 7 */ + 10700 "11110011" // /* MW 6 */ + 10701 "00000001" // /* MW 5 */ + 10702 "00000000" // /* MW 4 */ + 10703 "11010000" // /* MW 3 */ + 10704 "11001010" // /* MW 2 */ + 10705 "11011100" // /* MW 1 */ + 10706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10707 "00000000" // /* MW 1 */ + 10708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10709 "00000000" // /* MW 1 */ + 10710 "10000100" // J #10752 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10752 delay_slots=5 */ + 10711 "00000000" // /* MW 5 */ + 10712 "00000000" // /* MW 4 */ + 10713 "00000000" // /* MW 3 */ + 10714 "00010101" // /* MW 2 */ + 10715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10717 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10719 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10721 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.delay_slot + 10722 "10011000" // SUB r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "00100001" // /* MW 3 */ + 10724 "01100011" // /* MW 2 */ + 10725 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.delay_slot + 10726 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10727 "00000000" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "00000000" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00110001" // /* MW 5 */ + 10732 "11100110" // /* MW 4 */ + 10733 "11110110" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1216 +.src_ref 6 "superkernels.cpp" 502 7 + 10736 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #508436; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10737 "00000000" // /* MW 15 */ + 10738 "00000000" // /* MW 14 */ + 10739 "00010000" // /* MW 13 */ + 10740 "00001010" // /* MW 12 */ + 10741 "10110001" // /* MW 11 */ + 10742 "11110011" // /* MW 10 */ + 10743 "00000001" // /* MW 9 */ + 10744 "00000000" // /* MW 8 */ + 10745 "01011011" // /* MW 7 */ + 10746 "00000001" // /* MW 6 */ + 10747 "00100000" // /* MW 5 */ + 10748 "00000000" // /* MW 4 */ + 10749 "11110000" // /* MW 3 */ + 10750 "00101100" // /* MW 2 */ + 10751 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 +.src_ref 6 "superkernels.cpp" 502 7 first + 10752 "11100001" // LDA r17, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10753 "00000000" // /* MW 15 */ + 10754 "00000000" // /* MW 14 */ + 10755 "01111000" // /* MW 13 */ + 10756 "10100101" // /* MW 12 */ + 10757 "00000001" // /* MW 11 */ + 10758 "00000000" // /* MW 10 */ + 10759 "00000000" // /* MW 9 */ + 10760 "00000000" // /* MW 8 */ + 10761 "01011011" // /* MW 7 */ + 10762 "00000001" // /* MW 6 */ + 10763 "00100000" // /* MW 5 */ + 10764 "00000000" // /* MW 4 */ + 10765 "11010000" // /* MW 3 */ + 10766 "11000110" // /* MW 2 */ + 10767 "11100000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ + 10776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10777 "00000000" // /* MW 1 */ + 10778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10779 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 19 + 10780 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00001000" // /* MW 3 */ + 10782 "01100001" // /* MW 2 */ + 10783 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 25 + 10784 "10000100" // JNZ r16, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */ + 10785 "00000001" // /* MW 5 */ + 10786 "01000000" // /* MW 4 */ + 10787 "01111000" // /* MW 3 */ + 10788 "00010101" // /* MW 2 */ + 10789 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 62 +.delay_slot + 10790 "01000100" // MOVXM p1, #508512 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10791 "11000000" // /* MW 5 */ + 10792 "11000100" // /* MW 4 */ + 10793 "11000010" // /* MW 3 */ + 10794 "00000111" // /* MW 2 */ + 10795 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 +.src_ref 6 "superkernels.cpp" 505 15 +.delay_slot + 10796 "01000100" // MOVXM p7, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10797 "00110000" // /* MW 5 */ + 10798 "11000100" // /* MW 4 */ + 10799 "11001110" // /* MW 3 */ + 10800 "00000111" // /* MW 2 */ + 10801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 + 10808 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00010110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 62 + 10812 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10813 "00111010" // /* MW 3 */ + 10814 "00000100" // /* MW 2 */ + 10815 "00000001" // /* MW 1 */ + 10816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10817 "00000000" // /* MW 1 */ + 10818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10819 "00000000" // /* MW 1 */ + 10820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10821 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 +.no_stack_arguments + 10822 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10823 "00000001" // /* MW 5 */ + 10824 "00000000" // /* MW 4 */ + 10825 "01010000" // /* MW 3 */ + 10826 "00011010" // /* MW 2 */ + 10827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10829 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 +.delay_slot + 10830 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10831 "00000111" // /* MW 3 */ + 10832 "00100000" // /* MW 2 */ + 10833 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 +.src_ref 6 "superkernels.cpp" 502 43 +.delay_slot + 10834 "11010010" // NOPB; ST r16, [p7]; LT r27, r16, r13 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10835 "11011010" // /* MW 7 */ + 10836 "00110110" // /* MW 6 */ + 10837 "00100100" // /* MW 5 */ + 10838 "00000000" // /* MW 4 */ + 10839 "00110000" // /* MW 3 */ + 10840 "11000010" // /* MW 2 */ + 10841 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 +.src_ref 6 "superkernels.cpp" 502 43 +.delay_slot + 10842 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10843 "01000001" // /* MW 5 */ + 10844 "10111011" // /* MW 4 */ + 10845 "00110111" // /* MW 3 */ + 10846 "01100000" // /* MW 2 */ + 10847 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "01111000" // /* MW 13 */ + 10852 "10100101" // /* MW 12 */ + 10853 "00000001" // /* MW 11 */ + 10854 "10010000" // /* MW 10 */ + 10855 "00001000" // /* MW 9 */ + 10856 "00100000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 502 43 +.src_ref 6 "superkernels.cpp" 502 43 +.return_address + 10864 "10111010" // LDA p2, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "01111000" // /* MW 9 */ + 10866 "11010000" // /* MW 8 */ + 10867 "01101011" // /* MW 7 */ + 10868 "10001111" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00011011" // /* MW 4 */ + 10871 "00100000" // /* MW 3 */ + 10872 "00100011" // /* MW 2 */ + 10873 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 + 10874 "00011000" // SEL.EQZ r17, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000010" // /* MW 3 */ + 10876 "11100011" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 6 +.src_ref 6 "superkernels.cpp" 502 73 + 10878 "10000100" // JNZ r17, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10879 "00000001" // /* MW 5 */ + 10880 "01000000" // /* MW 4 */ + 10881 "01101000" // /* MW 3 */ + 10882 "00010101" // /* MW 2 */ + 10883 "10001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 7 +.delay_slot + 10884 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10885 "00101000" // /* MW 5 */ + 10886 "11000100" // /* MW 4 */ + 10887 "11000010" // /* MW 3 */ + 10888 "00000111" // /* MW 2 */ + 10889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.delay_slot + 10890 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "00000101" // /* MW 3 */ + 10892 "00100000" // /* MW 2 */ + 10893 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10895 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10897 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10899 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 505 15 first + 10900 "00001100" // LDA r17, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10901 "01100011" // /* MW 5 */ + 10902 "00001011" // /* MW 4 */ + 10903 "11011110" // /* MW 3 */ + 10904 "11000110" // /* MW 2 */ + 10905 "01001010" // /* MW 1 */ + 10906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10907 "00000000" // /* MW 1 */ + 10908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10909 "00000000" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 10918 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00001000" // /* MW 3 */ + 10920 "01010001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 10922 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00110110" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00000110" // /* MW 1 */ + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ + 10928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10929 "00000000" // /* MW 1 */ + 10930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10931 "00000000" // /* MW 1 */ + 10932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10933 "00000000" // /* MW 1 */ + 10934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10935 "00000000" // /* MW 1 */ + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 10938 "00101100" // NOPA; SUB r16, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10939 "00100011" // /* MW 5 */ + 10940 "01000010" // /* MW 4 */ + 10941 "11111000" // /* MW 3 */ + 10942 "00101100" // /* MW 2 */ + 10943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 10944 "11100001" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10945 "00000000" // /* MW 15 */ + 10946 "00000000" // /* MW 14 */ + 10947 "01111000" // /* MW 13 */ + 10948 "10100101" // /* MW 12 */ + 10949 "00000001" // /* MW 11 */ + 10950 "00000000" // /* MW 10 */ + 10951 "00000000" // /* MW 9 */ + 10952 "10000000" // /* MW 8 */ + 10953 "00010001" // /* MW 7 */ + 10954 "11100110" // /* MW 6 */ + 10955 "00100110" // /* MW 5 */ + 10956 "00000000" // /* MW 4 */ + 10957 "11110000" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1440 + 10960 "10000100" // J #11008 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 10961 "00000000" // /* MW 5 */ + 10962 "00000000" // /* MW 4 */ + 10963 "10000000" // /* MW 3 */ + 10964 "00010101" // /* MW 2 */ + 10965 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 10966 "11111000" // MOV p7, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10967 "11000000" // /* MW 3 */ + 10968 "01100100" // /* MW 2 */ + 10969 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10977 "00000000" // /* MW 15 */ + 10978 "00000000" // /* MW 14 */ + 10979 "01111000" // /* MW 13 */ + 10980 "10100101" // /* MW 12 */ + 10981 "00000001" // /* MW 11 */ + 10982 "00000000" // /* MW 10 */ + 10983 "00000000" // /* MW 9 */ + 10984 "00000000" // /* MW 8 */ + 10985 "01011011" // /* MW 7 */ + 10986 "00000001" // /* MW 6 */ + 10987 "00100000" // /* MW 5 */ + 10988 "00000000" // /* MW 4 */ + 10989 "11110000" // /* MW 3 */ + 10990 "00101100" // /* MW 2 */ + 10991 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1472 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 508 7 + 10992 "11100001" // LDA p7, [sp, #-24]; NOPB; NOPS; MOVXM p1, #508436; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10993 "00000000" // /* MW 15 */ + 10994 "00000000" // /* MW 14 */ + 10995 "00010000" // /* MW 13 */ + 10996 "00001010" // /* MW 12 */ + 10997 "10110001" // /* MW 11 */ + 10998 "11110000" // /* MW 10 */ + 10999 "00000001" // /* MW 9 */ + 11000 "00000000" // /* MW 8 */ + 11001 "01011011" // /* MW 7 */ + 11002 "00000001" // /* MW 6 */ + 11003 "00100000" // /* MW 5 */ + 11004 "00000000" // /* MW 4 */ + 11005 "00100000" // /* MW 3 */ + 11006 "01110011" // /* MW 2 */ + 11007 "11111101" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1488 +.src_ref 6 "superkernels.cpp" 508 7 first + 11008 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11009 "00010110" // /* MW 3 */ + 11010 "00000110" // /* MW 2 */ + 11011 "00000001" // /* MW 1 */ + 11012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11013 "00000000" // /* MW 1 */ + 11014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11015 "00000000" // /* MW 1 */ + 11016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11017 "00000000" // /* MW 1 */ + 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11019 "00000000" // /* MW 1 */ + 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11021 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 19 + 11022 "00011000" // MOVX r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11023 "00010001" // /* MW 3 */ + 11024 "00100010" // /* MW 2 */ + 11025 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 19 + 11026 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11027 "00001000" // /* MW 3 */ + 11028 "01100001" // /* MW 2 */ + 11029 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 25 + 11030 "10000100" // JNZ r16, #11184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11184 delay_slots=5 */ + 11031 "00000001" // /* MW 5 */ + 11032 "01000000" // /* MW 4 */ + 11033 "11011000" // /* MW 3 */ + 11034 "00010101" // /* MW 2 */ + 11035 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.delay_slot + 11036 "01000100" // MOVXM p1, #508444 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11037 "00111000" // /* MW 5 */ + 11038 "11000100" // /* MW 4 */ + 11039 "11000010" // /* MW 3 */ + 11040 "00000111" // /* MW 2 */ + 11041 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11049 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.src_ref 6 "superkernels.cpp" 508 63 + 11050 "10111010" // LDA r16, [p1]; MOVXM p2, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11051 "00010000" // /* MW 9 */ + 11052 "00110000" // /* MW 8 */ + 11053 "00110001" // /* MW 7 */ + 11054 "11110001" // /* MW 6 */ + 11055 "00000001" // /* MW 5 */ + 11056 "00000000" // /* MW 4 */ + 11057 "11010000" // /* MW 3 */ + 11058 "11000010" // /* MW 2 */ + 11059 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 63 + 11060 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11061 "00111010" // /* MW 3 */ + 11062 "00000100" // /* MW 2 */ + 11063 "00000010" // /* MW 1 */ + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ + 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11067 "00000000" // /* MW 1 */ + 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11069 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.no_stack_arguments + 11070 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 11071 "00000001" // /* MW 5 */ + 11072 "00000000" // /* MW 4 */ + 11073 "01010000" // /* MW 3 */ + 11074 "00011010" // /* MW 2 */ + 11075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.delay_slot + 11078 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11079 "00000111" // /* MW 3 */ + 11080 "00100000" // /* MW 2 */ + 11081 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.src_ref 6 "superkernels.cpp" 508 44 +.delay_slot + 11082 "01011100" // ST r16, [p1]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11083 "10110101" // /* MW 5 */ + 11084 "01101101" // /* MW 4 */ + 11085 "00111000" // /* MW 3 */ + 11086 "11000010" // /* MW 2 */ + 11087 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.src_ref 6 "superkernels.cpp" 508 44 +.delay_slot + 11088 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11089 "01000001" // /* MW 5 */ + 11090 "10111011" // /* MW 4 */ + 11091 "00110111" // /* MW 3 */ + 11092 "01100000" // /* MW 2 */ + 11093 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.delay_slot + 11094 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11095 "00010010" // /* MW 9 */ + 11096 "00000001" // /* MW 8 */ + 11097 "00000100" // /* MW 7 */ + 11098 "00000000" // /* MW 6 */ + 11099 "01011011" // /* MW 5 */ + 11100 "00000001" // /* MW 4 */ + 11101 "11110000" // /* MW 3 */ + 11102 "00101100" // /* MW 2 */ + 11103 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.src_ref 6 "superkernels.cpp" 508 44 +.return_address + 11104 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11105 "01000001" // /* MW 5 */ + 11106 "10101111" // /* MW 4 */ + 11107 "00111101" // /* MW 3 */ + 11108 "00000110" // /* MW 2 */ + 11109 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 + 11110 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11111 "00000010" // /* MW 3 */ + 11112 "11100001" // /* MW 2 */ + 11113 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 6 +.src_ref 6 "superkernels.cpp" 508 74 + 11114 "10000100" // JNZ r16, #11184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11184 delay_slots=5 */ + 11115 "00000001" // /* MW 5 */ + 11116 "01000000" // /* MW 4 */ + 11117 "11011000" // /* MW 3 */ + 11118 "00010101" // /* MW 2 */ + 11119 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 511 16 +.delay_slot + 11120 "01000100" // MOVXM p1, #508444 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11121 "00111000" // /* MW 5 */ + 11122 "11000100" // /* MW 4 */ + 11123 "11000010" // /* MW 3 */ + 11124 "00000111" // /* MW 2 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11133 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 511 16 first + 11134 "01111010" // LDA r17, [p7, #20]; ST r13, [p1]; MOVX r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11135 "00000101" // /* MW 9 */ + 11136 "00100000" // /* MW 8 */ + 11137 "00000000" // /* MW 7 */ + 11138 "10000000" // /* MW 6 */ + 11139 "10110001" // /* MW 5 */ + 11140 "00000101" // /* MW 4 */ + 11141 "11010001" // /* MW 3 */ + 11142 "11000110" // /* MW 2 */ + 11143 "11101010" // /* MW 1 */ + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ + 11150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11151 "00000000" // /* MW 1 */ + 11152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11153 "00000000" // /* MW 1 */ + 11154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11155 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 11156 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00001000" // /* MW 3 */ + 11158 "01010001" // /* MW 2 */ + 11159 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 11160 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "00110110" // /* MW 3 */ + 11162 "11100110" // /* MW 2 */ + 11163 "00000110" // /* MW 1 */ + 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11165 "00000000" // /* MW 1 */ + 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11167 "00000000" // /* MW 1 */ + 11168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11169 "00000000" // /* MW 1 */ + 11170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11171 "00000000" // /* MW 1 */ + 11172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11173 "00000000" // /* MW 1 */ + 11174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 11176 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11177 "00010001" // /* MW 3 */ + 11178 "00100001" // /* MW 2 */ + 11179 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 11180 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11181 "00010001" // /* MW 3 */ + 11182 "11100110" // /* MW 2 */ + 11183 "00001110" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.src_ref 6 "superkernels.cpp" 514 6 +.src_ref 6 "superkernels.cpp" 515 14 + 11184 "10111010" // LDA r1, [sp, #-12]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11185 "00010000" // /* MW 9 */ + 11186 "00000000" // /* MW 8 */ + 11187 "00110001" // /* MW 7 */ + 11188 "11110011" // /* MW 6 */ + 11189 "00000001" // /* MW 5 */ + 11190 "00000000" // /* MW 4 */ + 11191 "00100000" // /* MW 3 */ + 11192 "10000110" // /* MW 2 */ + 11193 "11111110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 6 first +.src_ref 6 "superkernels.cpp" 514 19 + 11194 "10111010" // LDA r16, [p6]; MOVXM p1, #508456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11195 "00010000" // /* MW 9 */ + 11196 "00010100" // /* MW 8 */ + 11197 "10110001" // /* MW 7 */ + 11198 "11110000" // /* MW 6 */ + 11199 "00000001" // /* MW 5 */ + 11200 "00000000" // /* MW 4 */ + 11201 "11010000" // /* MW 3 */ + 11202 "11000010" // /* MW 2 */ + 11203 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 19 + 11204 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00110110" // /* MW 3 */ + 11206 "00000110" // /* MW 2 */ + 11207 "00000001" // /* MW 1 */ + 11208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11209 "00000000" // /* MW 1 */ + 11210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11211 "00000000" // /* MW 1 */ + 11212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11213 "00000000" // /* MW 1 */ + 11214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11215 "00000000" // /* MW 1 */ + 11216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11217 "00000000" // /* MW 1 */ + 11218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 16 + 11220 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11221 "00001000" // /* MW 3 */ + 11222 "01100001" // /* MW 2 */ + 11223 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 6 + 11224 "10000100" // JNZ r16, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */ + 11225 "00000001" // /* MW 5 */ + 11226 "01000000" // /* MW 4 */ + 11227 "11111000" // /* MW 3 */ + 11228 "00010101" // /* MW 2 */ + 11229 "10000000" // /* MW 1 */ +.delay_slot + 11230 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11231 "10011001" // /* MW 3 */ + 11232 "11101111" // /* MW 2 */ + 11233 "00000111" // /* MW 1 */ +.delay_slot + 11234 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11235 "11110001" // /* MW 3 */ + 11236 "11110001" // /* MW 2 */ + 11237 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11241 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11243 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 515 14 first + 11244 "10011000" // ST r13, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11245 "10110001" // /* MW 3 */ + 11246 "00000101" // /* MW 2 */ + 11247 "00001110" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1728 +.src_ref 6 "superkernels.cpp" 517 + 11248 "11010100" // LDA p6, [sp, #-4]; MOV lr, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11249 "01000001" // /* MW 5 */ + 11250 "11101110" // /* MW 4 */ + 11251 "00101110" // /* MW 3 */ + 11252 "11100011" // /* MW 2 */ + 11253 "11111111" // /* MW 1 */ + 11254 "00011000" // LDA r13, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11255 "10110001" // /* MW 3 */ + 11256 "11111001" // /* MW 2 */ + 11257 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 517 first + 11258 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11259 "00000000" // /* MW 3 */ + 11260 "00101000" // /* MW 2 */ + 11261 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 517 +.delay_slot + 11262 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11263 "00000001" // /* MW 5 */ + 11264 "00000000" // /* MW 4 */ + 11265 "00000000" // /* MW 3 */ + 11266 "11111000" // /* MW 2 */ + 11267 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11271 "00000000" // /* MW 1 */ +.delay_slot + 11272 "11111000" // MOV r14, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11273 "10100000" // /* MW 3 */ + 11274 "10010000" // /* MW 2 */ + 11275 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 11277 "00000000" // /* MW 1 */ +.label __Z15_b14285_wrapperPPv___func_begin0 +.label _Z15_b14285_wrapperPPv +.function _b14285_wrapper _Z15_b14285_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 21 first +.src_ref 0 "0_0_reloadable11.cc" 23 79 +.function_start + 11280 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "01100000" // /* MW 2 */ + 11283 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 23 79 first + 11284 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11285 "00011110" // /* MW 3 */ + 11286 "00111100" // /* MW 2 */ + 11287 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 24 47 first + 11288 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11289 "10011110" // /* MW 3 */ + 11290 "11101100" // /* MW 2 */ + 11291 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 26 81 first + 11292 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "10011110" // /* MW 3 */ + 11294 "00010101" // /* MW 2 */ + 11295 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 25 80 first + 11296 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00011110" // /* MW 3 */ + 11298 "00000101" // /* MW 2 */ + 11299 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 22 4 first +.tail_call + 11300 "10000100" // J #6560 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6560 delay_slots=5 */ + 11301 "00000000" // /* MW 5 */ + 11302 "00000000" // /* MW 4 */ + 11303 "11010000" // /* MW 3 */ + 11304 "00001100" // /* MW 2 */ + 11305 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11307 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11313 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14285_wrapperPPv__end +.label __Z15_b14285_wrapperPPv___func_end0 + 11315 "00000000" // /* MW 1 */ +.label __Z15_b14290_wrapperPPv___func_begin0 +.label _Z15_b14290_wrapperPPv +.function _b14290_wrapper _Z15_b14290_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 30 first +.src_ref 0 "0_0_reloadable11.cc" 32 79 +.function_start + 11328 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "11000000" // /* MW 3 */ + 11330 "01100000" // /* MW 2 */ + 11331 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 32 79 first + 11332 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11333 "00011110" // /* MW 3 */ + 11334 "00101100" // /* MW 2 */ + 11335 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 34 81 first + 11336 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11337 "00011110" // /* MW 3 */ + 11338 "11110101" // /* MW 2 */ + 11339 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 33 47 first + 11340 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11341 "10011110" // /* MW 3 */ + 11342 "00000100" // /* MW 2 */ + 11343 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 31 4 first +.tail_call + 11344 "10000100" // J #3808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3808 delay_slots=5 */ + 11345 "00000000" // /* MW 5 */ + 11346 "00000000" // /* MW 4 */ + 11347 "01110000" // /* MW 3 */ + 11348 "00000111" // /* MW 2 */ + 11349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14290_wrapperPPv__end +.label __Z15_b14290_wrapperPPv___func_end0 + 11359 "00000000" // /* MW 1 */ +.label __Z15_b13811_wrapperPPv___func_begin0 +.label _Z15_b13811_wrapperPPv +.function _b13811_wrapper _Z15_b13811_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 38 first +.src_ref 0 "0_0_reloadable11.cc" 40 79 +.function_start + 11360 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11361 "11000000" // /* MW 3 */ + 11362 "01100000" // /* MW 2 */ + 11363 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 40 79 first + 11364 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11365 "00011110" // /* MW 3 */ + 11366 "00111100" // /* MW 2 */ + 11367 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 41 47 first + 11368 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11369 "10011110" // /* MW 3 */ + 11370 "11101100" // /* MW 2 */ + 11371 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 43 81 first + 11372 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11373 "10011110" // /* MW 3 */ + 11374 "00010101" // /* MW 2 */ + 11375 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 42 80 first + 11376 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "00011110" // /* MW 3 */ + 11378 "00000101" // /* MW 2 */ + 11379 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 39 4 first +.tail_call + 11380 "10000100" // J #4592 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4592 delay_slots=5 */ + 11381 "00000000" // /* MW 5 */ + 11382 "00000000" // /* MW 4 */ + 11383 "11111000" // /* MW 3 */ + 11384 "00001000" // /* MW 2 */ + 11385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13811_wrapperPPv__end +.label __Z15_b13811_wrapperPPv___func_end0 + 11395 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 47 first +.src_ref 0 "0_0_reloadable11.cc" 49 79 +.function_start + 11408 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11409 "11000000" // /* MW 3 */ + 11410 "01100000" // /* MW 2 */ + 11411 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 49 79 first + 11412 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11413 "00011110" // /* MW 3 */ + 11414 "00111100" // /* MW 2 */ + 11415 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 50 47 first + 11416 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11417 "10011110" // /* MW 3 */ + 11418 "11101100" // /* MW 2 */ + 11419 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 52 81 first + 11420 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11421 "10011110" // /* MW 3 */ + 11422 "00010101" // /* MW 2 */ + 11423 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 51 80 first + 11424 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11425 "00011110" // /* MW 3 */ + 11426 "00000101" // /* MW 2 */ + 11427 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 48 4 first +.tail_call + 11428 "10000100" // J #5712 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5712 delay_slots=5 */ + 11429 "00000000" // /* MW 5 */ + 11430 "00000000" // /* MW 4 */ + 11431 "00101000" // /* MW 3 */ + 11432 "00001011" // /* MW 2 */ + 11433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11435 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11443 "00000000" // /* MW 1 */ +.label __Z15_b14811_wrapperPPv___func_begin0 +.label _Z15_b14811_wrapperPPv +.function _b14811_wrapper _Z15_b14811_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 56 first +.src_ref 0 "0_0_reloadable11.cc" 58 79 +.function_start + 11456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11457 "11000000" // /* MW 3 */ + 11458 "01100000" // /* MW 2 */ + 11459 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 58 79 first + 11460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00011110" // /* MW 3 */ + 11462 "00101100" // /* MW 2 */ + 11463 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 60 81 first + 11464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00011110" // /* MW 3 */ + 11466 "11110101" // /* MW 2 */ + 11467 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 59 47 first + 11468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11469 "10011110" // /* MW 3 */ + 11470 "00000100" // /* MW 2 */ + 11471 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 57 4 first +.tail_call + 11472 "10000100" // J #9520 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9520 delay_slots=5 */ + 11473 "00000000" // /* MW 5 */ + 11474 "00000000" // /* MW 4 */ + 11475 "10011000" // /* MW 3 */ + 11476 "00010010" // /* MW 2 */ + 11477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14811_wrapperPPv__end +.label __Z15_b14811_wrapperPPv___func_end0 + 11487 "00000000" // /* MW 1 */ +.label __Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params___func_begin0 +.label _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params +.function softmax_row_major<1, bfloat16, bfloat16, (unsigned short)1> _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params +.src_ref 3 "softmax_row_major.h" 214 +.src_ref 3 "softmax_row_major.h" 214 first +.function_start + 11488 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11489 "11000000" // /* MW 3 */ + 11490 "00010100" // /* MW 2 */ + 11491 "00011000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 214 +.src_ref 3 "softmax_row_major.h" 219 32 + 11492 "00010100" // MOVA m0, #-6; ADD.NC p2, r0, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11493 "00000100" // /* MW 5 */ + 11494 "11000000" // /* MW 4 */ + 11495 "10000100" // /* MW 3 */ + 11496 "01000000" // /* MW 2 */ + 11497 "11111111" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 217 25 first + 11498 "10111010" // LDA r0, [p2], #4; MOVXM p3, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11499 "00010000" // /* MW 9 */ + 11500 "00011000" // /* MW 8 */ + 11501 "10110001" // /* MW 7 */ + 11502 "11110001" // /* MW 6 */ + 11503 "00000001" // /* MW 5 */ + 11504 "00000000" // /* MW 4 */ + 11505 "11010000" // /* MW 3 */ + 11506 "10000010" // /* MW 2 */ + 11507 "01000011" // /* MW 1 */ +.src_ref 5 "accum_native_types.hpp" 213 147 +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 219 32 first + 11508 "10111010" // LDA r18, [p2], m0; MOVXM r2, #1069088768 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11509 "00010000" // /* MW 9 */ + 11510 "00000000" // /* MW 8 */ + 11511 "01001000" // /* MW 7 */ + 11512 "01000000" // /* MW 6 */ + 11513 "11101110" // /* MW 5 */ + 11514 "00001111" // /* MW 4 */ + 11515 "11010000" // /* MW 3 */ + 11516 "01001010" // /* MW 2 */ + 11517 "01000001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 +.src_ref 3 "softmax_row_major.h" 256 76 first + 11518 "10111010" // LDA.u16 r22, [p2]; MOVXM r29, #65408 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11519 "00010000" // /* MW 9 */ + 11520 "11000000" // /* MW 8 */ + 11521 "10101111" // /* MW 7 */ + 11522 "00111111" // /* MW 6 */ + 11523 "00000000" // /* MW 5 */ + 11524 "00000000" // /* MW 4 */ + 11525 "01010000" // /* MW 3 */ + 11526 "11011011" // /* MW 2 */ + 11527 "01000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 83 8 first + 11528 "10111010" // LDA.s8 r17, [p3]; MOVXM ls, #11680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11529 "00010000" // /* MW 9 */ + 11530 "11010000" // /* MW 8 */ + 11531 "01111110" // /* MW 7 */ + 11532 "00001000" // /* MW 6 */ + 11533 "00000000" // /* MW 5 */ + 11534 "00000000" // /* MW 4 */ + 11535 "01010000" // /* MW 3 */ + 11536 "11000100" // /* MW 2 */ + 11537 "01100000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 83 8 + 11538 "01000100" // MOVXM le, #11744 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11539 "11000000" // /* MW 5 */ + 11540 "11111011" // /* MW 4 */ + 11541 "00100110" // /* MW 3 */ + 11542 "00000000" // /* MW 2 */ + 11543 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 3 "softmax_row_major.h" 68 40 + 11544 "11100100" // MOVX r5, #-31; VBCST.16 x9, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11545 "11100101" // /* MW 5 */ + 11546 "11101010" // /* MW 4 */ + 11547 "10101001" // /* MW 3 */ + 11548 "01010000" // /* MW 2 */ + 11549 "11111001" // /* MW 1 */ +.src_ref 5 "accum_native_types.hpp" 213 147 first +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 55 37 +.src_ref 3 "softmax_row_major.h" 68 40 +.src_ref 3 "softmax_row_major.h" 264 29 + 11550 "11100100" // MOVX r7, #-4; VINSERT.32 x0, x0, #0, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11551 "10100010" // /* MW 5 */ + 11552 "00000000" // /* MW 4 */ + 11553 "00100000" // /* MW 3 */ + 11554 "11011110" // /* MW 2 */ + 11555 "11111001" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 55 37 first + 11556 "11100100" // LSHL r29, r0, r7; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11557 "00100101" // /* MW 5 */ + 11558 "00000001" // /* MW 4 */ + 11559 "10110000" // /* MW 3 */ + 11560 "01001111" // /* MW 2 */ + 11561 "00000111" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 68 40 first + 11562 "10011000" // ASHL r5, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11563 "01011110" // /* MW 3 */ + 11564 "10001010" // /* MW 2 */ + 11565 "00010100" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 256 64 +.src_ref 3 "softmax_row_major.h" 264 29 first +.src_ref 3 "softmax_row_major.h" 269 47 + 11566 "01100100" // ASHL r0, r0, r7; MOV r2, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11567 "00000101" // /* MW 5 */ + 11568 "00100000" // /* MW 4 */ + 11569 "11010001" // /* MW 3 */ + 11570 "00001111" // /* MW 2 */ + 11571 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 69 53 +.src_ref 3 "softmax_row_major.h" 256 64 first + 11572 "10111010" // MOVA r6, #15; LSHL r22, r22, r2; MOV crRnd, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "01111000" // /* MW 9 */ + 11574 "01010000" // /* MW 8 */ + 11575 "11011100" // /* MW 7 */ + 11576 "01101111" // /* MW 6 */ + 11577 "01100001" // /* MW 5 */ + 11578 "00101101" // /* MW 4 */ + 11579 "00000000" // /* MW 3 */ + 11580 "11100110" // /* MW 2 */ + 11581 "00000001" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 68 40 +.src_ref 3 "softmax_row_major.h" 69 53 first +.src_ref 3 "softmax_row_major.h" 256 64 + 11582 "10111010" // MOVA r25, #-28; AND r6, r6, r18; MOV m0, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "01111000" // /* MW 9 */ + 11584 "10010000" // /* MW 8 */ + 11585 "00000101" // /* MW 7 */ + 11586 "00100100" // /* MW 6 */ + 11587 "01101001" // /* MW 5 */ + 11588 "00001100" // /* MW 4 */ + 11589 "00000000" // /* MW 3 */ + 11590 "10011001" // /* MW 2 */ + 11591 "11111100" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 68 40 first +.src_ref 3 "softmax_row_major.h" 70 65 +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 176 59 +.src_ref 3 "softmax_row_major.h" 256 64 first + 11592 "00110110" // PADDB [p0], m0; VCONV.bf16.fp32 wl0, bmll0; LSHL r5, r5, r25; MOV r21, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11593 "01011000" // /* MW 11 */ + 11594 "11111111" // /* MW 10 */ + 11595 "10101111" // /* MW 9 */ + 11596 "11101110" // /* MW 8 */ + 11597 "01011100" // /* MW 7 */ + 11598 "00001010" // /* MW 6 */ + 11599 "00100000" // /* MW 5 */ + 11600 "00010111" // /* MW 4 */ + 11601 "11000000" // /* MW 3 */ + 11602 "00000010" // /* MW 2 */ + 11603 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 3 "softmax_row_major.h" 68 40 +.src_ref 3 "softmax_row_major.h" 70 65 first +.src_ref 3 "softmax_row_major.h" 99 35 + 11604 "00111010" // MOVS p3, p0; LSHL r6, r21, r6; ADD.NC r5, r5, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11605 "10101001" // /* MW 9 */ + 11606 "01100100" // /* MW 8 */ + 11607 "10101001" // /* MW 7 */ + 11608 "01101100" // /* MW 6 */ + 11609 "01100011" // /* MW 5 */ + 11610 "00101010" // /* MW 4 */ + 11611 "01100000" // /* MW 3 */ + 11612 "00010001" // /* MW 2 */ + 11613 "01110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 4 "vector_native_types.hpp" 373 137 first +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 70 65 +.src_ref 3 "softmax_row_major.h" 99 35 first + 11614 "10110110" // NOPA; VLDB wl10, [p3], #32; XOR r6, r21, r6; VEXTBCST.16 x0, x0, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11615 "10101000" // /* MW 11 */ + 11616 "10000001" // /* MW 10 */ + 11617 "00000000" // /* MW 9 */ + 11618 "00110100" // /* MW 8 */ + 11619 "01100011" // /* MW 7 */ + 11620 "00101010" // /* MW 6 */ + 11621 "01001000" // /* MW 5 */ + 11622 "00111101" // /* MW 4 */ + 11623 "11110110" // /* MW 3 */ + 11624 "00101100" // /* MW 2 */ + 11625 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 68 40 first +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 85 65 + 11626 "01100100" // ASHL r5, r5, r7; MOV r7, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11627 "00000001" // /* MW 5 */ + 11628 "10100000" // /* MW 4 */ + 11629 "11010011" // /* MW 3 */ + 11630 "01001111" // /* MW 2 */ + 11631 "00101001" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 83 8 first +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 85 63 first +.src_ref 3 "softmax_row_major.h" 176 59 + 11632 "11100001" // MOVA r23, #0; NOPB; NOPS; LT r27, r7, r5; ADD.NC lc, r29, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11633 "00000000" // /* MW 15 */ + 11634 "00000000" // /* MW 14 */ + 11635 "11001000" // /* MW 13 */ + 11636 "01111111" // /* MW 12 */ + 11637 "10111111" // /* MW 11 */ + 11638 "11010110" // /* MW 10 */ + 11639 "10110010" // /* MW 9 */ + 11640 "00001111" // /* MW 8 */ + 11641 "01011011" // /* MW 7 */ + 11642 "00000001" // /* MW 6 */ + 11643 "00100000" // /* MW 5 */ + 11644 "00000000" // /* MW 4 */ + 11645 "00000000" // /* MW 3 */ + 11646 "00010111" // /* MW 2 */ + 11647 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 85 63 + 11648 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r30, r23, r21, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11649 "00000000" // /* MW 15 */ + 11650 "00000000" // /* MW 14 */ + 11651 "01111000" // /* MW 13 */ + 11652 "10100101" // /* MW 12 */ + 11653 "00000001" // /* MW 11 */ + 11654 "10010000" // /* MW 10 */ + 11655 "11101010" // /* MW 9 */ + 11656 "00101111" // /* MW 8 */ + 11657 "01011011" // /* MW 7 */ + 11658 "00000001" // /* MW 6 */ + 11659 "00100000" // /* MW 5 */ + 11660 "00000000" // /* MW 4 */ + 11661 "11110000" // /* MW 3 */ + 11662 "00101100" // /* MW 2 */ + 11663 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.src_ref 3 "softmax_row_major.h" 85 65 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11664 "11100001" // NOPA; NOPB; NOPS; EQ r27, r7, r5; VMOV x8, x9; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11665 "00000000" // /* MW 15 */ + 11666 "00000000" // /* MW 14 */ + 11667 "01111000" // /* MW 13 */ + 11668 "01001001" // /* MW 12 */ + 11669 "00011001" // /* MW 11 */ + 11670 "10111110" // /* MW 10 */ + 11671 "10110010" // /* MW 9 */ + 11672 "00001111" // /* MW 8 */ + 11673 "01011011" // /* MW 7 */ + 11674 "00000001" // /* MW 6 */ + 11675 "00100000" // /* MW 5 */ + 11676 "00000000" // /* MW 4 */ + 11677 "11110000" // /* MW 3 */ + 11678 "00101100" // /* MW 2 */ + 11679 "00000000" // /* MW 1 */ +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_192 +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 99 35 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 11680 "11100001" // NOPA; VLDB wl10, [p3], #32; NOPS; SEL.EQZ r31, r30, r6, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11681 "00000000" // /* MW 15 */ + 11682 "00000000" // /* MW 14 */ + 11683 "01111000" // /* MW 13 */ + 11684 "10100101" // /* MW 12 */ + 11685 "00000001" // /* MW 11 */ + 11686 "00010000" // /* MW 10 */ + 11687 "11110011" // /* MW 9 */ + 11688 "00111101" // /* MW 8 */ + 11689 "01011011" // /* MW 7 */ + 11690 "00000001" // /* MW 6 */ + 11691 "01001000" // /* MW 5 */ + 11692 "00111101" // /* MW 4 */ + 11693 "11110110" // /* MW 3 */ + 11694 "00101100" // /* MW 2 */ + 11695 "00000000" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 3 "softmax_row_major.h" 83 41 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 "11100001" // NOPA; NOPB; NOPS; EXTEND.u16 r19, r31; ADD.NC r7, r7, #1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11697 "00000000" // /* MW 15 */ + 11698 "00000000" // /* MW 14 */ + 11699 "01001000" // /* MW 13 */ + 11700 "11000000" // /* MW 12 */ + 11701 "11101001" // /* MW 11 */ + 11702 "10000000" // /* MW 10 */ + 11703 "00110101" // /* MW 9 */ + 11704 "00111111" // /* MW 8 */ + 11705 "01011011" // /* MW 7 */ + 11706 "00000001" // /* MW 6 */ + 11707 "00100000" // /* MW 5 */ + 11708 "00000000" // /* MW 4 */ + 11709 "11110000" // /* MW 3 */ + 11710 "00101100" // /* MW 2 */ + 11711 "00000000" // /* MW 1 */ +.src_ref 4 "blend.hpp" 163 48 first +.src_ref 3 "softmax_row_major.h" 85 63 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11712 "11100001" // NOPA; NOPB; NOPS; LT r27, r7, r5; VSEL.16 x5, x9, x10, r19; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11713 "00000000" // /* MW 15 */ + 11714 "00000000" // /* MW 14 */ + 11715 "00011000" // /* MW 13 */ + 11716 "10001101" // /* MW 12 */ + 11717 "01100110" // /* MW 11 */ + 11718 "11010101" // /* MW 10 */ + 11719 "10110010" // /* MW 9 */ + 11720 "00001111" // /* MW 8 */ + 11721 "01011011" // /* MW 7 */ + 11722 "00000001" // /* MW 6 */ + 11723 "00100000" // /* MW 5 */ + 11724 "00000000" // /* MW 4 */ + 11725 "11110000" // /* MW 3 */ + 11726 "00101100" // /* MW 2 */ + 11727 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 1454 19 first +.src_ref 3 "softmax_row_major.h" 85 63 + 11728 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r30, r23, r21, r27; VMOV wl3, wl5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11729 "00000000" // /* MW 15 */ + 11730 "00000000" // /* MW 14 */ + 11731 "01111000" // /* MW 13 */ + 11732 "10010001" // /* MW 12 */ + 11733 "11100101" // /* MW 11 */ + 11734 "10010000" // /* MW 10 */ + 11735 "11101010" // /* MW 9 */ + 11736 "00101111" // /* MW 8 */ + 11737 "01011011" // /* MW 7 */ + 11738 "00000001" // /* MW 6 */ + 11739 "00100000" // /* MW 5 */ + 11740 "00000000" // /* MW 4 */ + 11741 "11110000" // /* MW 3 */ + 11742 "00101100" // /* MW 2 */ + 11743 "00000000" // /* MW 1 */ +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_256 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "softmax_row_major.h" 85 65 first +.end_of_loop + 11744 "11100001" // NOPA; NOPB; NOPS; EQ r27, r7, r5; VMAX_LT.bf16 x8, r16, x8, x3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11745 "00000000" // /* MW 15 */ + 11746 "00000000" // /* MW 14 */ + 11747 "01111000" // /* MW 13 */ + 11748 "11110110" // /* MW 12 */ + 11749 "00100000" // /* MW 11 */ + 11750 "10111110" // /* MW 10 */ + 11751 "10110010" // /* MW 9 */ + 11752 "00001111" // /* MW 8 */ + 11753 "01011011" // /* MW 7 */ + 11754 "00000001" // /* MW 6 */ + 11755 "00100000" // /* MW 5 */ + 11756 "00000000" // /* MW 4 */ + 11757 "11110000" // /* MW 3 */ + 11758 "00101100" // /* MW 2 */ + 11759 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 180 25 +.loop_nesting 0 + 11760 "11101001" // MOVA r4, #8; NOPB; MOVS p4, p1; SEL.EQZ r31, r30, r6, r27; MOV r1, #16; VCLR dm0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11761 "00000000" // /* MW 15 */ + 11762 "00000111" // /* MW 14 */ + 11763 "01011000" // /* MW 13 */ + 11764 "00010000" // /* MW 12 */ + 11765 "00101000" // /* MW 11 */ + 11766 "00010000" // /* MW 10 */ + 11767 "11110011" // /* MW 9 */ + 11768 "00111101" // /* MW 8 */ + 11769 "10001011" // /* MW 7 */ + 11770 "10000100" // /* MW 6 */ + 11771 "00100100" // /* MW 5 */ + 11772 "00000000" // /* MW 4 */ + 11773 "00000000" // /* MW 3 */ + 11774 "00000100" // /* MW 2 */ + 11775 "00000001" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 3 "softmax_row_major.h" 148 30 first + 11776 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32; EXTEND.u16 r19, r31; MOV r3, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11777 "01011000" // /* MW 9 */ + 11778 "00000100" // /* MW 8 */ + 11779 "01101000" // /* MW 7 */ + 11780 "10000000" // /* MW 6 */ + 11781 "00110101" // /* MW 5 */ + 11782 "00111111" // /* MW 4 */ + 11783 "00110000" // /* MW 3 */ + 11784 "10100001" // /* MW 2 */ + 11785 "00000011" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "blend.hpp" 163 48 first +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 20 92 + 11786 "10111010" // MOVA r20, #60; MOVX r26, #2; VSEL.16 x5, x9, x10, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11787 "00011000" // /* MW 9 */ + 11788 "10001101" // /* MW 8 */ + 11789 "01100110" // /* MW 7 */ + 11790 "01001001" // /* MW 6 */ + 11791 "10100000" // /* MW 5 */ + 11792 "00000001" // /* MW 4 */ + 11793 "00000000" // /* MW 3 */ + 11794 "10010100" // /* MW 2 */ + 11795 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 1454 19 first + 11796 "11100100" // MOVX r24, #0; VMOV wl3, wl5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11797 "01000101" // /* MW 5 */ + 11798 "10010110" // /* MW 4 */ + 11799 "00100011" // /* MW 3 */ + 11800 "00000000" // /* MW 2 */ + 11801 "00000110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "softmax_row_major.h" 153 29 +.src_ref 3 "softmax_row_major.h" 269 +.src_ref 3 "softmax_row_major.h" 277 31 + 11802 "10111010" // MOVA r17, #828; MOVX crRnd, r17; VMAX_LT.bf16 x8, r16, x8, x3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11803 "01111000" // /* MW 9 */ + 11804 "11110110" // /* MW 8 */ + 11805 "00100000" // /* MW 7 */ + 11806 "00000010" // /* MW 6 */ + 11807 "11010100" // /* MW 5 */ + 11808 "00100011" // /* MW 4 */ + 11809 "00000000" // /* MW 3 */ + 11810 "10010001" // /* MW 2 */ + 11811 "01100111" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first + 11812 "11011000" // VSHIFT x1, x8, x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00000110" // /* MW 3 */ + 11814 "11000000" // /* MW 2 */ + 11815 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 11816 "11111000" // VMAX_LT.bf16 x1, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "11101100" // /* MW 3 */ + 11818 "11000000" // /* MW 2 */ + 11819 "00011000" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first + 11820 "11011000" // VSHIFT x8, x1, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11821 "00010010" // /* MW 3 */ + 11822 "00001000" // /* MW 2 */ + 11823 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 11824 "11111000" // VMAX_LT.bf16 x1, r16, x1, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11825 "01101100" // /* MW 3 */ + 11826 "10001100" // /* MW 2 */ + 11827 "00011000" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first + 11828 "11011000" // VSHIFT x8, x1, x0, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11829 "00001110" // /* MW 3 */ + 11830 "00001000" // /* MW 2 */ + 11831 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 11832 "11111000" // VMAX_LT.bf16 x1, r16, x1, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11833 "01101100" // /* MW 3 */ + 11834 "10001100" // /* MW 2 */ + 11835 "00011000" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first +.src_ref 3 "softmax_row_major.h" 176 59 +.src_ref 3 "softmax_row_major.h" 176 61 + 11836 "10100100" // MOVX r26, #0; VSHIFT x8, x1, x0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11837 "11010101" // /* MW 5 */ + 11838 "00010000" // /* MW 4 */ + 11839 "00101000" // /* MW 3 */ + 11840 "10000000" // /* MW 2 */ + 11841 "00000110" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11842 "11100100" // LT r27, r26, r5; VMAX_LT.bf16 x1, r16, x1, x8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11843 "11011001" // /* MW 5 */ + 11844 "00011000" // /* MW 4 */ + 11845 "01010001" // /* MW 3 */ + 11846 "11001011" // /* MW 2 */ + 11847 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "add_accum.hpp" 20 92 +.src_ref 3 "softmax_row_major.h" 148 30 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11848 "01000110" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32; SEL.EQZ r28, r23, r21, r27; VEXTBCST.16 x8, x1, #0; VSUB.f dm2, dm2, dm0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11849 "00111111" // /* MW 11 */ + 11850 "01000000" // /* MW 10 */ + 11851 "10100010" // /* MW 9 */ + 11852 "00110101" // /* MW 8 */ + 11853 "10010000" // /* MW 7 */ + 11854 "01000000" // /* MW 6 */ + 11855 "01010010" // /* MW 5 */ + 11856 "11111001" // /* MW 4 */ + 11857 "00110101" // /* MW 3 */ + 11858 "10100001" // /* MW 2 */ + 11859 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 3 "softmax_row_major.h" 176 61 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11860 "11100100" // EQ r27, r5, r26; VCONV.fp32.bf16 bmll0, wl8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11861 "01100101" // /* MW 5 */ + 11862 "00100010" // /* MW 4 */ + 11863 "11110000" // /* MW 3 */ + 11864 "11110100" // /* MW 2 */ + 11865 "00101110" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 471 87 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11866 "11100100" // SEL.EQZ r25, r28, r6, r27; VMOV bmhl0, bmll0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11867 "00100101" // /* MW 5 */ + 11868 "00000000" // /* MW 4 */ + 11869 "01000001" // /* MW 3 */ + 11870 "01001100" // /* MW 2 */ + 11871 "11100110" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 112 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11872 "11100100" // ADD r26, r26, #1; VBCST.16 x1, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11873 "11100101" // /* MW 5 */ + 11874 "11000010" // /* MW 4 */ + 11875 "11100001" // /* MW 3 */ + 11876 "10000000" // /* MW 2 */ + 11877 "11010110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 5 "add_accum.hpp" 20 92 first +.src_ref 3 "softmax_row_major.h" 112 4 first +.src_ref 3 "softmax_row_major.h" 148 30 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11878 "01000110" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32; EXTEND.u16 r22, r25; ADD.NC lc, r29, #-3; VSUB.f dm2, dm2, dm0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11879 "00111111" // /* MW 11 */ + 11880 "01000000" // /* MW 10 */ + 11881 "10100010" // /* MW 9 */ + 11882 "11101001" // /* MW 8 */ + 11883 "11101111" // /* MW 7 */ + 11884 "01010111" // /* MW 6 */ + 11885 "10110000" // /* MW 5 */ + 11886 "01101100" // /* MW 4 */ + 11887 "00110110" // /* MW 3 */ + 11888 "10100001" // /* MW 2 */ + 11889 "00000011" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11890 "10011000" // LT r27, r26, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11891 "01011010" // /* MW 3 */ + 11892 "10110110" // /* MW 2 */ + 11893 "00010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 153 29 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11894 "01011100" // VCONV.bf16.fp32 wl6, bmll2; SEL.EQZ r28, r23, r21, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11895 "10100100" // /* MW 5 */ + 11896 "11110010" // /* MW 4 */ + 11897 "11001011" // /* MW 3 */ + 11898 "00100010" // /* MW 2 */ + 11899 "01101000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 112 37 +.src_ref 3 "softmax_row_major.h" 176 61 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11900 "00100100" // EQ r27, r5, r26; ADD.NC r26, r26, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11901 "00000001" // /* MW 5 */ + 11902 "00111010" // /* MW 4 */ + 11903 "11111101" // /* MW 3 */ + 11904 "11110100" // /* MW 2 */ + 11905 "00101110" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11906 "01100010" // SEL.EQZ r25, r28, r6, r27; VMUL.f dm3, x6, x0, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11907 "00000001" // /* MW 7 */ + 11908 "11101100" // /* MW 6 */ + 11909 "10001011" // /* MW 5 */ + 11910 "00010001" // /* MW 4 */ + 11911 "10010011" // /* MW 3 */ + 11912 "00111001" // /* MW 2 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 5 "add_accum.hpp" 20 92 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11914 "01100010" // LT r27, r26, r5; VSUB.f dm2, dm2, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11915 "00111111" // /* MW 7 */ + 11916 "01000000" // /* MW 6 */ + 11917 "10100010" // /* MW 5 */ + 11918 "11010101" // /* MW 4 */ + 11919 "10110010" // /* MW 3 */ + 11920 "00110101" // /* MW 2 */ + 11921 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 112 4 first +.src_ref 3 "softmax_row_major.h" 153 29 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11922 "00111010" // VCONV.bf16.fp32 wl6, bmll2; MOVXM ls, #11968 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11923 "00010001" // /* MW 9 */ + 11924 "01100000" // /* MW 8 */ + 11925 "01111111" // /* MW 7 */ + 11926 "00001000" // /* MW 6 */ + 11927 "00000000" // /* MW 5 */ + 11928 "00000000" // /* MW 4 */ + 11929 "11000000" // /* MW 3 */ + 11930 "00100010" // /* MW 2 */ + 11931 "01101000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 112 4 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 11932 "01000100" // MOVXM le, #12048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11933 "00100000" // /* MW 5 */ + 11934 "11111110" // /* MW 4 */ + 11935 "00100110" // /* MW 3 */ + 11936 "00000000" // /* MW 2 */ + 11937 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 11938 "01100010" // SEL.EQZ r28, r23, r21, r27; VMUL.f dm3, x6, x0, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11939 "00000001" // /* MW 7 */ + 11940 "11101100" // /* MW 6 */ + 11941 "10001011" // /* MW 5 */ + 11942 "10010001" // /* MW 4 */ + 11943 "11001010" // /* MW 3 */ + 11944 "00101111" // /* MW 2 */ + 11945 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 176 61 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 11946 "10011000" // EQ r27, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11947 "10100111" // /* MW 3 */ + 11948 "01110111" // /* MW 2 */ + 11949 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "elementary.hpp" 473 55 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11950 "11111000" // VEXP2 wl4, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11951 "11110010" // /* MW 3 */ + 11952 "01011000" // /* MW 2 */ + 11953 "00011010" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 4 "blend.hpp" 163 48 first + 11954 "01100100" // EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11955 "01100100" // /* MW 5 */ + 11956 "00010100" // /* MW 4 */ + 11957 "00000111" // /* MW 3 */ + 11958 "10010110" // /* MW 2 */ + 11959 "11001101" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 11960 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11961 "00011100" // /* MW 7 */ + 11962 "00000000" // /* MW 6 */ + 11963 "00000000" // /* MW 5 */ + 11964 "00000100" // /* MW 4 */ + 11965 "11110000" // /* MW 3 */ + 11966 "00101100" // /* MW 2 */ + 11967 "00000000" // /* MW 1 */ +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_480 +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "softmax_row_major.h" 148 30 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.noswbrkpt +.loop_nesting 1 + 11968 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32;NOPB; VST wl7, [p4], #32; SEL.EQZ r25, r28, r6, r27; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11969 "10000001" // /* MW 15 */ + 11970 "00100001" // /* MW 14 */ + 11971 "01111101" // /* MW 13 */ + 11972 "10011001" // /* MW 12 */ + 11973 "00000111" // /* MW 11 */ + 11974 "00010010" // /* MW 10 */ + 11975 "10010011" // /* MW 9 */ + 11976 "10111001" // /* MW 8 */ + 11977 "11101010" // /* MW 7 */ + 11978 "00011101" // /* MW 6 */ + 11979 "00100100" // /* MW 5 */ + 11980 "00000000" // /* MW 4 */ + 11981 "00110000" // /* MW 3 */ + 11982 "10100001" // /* MW 2 */ + 11983 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 3 "softmax_row_major.h" 112 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 11984 "11100100" // ADD r26, r26, #1; VMOV bmll1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11985 "00100101" // /* MW 5 */ + 11986 "00000100" // /* MW 4 */ + 11987 "11100010" // /* MW 3 */ + 11988 "10000000" // /* MW 2 */ + 11989 "11010110" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 11990 "11111010" // NOPA; NOPS; LT r27, r26, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11991 "01011010" // /* MW 9 */ + 11992 "10110110" // /* MW 8 */ + 11993 "00000110" // /* MW 7 */ + 11994 "00000000" // /* MW 6 */ + 11995 "01011011" // /* MW 5 */ + 11996 "00000001" // /* MW 4 */ + 11997 "11110000" // /* MW 3 */ + 11998 "00101100" // /* MW 2 */ + 11999 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 153 29 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12000 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl6, bmll2; SEL.EQZ r28, r23, r21, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "10010000" // /* MW 10 */ + 12007 "11001010" // /* MW 9 */ + 12008 "00101111" // /* MW 8 */ + 12009 "00010110" // /* MW 7 */ + 12010 "01000001" // /* MW 6 */ + 12011 "00100011" // /* MW 5 */ + 12012 "00000000" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00101100" // /* MW 2 */ + 12015 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "add_accum.hpp" 20 92 first +.src_ref 5 "elementary.hpp" 473 55 first +.src_ref 3 "softmax_row_major.h" 176 61 first + 12016 "11111011" // NOPA; NOPB; NOPS; EQ r27, r5, r26; VEXP2 wl4, bmll3; VSUB.f dm2, dm2, dm0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12017 "00000001" // /* MW 15 */ + 12018 "00010010" // /* MW 14 */ + 12019 "01111101" // /* MW 13 */ + 12020 "01111001" // /* MW 12 */ + 12021 "00101100" // /* MW 11 */ + 12022 "00111101" // /* MW 10 */ + 12023 "10111101" // /* MW 9 */ + 12024 "00001011" // /* MW 8 */ + 12025 "01011011" // /* MW 7 */ + 12026 "00000001" // /* MW 6 */ + 12027 "00100000" // /* MW 5 */ + 12028 "00000000" // /* MW 4 */ + 12029 "11110000" // /* MW 3 */ + 12030 "00101100" // /* MW 2 */ + 12031 "00000000" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 4 "blend.hpp" 163 48 first + 12032 "00001011" // NOPA; NOPB; NOPS; EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22; VMUL.f dm3, x6, x0, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12033 "01100000" // /* MW 15 */ + 12034 "01011111" // /* MW 14 */ + 12035 "00011100" // /* MW 13 */ + 12036 "00011001" // /* MW 12 */ + 12037 "11000101" // /* MW 11 */ + 12038 "10000001" // /* MW 10 */ + 12039 "01100101" // /* MW 9 */ + 12040 "00110011" // /* MW 8 */ + 12041 "01011011" // /* MW 7 */ + 12042 "00000001" // /* MW 6 */ + 12043 "00100000" // /* MW 5 */ + 12044 "00000000" // /* MW 4 */ + 12045 "11110000" // /* MW 3 */ + 12046 "00101100" // /* MW 2 */ + 12047 "00000000" // /* MW 1 */ +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_560 +.src_ref 5 "accum.hpp" 153 115 first +.end_of_loop +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhl0, bmll4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12049 "00000000" // /* MW 15 */ + 12050 "00000000" // /* MW 14 */ + 12051 "01111000" // /* MW 13 */ + 12052 "00001001" // /* MW 12 */ + 12053 "01001000" // /* MW 11 */ + 12054 "00000000" // /* MW 10 */ + 12055 "00000000" // /* MW 9 */ + 12056 "00000000" // /* MW 8 */ + 12057 "01011011" // /* MW 7 */ + 12058 "00000001" // /* MW 6 */ + 12059 "00100000" // /* MW 5 */ + 12060 "00000000" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.aggressive_scheduled_block_id 5 +.noswbrkpt +.loop_nesting 0 + 12064 "11101011" // MOVA r18, #32; NOPB; VST wl7, [p4], #32; SEL.EQZ r25, r28, r6, r27; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12065 "10000001" // /* MW 15 */ + 12066 "00100001" // /* MW 14 */ + 12067 "01111101" // /* MW 13 */ + 12068 "10011001" // /* MW 12 */ + 12069 "00000111" // /* MW 11 */ + 12070 "00010010" // /* MW 10 */ + 12071 "10010011" // /* MW 9 */ + 12072 "10111001" // /* MW 8 */ + 12073 "11101010" // /* MW 7 */ + 12074 "00011101" // /* MW 6 */ + 12075 "00100100" // /* MW 5 */ + 12076 "00000000" // /* MW 4 */ + 12077 "00000000" // /* MW 3 */ + 12078 "00010010" // /* MW 2 */ + 12079 "00000100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "accum.hpp" 198 120 +.src_ref 3 "softmax_row_major.h" 269 36 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12080 "10111010" // LDA.u8 r7, [p2, #-2]; MOVX vaddSign0, #1; VMOV bmll1, bmhl0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12081 "01111000" // /* MW 9 */ + 12082 "00001001" // /* MW 8 */ + 12083 "10000001" // /* MW 7 */ + 12084 "00000000" // /* MW 6 */ + 12085 "11010010" // /* MW 5 */ + 12086 "00000010" // /* MW 4 */ + 12087 "01010000" // /* MW 3 */ + 12088 "10011101" // /* MW 2 */ + 12089 "01011100" // /* MW 1 */ + 12090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12091 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 153 29 first + 12092 "00011000" // VCONV.bf16.fp32 wl6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12093 "00010110" // /* MW 3 */ + 12094 "01000001" // /* MW 2 */ + 12095 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "elementary.hpp" 473 55 first + 12096 "11111000" // VEXP2 wl4, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12097 "11110010" // /* MW 3 */ + 12098 "01011000" // /* MW 2 */ + 12099 "00011010" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 4 "blend.hpp" 163 48 first + 12100 "01011010" // EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22; VMUL.f dm3, x6, x0, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12101 "00000001" // /* MW 9 */ + 12102 "11101100" // /* MW 8 */ + 12103 "10001011" // /* MW 7 */ + 12104 "00100011" // /* MW 6 */ + 12105 "10100011" // /* MW 5 */ + 12106 "00111000" // /* MW 4 */ + 12107 "10110000" // /* MW 3 */ + 12108 "01101100" // /* MW 2 */ + 12109 "00000110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 12110 "11111000" // VMOV bmhl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "00010010" // /* MW 3 */ + 12112 "10010000" // /* MW 2 */ + 12113 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 12114 "01001010" // VST wl7, [p4], #32; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12115 "00111101" // /* MW 9 */ + 12116 "00110000" // /* MW 8 */ + 12117 "10100100" // /* MW 7 */ + 12118 "11100100" // /* MW 6 */ + 12119 "00110010" // /* MW 5 */ + 12120 "00001111" // /* MW 4 */ + 12121 "01010100" // /* MW 3 */ + 12122 "10111101" // /* MW 2 */ + 12123 "10000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 3 "softmax_row_major.h" 269 47 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12124 "11100100" // NE r7, r7, r2; VMOV bmll1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12125 "00100101" // /* MW 5 */ + 12126 "00000100" // /* MW 4 */ + 12127 "00010010" // /* MW 3 */ + 12128 "11000101" // /* MW 2 */ + 12129 "00111001" // /* MW 1 */ + 12130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12131 "00000000" // /* MW 1 */ + 12132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12133 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "elementary.hpp" 473 55 first + 12134 "11111000" // VEXP2 wl4, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12135 "11110010" // /* MW 3 */ + 12136 "01011000" // /* MW 2 */ + 12137 "00011010" // /* MW 1 */ +.src_ref 4 "blend.hpp" 163 48 first + 12138 "00111000" // VSEL.16 x7, x1, x4, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12139 "00110010" // /* MW 3 */ + 12140 "10001010" // /* MW 2 */ + 12141 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 12142 "11111000" // VMOV bmhl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12143 "00010010" // /* MW 3 */ + 12144 "10010000" // /* MW 2 */ + 12145 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 12146 "01100010" // VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12147 "00111101" // /* MW 7 */ + 12148 "00110000" // /* MW 6 */ + 12149 "10100100" // /* MW 5 */ + 12150 "11100110" // /* MW 4 */ + 12151 "00110010" // /* MW 3 */ + 12152 "00001111" // /* MW 2 */ + 12153 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12154 "11111000" // VMOV bmll1, bmhl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12155 "00010010" // /* MW 3 */ + 12156 "00000010" // /* MW 2 */ + 12157 "00011001" // /* MW 1 */ + 12158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12159 "00000000" // /* MW 1 */ + 12160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12161 "00000000" // /* MW 1 */ + 12162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12163 "00000000" // /* MW 1 */ + 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12165 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first + 12166 "11111000" // VMOV bmhl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12167 "00010010" // /* MW 3 */ + 12168 "10010000" // /* MW 2 */ + 12169 "00011000" // /* MW 1 */ + 12170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12171 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 + 12172 "11111000" // VMOV x2, bmhl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12173 "00010010" // /* MW 3 */ + 12174 "00100010" // /* MW 2 */ + 12175 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 12176 "11011000" // VSHIFT x2, x2, x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12177 "01001010" // /* MW 3 */ + 12178 "00010000" // /* MW 2 */ + 12179 "00011001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 4 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 12180 "01100010" // VMOV bmll2, x2; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12181 "00111101" // /* MW 7 */ + 12182 "00001000" // /* MW 6 */ + 12183 "10100000" // /* MW 5 */ + 12184 "11100110" // /* MW 4 */ + 12185 "10010010" // /* MW 3 */ + 12186 "00000100" // /* MW 2 */ + 12187 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12188 "11111000" // VMOV bmll0, bmhl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12189 "00010010" // /* MW 3 */ + 12190 "00000010" // /* MW 2 */ + 12191 "00011000" // /* MW 1 */ + 12192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12193 "00000000" // /* MW 1 */ + 12194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12195 "00000000" // /* MW 1 */ + 12196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12197 "00000000" // /* MW 1 */ + 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12199 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 12200 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12201 "00010010" // /* MW 3 */ + 12202 "00100000" // /* MW 2 */ + 12203 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.src_ref 4 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 12204 "01100010" // VSHIFT x2, x2, x0, r1; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12205 "00111101" // /* MW 7 */ + 12206 "00001000" // /* MW 6 */ + 12207 "10100000" // /* MW 5 */ + 12208 "11000110" // /* MW 4 */ + 12209 "00000110" // /* MW 3 */ + 12210 "00010000" // /* MW 2 */ + 12211 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12212 "11111000" // VMOV bmll2, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12213 "10010010" // /* MW 3 */ + 12214 "00000100" // /* MW 2 */ + 12215 "00011010" // /* MW 1 */ + 12216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12217 "00000000" // /* MW 1 */ + 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12219 "00000000" // /* MW 1 */ + 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12221 "00000000" // /* MW 1 */ + 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12223 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 12224 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12225 "00010010" // /* MW 3 */ + 12226 "00100000" // /* MW 2 */ + 12227 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.src_ref 4 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 12228 "01100010" // VSHIFT x2, x2, x0, r4; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12229 "00111101" // /* MW 7 */ + 12230 "00001000" // /* MW 6 */ + 12231 "10100000" // /* MW 5 */ + 12232 "11000110" // /* MW 4 */ + 12233 "00010010" // /* MW 3 */ + 12234 "00010000" // /* MW 2 */ + 12235 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12236 "11111000" // VMOV bmll2, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12237 "10010010" // /* MW 3 */ + 12238 "00000100" // /* MW 2 */ + 12239 "00011010" // /* MW 1 */ + 12240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12241 "00000000" // /* MW 1 */ + 12242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12243 "00000000" // /* MW 1 */ + 12244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12245 "00000000" // /* MW 1 */ + 12246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12247 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 12248 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "00010010" // /* MW 3 */ + 12250 "00100000" // /* MW 2 */ + 12251 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.src_ref 4 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 12252 "01100010" // VSHIFT x2, x2, x0, r3; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12253 "00111101" // /* MW 7 */ + 12254 "00001000" // /* MW 6 */ + 12255 "10100000" // /* MW 5 */ + 12256 "11000110" // /* MW 4 */ + 12257 "00001110" // /* MW 3 */ + 12258 "00010000" // /* MW 2 */ + 12259 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12260 "11111000" // VMOV bmll2, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12261 "10010010" // /* MW 3 */ + 12262 "00000100" // /* MW 2 */ + 12263 "00011010" // /* MW 1 */ + 12264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12265 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 26 first + 12266 "10000100" // JNZ r7, #12320 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12320 delay_slots=5 */ + 12267 "00000001" // /* MW 5 */ + 12268 "01000000" // /* MW 4 */ + 12269 "00010000" // /* MW 3 */ + 12270 "00011000" // /* MW 2 */ + 12271 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.delay_slot + 12272 "10011000" // VST wl7, [p4], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "11101010" // /* MW 3 */ + 12274 "00011101" // /* MW 2 */ + 12275 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12277 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 first +.delay_slot + 12278 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12279 "00010010" // /* MW 3 */ + 12280 "00100000" // /* MW 2 */ + 12281 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.delay_slot + 12282 "10111000" // VEXTRACT.32 r2, x2, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12283 "00000001" // /* MW 3 */ + 12284 "10001010" // /* MW 2 */ + 12285 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ + 12288 "10000100" // J #12336 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12336 delay_slots=5 */ + 12289 "00000000" // /* MW 5 */ + 12290 "00000000" // /* MW 4 */ + 12291 "00011000" // /* MW 3 */ + 12292 "00011000" // /* MW 2 */ + 12293 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 54 +.delay_slot + 12294 "01000100" // MOVXM r1, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12295 "00000000" // /* MW 5 */ + 12296 "10100000" // /* MW 4 */ + 12297 "00000000" // /* MW 3 */ + 12298 "10000000" // /* MW 2 */ + 12299 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12305 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 54 first +.delay_slot + 12306 "00101110" // NOPA; NOPS; VINSERT.32 x0, x0, #0, r1; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12307 "00011100" // /* MW 13 */ + 12308 "00000000" // /* MW 12 */ + 12309 "00000000" // /* MW 11 */ + 12310 "10001011" // /* MW 10 */ + 12311 "00000001" // /* MW 9 */ + 12312 "01000000" // /* MW 8 */ + 12313 "00000000" // /* MW 7 */ + 12314 "00000000" // /* MW 6 */ + 12315 "10110110" // /* MW 5 */ + 12316 "00000010" // /* MW 4 */ + 12317 "11110000" // /* MW 3 */ + 12318 "00101100" // /* MW 2 */ + 12319 "00000000" // /* MW 1 */ +.label TGT_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_832 +.src_ref 5 "elementary.hpp" 618 15 first + 12320 "00011000" // INV r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12321 "00000100" // /* MW 3 */ + 12322 "10000011" // /* MW 2 */ + 12323 "00010000" // /* MW 1 */ + 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12325 "00000000" // /* MW 1 */ + 12326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12327 "00000000" // /* MW 1 */ + 12328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12329 "00000000" // /* MW 1 */ +.src_ref 3 "softmax.h" 166 25 first + 12330 "11010100" // NOPA; VINSERT.32 x0, x0, #0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12331 "01100010" // /* MW 5 */ + 12332 "00000000" // /* MW 4 */ + 12333 "11110000" // /* MW 3 */ + 12334 "00101100" // /* MW 2 */ + 12335 "00000000" // /* MW 1 */ +.label TGT_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_848 +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 3 "softmax_row_major.h" 269 +.src_ref 3 "softmax_row_major.h" 275 25 first +.src_ref 3 "softmax_row_major.h" 277 31 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 12336 "10111010" // VLDB wl1, [p1], #32; MOVS p0, p1; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12337 "01110110" // /* MW 9 */ + 12338 "01001001" // /* MW 8 */ + 12339 "00000000" // /* MW 7 */ + 12340 "00000000" // /* MW 6 */ + 12341 "01100100" // /* MW 5 */ + 12342 "00011100" // /* MW 4 */ + 12343 "01100001" // /* MW 3 */ + 12344 "10010001" // /* MW 2 */ + 12345 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 3 "softmax_row_major.h" 273 12 first +.src_ref 3 "softmax_row_major.h" 275 25 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 12346 "00111010" // VLDB wl1, [p1], #32; MOVXM ls, #12432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12347 "00010000" // /* MW 9 */ + 12348 "01001000" // /* MW 8 */ + 12349 "01111000" // /* MW 7 */ + 12350 "00001100" // /* MW 6 */ + 12351 "00000000" // /* MW 5 */ + 12352 "00000000" // /* MW 4 */ + 12353 "11001000" // /* MW 3 */ + 12354 "00111000" // /* MW 2 */ + 12355 "00000010" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 first +.src_ref 3 "softmax_row_major.h" 273 12 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12356 "00111010" // VCONV.bf16.fp32 wl0, bmll0; MOVXM le, #12480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12357 "00010001" // /* MW 9 */ + 12358 "01100000" // /* MW 8 */ + 12359 "10111000" // /* MW 7 */ + 12360 "00001101" // /* MW 6 */ + 12361 "00000000" // /* MW 5 */ + 12362 "00000000" // /* MW 4 */ + 12363 "11000000" // /* MW 3 */ + 12364 "00000010" // /* MW 2 */ + 12365 "00001000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 273 12 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12366 "10011000" // ADD.NC lc, r0, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12367 "01111110" // /* MW 3 */ + 12368 "01110000" // /* MW 2 */ + 12369 "00011101" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12370 "01011000" // VEXTBCST.16 x0, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12371 "00000011" // /* MW 3 */ + 12372 "00000001" // /* MW 2 */ + 12373 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12375 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12377 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12378 "01001000" // VMUL.f dm0, x1, x0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12379 "00000001" // /* MW 3 */ + 12380 "11100010" // /* MW 2 */ + 12381 "10001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 3 "softmax_row_major.h" 275 25 first +.aggressive_scheduled_block_id 12 +.noswbrkpt + 12382 "00011000" // VLDB wl1, [p1], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12383 "01100100" // /* MW 3 */ + 12384 "00011100" // /* MW 2 */ + 12385 "00111001" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12386 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12387 "00011100" // /* MW 13 */ + 12388 "00000000" // /* MW 12 */ + 12389 "00000000" // /* MW 11 */ + 12390 "01010111" // /* MW 10 */ + 12391 "00011010" // /* MW 9 */ + 12392 "01000000" // /* MW 8 */ + 12393 "00000000" // /* MW 7 */ + 12394 "00000000" // /* MW 6 */ + 12395 "10110110" // /* MW 5 */ + 12396 "00000010" // /* MW 4 */ + 12397 "11110000" // /* MW 3 */ + 12398 "00101100" // /* MW 2 */ + 12399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12400 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12401 "00000000" // /* MW 15 */ + 12402 "00000000" // /* MW 14 */ + 12403 "01111000" // /* MW 13 */ + 12404 "10100101" // /* MW 12 */ + 12405 "00000001" // /* MW 11 */ + 12406 "00000000" // /* MW 10 */ + 12407 "00000000" // /* MW 9 */ + 12408 "00000000" // /* MW 8 */ + 12409 "01011011" // /* MW 7 */ + 12410 "00000001" // /* MW 6 */ + 12411 "00100000" // /* MW 5 */ + 12412 "00000000" // /* MW 4 */ + 12413 "11110000" // /* MW 3 */ + 12414 "00101100" // /* MW 2 */ + 12415 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12416 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMUL.f dm0, x1, x0, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12417 "00010000" // /* MW 15 */ + 12418 "01000111" // /* MW 14 */ + 12419 "01111100" // /* MW 13 */ + 12420 "10100101" // /* MW 12 */ + 12421 "00000001" // /* MW 11 */ + 12422 "00000000" // /* MW 10 */ + 12423 "00000000" // /* MW 9 */ + 12424 "00000000" // /* MW 8 */ + 12425 "01011011" // /* MW 7 */ + 12426 "00000001" // /* MW 6 */ + 12427 "00100000" // /* MW 5 */ + 12428 "00000000" // /* MW 4 */ + 12429 "11110000" // /* MW 3 */ + 12430 "00101100" // /* MW 2 */ + 12431 "00000000" // /* MW 1 */ +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_944 +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 3 "softmax_row_major.h" 275 25 first +.begin_of_loop +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 12432 "11100001" // NOPA; VLDB wl1, [p1], #32; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12433 "00000000" // /* MW 15 */ + 12434 "00000000" // /* MW 14 */ + 12435 "01111000" // /* MW 13 */ + 12436 "10100101" // /* MW 12 */ + 12437 "00000001" // /* MW 11 */ + 12438 "00000000" // /* MW 10 */ + 12439 "00000000" // /* MW 9 */ + 12440 "00000000" // /* MW 8 */ + 12441 "01011011" // /* MW 7 */ + 12442 "00000001" // /* MW 6 */ + 12443 "11001000" // /* MW 5 */ + 12444 "00111000" // /* MW 4 */ + 12445 "11110010" // /* MW 3 */ + 12446 "00101100" // /* MW 2 */ + 12447 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 3 "softmax_row_major.h" 277 31 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12448 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll0, [p0], #32;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12449 "00000000" // /* MW 15 */ + 12450 "00000000" // /* MW 14 */ + 12451 "01111000" // /* MW 13 */ + 12452 "10100101" // /* MW 12 */ + 12453 "00000001" // /* MW 11 */ + 12454 "00000000" // /* MW 10 */ + 12455 "00000000" // /* MW 9 */ + 12456 "10000000" // /* MW 8 */ + 12457 "00010010" // /* MW 7 */ + 12458 "00011100" // /* MW 6 */ + 12459 "00100000" // /* MW 5 */ + 12460 "00000000" // /* MW 4 */ + 12461 "11110000" // /* MW 3 */ + 12462 "00101100" // /* MW 2 */ + 12463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12464 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12465 "00000000" // /* MW 15 */ + 12466 "00000000" // /* MW 14 */ + 12467 "01111000" // /* MW 13 */ + 12468 "10100101" // /* MW 12 */ + 12469 "00000001" // /* MW 11 */ + 12470 "00000000" // /* MW 10 */ + 12471 "00000000" // /* MW 9 */ + 12472 "00000000" // /* MW 8 */ + 12473 "01011011" // /* MW 7 */ + 12474 "00000001" // /* MW 6 */ + 12475 "00100000" // /* MW 5 */ + 12476 "00000000" // /* MW 4 */ + 12477 "11110000" // /* MW 3 */ + 12478 "00101100" // /* MW 2 */ + 12479 "00000000" // /* MW 1 */ +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_992 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.end_of_loop +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12480 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMUL.f dm0, x1, x0, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00010000" // /* MW 15 */ + 12482 "01000111" // /* MW 14 */ + 12483 "01111100" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "00000000" // /* MW 10 */ + 12487 "00000000" // /* MW 9 */ + 12488 "00000000" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12497 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 3 "softmax_row_major.h" 277 31 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12498 "10011000" // VST.CONV.bf16.fp32 bmll0, [p0], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12499 "00010010" // /* MW 3 */ + 12500 "00011100" // /* MW 2 */ + 12501 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 12502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12503 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 13 +.noswbrkpt + 12504 "01001000" // VMUL.f dm0, x1, x0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12505 "00000001" // /* MW 3 */ + 12506 "11100010" // /* MW 2 */ + 12507 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 12508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12509 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 281 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 12510 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12511 "00000000" // /* MW 3 */ + 12512 "00101000" // /* MW 2 */ + 12513 "00010000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 3 "softmax_row_major.h" 277 31 first +.delay_slot +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12514 "10011000" // VST.CONV.bf16.fp32 bmll0, [p0], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12515 "00010010" // /* MW 3 */ + 12516 "00011100" // /* MW 2 */ + 12517 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12521 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 3 "softmax_row_major.h" 277 31 +.delay_slot + 12522 "10011000" // VST.CONV.bf16.fp32 bmll0, [p0], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "00010010" // /* MW 3 */ + 12524 "00011100" // /* MW 2 */ + 12525 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params__end +.label __Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params___func_end0 + 12527 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE___func_begin0 +.label _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE +.function softmax_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE +.src_ref 9 "softmax_adf_wrapper.cpp" 34 first +.src_ref 9 "softmax_adf_wrapper.cpp" 46 30 +.src_ref 9 "softmax_adf_wrapper.cpp" 57 56 +.function_start + 12528 "00000010" // MOVS p7, p1; MOV p3, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12529 "01110000" // /* MW 7 */ + 12530 "01100000" // /* MW 6 */ + 12531 "10110111" // /* MW 5 */ + 12532 "00000001" // /* MW 4 */ + 12533 "01100000" // /* MW 3 */ + 12534 "10010001" // /* MW 2 */ + 12535 "11110000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 37 17 + 12536 "00111010" // MOVS p1, p6; MOVXM p6, #508596 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12537 "00010001" // /* MW 9 */ + 12538 "01011010" // /* MW 8 */ + 12539 "00110001" // /* MW 7 */ + 12540 "11110011" // /* MW 6 */ + 12541 "00000001" // /* MW 5 */ + 12542 "00000000" // /* MW 4 */ + 12543 "01100000" // /* MW 3 */ + 12544 "00010001" // /* MW 2 */ + 12545 "00110011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 37 17 first + 12546 "10011000" // LDA r16, [p6], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00010110" // /* MW 3 */ + 12548 "11011110" // /* MW 2 */ + 12549 "00000110" // /* MW 1 */ + 12550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12551 "00000000" // /* MW 1 */ + 12552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12553 "00000000" // /* MW 1 */ + 12554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12555 "00000000" // /* MW 1 */ + 12556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12557 "00000000" // /* MW 1 */ + 12558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12559 "00000000" // /* MW 1 */ + 12560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12561 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 37 7 +.src_ref 9 "softmax_adf_wrapper.cpp" 37 27 + 12562 "10000100" // JNZ r16, #12784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12784 delay_slots=5 */ + 12563 "00000001" // /* MW 5 */ + 12564 "01000000" // /* MW 4 */ + 12565 "11111000" // /* MW 3 */ + 12566 "00011000" // /* MW 2 */ + 12567 "10000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 34 +.delay_slot + 12568 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12569 "00000001" // /* MW 5 */ + 12570 "00000000" // /* MW 4 */ + 12571 "00000000" // /* MW 3 */ + 12572 "00010000" // /* MW 2 */ + 12573 "00000000" // /* MW 1 */ +.delay_slot + 12574 "10011000" // ST p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "10011101" // /* MW 3 */ + 12576 "11111100" // /* MW 2 */ + 12577 "00001111" // /* MW 1 */ +.delay_slot + 12578 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12579 "10011101" // /* MW 3 */ + 12580 "11111001" // /* MW 2 */ + 12581 "00001111" // /* MW 1 */ +.delay_slot + 12582 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12583 "00111101" // /* MW 3 */ + 12584 "11110100" // /* MW 2 */ + 12585 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 12586 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12587 "11000000" // /* MW 3 */ + 12588 "01100100" // /* MW 2 */ + 12589 "00011001" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 42 26 +.src_ref 9 "softmax_adf_wrapper.cpp" 42 26 +.src_ref 9 "softmax_adf_wrapper.cpp" 46 30 + 12590 "01110110" // MOVA m0, #5; MOVS p3, p7; MOVXM p2, #508600 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12591 "00010000" // /* MW 11 */ + 12592 "01011100" // /* MW 10 */ + 12593 "00110001" // /* MW 9 */ + 12594 "11110001" // /* MW 8 */ + 12595 "00000001" // /* MW 7 */ + 12596 "00000000" // /* MW 6 */ + 12597 "10001011" // /* MW 5 */ + 12598 "10011100" // /* MW 4 */ + 12599 "10000011" // /* MW 3 */ + 12600 "10100000" // /* MW 2 */ + 12601 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 43 58 +.src_ref 9 "softmax_adf_wrapper.cpp" 44 27 + 12602 "10111010" // MOVA m2, #-24; MOVX r17, #-16; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12603 "01111000" // /* MW 9 */ + 12604 "01110000" // /* MW 8 */ + 12605 "00001101" // /* MW 7 */ + 12606 "00001010" // /* MW 6 */ + 12607 "00010110" // /* MW 5 */ + 12608 "00111111" // /* MW 4 */ + 12609 "10000000" // /* MW 3 */ + 12610 "00001000" // /* MW 2 */ + 12611 "11111101" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 42 26 first +.src_ref 9 "softmax_adf_wrapper.cpp" 43 58 first +.src_ref 9 "softmax_adf_wrapper.cpp" 44 40 +.src_ref 9 "softmax_adf_wrapper.cpp" 47 30 + 12612 "01110110" // MOVA m0, #-6; ST r16, [p2], m0; LSHL r16, r16, r17; ADD.NC r18, r16, #-2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12613 "10001000" // /* MW 11 */ + 12614 "00111111" // /* MW 10 */ + 12615 "01001100" // /* MW 9 */ + 12616 "11101110" // /* MW 8 */ + 12617 "00001000" // /* MW 7 */ + 12618 "10100001" // /* MW 6 */ + 12619 "00010001" // /* MW 5 */ + 12620 "00001010" // /* MW 4 */ + 12621 "10000010" // /* MW 3 */ + 12622 "01000000" // /* MW 2 */ + 12623 "11111111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 9 "softmax_adf_wrapper.cpp" 43 27 +.src_ref 9 "softmax_adf_wrapper.cpp" 49 29 + 12624 "10111010" // ST.s8 r16, [p2], #-1; MOVX r17, #1; MOV m1, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12625 "01011000" // /* MW 9 */ + 12626 "00001100" // /* MW 8 */ + 12627 "10000000" // /* MW 7 */ + 12628 "00101000" // /* MW 6 */ + 12629 "00010000" // /* MW 5 */ + 12630 "00000001" // /* MW 4 */ + 12631 "11100000" // /* MW 3 */ + 12632 "11000000" // /* MW 2 */ + 12633 "01011111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 9 "softmax_adf_wrapper.cpp" 46 30 first + 12634 "10111010" // LDA r19, [p3], #4; MOVXM p4, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12635 "00010000" // /* MW 9 */ + 12636 "00011000" // /* MW 8 */ + 12637 "00110001" // /* MW 7 */ + 12638 "11110010" // /* MW 6 */ + 12639 "00000001" // /* MW 5 */ + 12640 "00000000" // /* MW 4 */ + 12641 "11010000" // /* MW 3 */ + 12642 "11001110" // /* MW 2 */ + 12643 "01100011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 + 12644 "10111010" // MOVA r25, #0; MOVXM p5, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12645 "00010000" // /* MW 9 */ + 12646 "00010110" // /* MW 8 */ + 12647 "10110001" // /* MW 7 */ + 12648 "11110010" // /* MW 6 */ + 12649 "00000001" // /* MW 5 */ + 12650 "00000000" // /* MW 4 */ + 12651 "00000000" // /* MW 3 */ + 12652 "00011001" // /* MW 2 */ + 12653 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 49 29 + 12654 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12655 "00000001" // /* MW 3 */ + 12656 "00110000" // /* MW 2 */ + 12657 "00010000" // /* MW 1 */ + 12658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12659 "00000000" // /* MW 1 */ + 12660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12661 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 48 31 first + 12662 "00011000" // EXTEND.u8 r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12663 "10010000" // /* MW 3 */ + 12664 "10100000" // /* MW 2 */ + 12665 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 44 27 first + 12666 "00011000" // ST.s8 r18, [p2], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12667 "01000111" // /* MW 3 */ + 12668 "01001010" // /* MW 2 */ + 12669 "00000010" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 48 50 first + 12670 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12671 "00001111" // /* MW 3 */ + 12672 "11100001" // /* MW 2 */ + 12673 "00010100" // /* MW 1 */ + 12674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12675 "00000000" // /* MW 1 */ + 12676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12677 "00000000" // /* MW 1 */ + 12678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12679 "00000000" // /* MW 1 */ + 12680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12681 "00000000" // /* MW 1 */ + 12682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12683 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 46 23 first + 12684 "10011000" // ST r19, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12685 "01110001" // /* MW 3 */ + 12686 "00011110" // /* MW 2 */ + 12687 "00001010" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 9 "softmax_adf_wrapper.cpp" 47 37 first + 12688 "00001100" // LDA el0, [p3], #8; ST r17, [p5] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12689 "01100011" // /* MW 5 */ + 12690 "00001100" // /* MW 4 */ + 12691 "11011010" // /* MW 3 */ + 12692 "10000101" // /* MW 2 */ + 12693 "01100101" // /* MW 1 */ + 12694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12695 "00000000" // /* MW 1 */ + 12696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12697 "00000000" // /* MW 1 */ + 12698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12699 "00000000" // /* MW 1 */ + 12700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12701 "00000000" // /* MW 1 */ + 12702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12703 "00000000" // /* MW 1 */ + 12704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12705 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 47 30 + 12706 "10011000" // ST el0, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12707 "00101001" // /* MW 3 */ + 12708 "00001000" // /* MW 2 */ + 12709 "00001010" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 48 29 first + 12710 "00011000" // ST.s16 r16, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12711 "00010111" // /* MW 3 */ + 12712 "11111110" // /* MW 2 */ + 12713 "00000010" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 50 34 first + 12714 "10011000" // LDA el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "00101110" // /* MW 3 */ + 12716 "00000100" // /* MW 2 */ + 12717 "00000011" // /* MW 1 */ + 12718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12719 "00000000" // /* MW 1 */ + 12720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12721 "00000000" // /* MW 1 */ + 12722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12723 "00000000" // /* MW 1 */ + 12724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12725 "00000000" // /* MW 1 */ + 12726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12727 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 49 29 first + 12728 "00011000" // ST.s8 r24, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12729 "00000111" // /* MW 3 */ + 12730 "00101011" // /* MW 2 */ + 12731 "00000010" // /* MW 1 */ + 12732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12733 "00000000" // /* MW 1 */ + 12734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12735 "00000000" // /* MW 1 */ + 12736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12737 "00000000" // /* MW 1 */ + 12738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12739 "00000000" // /* MW 1 */ + 12740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12741 "00000000" // /* MW 1 */ + 12742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12743 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 50 27 first + 12744 "10011000" // ST el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12745 "00101001" // /* MW 3 */ + 12746 "00000100" // /* MW 2 */ + 12747 "00001010" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 first + 12748 "00011000" // ST.s8 r25, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12749 "00100111" // /* MW 3 */ + 12750 "00000111" // /* MW 2 */ + 12751 "00000100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 51 35 first + 12752 "10011000" // LDA el0, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12753 "00101110" // /* MW 3 */ + 12754 "00010100" // /* MW 2 */ + 12755 "00000011" // /* MW 1 */ + 12756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12757 "00000000" // /* MW 1 */ + 12758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12759 "00000000" // /* MW 1 */ + 12760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12761 "00000000" // /* MW 1 */ + 12762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12763 "00000000" // /* MW 1 */ + 12764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12765 "00000000" // /* MW 1 */ + 12766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12767 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 51 28 + 12768 "11100001" // NOPA; NOPB; ST el0, [p2, #4]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12769 "00000000" // /* MW 15 */ + 12770 "00000000" // /* MW 14 */ + 12771 "01111000" // /* MW 13 */ + 12772 "10100101" // /* MW 12 */ + 12773 "00000001" // /* MW 11 */ + 12774 "00000000" // /* MW 10 */ + 12775 "00000000" // /* MW 9 */ + 12776 "10000000" // /* MW 8 */ + 12777 "00101001" // /* MW 7 */ + 12778 "00010100" // /* MW 6 */ + 12779 "00100010" // /* MW 5 */ + 12780 "00000000" // /* MW 4 */ + 12781 "11110000" // /* MW 3 */ + 12782 "00101100" // /* MW 2 */ + 12783 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE_256 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 first + 12784 "01010100" // LDA eh0, [p6], #4; MOV m0, #-60 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12785 "00010001" // /* MW 5 */ + 12786 "00011111" // /* MW 4 */ + 12787 "11010000" // /* MW 3 */ + 12788 "10000001" // /* MW 2 */ + 12789 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12790 "11010100" // LDA el0, [p6], #4; MOV p2, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12791 "11000001" // /* MW 5 */ + 12792 "11001011" // /* MW 4 */ + 12793 "11010100" // /* MW 3 */ + 12794 "10000101" // /* MW 2 */ + 12795 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12796 "00111100" // LDA el3, [p6], #4; PADDB [p2], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12797 "00100000" // /* MW 5 */ + 12798 "11011111" // /* MW 4 */ + 12799 "11010101" // /* MW 3 */ + 12800 "10011101" // /* MW 2 */ + 12801 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12802 "10011000" // LDA el2, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12803 "10101110" // /* MW 3 */ + 12804 "00011100" // /* MW 2 */ + 12805 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12806 "10011000" // LDA el1, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12807 "01101110" // /* MW 3 */ + 12808 "00011100" // /* MW 2 */ + 12809 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12810 "10011000" // LDA eh1, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12811 "01001110" // /* MW 3 */ + 12812 "00011100" // /* MW 2 */ + 12813 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12814 "10011000" // LDA eh2, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12815 "10001110" // /* MW 3 */ + 12816 "00011100" // /* MW 2 */ + 12817 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12818 "00001100" // LDA eh0, [p6], #4; ST eh0, [sp, #-120] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12819 "00011011" // /* MW 5 */ + 12820 "00010000" // /* MW 4 */ + 12821 "11011111" // /* MW 3 */ + 12822 "10000001" // /* MW 2 */ + 12823 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12824 "00001100" // LDA el0, [p6], #4; ST el0, [sp, #-116] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12825 "01011011" // /* MW 5 */ + 12826 "00011000" // /* MW 4 */ + 12827 "11011111" // /* MW 3 */ + 12828 "10000101" // /* MW 2 */ + 12829 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12830 "00001100" // LDA el3, [p6], #4; ST el3, [sp, #-112] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12831 "11011011" // /* MW 5 */ + 12832 "00100001" // /* MW 4 */ + 12833 "11011111" // /* MW 3 */ + 12834 "10011101" // /* MW 2 */ + 12835 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12836 "00001100" // LDA el2, [p6], #4; ST el2, [sp, #-108] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12837 "01011011" // /* MW 5 */ + 12838 "00101001" // /* MW 4 */ + 12839 "11011111" // /* MW 3 */ + 12840 "10010101" // /* MW 2 */ + 12841 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12842 "00001100" // LDA el1, [p6], #4; ST el1, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12843 "11011011" // /* MW 5 */ + 12844 "00110000" // /* MW 4 */ + 12845 "11011111" // /* MW 3 */ + 12846 "10001101" // /* MW 2 */ + 12847 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12848 "00001100" // LDA eh1, [p6], #4; ST eh1, [sp, #-100] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12849 "10011011" // /* MW 5 */ + 12850 "00111000" // /* MW 4 */ + 12851 "11011111" // /* MW 3 */ + 12852 "10001001" // /* MW 2 */ + 12853 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12854 "00001100" // LDA eh2, [p6], m0; ST eh2, [sp, #-96] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12855 "00011011" // /* MW 5 */ + 12856 "01000001" // /* MW 4 */ + 12857 "11011111" // /* MW 3 */ + 12858 "00010001" // /* MW 2 */ + 12859 "11000001" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12860 "00001100" // LDA eh0, [p6], #4; ST eh0, [sp, #-92] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12861 "00011011" // /* MW 5 */ + 12862 "01001000" // /* MW 4 */ + 12863 "11011111" // /* MW 3 */ + 12864 "10000001" // /* MW 2 */ + 12865 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12866 "00001100" // LDA el0, [p6]; ST el0, [sp, #-88] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12867 "01011011" // /* MW 5 */ + 12868 "01010000" // /* MW 4 */ + 12869 "11011111" // /* MW 3 */ + 12870 "10000101" // /* MW 2 */ + 12871 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12872 "00001100" // LDA p0, [p0]; ST el3, [sp, #-84] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12873 "11011011" // /* MW 5 */ + 12874 "01011001" // /* MW 4 */ + 12875 "11011111" // /* MW 3 */ + 12876 "10000011" // /* MW 2 */ + 12877 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 first + 12878 "00001100" // LDA p1, [p1]; ST el2, [sp, #-80] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12879 "01011011" // /* MW 5 */ + 12880 "01100001" // /* MW 4 */ + 12881 "11011111" // /* MW 3 */ + 12882 "10010011" // /* MW 2 */ + 12883 "00100000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 4 +.no_stack_arguments + 12884 "00000100" // JL #11488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11488 delay_slots=5 */ + 12885 "00000001" // /* MW 5 */ + 12886 "00000000" // /* MW 4 */ + 12887 "01110000" // /* MW 3 */ + 12888 "00010110" // /* MW 2 */ + 12889 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12890 "10011000" // ST el1, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12891 "01101101" // /* MW 3 */ + 12892 "10110100" // /* MW 2 */ + 12893 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12894 "10011000" // ST eh1, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12895 "01001101" // /* MW 3 */ + 12896 "10111000" // /* MW 2 */ + 12897 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12898 "10011000" // ST eh2, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12899 "10001101" // /* MW 3 */ + 12900 "10111100" // /* MW 2 */ + 12901 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12902 "10011000" // ST eh0, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12903 "00001101" // /* MW 3 */ + 12904 "10000000" // /* MW 2 */ + 12905 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12906 "00001100" // NOPA; ST el0, [sp, #-124] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12907 "01011011" // /* MW 5 */ + 12908 "00001000" // /* MW 4 */ + 12909 "11111111" // /* MW 3 */ + 12910 "00101100" // /* MW 2 */ + 12911 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 56 23 first +.src_ref 9 "softmax_adf_wrapper.cpp" 57 26 +.return_address + 12912 "00101100" // LDA r17, [p6, #16]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12913 "00000010" // /* MW 5 */ + 12914 "01000000" // /* MW 4 */ + 12915 "11010000" // /* MW 3 */ + 12916 "11000110" // /* MW 2 */ + 12917 "11001000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 58 + 12918 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12919 "00111001" // /* MW 3 */ + 12920 "11110100" // /* MW 2 */ + 12921 "00000111" // /* MW 1 */ + 12922 "00011000" // LDA p2, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12923 "00011001" // /* MW 3 */ + 12924 "11111001" // /* MW 2 */ + 12925 "00000111" // /* MW 1 */ + 12926 "00011000" // LDA p0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12927 "00011001" // /* MW 3 */ + 12928 "11111100" // /* MW 2 */ + 12929 "00000111" // /* MW 1 */ + 12930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12931 "00000000" // /* MW 1 */ + 12932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12933 "00000000" // /* MW 1 */ + 12934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12935 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 56 23 + 12936 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12937 "00000111" // /* MW 3 */ + 12938 "01100010" // /* MW 2 */ + 12939 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 56 23 + 12940 "10011000" // ST r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12941 "00110001" // /* MW 3 */ + 12942 "01000110" // /* MW 2 */ + 12943 "00001110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 56 first + 12944 "00001100" // LDA r18, [p7, #8]; MOVS p7, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12945 "00010110" // /* MW 5 */ + 12946 "00010001" // /* MW 4 */ + 12947 "11011111" // /* MW 3 */ + 12948 "11001010" // /* MW 2 */ + 12949 "11100100" // /* MW 1 */ + 12950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12951 "00000000" // /* MW 1 */ + 12952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12953 "00000000" // /* MW 1 */ + 12954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12955 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 58 first + 12956 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12957 "00000000" // /* MW 3 */ + 12958 "00101000" // /* MW 2 */ + 12959 "00010000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 58 +.delay_slot + 12960 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12961 "00000001" // /* MW 5 */ + 12962 "00000000" // /* MW 4 */ + 12963 "00000000" // /* MW 3 */ + 12964 "11110000" // /* MW 2 */ + 12965 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12967 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 53 first +.delay_slot + 12968 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12969 "00100111" // /* MW 3 */ + 12970 "01110111" // /* MW 2 */ + 12971 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 26 +.delay_slot + 12972 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12973 "00000010" // /* MW 3 */ + 12974 "01100001" // /* MW 2 */ + 12975 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 24 +.delay_slot + 12976 "00000010" // ST r16, [p6, #16]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12977 "01110000" // /* MW 7 */ + 12978 "01100000" // /* MW 6 */ + 12979 "00110000" // /* MW 5 */ + 12980 "00000011" // /* MW 4 */ + 12981 "00110000" // /* MW 3 */ + 12982 "11000010" // /* MW 2 */ +.label _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE__end +.label __ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE___func_end0 + 12983 "11001000" // /* MW 1 */ +.label __Z14_b8134_wrapperPPv___func_begin0 +.label _Z14_b8134_wrapperPPv +.function _b8134_wrapper _Z14_b8134_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 64 first +.src_ref 0 "0_0_reloadable11.cc" 66 79 +.function_start + 12992 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12993 "11000000" // /* MW 3 */ + 12994 "01100000" // /* MW 2 */ + 12995 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 66 79 first + 12996 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12997 "00011110" // /* MW 3 */ + 12998 "00101100" // /* MW 2 */ + 12999 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 68 80 first + 13000 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13001 "00011110" // /* MW 3 */ + 13002 "11110101" // /* MW 2 */ + 13003 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 67 46 first + 13004 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13005 "10011110" // /* MW 3 */ + 13006 "00000100" // /* MW 2 */ + 13007 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 65 4 first +.tail_call + 13008 "10000100" // J #12528 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12528 delay_slots=5 */ + 13009 "00000000" // /* MW 5 */ + 13010 "00000000" // /* MW 4 */ + 13011 "01111000" // /* MW 3 */ + 13012 "00011000" // /* MW 2 */ + 13013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13015 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13017 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8134_wrapperPPv__end +.label __Z14_b8134_wrapperPPv___func_end0 + 13023 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE___func_begin0 +.label _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE +.function expand_wrapper _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE +.src_ref 9 "expand_adf_wrapper.cpp" 25 first +.src_ref 9 "expand_adf_wrapper.cpp" 26 7 +.src_ref 9 "expand_adf_wrapper.cpp" 64 26 +.function_start + 13024 "01000100" // MOVXM p3, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13025 "01000000" // /* MW 5 */ + 13026 "11000100" // /* MW 4 */ + 13027 "11000110" // /* MW 3 */ + 13028 "00000111" // /* MW 2 */ + 13029 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 26 7 first + 13030 "10011000" // LDA r0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13031 "00010110" // /* MW 3 */ + 13032 "00000100" // /* MW 2 */ + 13033 "00000011" // /* MW 1 */ + 13034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13035 "00000000" // /* MW 1 */ + 13036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13037 "00000000" // /* MW 1 */ + 13038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13039 "00000000" // /* MW 1 */ + 13040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13041 "00000000" // /* MW 1 */ + 13042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13043 "00000000" // /* MW 1 */ + 13044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13045 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 26 7 +.src_ref 9 "expand_adf_wrapper.cpp" 26 30 + 13046 "10000100" // JNZ r0, #13088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13088 delay_slots=5 */ + 13047 "00000001" // /* MW 5 */ + 13048 "01000000" // /* MW 4 */ + 13049 "10010000" // /* MW 3 */ + 13050 "00011001" // /* MW 2 */ + 13051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13055 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13057 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13059 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13061 "00000000" // /* MW 1 */ + 13062 "11111000" // MOV r1, CORE_ID /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13063 "11100000" // /* MW 3 */ + 13064 "01011010" // /* MW 2 */ + 13065 "00011000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 41 first + 13066 "00011000" // EXTEND.u8 r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13067 "10010000" // /* MW 3 */ + 13068 "01000010" // /* MW 2 */ + 13069 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 68 + 13070 "00011000" // ADD r1, r1, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13071 "11111011" // /* MW 3 */ + 13072 "01000011" // /* MW 2 */ + 13073 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 28 + 13074 "01000100" // MOVXM p4, #508452 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13075 "01001000" // /* MW 5 */ + 13076 "11000100" // /* MW 4 */ + 13077 "11001000" // /* MW 3 */ + 13078 "00000111" // /* MW 2 */ + 13079 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 28 + 13080 "00000010" // ST r1, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13081 "01110000" // /* MW 7 */ + 13082 "10100101" // /* MW 6 */ + 13083 "00000001" // /* MW 5 */ + 13084 "00000000" // /* MW 4 */ + 13085 "00110000" // /* MW 3 */ + 13086 "10000110" // /* MW 2 */ + 13087 "10000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_64 +.src_ref 9 "expand_adf_wrapper.cpp" 36 19 first +.src_ref 9 "expand_adf_wrapper.cpp" 38 37 +.src_ref 9 "expand_adf_wrapper.cpp" 40 8 +.src_ref 9 "expand_adf_wrapper.cpp" 40 20 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13088 "10111010" // LDA r2, [p1, #4]; MOVX r16, #-5; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13089 "01011000" // /* MW 9 */ + 13090 "00000000" // /* MW 8 */ + 13091 "00001000" // /* MW 7 */ + 13092 "01101011" // /* MW 6 */ + 13093 "00000111" // /* MW 5 */ + 13094 "00111111" // /* MW 4 */ + 13095 "11010000" // /* MW 3 */ + 13096 "10001010" // /* MW 2 */ + 13097 "00100010" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 37 19 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13098 "10111010" // LDA r1, [p1]; MOVXM p1, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13099 "00010000" // /* MW 9 */ + 13100 "00011000" // /* MW 8 */ + 13101 "10110001" // /* MW 7 */ + 13102 "11110000" // /* MW 6 */ + 13103 "00000001" // /* MW 5 */ + 13104 "00000000" // /* MW 4 */ + 13105 "11010000" // /* MW 3 */ + 13106 "10000110" // /* MW 2 */ + 13107 "00100000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 46 33 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 first + 13108 "10111010" // LDA.s8 r7, [p1]; MOVXM p1, #508452 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13109 "00010000" // /* MW 9 */ + 13110 "00010010" // /* MW 8 */ + 13111 "10110001" // /* MW 7 */ + 13112 "11110000" // /* MW 6 */ + 13113 "00000001" // /* MW 5 */ + 13114 "00000000" // /* MW 4 */ + 13115 "01010000" // /* MW 3 */ + 13116 "10011100" // /* MW 2 */ + 13117 "00100000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 +.src_ref 9 "expand_adf_wrapper.cpp" 46 33 first +.src_ref 9 "expand_adf_wrapper.cpp" 46 53 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13118 "10111010" // LDA r4, [p1]; MOVX r3, #1; VINSERT.32 x0, x0, #0, r24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13119 "10111000" // /* MW 9 */ + 13120 "10001000" // /* MW 8 */ + 13121 "00000001" // /* MW 7 */ + 13122 "00101000" // /* MW 6 */ + 13123 "00110000" // /* MW 5 */ + 13124 "00000000" // /* MW 4 */ + 13125 "11010000" // /* MW 3 */ + 13126 "10010010" // /* MW 2 */ + 13127 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13128 "10111010" // LDA r6, [p0]; MOVX r5, #64; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13129 "01111000" // /* MW 9 */ + 13130 "01001001" // /* MW 8 */ + 13131 "00000000" // /* MW 7 */ + 13132 "00001000" // /* MW 6 */ + 13133 "01010000" // /* MW 5 */ + 13134 "00000010" // /* MW 4 */ + 13135 "11010000" // /* MW 3 */ + 13136 "10011010" // /* MW 2 */ + 13137 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 + 13138 "00101100" // LDA p2, [p2]; MOVX r17, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13139 "11111010" // /* MW 5 */ + 13140 "01000100" // /* MW 4 */ + 13141 "11010000" // /* MW 3 */ + 13142 "10100011" // /* MW 2 */ + 13143 "01000000" // /* MW 1 */ + 13144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13145 "00000000" // /* MW 1 */ + 13146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13147 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 38 32 first +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 first + 13148 "00100100" // AND r27, r17, r1; ADD.NC r1, r1, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13149 "00011111" // /* MW 5 */ + 13150 "10100001" // /* MW 4 */ + 13151 "10010000" // /* MW 3 */ + 13152 "11000010" // /* MW 2 */ + 13153 "10001110" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 51 4 first +.src_ref 9 "expand_adf_wrapper.cpp" 51 22 first + 13154 "10000100" // JZ r2, #13408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13408 delay_slots=5 */ + 13155 "00000001" // /* MW 5 */ + 13156 "00000000" // /* MW 4 */ + 13157 "00110000" // /* MW 3 */ + 13158 "00011010" // /* MW 2 */ + 13159 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 46 53 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 +.delay_slot + 13160 "11100100" // MUL r4, r2, r4; MOV crRnd, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13161 "01000001" // /* MW 5 */ + 13162 "01100111" // /* MW 4 */ + 13163 "11111111" // /* MW 3 */ + 13164 "00001001" // /* MW 2 */ + 13165 "00010001" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 first +.delay_slot + 13166 "01011100" // VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r27, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13167 "01111011" // /* MW 5 */ + 13168 "11000100" // /* MW 4 */ + 13169 "11001101" // /* MW 3 */ + 13170 "00000010" // /* MW 2 */ + 13171 "00001000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 46 53 first +.delay_slot + 13172 "10011000" // LSHL r3, r4, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13173 "00111101" // /* MW 3 */ + 13174 "00000110" // /* MW 2 */ + 13175 "00010001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 9 "expand_adf_wrapper.cpp" 38 37 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 first +.delay_slot + 13176 "10100100" // LSHL r1, r1, r16; VEXTBCST.16 x0, x0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13177 "00000110" // /* MW 5 */ + 13178 "00000010" // /* MW 4 */ + 13179 "10110000" // /* MW 3 */ + 13180 "01100001" // /* MW 2 */ + 13181 "00001000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 first +.src_ref 9 "expand_adf_wrapper.cpp" 46 11 first +.delay_slot + 13182 "10100100" // SUB r5, r5, r17; ADD.NC p0, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13183 "00110010" // /* MW 5 */ + 13184 "11000011" // /* MW 4 */ + 13185 "00110000" // /* MW 3 */ + 13186 "01100010" // /* MW 2 */ + 13187 "00101001" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 40 8 first +.src_ref 9 "expand_adf_wrapper.cpp" 40 20 first +.src_ref 9 "expand_adf_wrapper.cpp" 40 20 first + 13188 "00100100" // SEL.EQZ r4, r24, r5, r27; ADD.NC r3, r1, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13189 "11111111" // /* MW 5 */ + 13190 "10100001" // /* MW 4 */ + 13191 "01000001" // /* MW 3 */ + 13192 "00001010" // /* MW 2 */ + 13193 "11000001" // /* MW 1 */ + 13194 "00101100" // NOPA; ADD r2, r2, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13195 "11111110" // /* MW 5 */ + 13196 "00001011" // /* MW 4 */ + 13197 "11110001" // /* MW 3 */ + 13198 "00101100" // /* MW 2 */ + 13199 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 51 4 + 13200 "11100001" // NOPA; NOPB; NOPS; MOVXM p1, #13216; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13201 "00000000" // /* MW 15 */ + 13202 "00000000" // /* MW 14 */ + 13203 "00010000" // /* MW 13 */ + 13204 "11010000" // /* MW 12 */ + 13205 "10110001" // /* MW 11 */ + 13206 "00001100" // /* MW 10 */ + 13207 "00000000" // /* MW 9 */ + 13208 "00000000" // /* MW 8 */ + 13209 "01011011" // /* MW 7 */ + 13210 "00000001" // /* MW 6 */ + 13211 "00100000" // /* MW 5 */ + 13212 "00000000" // /* MW 4 */ + 13213 "11110000" // /* MW 3 */ + 13214 "00101100" // /* MW 2 */ + 13215 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_192 +.src_ref 9 "expand_adf_wrapper.cpp" 52 20 first +.loop_nesting 1 + 13216 "10011000" // LDA.s16 r6, [p0], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13217 "11010010" // /* MW 3 */ + 13218 "00011100" // /* MW 2 */ + 13219 "00000000" // /* MW 1 */ + 13220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13221 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 first + 13222 "10000100" // JZ r3, #13376 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13376 delay_slots=5 */ + 13223 "00000001" // /* MW 5 */ + 13224 "00000000" // /* MW 4 */ + 13225 "00100000" // /* MW 3 */ + 13226 "00011010" // /* MW 2 */ + 13227 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13235 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 13236 "11111000" // VBCST.16 x1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13237 "01110010" // /* MW 3 */ + 13238 "10011001" // /* MW 2 */ + 13239 "00011000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 first + 13240 "01000100" // MOVXM ls, #13360 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13241 "01100000" // /* MW 5 */ + 13242 "11101000" // /* MW 4 */ + 13243 "00110001" // /* MW 3 */ + 13244 "00000000" // /* MW 2 */ + 13245 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 + 13246 "01000100" // MOVXM le, #13360 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13247 "01100000" // /* MW 5 */ + 13248 "11101000" // /* MW 4 */ + 13249 "00110110" // /* MW 3 */ + 13250 "00000000" // /* MW 2 */ + 13251 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 + 13252 "11110110" // NOPA; NOPB; NOPS; ADD.NC lc, r1, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13253 "11000000" // /* MW 11 */ + 13254 "01111111" // /* MW 10 */ + 13255 "10111000" // /* MW 9 */ + 13256 "00000010" // /* MW 8 */ + 13257 "01011011" // /* MW 7 */ + 13258 "00000001" // /* MW 6 */ + 13259 "00100000" // /* MW 5 */ + 13260 "00000000" // /* MW 4 */ + 13261 "11110000" // /* MW 3 */ + 13262 "00101100" // /* MW 2 */ + 13263 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 9 "expand_adf_wrapper.cpp" 57 20 + 13264 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13265 "00000000" // /* MW 15 */ + 13266 "00000000" // /* MW 14 */ + 13267 "01111000" // /* MW 13 */ + 13268 "01001001" // /* MW 12 */ + 13269 "00000001" // /* MW 11 */ + 13270 "00000000" // /* MW 10 */ + 13271 "00000000" // /* MW 9 */ + 13272 "00000000" // /* MW 8 */ + 13273 "01011011" // /* MW 7 */ + 13274 "00000001" // /* MW 6 */ + 13275 "00100000" // /* MW 5 */ + 13276 "00000000" // /* MW 4 */ + 13277 "11110000" // /* MW 3 */ + 13278 "00101100" // /* MW 2 */ + 13279 "00000000" // /* MW 1 */ + 13280 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13281 "00000000" // /* MW 15 */ + 13282 "00000000" // /* MW 14 */ + 13283 "01111000" // /* MW 13 */ + 13284 "10100101" // /* MW 12 */ + 13285 "00000001" // /* MW 11 */ + 13286 "00000000" // /* MW 10 */ + 13287 "00000000" // /* MW 9 */ + 13288 "00000000" // /* MW 8 */ + 13289 "01011011" // /* MW 7 */ + 13290 "00000001" // /* MW 6 */ + 13291 "00100000" // /* MW 5 */ + 13292 "00000000" // /* MW 4 */ + 13293 "11110000" // /* MW 3 */ + 13294 "00101100" // /* MW 2 */ + 13295 "00000000" // /* MW 1 */ + 13296 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13297 "00000000" // /* MW 15 */ + 13298 "00000000" // /* MW 14 */ + 13299 "01111000" // /* MW 13 */ + 13300 "10100101" // /* MW 12 */ + 13301 "00000001" // /* MW 11 */ + 13302 "00000000" // /* MW 10 */ + 13303 "00000000" // /* MW 9 */ + 13304 "00000000" // /* MW 8 */ + 13305 "01011011" // /* MW 7 */ + 13306 "00000001" // /* MW 6 */ + 13307 "00100000" // /* MW 5 */ + 13308 "00000000" // /* MW 4 */ + 13309 "11110000" // /* MW 3 */ + 13310 "00101100" // /* MW 2 */ + 13311 "00000000" // /* MW 1 */ + 13312 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13313 "00000000" // /* MW 15 */ + 13314 "00000000" // /* MW 14 */ + 13315 "01111000" // /* MW 13 */ + 13316 "10100101" // /* MW 12 */ + 13317 "00000001" // /* MW 11 */ + 13318 "00000000" // /* MW 10 */ + 13319 "00000000" // /* MW 9 */ + 13320 "00000000" // /* MW 8 */ + 13321 "01011011" // /* MW 7 */ + 13322 "00000001" // /* MW 6 */ + 13323 "00100000" // /* MW 5 */ + 13324 "00000000" // /* MW 4 */ + 13325 "11110000" // /* MW 3 */ + 13326 "00101100" // /* MW 2 */ + 13327 "00000000" // /* MW 1 */ + 13328 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13329 "00000000" // /* MW 15 */ + 13330 "00000000" // /* MW 14 */ + 13331 "01111000" // /* MW 13 */ + 13332 "10100101" // /* MW 12 */ + 13333 "00000001" // /* MW 11 */ + 13334 "00000000" // /* MW 10 */ + 13335 "00000000" // /* MW 9 */ + 13336 "00000000" // /* MW 8 */ + 13337 "01011011" // /* MW 7 */ + 13338 "00000001" // /* MW 6 */ + 13339 "00100000" // /* MW 5 */ + 13340 "00000000" // /* MW 4 */ + 13341 "11110000" // /* MW 3 */ + 13342 "00101100" // /* MW 2 */ + 13343 "00000000" // /* MW 1 */ + 13344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13345 "00000000" // /* MW 15 */ + 13346 "00000000" // /* MW 14 */ + 13347 "01111000" // /* MW 13 */ + 13348 "10100101" // /* MW 12 */ + 13349 "00000001" // /* MW 11 */ + 13350 "00000000" // /* MW 10 */ + 13351 "00000000" // /* MW 9 */ + 13352 "00000000" // /* MW 8 */ + 13353 "01011011" // /* MW 7 */ + 13354 "00000001" // /* MW 6 */ + 13355 "00100000" // /* MW 5 */ + 13356 "00000000" // /* MW 4 */ + 13357 "11110000" // /* MW 3 */ + 13358 "00101100" // /* MW 2 */ + 13359 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_336 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 9 "expand_adf_wrapper.cpp" 57 20 first +.begin_of_loop +.end_of_loop +.loop_nesting 2 + 13360 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13361 "00000000" // /* MW 15 */ + 13362 "00000000" // /* MW 14 */ + 13363 "01111000" // /* MW 13 */ + 13364 "10100101" // /* MW 12 */ + 13365 "00000001" // /* MW 11 */ + 13366 "00000000" // /* MW 10 */ + 13367 "00000000" // /* MW 9 */ + 13368 "10000000" // /* MW 8 */ + 13369 "00000110" // /* MW 7 */ + 13370 "00011100" // /* MW 6 */ + 13371 "00100010" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_352 +.src_ref 9 "expand_adf_wrapper.cpp" 51 4 first +.loop_nesting 1 + 13376 "00011000" // JNZD r2, r2, p1 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13377 "01100000" // /* MW 3 */ + 13378 "10000100" // /* MW 2 */ + 13379 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13383 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.delay_slot + 13384 "11011000" // VSHIFT x1, x1, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13385 "00010010" // /* MW 3 */ + 13386 "10001000" // /* MW 2 */ + 13387 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13388 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13389 "01100111" // /* MW 3 */ + 13390 "00000001" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 9 "expand_adf_wrapper.cpp" 61 16 first +.delay_slot + 13392 "11100001" // NOPA; NOPB; VST x1, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00000000" // /* MW 15 */ + 13394 "00000000" // /* MW 14 */ + 13395 "01111000" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01010011" // /* MW 7 */ + 13402 "00011100" // /* MW 6 */ + 13403 "00100010" // /* MW 5 */ + 13404 "00000000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_384 +.src_ref 9 "expand_adf_wrapper.cpp" 65 first +.loop_nesting 0 + 13408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13409 "00000000" // /* MW 3 */ + 13410 "00101000" // /* MW 2 */ + 13411 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 64 26 +.delay_slot + 13412 "00011000" // ADD r0, r0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13413 "00000111" // /* MW 3 */ + 13414 "00000000" // /* MW 2 */ + 13415 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 64 26 first +.delay_slot + 13416 "10011000" // ST r0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13417 "00010001" // /* MW 3 */ + 13418 "00000100" // /* MW 2 */ + 13419 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE__end +.label __ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE___func_end0 + 13425 "00000000" // /* MW 1 */ +.label __Z14_b8096_wrapperPPv___func_begin0 +.label _Z14_b8096_wrapperPPv +.function _b8096_wrapper _Z14_b8096_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 72 first +.src_ref 0 "0_0_reloadable11.cc" 74 79 +.function_start + 13440 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13441 "11000000" // /* MW 3 */ + 13442 "01100000" // /* MW 2 */ + 13443 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 74 79 first + 13444 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13445 "00011110" // /* MW 3 */ + 13446 "00101100" // /* MW 2 */ + 13447 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 76 80 first + 13448 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13449 "00011110" // /* MW 3 */ + 13450 "11110101" // /* MW 2 */ + 13451 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 75 46 first + 13452 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13453 "10011110" // /* MW 3 */ + 13454 "00000100" // /* MW 2 */ + 13455 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 73 4 first +.tail_call + 13456 "10000100" // J #13024 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */ + 13457 "00000000" // /* MW 5 */ + 13458 "00000000" // /* MW 4 */ + 13459 "01110000" // /* MW 3 */ + 13460 "00011001" // /* MW 2 */ + 13461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8096_wrapperPPv__end +.label __Z14_b8096_wrapperPPv___func_end0 + 13471 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 13472 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13473 "01000001" // /* MW 5 */ + 13474 "10100000" // /* MW 4 */ + 13475 "00101111" // /* MW 3 */ + 13476 "11000000" // /* MW 2 */ + 13477 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13478 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13479 "00011100" // /* MW 3 */ + 13480 "11000110" // /* MW 2 */ + 13481 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13482 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13483 "00011100" // /* MW 3 */ + 13484 "11000110" // /* MW 2 */ + 13485 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13486 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13487 "00011100" // /* MW 3 */ + 13488 "11000110" // /* MW 2 */ + 13489 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13490 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13491 "00011100" // /* MW 3 */ + 13492 "11000110" // /* MW 2 */ + 13493 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13494 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13495 "00011100" // /* MW 3 */ + 13496 "11000110" // /* MW 2 */ + 13497 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13498 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13499 "00011100" // /* MW 3 */ + 13500 "11000110" // /* MW 2 */ + 13501 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13502 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13503 "00011100" // /* MW 3 */ + 13504 "11000110" // /* MW 2 */ + 13505 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13506 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13507 "00011100" // /* MW 3 */ + 13508 "11000110" // /* MW 2 */ + 13509 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13510 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13511 "00011100" // /* MW 3 */ + 13512 "11000110" // /* MW 2 */ + 13513 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13514 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13515 "00011100" // /* MW 3 */ + 13516 "11000110" // /* MW 2 */ + 13517 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13518 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13519 "00011100" // /* MW 3 */ + 13520 "11000110" // /* MW 2 */ + 13521 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13522 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13523 "00011100" // /* MW 3 */ + 13524 "11000110" // /* MW 2 */ + 13525 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13526 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13527 "00011100" // /* MW 3 */ + 13528 "11000110" // /* MW 2 */ + 13529 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13530 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13531 "00011100" // /* MW 3 */ + 13532 "11000110" // /* MW 2 */ + 13533 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13534 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13535 "00011100" // /* MW 3 */ + 13536 "11000110" // /* MW 2 */ + 13537 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13538 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13539 "00011100" // /* MW 3 */ + 13540 "11000110" // /* MW 2 */ + 13541 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13542 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13543 "00011100" // /* MW 3 */ + 13544 "11000110" // /* MW 2 */ + 13545 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13546 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13547 "00011100" // /* MW 3 */ + 13548 "11000110" // /* MW 2 */ + 13549 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13550 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13551 "00011100" // /* MW 3 */ + 13552 "11000110" // /* MW 2 */ + 13553 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13554 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13555 "00011100" // /* MW 3 */ + 13556 "11000110" // /* MW 2 */ + 13557 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13558 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13559 "00011100" // /* MW 3 */ + 13560 "11000110" // /* MW 2 */ + 13561 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13562 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13563 "00011100" // /* MW 3 */ + 13564 "11000110" // /* MW 2 */ + 13565 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13566 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13567 "00011100" // /* MW 3 */ + 13568 "11000110" // /* MW 2 */ + 13569 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13570 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13571 "00011100" // /* MW 3 */ + 13572 "11000110" // /* MW 2 */ + 13573 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13574 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13575 "00011100" // /* MW 3 */ + 13576 "11000110" // /* MW 2 */ + 13577 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13578 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13579 "00011100" // /* MW 3 */ + 13580 "11000110" // /* MW 2 */ + 13581 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13582 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13583 "00011100" // /* MW 3 */ + 13584 "11000110" // /* MW 2 */ + 13585 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13586 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13587 "00011100" // /* MW 3 */ + 13588 "11000110" // /* MW 2 */ + 13589 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 13590 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13591 "00000000" // /* MW 3 */ + 13592 "00101000" // /* MW 2 */ + 13593 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 13594 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13595 "00011100" // /* MW 3 */ + 13596 "11000110" // /* MW 2 */ + 13597 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 13598 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13599 "00011100" // /* MW 3 */ + 13600 "11000110" // /* MW 2 */ + 13601 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 13602 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13603 "00011100" // /* MW 3 */ + 13604 "11000110" // /* MW 2 */ + 13605 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 13606 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13607 "00011100" // /* MW 3 */ + 13608 "11000110" // /* MW 2 */ + 13609 "00010000" // /* MW 1 */ +.delay_slot + 13610 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13611 "10100000" // /* MW 3 */ + 13612 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 13613 "00011000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.cmico b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.lst b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.lst new file mode 100644 index 0000000000000000000000000000000000000000..ebce05eb30c28220693e7e1c698a0539c2c1a642 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.lst @@ -0,0 +1,5010 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:31:55 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable11 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2528 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2528 0x00 0xc2 0xd0 0x02 0xf7 0x80 0x8b 0xd0 0x78 0xba LDA r16, [p0]; NEZ r15, r1; MOV r4, r15 + 2538 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2544 0xfd 0xe3 0xb0 0x03 0x4b 0xd0 0x70 0x02 ST p6, [sp, #-20]; MOV r26, r15 + 2552 0xfe 0xb6 0xb0 0x01 0xa8 0xd0 0x70 0x02 ST r13, [sp, #-12]; MOV r13, r3 + 2560 0xff 0x3a 0xb0 0x01 0xc8 0x50 0x70 0x02 ST r14, [sp, #-8]; MOV r14, r1 + 2568 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] + 2572 0x0f 0xf0 0x95 0x98 ST r4, [sp, #-16] + 2576 0xfd 0x07 0xb0 0x03 0x34 0x01 0x00 0x02 ST lr, [sp, #-24]; ADD.NC p6, r16, #4 + 2584 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 2588 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 2592 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 2596 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2600 0x00 0x00 NOPX + 2602 0x00 0x00 NOPX + 2604 0x00 0x00 NOPX + 2606 0x00 0x00 NOPX + 2608 0x00 0x00 NOPX + 2610 0x00 0x00 NOPX + 2612 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2616 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 2620 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 2624 0x00 0x00 NOPX + 2626 0x00 0x00 NOPX + 2628 0x00 0x00 NOPX + 2630 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2634 0x6e 0x9e 0x08 0xa0 0x09 0x64 NEZ r26, r13; MOV r17, #2 + 2640 0x7c 0xe3 0xb9 0x21 0x81 0xe4 LSHL r19, r15, r17; MOV r18, p0 + 2646 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 2652 0x1e 0x69 0xc9 0x58 ADD.NC p6, r19, r18 + 2656 0xc0 0xca 0xdf 0xce 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-28] + 2662 0x00 0x00 NOPX + 2664 0x00 0x00 NOPX + 2666 0x00 0x00 NOPX + 2668 0x00 0x00 NOPX + 2670 0x00 0x00 NOPX + 2672 0x00 0x00 NOPX + 2674 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 2678 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2682 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2686 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 2690 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2694 0x00 0x00 NOPX + 2696 0x00 0x00 NOPX + 2698 0x00 0x00 NOPX + 2700 0x00 0x00 NOPX + 2702 0x00 0x00 NOPX + 2704 0x00 0x00 NOPX + 2706 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 2710 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2714 0x00 0x00 NOPX + 2716 0x00 0x00 NOPX + 2718 0x00 0x00 NOPX + 2720 0x00 0x00 NOPX + 2722 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 2726 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 2730 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 2734 0x00 0x07 0xce 0xc5 0x00 0x44 MOVXM p7, #508544 + 2740 0x07 0x00 0x9e 0x98 LDA p1, [p7, dj0] + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x00 0x00 NOPX +.no_stack_arguments + 2756 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2760 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2764 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2766 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2768 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2770 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address + 2784 0x07 0x06 0x16 0x98 LDA r16, [p7] + 2788 0x07 0xe4 0x29 0x18 LDA el0, [sp, #-28] + 2792 0x07 0xe8 0x39 0x18 LDA lr, [sp, #-24] + 2796 0x00 0x00 NOPX + 2798 0x00 0x00 NOPX + 2800 0x00 0x00 NOPX + 2802 0x00 0x00 NOPX + 2804 0x1f 0x68 0x08 0x18 ADD.NC p7, r16, #16 + 2808 0x07 0x06 0x36 0x98 LDA r17, [p7] + 2812 0x04 0x00 0xad 0x2f 0x41 0xe4 MOVX r16, #1; MOV r26, r15 + 2818 0x00 0x00 NOPX + 2820 0x00 0x00 NOPX + 2822 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2824 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2826 0x1e 0xa0 0x1c 0xf8 MOV r26, el0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2830 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2834 0xfe 0xc6 0xdd 0xae 0x41 0xd4 LDA r17, [p7, #-4]; MOV r27, r14 + 2840 0x06 0x06 0x56 0x98 LDA r18, [p6] + 2844 0x00 0x00 NOPX + 2846 0x00 0x00 NOPX + 2848 0x00 0x00 NOPX + 2850 0x00 0x00 NOPX + 2852 0x00 0x00 NOPX + 2854 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2858 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 2864 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 2870 0x00 0x00 NOPX + 2872 0x00 0x00 NOPX + 2874 0x00 0x00 NOPX + 2876 0x00 0x00 NOPX + 2878 0x00 0x00 NOPX + 2880 0x00 0x00 NOPX + 2882 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2886 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2890 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2894 0x07 0xf5 0xb1 0x18 LDA r13, [sp, #-12] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2898 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2902 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2906 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2910 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2914 0x1e 0xd6 0xa0 0xf8 MOV r27, r13 +.delay_slot + 2918 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot + 2922 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 2926 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot + 2932 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 2944 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function_start + 2944 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0x80 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508672 + 2954 0xf0 0x00 0x00 0x08 0x10 0x0b 0x08 0x00 0x58 0xba MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 + 2964 0x00 0x00 NOPX + 2966 0x00 0x00 NOPX + 2968 0x00 0x00 NOPX + 2970 0x00 0x00 NOPX + 2972 0x00 0x00 NOPX + 2974 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 2978 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 2982 0x00 0x00 NOPX + 2984 0x00 0x00 NOPX + 2986 0x00 0x00 NOPX + 2988 0x00 0x00 NOPX + 2990 0x00 0x00 NOPX + 2992 0x00 0x00 NOPX + 2994 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 2998 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 3002 0x00 0x00 NOPX + 3004 0x00 0x00 NOPX + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3018 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 3022 0x00 0x00 NOPX + 3024 0x00 0x00 NOPX + 3026 0x00 0x00 NOPX + 3028 0x00 0x00 NOPX + 3030 0x00 0x00 NOPX + 3032 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3034 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3038 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3042 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3046 0x00 0x00 0xf1 0x3e 0x00 0x44 MOVXM r2, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3052 0x10 0xc4 0x24 0x98 AND r2, r3, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3056 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3060 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 3064 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 3072 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 3072 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3078 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 3082 0x00 0x05 0xc0 0x00 0x01 0x04 JL #2944 +.delay_slot + 3088 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.delay_slot +.swstall delay_slot + 3094 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3096 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3098 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3100 0x00 0x01 0x67 0x98 NOPA +.return_address + 3104 0xff 0x87 0x20 0x00 0x01 0xf0 0xb1 0x80 0x10 0xba LDA lr, [sp, #-4]; MOVXM p1, #508672 + 3114 0x00 0x00 NOPX + 3116 0x00 0x00 NOPX + 3118 0x00 0x00 NOPX + 3120 0x00 0x00 NOPX + 3122 0x00 0x00 NOPX + 3124 0x00 0x00 NOPX + 3126 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3130 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot + 3134 0x09 0x46 0x11 0x98 ST r16, [p1, #16] +.delay_slot + 3138 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3144 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3146 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 3152 +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function_start + 3152 0x18 0x96 0xc0 0xf8 MOV r2, p3 + 3156 0x00 0x00 0x2a 0xc2 0x0e 0x24 MOVX r0, #0; ADD.NC p5, r2, #14 + 3162 0xa3 0x82 0x50 0x02 0xe5 0xd4 LDA.s16 r0, [p5], #2; VBCST.16 x0, r0 + 3168 0x05 0x04 0x56 0x98 LDA r2, [p5] + 3172 0x00 0x00 NOPX + 3174 0x00 0x00 NOPX + 3176 0x00 0x00 NOPX + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x10 0x02 0x09 0x18 MOVX r1, #2 + 3186 0x10 0x42 0x2c 0x98 LTU r1, r1, r2 + 3190 0x08 0x06 0x80 0x40 0x01 0x84 JNZ r1, #3328 +.delay_slot + 3196 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 3202 0x1c 0x65 0xe0 0xf8 MOV p4, sp +.delay_slot + 3206 0x3c 0xff 0x90 0x18 PADDB [p4], #-64 +.delay_slot + 3210 0x0c 0x04 0x13 0x18 VST x0, [p4] +.delay_slot +.swstall delay_slot + 3214 0x00 0x00 NOPX + 3216 0x01 0x82 0x80 0x02 0xe5 0xd4 MOVA dj0, #12; VBCST.16 x0, r0 + 3222 0x03 0x00 0x0a 0x98 LDA.u8 r0, [p3, dj0] + 3226 0x00 0x00 NOPX + 3228 0x00 0x00 NOPX + 3230 0x00 0x00 NOPX + 3232 0x00 0x00 NOPX + 3234 0x00 0x00 NOPX + 3236 0x00 0x00 NOPX + 3238 0x00 0x06 0x68 0x40 0x01 0x84 JNZ r0, #3280 +.delay_slot + 3244 0x18 0x00 0x00 0xb8 MOV m0, #0 +.delay_slot + 3248 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.delay_slot +.swstall delay_slot + 3252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3254 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3256 0x00 0x00 NOPX + 3258 0x00 0x04 0x80 0x00 0x01 0x9c 0x00 0x00 0x20 0xba MOVA m1, #0; J #3296 +.delay_slot +.swstall delay_slot + 3268 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3270 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3272 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3274 0x00 0x00 NOPX +.delay_slot + 3276 0x08 0x04 0x13 0x18 VST x0, [p0] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 + 3280 0x19 0x00 0x80 0xb8 MOV m1, #64 + 3284 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0 +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 3296 0x00 0x06 0xb0 0x00 0x00 0x84 J #3424 +.delay_slot + 3302 0x12 0x11 0x60 0x02 0x30 0x60 0x70 0x02 MOVS p0, p4; MOV p4, p0 +.delay_slot +.swstall delay_slot + 3310 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3312 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3314 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3316 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 + 3328 0x10 0x02 0x0d 0x18 MOVX r1, #3 + 3332 0x10 0x42 0x27 0x98 EQ r1, r1, r2 + 3336 0x08 0x06 0x98 0x40 0x01 0x84 JNZ r1, #3376 +.delay_slot + 3342 0x00 0x07 0xc6 0xc4 0x60 0x44 MOVXM p3, #508464 +.delay_slot +.swstall delay_slot + 3348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3350 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3352 0x00 0x00 NOPX +.delay_slot + 3354 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3360 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3376 0x60 0x80 0x50 0x01 0x01 0x54 LDA.s8 r0, [p3]; MOV m0, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3382 0x19 0x00 0x00 0xb8 MOV m1, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3386 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3390 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3392 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3394 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0 + 3398 0x19 0x00 0x92 0xf8 VMOV bmll1, x0 + 3402 0x10 0x3a 0x80 0x18 MOVX crRnd, r0 + 3406 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 + 3410 0x00 0x00 NOPX + 3412 0x18 0x01 0x03 0x58 VEXTBCST.16 x0, x0, #0 + 3416 0x00 0x00 NOPX + 3418 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 + 3424 0xb8 0x86 0xd8 0x50 0xe8 0x00 0x00 0x00 0x7e 0xe8 0x10 0xb6 LDA r1, [p5, #-16]; VLDB x1, [p4], m1; MOVXM ls, #3536 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3436 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x01 0xbf 0x00 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #3584 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3448 0x01 0x05 0x78 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p4], m1; MOVX r0, #60 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3456 0x29 0x03 0x70 0x00 0x01 0xf1 0xb1 0x18 0x10 0xba VLDA x0, [p1], m2; MOVXM p3, #508464 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3466 0x03 0x04 0x42 0x98 LDA.s8 r2, [p3] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3470 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3474 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3476 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3480 0x05 0x70 0xfe 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r1, #-3; VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3488 0x29 0x03 0x78 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p4], m1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3494 0x01 0x05 0x70 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPM +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3504 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x05 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX crRnd, r2; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3520 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3536 0x29 0x03 0x78 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p4], m1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3552 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3568 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3584 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3600 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3606 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3608 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3612 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3614 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3618 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 3622 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3628 0x00 0x00 NOPX +.delay_slot + 3630 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 3634 0x00 0x00 NOPX +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + +.text_segment PM 3648 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function_start + 3648 0x00 0x07 0xc4 0xc6 0x18 0x44 MOVXM p2, #508684 + 3654 0x40 0x81 0x52 0x84 0x8b 0x00 0x01 0xf1 0xb1 0x80 0x10 0x76 LDA.u8 r0, [p2]; MOVS p2, p1; MOVXM p3, #508672 + 3666 0x00 0x00 NOPX + 3668 0x00 0x00 NOPX + 3670 0x00 0x00 NOPX + 3672 0x00 0x00 NOPX + 3674 0x00 0x00 NOPX + 3676 0x00 0x00 NOPX + 3678 0x00 0x07 0x50 0x00 0x01 0x84 JZ r0, #3744 +.delay_slot + 3684 0x18 0xc1 0xe0 0xf8 MOV dc0, lr +.delay_slot + 3688 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 3694 0x18 0x55 0xe0 0xf8 MOV r1, sp +.delay_slot + 3698 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64 +.delay_slot + 3702 0x09 0x07 0x2b 0x18 VST sfh, [p1] +.no_stack_arguments + 3706 0x00 0x06 0x28 0x00 0x01 0x04 JL #3152 +.delay_slot +.swstall delay_slot + 3712 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3714 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3716 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3718 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3720 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.return_address + 3728 0x00 0x07 0x60 0x00 0x00 0x84 J #3776 +.delay_slot +.swstall delay_slot + 3734 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3736 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3738 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3740 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3742 0x00 0x00 NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.no_stack_arguments + 3744 0x00 0x06 0x28 0x00 0x01 0x04 JL #3152 +.delay_slot + 3750 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0 +.delay_slot +.swstall delay_slot + 3758 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3760 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3762 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3764 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.return_address + 3776 0x1f 0x71 0x80 0xf8 MOV lr, dc0 + 3780 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3784 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3790 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3792 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3794 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3796 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 3808 +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 3808 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 3814 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 3820 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3826 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 3834 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0x04 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508424 + 3844 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 3848 0x00 0x00 NOPX + 3850 0x00 0x00 NOPX + 3852 0x80 0x07 0xd8 0x40 0x01 0x84 JNZ r16, #4016 +.delay_slot + 3858 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 3862 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 3866 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 3870 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 3878 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3884 0x00 0x07 0xc4 0xc4 0x60 0x44 MOVXM p2, #508464 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3890 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x16 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508460 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3900 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3902 0x00 0x06 0x00 0x00 0x01 0x04 JL #3072 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3908 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3910 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3912 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 3916 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 3920 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 3936 0x00 0x07 0xc4 0xc4 0x10 0x44 MOVXM p2, #508424 + 3942 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #508672 + 3952 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #508672 + 3962 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #508428 + 3972 0x00 0x00 NOPX + 3974 0x00 0x00 NOPX + 3976 0x00 0x07 0xe0 0x00 0x00 0x84 J #4032 +.delay_slot + 3982 0x00 0x07 0xc0 0xc4 0x50 0x44 MOVXM p0, #508456 +.delay_slot +.swstall delay_slot + 3988 0x00 0x00 NOPX +.delay_slot + 3990 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 3994 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 4000 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 4016 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0x06 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508428; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 4032 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 4040 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508416 + 4050 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 4054 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 4058 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 4062 0x00 0x00 NOPX + 4064 0x00 0x00 NOPX + 4066 0x00 0x00 NOPX + 4068 0x00 0x00 NOPX + 4070 0x00 0x00 NOPX + 4072 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 4076 0x0f 0x06 0x11 0x98 ST r16, [p7] + 4080 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 4084 0x00 0x00 NOPX + 4086 0x00 0x00 NOPX + 4088 0x00 0x00 NOPX + 4090 0x14 0x93 0x08 0x18 ACQ r18, r16 + 4094 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 4100 0x00 0x00 NOPX + 4102 0x00 0x00 NOPX + 4104 0x00 0x06 0x36 0x98 LDA r17, [p0] + 4108 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 4114 0x01 0x06 0x76 0x98 LDA r19, [p1] + 4118 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 4122 0x00 0x00 NOPX +.no_stack_arguments + 4124 0x00 0x07 0x20 0x00 0x01 0x04 JL #3648 +.delay_slot +.swstall delay_slot + 4130 0x00 0x00 NOPX +.delay_slot + 4132 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 4136 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 4140 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 4144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 4160 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508416 + 4170 0x10 0x20 0x05 0x18 MOVX r16, #1 + 4174 0x00 0x00 NOPX + 4176 0x00 0x00 NOPX + 4178 0x00 0x00 NOPX + 4180 0x00 0x00 NOPX + 4182 0x00 0x00 NOPX + 4184 0x14 0x51 0x08 0x18 REL r17, r16 + 4188 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x31 0x14 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508456 + 4198 0x06 0x06 0x36 0x98 LDA r17, [p6] + 4202 0x02 0x06 0x56 0x98 LDA r18, [p2] + 4206 0x00 0x00 NOPX + 4208 0x00 0x00 NOPX + 4210 0x00 0x00 NOPX + 4212 0x00 0x00 NOPX + 4214 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 4218 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 4222 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 4226 0x80 0x08 0x50 0x40 0x01 0x84 JNZ r16, #4256 +.delay_slot +.swstall delay_slot + 4232 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4234 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4236 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4238 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4240 0x00 0x00 NOPX + 4242 0x10 0x20 0x01 0x18 MOVX r16, #0 + 4246 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 4256 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 4260 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 4264 0x00 0x00 NOPX + 4266 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4268 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4270 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4274 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4276 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4280 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 4284 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 4290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4294 0x00 0x00 NOPX +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 4304 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function_start + 4304 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xa0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508736 + 4314 0xf0 0x00 0x00 0x08 0x10 0x0b 0x08 0x00 0x58 0xba MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 + 4324 0x00 0x00 NOPX + 4326 0x00 0x00 NOPX + 4328 0x00 0x00 NOPX + 4330 0x00 0x00 NOPX + 4332 0x00 0x00 NOPX + 4334 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 4338 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 4342 0x00 0x00 NOPX + 4344 0x00 0x00 NOPX + 4346 0x00 0x00 NOPX + 4348 0x00 0x00 NOPX + 4350 0x00 0x00 NOPX + 4352 0x00 0x00 NOPX + 4354 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 4358 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 4362 0x00 0x00 NOPX + 4364 0x00 0x00 NOPX + 4366 0x00 0x00 NOPX + 4368 0x00 0x00 NOPX + 4370 0x00 0x00 NOPX + 4372 0x00 0x00 NOPX + 4374 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 4378 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 4382 0x00 0x00 NOPX + 4384 0x00 0x00 NOPX + 4386 0x00 0x00 NOPX + 4388 0x00 0x00 NOPX + 4390 0x00 0x00 NOPX + 4392 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4394 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4398 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 0x00 0x00 0xf1 0x3e 0x00 0x44 MOVXM r2, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4412 0x10 0xc4 0x24 0x98 AND r2, r3, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4416 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4420 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 4424 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 4432 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 4432 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 4438 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 4442 0x00 0x08 0x68 0x00 0x01 0x04 JL #4304 +.delay_slot + 4448 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.delay_slot + 4452 0xff 0x82 0xb0 0x00 0x01 0xf1 0xe9 0xa0 0x11 0x3a ST r0, [sp, #-4]; MOVXM r15, #508736 +.delay_slot + 4462 0x18 0x67 0xa0 0xf8 MOV p0, r15 +.delay_slot +.swstall delay_slot + 4466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4468 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 4480 0xff 0x07 0x20 0x01 0x00 0x68 0xb3 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p1, r15, #16 + 4490 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 4500 0xff 0xbe 0x21 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p1], m0; MOVX r16, #-128 + 4510 0x01 0x06 0x4a 0x98 LDA.u8 r18, [p1] + 4514 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4516 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4518 0x01 0x02 0x17 0x18 ST.s16 r16, [p1, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4526 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4530 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4536 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4540 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 4544 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 4560 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.tail_call +.function_start + 4560 0x00 0x06 0x28 0x00 0x00 0x84 J #3152 +.delay_slot + 4566 0x00 0x07 0xc6 0xc6 0x80 0x44 MOVXM p3, #508736 +.delay_slot +.swstall delay_slot + 4572 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4574 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4578 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 4592 +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 4592 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 4598 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 4604 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 4610 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 4620 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 4628 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 4632 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 4636 0x00 0x00 NOPX + 4638 0x80 0x09 0x58 0x40 0x01 0x84 JNZ r16, #4784 +.delay_slot + 4644 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 4648 0x00 0x07 0xc4 0xc4 0x10 0x44 MOVXM p2, #508424 +.delay_slot + 4654 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 4662 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 4666 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb1 0xa0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #508736 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4676 0x13 0x91 0x60 0x00 0x01 0xf1 0x31 0x18 0x11 0x3a MOVS p0, p7; MOVXM p2, #508464 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4686 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x16 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508460 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4696 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4698 0x00 0x08 0xa8 0x00 0x01 0x04 JL #4432 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4704 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4706 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4708 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4712 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4716 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 4720 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA r16, [p7]; MOVXM p1, #508424 + 4730 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0x06 0x10 0xba LDA r17, [p1]; MOVXM p3, #508428 + 4740 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x08 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 + 4750 0x00 0x00 NOPX + 4752 0x00 0x00 NOPX + 4754 0x00 0x00 NOPX + 4756 0x00 0x09 0x60 0x00 0x00 0x84 J #4800 +.delay_slot + 4762 0x00 0x07 0xc4 0xc4 0x50 0x44 MOVXM p2, #508456 +.delay_slot + 4768 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 4772 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 4776 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 4780 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 4784 0x00 0x07 0xc6 0xc4 0x18 0x44 MOVXM p3, #508428 + 4790 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0x08 0x10 0xba NOPA; MOVXM p1, #508432 +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 4800 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 4804 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508416 + 4814 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 4818 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 4822 0x02 0x06 0x56 0x98 LDA r18, [p2] + 4826 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 4830 0x00 0x00 NOPX + 4832 0x00 0x00 NOPX + 4834 0x00 0x00 NOPX + 4836 0x00 0x00 NOPX + 4838 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 4842 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 4848 0x0a 0x06 0x11 0x98 ST r16, [p2] + 4852 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 4856 0x00 0x00 NOPX + 4858 0x00 0x00 NOPX + 4860 0x00 0x00 NOPX + 4862 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 4866 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 4870 0x00 0x00 NOPX + 4872 0x00 0x00 NOPX + 4874 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 4878 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 4882 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 4886 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 4890 0x00 0x00 NOPX + 4892 0x00 0x00 NOPX + 4894 0x00 0x00 NOPX + 4896 0x00 0x00 NOPX + 4898 0x00 0x00 NOPX + 4900 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 4904 0x0a 0x06 0x31 0x98 ST r17, [p2] + 4908 0x00 0x00 NOPX + 4910 0x00 0x00 NOPX + 4912 0x00 0x00 NOPX + 4914 0x00 0x00 NOPX + 4916 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 4920 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 4930 0x00 0x00 NOPX + 4932 0x00 0x00 NOPX + 4934 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 4938 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 4944 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4950 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4954 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4958 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4962 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4964 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4968 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4970 0x00 0x08 0xe8 0x00 0x01 0x04 JL #4560 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4976 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 4980 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 4984 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 4988 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 4992 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 5008 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 5018 0x00 0x07 0xcc 0xc4 0x50 0x44 MOVXM p6, #508456 + 5024 0x00 0x00 NOPX + 5026 0x00 0x00 NOPX + 5028 0x00 0x00 NOPX + 5030 0x00 0x00 NOPX + 5032 0x00 0x00 NOPX + 5034 0x14 0x51 0x08 0x18 REL r17, r16 + 5038 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 5042 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 5046 0x00 0x00 NOPX + 5048 0x00 0x00 NOPX + 5050 0x00 0x00 NOPX + 5052 0x00 0x00 NOPX + 5054 0x00 0x00 NOPX + 5056 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 5060 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 5066 0x00 0x00 NOPX + 5068 0x00 0x00 NOPX + 5070 0x00 0x00 NOPX + 5072 0x00 0x00 NOPX + 5074 0x00 0x00 NOPX + 5076 0x00 0x00 NOPX + 5078 0x14 0x51 0x08 0x18 REL r17, r16 + 5082 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508416 + 5092 0x06 0x06 0x56 0x98 LDA r18, [p6] + 5096 0x01 0x06 0x36 0x98 LDA r17, [p1] + 5100 0x00 0x00 NOPX + 5102 0x00 0x00 NOPX + 5104 0x00 0x00 NOPX + 5106 0x00 0x00 NOPX + 5108 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 5112 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 5116 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 5120 0x80 0x0a 0x10 0x40 0x01 0x84 JNZ r16, #5152 +.delay_slot +.swstall delay_slot + 5126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5130 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5132 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5134 0x00 0x00 NOPX + 5136 0x10 0x20 0x01 0x18 MOVX r16, #0 + 5140 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 5152 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 5156 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 5160 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5164 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5166 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5170 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5172 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5174 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5178 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 5182 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 5188 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5190 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5192 0x00 0x00 NOPX +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 5200 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 5200 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 5204 0x00 0x07 0xc0 0xc7 0x40 0x44 MOVXM p0, #508832 +.delay_slot + 5210 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 5214 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 5218 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 5222 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5232 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 5232 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xc0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508800 + 5242 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5248 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 5252 0x00 0x00 NOPX + 5254 0x00 0x00 NOPX + 5256 0x00 0x00 NOPX + 5258 0x00 0x00 NOPX + 5260 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5264 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 5268 0x00 0x00 NOPX + 5270 0x00 0x00 NOPX + 5272 0x00 0x00 NOPX + 5274 0x00 0x00 NOPX + 5276 0x00 0x00 NOPX + 5278 0x00 0x00 NOPX + 5280 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5284 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 5288 0x00 0x00 NOPX + 5290 0x00 0x00 NOPX + 5292 0x00 0x00 NOPX + 5294 0x00 0x00 NOPX + 5296 0x00 0x00 NOPX + 5298 0x00 0x00 NOPX + 5300 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5304 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 5308 0x00 0x00 NOPX + 5310 0x00 0x00 NOPX +.no_stack_arguments + 5312 0x00 0x0a 0x28 0x00 0x01 0x04 JL #5200 +.delay_slot + 5318 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 5322 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5324 0x00 0x00 NOPX +.delay_slot + 5326 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 5330 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x7b 0x06 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p7, p0; NOPV +.return_address + 5344 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 5348 0x00 0x00 NOPX + 5350 0x00 0x00 NOPX + 5352 0x00 0x00 NOPX + 5354 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5356 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5358 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5362 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5366 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5368 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5370 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5372 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5376 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 5392 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 5392 0x04 0x00 0x80 0x00 0x01 0xf1 0xb1 0xc0 0x10 0xba MOVA m0, #32; MOVXM p3, #508800 + 5402 0x61 0x06 0xd0 0x00 0x01 0xf2 0x31 0x18 0x10 0xba LDA r1, [p3], m0; MOVXM p4, #508464 + 5412 0x60 0x90 0xd0 0x18 0x07 0x88 0x6f 0xfa 0x58 0xba LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 + 5422 0x62 0x80 0xd0 0x00 0x00 0x04 0x7a 0xe8 0x10 0xba LDA m0, [p3, #4]; MOVXM ls, #5584 + 5432 0x80 0x88 0x50 0x00 0x00 0x05 0xba 0xf0 0x10 0xba LDA.s8 r2, [p4]; MOVXM le, #5600 + 5442 0x00 0x00 NOPX + 5444 0x00 0x00 NOPX + 5446 0x00 0x00 NOPX + 5448 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 + 5452 0x1d 0x70 0xfc 0x98 ADD.NC lc, r1, #-7 + 5456 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5462 0x21 0x1b 0x70 0x50 0x68 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5470 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5476 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5482 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5488 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5494 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5500 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5510 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5520 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5540 0x21 0x1b 0x70 0x50 0x68 0x00 0xad 0x8e 0x00 0xe2 0x41 0x66 VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5552 0x21 0x13 0x70 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5568 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5584 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5600 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5616 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5624 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5632 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5640 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5656 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5664 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5672 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5676 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5682 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5686 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 5690 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 5694 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 5698 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5712 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 5712 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 5718 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 5724 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5730 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 5740 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 5748 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 5752 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 5756 0x00 0x00 NOPX + 5758 0x80 0x0b 0x88 0x40 0x01 0x84 JNZ r16, #5904 +.delay_slot + 5764 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 5768 0x00 0x07 0xc4 0xc4 0x10 0x44 MOVXM p2, #508424 +.delay_slot + 5774 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 5782 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 5786 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb1 0xc0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #508800 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5796 0x13 0x91 0x60 0x00 0x01 0xf1 0x31 0x18 0x11 0x3a MOVS p0, p7; MOVXM p2, #508464 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5806 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x16 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508460 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5818 0x00 0x0a 0x38 0x00 0x01 0x04 JL #5232 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5826 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5828 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 5832 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 5836 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 5840 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA r16, [p7]; MOVXM p1, #508424 + 5850 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0x06 0x10 0xba LDA r17, [p1]; MOVXM p3, #508428 + 5860 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x08 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 + 5870 0x00 0x00 NOPX + 5872 0x00 0x00 NOPX + 5874 0x00 0x00 NOPX + 5876 0x00 0x0b 0x90 0x00 0x00 0x84 J #5920 +.delay_slot + 5882 0x00 0x07 0xc4 0xc4 0x50 0x44 MOVXM p2, #508456 +.delay_slot + 5888 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 5892 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 5896 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 5900 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 5904 0x00 0x07 0xc6 0xc4 0x18 0x44 MOVXM p3, #508428 + 5910 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0x08 0x10 0xba NOPA; MOVXM p1, #508432 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 5920 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 5924 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508416 + 5934 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 5938 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 5942 0x02 0x06 0x56 0x98 LDA r18, [p2] + 5946 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 5950 0x00 0x00 NOPX + 5952 0x00 0x00 NOPX + 5954 0x00 0x00 NOPX + 5956 0x00 0x00 NOPX + 5958 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 5962 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 5968 0x0a 0x06 0x11 0x98 ST r16, [p2] + 5972 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 5976 0x00 0x00 NOPX + 5978 0x00 0x00 NOPX + 5980 0x00 0x00 NOPX + 5982 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 5986 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 5990 0x00 0x00 NOPX + 5992 0x00 0x00 NOPX + 5994 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 5998 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 6002 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 6006 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 6010 0x00 0x00 NOPX + 6012 0x00 0x00 NOPX + 6014 0x00 0x00 NOPX + 6016 0x00 0x00 NOPX + 6018 0x00 0x00 NOPX + 6020 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 6024 0x0a 0x06 0x31 0x98 ST r17, [p2] + 6028 0x00 0x00 NOPX + 6030 0x00 0x00 NOPX + 6032 0x00 0x00 NOPX + 6034 0x00 0x00 NOPX + 6036 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6040 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 6050 0x00 0x00 NOPX + 6052 0x00 0x00 NOPX + 6054 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 6058 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 6064 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6070 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6074 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6078 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6082 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6084 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6088 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6090 0x00 0x0a 0x88 0x00 0x01 0x04 JL #5392 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6096 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 6100 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 6104 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 6108 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 6112 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 6128 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 6138 0x00 0x07 0xcc 0xc4 0x50 0x44 MOVXM p6, #508456 + 6144 0x00 0x00 NOPX + 6146 0x00 0x00 NOPX + 6148 0x00 0x00 NOPX + 6150 0x00 0x00 NOPX + 6152 0x00 0x00 NOPX + 6154 0x14 0x51 0x08 0x18 REL r17, r16 + 6158 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 6162 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 6166 0x00 0x00 NOPX + 6168 0x00 0x00 NOPX + 6170 0x00 0x00 NOPX + 6172 0x00 0x00 NOPX + 6174 0x00 0x00 NOPX + 6176 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 6180 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 6186 0x00 0x00 NOPX + 6188 0x00 0x00 NOPX + 6190 0x00 0x00 NOPX + 6192 0x00 0x00 NOPX + 6194 0x00 0x00 NOPX + 6196 0x00 0x00 NOPX + 6198 0x14 0x51 0x08 0x18 REL r17, r16 + 6202 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508416 + 6212 0x06 0x06 0x56 0x98 LDA r18, [p6] + 6216 0x01 0x06 0x36 0x98 LDA r17, [p1] + 6220 0x00 0x00 NOPX + 6222 0x00 0x00 NOPX + 6224 0x00 0x00 NOPX + 6226 0x00 0x00 NOPX + 6228 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 6232 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 6236 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 6240 0x80 0x0c 0x40 0x40 0x01 0x84 JNZ r16, #6272 +.delay_slot +.swstall delay_slot + 6246 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6250 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6254 0x00 0x00 NOPX + 6256 0x10 0x20 0x01 0x18 MOVX r16, #0 + 6260 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 6272 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 6276 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 6280 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6284 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6286 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6290 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6292 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6294 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6298 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 6302 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6308 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6310 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6312 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 6320 +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function_start + 6320 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xe0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508864 + 6330 0xf0 0x00 0x00 0x08 0x10 0x0b 0x08 0x00 0x58 0xba MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 + 6340 0x00 0x00 NOPX + 6342 0x00 0x00 NOPX + 6344 0x00 0x00 NOPX + 6346 0x00 0x00 NOPX + 6348 0x00 0x00 NOPX + 6350 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 6354 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 6358 0x00 0x00 NOPX + 6360 0x00 0x00 NOPX + 6362 0x00 0x00 NOPX + 6364 0x00 0x00 NOPX + 6366 0x00 0x00 NOPX + 6368 0x00 0x00 NOPX + 6370 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 6374 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 6378 0x00 0x00 NOPX + 6380 0x00 0x00 NOPX + 6382 0x00 0x00 NOPX + 6384 0x00 0x00 NOPX + 6386 0x00 0x00 NOPX + 6388 0x00 0x00 NOPX + 6390 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 6394 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 6398 0x00 0x00 NOPX + 6400 0x00 0x00 NOPX + 6402 0x00 0x00 NOPX + 6404 0x00 0x00 NOPX + 6406 0x00 0x00 NOPX + 6408 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6410 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6414 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6418 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6422 0x00 0x00 0xf1 0x3e 0x00 0x44 MOVXM r2, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6428 0x10 0xc4 0x24 0x98 AND r2, r3, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6432 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6436 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 6440 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 6448 +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 6448 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6454 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 6458 0x00 0x0c 0x58 0x00 0x01 0x04 JL #6320 +.delay_slot + 6464 0x00 0x07 0xc0 0xc7 0x80 0x44 MOVXM p0, #508864 +.delay_slot +.swstall delay_slot + 6470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6476 0x00 0x01 0x67 0x98 NOPA +.return_address + 6480 0xff 0x87 0x20 0x00 0x01 0xf0 0xb1 0xe0 0x10 0xba LDA lr, [sp, #-4]; MOVXM p1, #508864 + 6490 0x00 0x00 NOPX + 6492 0x00 0x00 NOPX + 6494 0x00 0x00 NOPX + 6496 0x00 0x00 NOPX + 6498 0x00 0x00 NOPX + 6500 0x00 0x00 NOPX + 6502 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6506 0x10 0x20 0x19 0x18 MOVX r16, #6 +.delay_slot + 6510 0x09 0x46 0x11 0x98 ST r16, [p1, #16] +.delay_slot + 6514 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6522 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 6528 +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E +.tail_call +.function_start + 6528 0x00 0x06 0x28 0x00 0x00 0x84 J #3152 +.delay_slot + 6534 0x00 0x07 0xc6 0xc7 0x80 0x44 MOVXM p3, #508864 +.delay_slot +.swstall delay_slot + 6540 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6542 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6544 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6546 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 6560 +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 6560 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 6566 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 6572 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6578 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 6588 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 6596 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 6600 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 6604 0x00 0x00 NOPX + 6606 0x80 0x0d 0x30 0x40 0x01 0x84 JNZ r16, #6752 +.delay_slot + 6612 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 6616 0x00 0x07 0xc4 0xc4 0x10 0x44 MOVXM p2, #508424 +.delay_slot + 6622 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 6630 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 6634 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb1 0xe0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #508864 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6644 0x13 0x91 0x60 0x00 0x01 0xf1 0x31 0x18 0x11 0x3a MOVS p0, p7; MOVXM p2, #508464 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6654 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x16 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508460 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6664 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6666 0x00 0x0c 0x98 0x00 0x01 0x04 JL #6448 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6672 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6674 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6676 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 6680 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 6684 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 6688 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA r16, [p7]; MOVXM p1, #508424 + 6698 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0x06 0x10 0xba LDA r17, [p1]; MOVXM p3, #508428 + 6708 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x08 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 + 6718 0x00 0x00 NOPX + 6720 0x00 0x00 NOPX + 6722 0x00 0x00 NOPX + 6724 0x00 0x0d 0x38 0x00 0x00 0x84 J #6768 +.delay_slot + 6730 0x00 0x07 0xc4 0xc4 0x50 0x44 MOVXM p2, #508456 +.delay_slot + 6736 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 6740 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 6744 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 6748 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 6752 0x00 0x07 0xc6 0xc4 0x18 0x44 MOVXM p3, #508428 + 6758 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0x08 0x10 0xba NOPA; MOVXM p1, #508432 +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 6768 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 6772 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508416 + 6782 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 6786 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 6790 0x02 0x06 0x56 0x98 LDA r18, [p2] + 6794 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 6798 0x00 0x00 NOPX + 6800 0x00 0x00 NOPX + 6802 0x00 0x00 NOPX + 6804 0x00 0x00 NOPX + 6806 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 6810 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 6816 0x0a 0x06 0x11 0x98 ST r16, [p2] + 6820 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 6824 0x00 0x00 NOPX + 6826 0x00 0x00 NOPX + 6828 0x00 0x00 NOPX + 6830 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6834 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 6838 0x00 0x00 NOPX + 6840 0x00 0x00 NOPX + 6842 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 6846 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 6850 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 6854 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 6858 0x00 0x00 NOPX + 6860 0x00 0x00 NOPX + 6862 0x00 0x00 NOPX + 6864 0x00 0x00 NOPX + 6866 0x00 0x00 NOPX + 6868 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 6872 0x0a 0x06 0x31 0x98 ST r17, [p2] + 6876 0x00 0x00 NOPX + 6878 0x00 0x00 NOPX + 6880 0x00 0x00 NOPX + 6882 0x00 0x00 NOPX + 6884 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6888 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 6898 0x00 0x00 NOPX + 6900 0x00 0x00 NOPX + 6902 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 6906 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 6912 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6918 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6922 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6926 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6930 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6932 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6936 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6938 0x00 0x0c 0xc0 0x00 0x01 0x04 JL #6528 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6944 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 6948 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 6952 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 6956 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 6960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 6976 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 6986 0x00 0x07 0xcc 0xc4 0x50 0x44 MOVXM p6, #508456 + 6992 0x00 0x00 NOPX + 6994 0x00 0x00 NOPX + 6996 0x00 0x00 NOPX + 6998 0x00 0x00 NOPX + 7000 0x00 0x00 NOPX + 7002 0x14 0x51 0x08 0x18 REL r17, r16 + 7006 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 7010 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 7014 0x00 0x00 NOPX + 7016 0x00 0x00 NOPX + 7018 0x00 0x00 NOPX + 7020 0x00 0x00 NOPX + 7022 0x00 0x00 NOPX + 7024 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 7028 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 7034 0x00 0x00 NOPX + 7036 0x00 0x00 NOPX + 7038 0x00 0x00 NOPX + 7040 0x00 0x00 NOPX + 7042 0x00 0x00 NOPX + 7044 0x00 0x00 NOPX + 7046 0x14 0x51 0x08 0x18 REL r17, r16 + 7050 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508416 + 7060 0x06 0x06 0x56 0x98 LDA r18, [p6] + 7064 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7068 0x00 0x00 NOPX + 7070 0x00 0x00 NOPX + 7072 0x00 0x00 NOPX + 7074 0x00 0x00 NOPX + 7076 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 7080 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 7084 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 7088 0x80 0x0d 0xe8 0x40 0x01 0x84 JNZ r16, #7120 +.delay_slot +.swstall delay_slot + 7094 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7096 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7098 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7102 0x00 0x00 NOPX + 7104 0x10 0x20 0x01 0x18 MOVX r16, #0 + 7108 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 7120 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 7124 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 7128 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7132 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7134 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7138 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7140 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7142 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7146 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 7150 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7156 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7158 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7160 0x00 0x00 NOPX +.label _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 7168 +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_begin0 +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.function_start + 7168 0x23 0x86 0xd0 0x00 0x01 0xf0 0x31 0x20 0x10 0xba LDA r1, [p1], #4; MOVXM p0, #508480 + 7178 0x00 0x18 0x00 0x00 0x40 0x88 0x00 0x06 0x58 0xba MOVA r24, #0; MOVX r4, #4; MOV m0, #6 + 7188 0x00 0x00 NOPX + 7190 0x00 0x00 NOPX + 7192 0x00 0x00 NOPX + 7194 0x00 0x00 NOPX + 7196 0x00 0x00 NOPX + 7198 0x08 0x1c 0x31 0x98 ST r1, [p0], #4 + 7202 0x01 0x1c 0xd6 0x98 LDA r6, [p1], #4 + 7206 0x00 0x00 NOPX + 7208 0x00 0x00 NOPX + 7210 0x00 0x00 NOPX + 7212 0x00 0x00 NOPX + 7214 0x00 0x00 NOPX + 7216 0x00 0x00 NOPX + 7218 0x03 0x9a 0x33 0x03 0xfe 0x5c ST r6, [p0], #4; ADD r0, r6, #-1 + 7224 0x01 0x1c 0xb6 0x98 LDA r5, [p1], #4 + 7228 0x00 0x00 NOPX + 7230 0x00 0x00 NOPX + 7232 0x00 0x00 NOPX + 7234 0x00 0x00 NOPX + 7236 0x00 0x00 NOPX + 7238 0x00 0x00 NOPX + 7240 0x03 0x96 0x32 0x9c 0xdf 0x5c ST r5, [p0], #4; MUL r7, r5, r6 + 7246 0x01 0x1e 0x16 0x98 LDA r16, [p1], #4 + 7250 0x00 0x00 NOPX + 7252 0x00 0x00 NOPX + 7254 0x00 0x00 NOPX + 7256 0x00 0x00 NOPX + 7258 0x00 0x00 NOPX + 7260 0x00 0x00 NOPX + 7262 0x03 0xc2 0x32 0x12 0x0f 0x5c ST r16, [p0], #4; EQ r4, r4, r16 + 7268 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7272 0x00 0x00 NOPX + 7274 0x00 0x00 NOPX + 7276 0x00 0x00 NOPX + 7278 0x00 0x00 NOPX + 7280 0x00 0x00 NOPX + 7282 0x00 0x00 NOPX + 7284 0x03 0x85 0x30 0x02 0x50 0x0e 0x70 0x02 ST el0, [p0], #4; MOV r18, el0 + 7292 0x01 0x1e 0x76 0x98 LDA r19, [p1], #4 + 7296 0x00 0x00 NOPX + 7298 0x00 0x00 NOPX + 7300 0x00 0x00 NOPX + 7302 0x00 0x00 NOPX + 7304 0x00 0x00 NOPX + 7306 0x00 0x00 NOPX + 7308 0x03 0xce 0x39 0xca 0x5f 0x5c ST r19, [p0], #4; MUL r18, r19, r18 + 7314 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 7318 0x00 0x00 NOPX + 7320 0x00 0x00 NOPX + 7322 0x00 0x00 NOPX + 7324 0x00 0x00 NOPX + 7326 0x00 0x00 NOPX + 7328 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7330 0x01 0x01 0x30 0x02 0x90 0x8e 0x70 0x02 ST eh0, [p0], m0; MOV r20, eh0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7338 0x00 0x2e 0x57 0x18 ST.s16 r18, [p0], #4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7342 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 7346 0x20 0x0e 0x88 0x40 0x01 0x84 JNZ r4, #7440 +.delay_slot + 7352 0x17 0xc4 0xed 0x18 MOVX r2, #-5 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7356 0x11 0x46 0x2d 0x98 LSHL r3, r5, r2 +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7360 0x11 0xc6 0x1f 0x98 MUL r3, r7, r1 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7364 0x10 0xe3 0xff 0x18 ADD r17, r3, #-1 +.delay_slot + 7368 0x00 0xe2 0xe1 0x8c 0x5b 0x2c ST.s16 r24, [p0]; LSHL r3, r3, r2 + 7374 0x10 0x24 0x09 0x18 MOVX r18, #2 + 7378 0x14 0xa5 0x07 0x98 EQ r18, r18, r16 + 7382 0x90 0x0f 0x50 0x40 0x01 0x84 JNZ r18, #7840 +.delay_slot +.swstall delay_slot + 7388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7394 0x00 0x00 NOPX +.delay_slot + 7396 0x10 0x49 0xff 0x18 ADD r4, r1, #-1 + 7400 0x10 0x0c 0x05 0x18 MOVX r6, #1 + 7404 0x11 0x8d 0x07 0x98 EQ r6, r6, r16 + 7408 0x30 0x0f 0x00 0x40 0x01 0x84 JNZ r6, #7680 +.delay_slot +.swstall delay_slot + 7414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7416 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7418 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7420 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7422 0x00 0x00 NOPX + 7424 0x00 0x0e 0xe8 0x00 0x00 0x84 J #7632 +.delay_slot +.swstall delay_slot + 7430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7434 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7436 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7438 0x00 0x00 NOPX +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_272 + 7440 0x04 0x01 0x00 0x00 0x01 0xf1 0x31 0x33 0x10 0xba MOVA r1, #32; MOVXM p2, #508518 + 7450 0x04 0x04 0x00 0x0f 0x01 0x6c 0x50 0x0e 0x78 0xba MOVA r4, #32; LSHL r16, r7, r2; MOV r2, el0 + 7460 0x29 0x0d 0xc1 0x00 0x69 0x64 MSC r4, r4, r5, r6; MOV dj0, #26 + 7466 0x00 0x00 NOPX + 7468 0x00 0x00 NOPX + 7470 0x00 0x00 NOPX + 7472 0x02 0x1c 0x37 0x18 ST.s16 r1, [p2], #2 + 7476 0x00 0x00 NOPX + 7478 0x00 0x00 NOPX + 7480 0x00 0x00 NOPX + 7482 0x00 0x00 NOPX + 7484 0x00 0x00 NOPX + 7486 0x00 0x00 NOPX + 7488 0x02 0x1e 0x37 0x18 ST.s16 r17, [p2], #2 + 7492 0x00 0x00 NOPX + 7494 0x00 0x00 NOPX + 7496 0x00 0x00 NOPX + 7498 0x00 0x00 NOPX + 7500 0x00 0x00 NOPX + 7502 0x00 0x00 NOPX + 7504 0x02 0x1c 0x37 0x18 ST.s16 r1, [p2], #2 + 7508 0x00 0x00 NOPX + 7510 0x00 0x00 NOPX + 7512 0x00 0x00 NOPX + 7514 0x00 0x00 NOPX + 7516 0x00 0x00 NOPX + 7518 0x00 0x00 NOPX + 7520 0x02 0x1c 0x17 0x18 ST.s16 r0, [p2], #2 + 7524 0x00 0x00 NOPX + 7526 0x00 0x00 NOPX + 7528 0x00 0x00 NOPX + 7530 0x00 0x00 NOPX + 7532 0x00 0x00 NOPX + 7534 0x00 0x00 NOPX + 7536 0x02 0x1c 0x37 0x18 ST.s16 r1, [p2], #2 + 7540 0x00 0x00 NOPX + 7542 0x00 0x00 NOPX + 7544 0x00 0x00 NOPX + 7546 0x00 0x00 NOPX + 7548 0x00 0x00 NOPX + 7550 0x00 0x00 NOPX + 7552 0x0a 0xcc 0xf1 0x98 ST r7, [p2], #-16 + 7556 0x02 0xec 0x57 0x18 ST.s16 r2, [p2], #-4 + 7560 0x00 0x00 NOPX + 7562 0x00 0x00 NOPX + 7564 0x00 0x00 NOPX + 7566 0x00 0x00 NOPX + 7568 0x00 0x00 NOPX + 7570 0x00 0x00 NOPX + 7572 0x02 0x00 0x97 0x18 ST.s16 r4, [p2, dj0] + 7576 0x00 0x00 NOPX + 7578 0x00 0x00 NOPX + 7580 0x00 0x00 NOPX + 7582 0x00 0x00 NOPX + 7584 0x00 0x00 NOPX + 7586 0x00 0x00 NOPX + 7588 0x40 0x8e 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r3, [p2]; NOPB; NOPS; NOPX +.label __ll7__ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + 7600 0x14 0x01 0xff 0x18 ADD r0, r16, #-1 + 7604 0x00 0x07 0xc4 0xc4 0xe8 0x44 MOVXM p2, #508532 + 7610 0x00 0x00 NOPX + 7612 0x00 0x00 NOPX + 7614 0x00 0x00 NOPX + 7616 0x00 0x00 NOPX + 7618 0x40 0x82 0xe0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e ST.s16 r0, [p2]; NOPS; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_464 + 7632 0x22 0x82 0xd1 0x00 0x59 0x54 LDA r0, [p1, #4]; MOV dj0, #22 + 7638 0x00 0x00 NOPX + 7640 0x00 0x00 NOPX + 7642 0x00 0x00 NOPX + 7644 0x00 0x00 NOPX + 7646 0x00 0x00 NOPX + 7648 0x00 0x00 0x07 0x18 ST.s8 r0, [p0, dj0] + 7652 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 7656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7660 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7662 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_512 + 7680 0x04 0x06 0x00 0x00 0x01 0xf1 0x31 0x33 0x10 0xba MOVA r6, #32; MOVXM p2, #508518 + 7690 0x43 0x9a 0xe2 0x84 0x3f 0x2c ST.s16 r6, [p2], #2; MUL r1, r5, r1 + 7696 0x00 0x00 NOPX + 7698 0x10 0x60 0x2d 0x98 LSHL r16, r1, r2 + 7702 0x00 0x00 NOPX + 7704 0x00 0x00 NOPX + 7706 0x00 0x00 NOPX + 7708 0x04 0x00 0x00 0x0a 0x60 0x30 0xc8 0x20 0x58 0xba MOVA r0, #32; MAC r6, r6, r5, r0; MOV r6, #32 + 7718 0x43 0xc6 0xe0 0x14 0x23 0x2c ST.s16 r17, [p2], #2; SUB r5, r0, r1 + 7724 0x00 0x00 NOPX + 7726 0x00 0x00 NOPX + 7728 0x00 0x00 NOPX + 7730 0x00 0x00 NOPX + 7732 0x00 0x00 NOPX + 7734 0x00 0x00 NOPX + 7736 0x02 0x1c 0xd7 0x18 ST.s16 r6, [p2], #2 + 7740 0x00 0x00 NOPX + 7742 0x00 0x00 NOPX + 7744 0x00 0x00 NOPX + 7746 0x00 0x00 NOPX + 7748 0x00 0x00 NOPX + 7750 0x00 0x00 NOPX + 7752 0x02 0x1c 0x97 0x18 ST.s16 r4, [p2], #2 + 7756 0x00 0x00 NOPX + 7758 0x00 0x00 NOPX + 7760 0x00 0x00 NOPX + 7762 0x00 0x00 NOPX + 7764 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7766 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7768 0x02 0x1c 0x17 0x18 ST.s16 r0, [p2], #2 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7772 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7774 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7776 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7778 0x11 0xc0 0x4e 0x18 MSC r0, r0, r7, r4 + 7782 0x00 0x00 NOPX + 7784 0x00 0x00 NOPX + 7786 0x0a 0xcc 0x31 0x98 ST r1, [p2], #-16 + 7790 0x02 0xee 0x77 0x18 ST.s16 r19, [p2], #-4 + 7794 0x00 0x00 NOPX + 7796 0x00 0x00 NOPX + 7798 0x00 0x00 NOPX + 7800 0x00 0x00 NOPX + 7802 0x00 0x00 NOPX + 7804 0x00 0x00 NOPX + 7806 0x02 0x04 0x77 0x18 ST.s16 r3, [p2] + 7810 0x00 0x00 NOPX + 7812 0x00 0x0e 0xd8 0x00 0x00 0x84 J #7600 +.delay_slot + 7818 0x18 0x80 0x34 0xb8 MOV dj0, #26 +.delay_slot +.swstall delay_slot + 7822 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7824 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7826 0x00 0x00 NOPX +.delay_slot + 7828 0x40 0x16 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r5, [p2, dj0]; NOPB; NOPS; NOPX +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_672 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 7840 0x02 0x04 0x80 0x00 0x01 0xf1 0x31 0x2e 0x10 0xba MOVA m1, #16; MOVXM p2, #508508 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 7850 0x45 0x86 0xe0 0x01 0x10 0xa8 0xf0 0x8e 0x78 0xba ST.s16 r1, [p2], #4; MOVX r17, #5; MOV r7, eh0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7860 0x04 0x03 0x00 0x03 0x03 0x7c 0x07 0xf6 0x58 0xba MOVA r3, #32; MUL r16, r1, r6; MOV m0, #-10 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7870 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7872 0x11 0x43 0x0f 0x98 MUL r1, r5, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7876 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7878 0x10 0x42 0x2d 0x98 LSHL r1, r1, r2 + 7882 0x00 0x00 NOPX + 7884 0x02 0x28 0xf7 0x18 ST.s16 r7, [p2], m1 + 7888 0x00 0x00 NOPX + 7890 0x00 0x00 NOPX + 7892 0x00 0x00 NOPX + 7894 0x00 0x00 NOPX + 7896 0x00 0x00 NOPX + 7898 0x00 0x00 NOPX + 7900 0x0a 0x0a 0x11 0x98 ST r16, [p2], m0 + 7904 0x02 0x1c 0xb7 0x18 ST.s16 r5, [p2], #2 + 7908 0x00 0x00 NOPX + 7910 0x00 0x00 NOPX + 7912 0x00 0x00 NOPX + 7914 0x00 0x00 NOPX + 7916 0x00 0x00 NOPX + 7918 0x00 0x00 NOPX + 7920 0x02 0x1c 0x17 0x18 ST.s16 r0, [p2], #2 + 7924 0x00 0x00 NOPX + 7926 0x00 0x00 NOPX + 7928 0x00 0x00 NOPX + 7930 0x00 0x00 NOPX + 7932 0x11 0x80 0x46 0x18 MAC r0, r0, r6, r4 + 7936 0x00 0x00 NOPX + 7938 0x02 0x1c 0xb7 0x18 ST.s16 r5, [p2], #2 + 7942 0x00 0x00 NOPX + 7944 0x00 0x00 NOPX + 7946 0x00 0x00 NOPX + 7948 0x00 0x00 NOPX + 7950 0x00 0x00 NOPX + 7952 0x00 0x00 NOPX + 7954 0x02 0x1c 0x97 0x18 ST.s16 r4, [p2], #2 + 7958 0x00 0x00 NOPX + 7960 0x00 0x00 NOPX + 7962 0x00 0x00 NOPX + 7964 0x00 0x00 NOPX + 7966 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 7968 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 7970 0x02 0x04 0x77 0x18 ST.s16 r3, [p2] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7974 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7976 0x00 0x0e 0xd8 0x00 0x00 0x84 J #7600 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7982 0x14 0x23 0x1d 0x98 LSHL r17, r16, r17 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7986 0x11 0x46 0x0e 0x18 MSC r3, r3, r5, r0 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7990 0x10 0xc3 0x11 0x98 SUB r1, r3, r17 +.delay_slot +.swstall delay_slot + 7994 0x00 0x00 NOPX +.delay_slot + 7996 0x02 0x44 0x37 0x18 ST.s16 r1, [p2, #8] +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv__end +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_end0 +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_begin0 +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E +.function_start + 8000 0xfd 0x10 0x80 0x00 0x01 0xf1 0x31 0x32 0x10 0xba MOVA m4, #-24; MOVXM p2, #508516 + 8010 0x51 0x43 0x50 0x60 0x02 0x2c LDA.u16 r16, [p2], m4; MOVX r24, #0 + 8016 0x18 0x03 0x11 0x78 VINSERT.32 x0, x0, #0, r24 + 8020 0x00 0x00 NOPX + 8022 0x00 0x00 NOPX + 8024 0x00 0x00 NOPX + 8026 0x00 0x00 NOPX + 8028 0x00 0x00 NOPX + 8030 0x80 0x10 0x20 0x40 0x01 0x84 JNZ r16, #8256 +.delay_slot + 8036 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 8042 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.delay_slot + 8046 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 8050 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8052 0x00 0x00 NOPX + 8054 0x00 0x07 0xce 0xc4 0x60 0x44 MOVXM p7, #508464 + 8060 0xe0 0xc4 0x50 0x00 0x01 0xf3 0xb1 0x38 0x10 0xba LDA.s8 r17, [p7]; MOVXM p7, #508528 + 8070 0x07 0x06 0x16 0x98 LDA r16, [p7] + 8074 0x00 0x00 NOPX + 8076 0x00 0x00 NOPX + 8078 0x00 0x00 NOPX + 8080 0x00 0x00 NOPX + 8082 0x00 0x00 NOPX + 8084 0x00 0x00 NOPX + 8086 0x80 0x10 0x20 0x00 0x01 0x84 JZ r16, #8256 +.delay_slot + 8092 0x19 0x80 0x92 0xf8 VMOV bmhl1, x0 +.delay_slot + 8096 0x14 0x7a 0x80 0x18 MOVX crRnd, r17 +.delay_slot + 8100 0x09 0x40 0xd6 0x18 VCONV.bf16.fp32 wl2, bmhl1 +.delay_slot +.swstall delay_slot + 8104 0x00 0x00 NOPX +.delay_slot + 8106 0x18 0x91 0x03 0x58 VEXTBCST.16 x1, x2, #0 + 8110 0xff 0x71 0x07 0x84 0x8b 0x00 0x00 0x08 0x78 0x18 0x10 0x76 MOVA r17, #-5; MOVS p7, p1; MOVXM ls, #8240 + 8122 0x00 0x00 0x26 0xe0 0x60 0x44 MOVXM le, #8240 + 8128 0x84 0x3f 0xe3 0x05 0x25 0xe4 ADD r16, r16, #-1; VMOV bmhl1, x1 + 8134 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 + 8138 0x00 0x2c 0xfa 0xf0 0x01 0x14 NOPA; ADD.NC lc, r16, #1 + 8144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 8160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 8176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 8192 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 8208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 8224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_240 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 8240 0x00 0x2c 0xf0 0x00 0x27 0x1c 0xc6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhl1, [p7], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_256 +.loop_nesting 0 + 8256 0x5b 0xc2 0xd0 0x01 0x20 0x49 0xb1 0x60 0x78 0xba LDA r16, [p2], #-12; MOVX r18, #2; MOV p3, p1 + 8266 0x00 0x00 NOPX + 8268 0x00 0x00 NOPX + 8270 0x00 0x00 NOPX + 8272 0x00 0x00 NOPX + 8274 0x00 0x00 NOPX + 8276 0x00 0x00 NOPX + 8278 0x14 0xa1 0x08 0x98 NE r16, r18, r16 + 8282 0x80 0x10 0xc0 0x40 0x01 0x84 JNZ r16, #8576 +.delay_slot +.swstall delay_slot + 8288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8294 0x00 0x00 NOPX +.delay_slot + 8296 0x1c 0x54 0xc0 0xf8 MOV r17, p2 + 8300 0x00 0x07 0xce 0xc4 0xe0 0x44 MOVXM p7, #508528 + 8306 0x07 0x06 0x56 0x98 LDA r18, [p7] + 8310 0x00 0x00 NOPX + 8312 0x00 0x00 NOPX + 8314 0x00 0x00 NOPX + 8316 0x00 0x00 NOPX + 8318 0x00 0x00 NOPX + 8320 0x00 0x00 NOPX + 8322 0x90 0x10 0xc0 0x00 0x01 0x84 JZ r18, #8576 +.delay_slot +.swstall delay_slot + 8328 0x00 0x00 NOPX +.delay_slot + 8330 0x10 0x26 0x05 0x18 MOVX r19, #1 +.delay_slot + 8334 0x94 0xe7 0xba 0x25 0x81 0xe4 LSHL r19, r18, r19; MOV r20, p1 +.delay_slot + 8340 0x1b 0x69 0xd1 0x58 ADD.NC p3, r19, r20 +.delay_slot + 8344 0x00 0x07 0xce 0xc4 0x60 0x44 MOVXM p7, #508464 + 8350 0xe0 0xd0 0x52 0x8c 0x8b 0x00 0x00 0x08 0x78 0xa0 0x10 0x76 LDA.s8 r20, [p7]; MOVS p2, p3; MOVXM ls, #8512 + 8362 0xff 0x73 0x04 0x84 0x8b 0x00 0x00 0x09 0xb8 0xa8 0x10 0x76 MOVA r19, #-5; MOVS p4, p1; MOVXM le, #8528 + 8374 0x00 0x00 0x2e 0xc1 0xc0 0x44 MOVXM p7, #8416 + 8380 0x10 0x5a 0x40 0x18 MOVX vaddSign0, #1 + 8384 0x00 0x00 NOPX + 8386 0x00 0x00 NOPX + 8388 0x19 0x80 0x92 0xf8 VMOV bmhl1, x0 + 8392 0xa7 0x50 0x0a 0xb2 0xff 0x24 MOVX crRnd, r20; ADD.NC r21, r18, #-1 + 8398 0x08 0x1a 0xca 0xce 0x7b 0x5c VCONV.bf16.fp32 wl0, bmhl1; LSHL r19, r21, r19 + 8404 0x00 0x00 NOPX + 8406 0x00 0x2c 0xf0 0x00 0x10 0x00 0x00 0x81 0xae 0xba NOPA; NOPB; VEXTBCST.16 x0, x0, #0 +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_416 +.loop_nesting 1 + 8416 0x83 0x8e 0x80 0x02 0xf2 0x1c VLDB x1, [p4], #64; MOVX lc, #30 + 8422 0x00 0x00 NOPX + 8424 0x00 0x00 NOPX + 8426 0x00 0x00 NOPX + 8428 0x00 0x01 0x67 0x98 NOPA + 8432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 8448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x40 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r20, #0; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8464 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x29 0x40 0x3a 0xa2 0xa8 0x68 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x29 0x40 0x3a 0xa2 0xa8 0x68 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x81 0x54 0xb8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VINSERT.16 x2, x0, #0, r21; NOPV +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_512 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8512 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xa2 0xa8 0x68 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_528 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8528 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x93 0x29 0x40 0x38 0x81 0x54 0xb8 0x00 0x00 0xe1 NOPA; NOPB; VST x2, [p2], #64; ADD r20, r20, #1; VINSERT.16 x2, x0, #0, r21; NOPV +.loop_nesting 1 + 8544 0x14 0xe7 0xe0 0x18 JNZD r19, r19, p7 +.delay_slot + 8548 0x0a 0x1c 0x93 0x18 VST x2, [p2], #64 +.delay_slot +.swstall delay_slot + 8552 0x00 0x00 NOPX +.delay_slot + 8554 0x19 0x02 0xa9 0x78 VINSERT.16 x2, x0, #0, r21 +.delay_slot +.swstall delay_slot + 8558 0x00 0x00 NOPX +.delay_slot + 8560 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x2, [p2], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_576 +.loop_nesting 0 + 8576 0x00 0x0b 0x80 0x02 0xd2 0x02 0x34 0x4d 0x88 0xba MOVA dc2, #0; MOVX vaddSign0, #1; ADD.NC p4, r17, #54 + 8586 0x9f 0xce 0x53 0x08 0x4b 0x00 0x01 0xf1 0x31 0x18 0x10 0x76 LDA.s16 r19, [p4], #-2; MOVS dc3, dc2; MOVXM p2, #508464 + 8598 0x9b 0xeb 0x54 0x08 0x4b 0x01 0x17 0x8b 0xb4 0x47 0x08 0x76 LDA.u16 r26, [p4], #-6; MOVS dc4, dc2; MOVX r17, #60; ADD.NC p7, r17, #28 + 8610 0x9b 0xd2 0x51 0x10 0x4b 0x01 0xc0 0x40 0x52 0xba LDA.s16 r20, [p4], #-6; MOVS dc1, dc4; MOV dj3, #64 + 8620 0x9f 0xf3 0x51 0x80 0x01 0x54 LDA.u16 r28, [p4], #-2; MOV dc0, #0 + 8626 0x87 0xd6 0x50 0x00 0x00 0x08 0x79 0x38 0x10 0xba LDA.s16 r21, [p4], #6; MOVXM ls, #8816 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8636 0x9e 0xda 0x50 0x00 0x00 0x09 0xb9 0x40 0x10 0xba LDA.s16 r22, [p4, #-2]; MOVXM le, #8832 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8646 0x80 0xeb 0x54 0x8c 0x8b 0x01 0x40 0x40 0x52 0xba LDA.u16 r26, [p4]; MOVS p4, p3; MOV dj2, #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8656 0x40 0xdc 0x54 0xdd 0x81 0xd4 LDA.s8 r23, [p2]; MOV p2, p7 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8662 0xe7 0xcf 0x50 0x01 0x20 0x29 0xa6 0x90 0x78 0xba LDA.u16 r19, [p7], #6; MOVX r18, #1; MOV dn3, r26 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8672 0x9e 0xe5 0xb4 0x9a 0x41 0xe4 LSHL r27, r19, r18; MOV dn2, r26 + 8678 0x0b 0x81 0x60 0x29 0x49 0x6d 0x86 0xd0 0x79 0x3a MOVS dn0, r28; LSHL r20, r20, r18; MOV m3, r27 + 8688 0xad 0x25 0xb0 0x14 0x41 0xe4 LSHL r20, r21, r18; MOV m0, r20 + 8694 0xb5 0x25 0xb1 0x14 0x41 0xe4 LSHL r20, r22, r18; MOV dj0, r20 + 8700 0x6e 0x15 0x74 0x5a 0x0b 0x02 0x45 0x10 0x72 0xba VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; MOVS dn4, r26; MOV dj4, r20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8710 0x03 0x25 0x70 0x2f 0xd4 0x01 0x03 0x00 0x78 0xba VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0; MOVX crRnd, r23; MOV m2, m3 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8720 0x6e 0x15 0x7a 0xf3 0xfb 0x14 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; ADD.NC lc, r19, #-5 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8726 0x00 0x19 0x2b 0x98 VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8730 0x6e 0x15 0x70 0x00 0x20 0x3c VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8736 0x03 0x25 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8752 0x6e 0x15 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8768 0x03 0x25 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8784 0x6e 0x15 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8800 0x03 0x25 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_816 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8816 0x6e 0x15 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_832 +.end_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8832 0x03 0x25 0x70 0x00 0x24 0x50 0x23 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; VST.2D.CONV.bf16.fp32 cml0, [p4], d2;NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8848 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8850 0x8a 0x04 0x60 0x02 0x88 0x28 0x3d 0x62 VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8858 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8860 0x8a 0x04 0x60 0x02 0x88 0x28 0x3d 0x62 VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8868 0x80 0x12 0x68 0x40 0x01 0x84 JNZ r16, #9424 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8874 0x0c 0x50 0x23 0x18 VST.2D.CONV.bf16.fp32 cml0, [p4], d2 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8878 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8880 0x0c 0x50 0x23 0x18 VST.2D.CONV.bf16.fp32 cml0, [p4], d2 +.delay_slot +.swstall delay_slot + 8884 0x00 0x00 NOPX +.delay_slot + 8886 0x0c 0x50 0x23 0x18 VST.2D.CONV.bf16.fp32 cml0, [p4], d2 + 8890 0x08 0x06 0x80 0x00 0x01 0xf0 0x31 0x3b 0x10 0xba MOVA dj1, #64; MOVXM p0, #508534 + 8900 0x1f 0x9e 0x50 0x01 0x04 0x0a 0xe9 0x60 0x78 0xba LDA.s16 r7, [p0], #-2; MOVX r16, #32; MOV r23, p1 + 8910 0x1e 0xf6 0xd0 0x01 0x32 0x0a 0x88 0x08 0x58 0xba LDA r29, [p0, #-4]; MOVX r19, #16; MOV r20, #8 + 8920 0x00 0xef 0x50 0x00 0x00 0x08 0x79 0xe8 0x10 0xba LDA.u16 r27, [p0]; MOVXM ls, #9168 + 8930 0x00 0x96 0x00 0x00 0x00 0x09 0xba 0x18 0x10 0xba MOVA r22, #4; MOVXM le, #9264 + 8940 0x00 0x00 NOPX + 8942 0x00 0x00 NOPX + 8944 0x00 0x00 NOPX + 8946 0x11 0xcf 0x2d 0x98 LSHL r7, r7, r18 + 8950 0xec 0xa5 0xb2 0x07 0x41 0xe4 LSHL r18, r29, r18; MOV m1, r7 + 8956 0x2b 0x61 0x60 0x02 0xbf 0x7f 0x40 0x02 MOVS dn1, r27; ADD.NC lc, r29, #-3 + 8964 0x18 0x6b 0xc9 0x58 ADD.NC p0, r23, r18 + 8968 0x00 0x30 0x2b 0x98 VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 + 8972 0x00 0x00 NOPX + 8974 0x00 0x00 NOPX + 8976 0x00 0x00 NOPX + 8978 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8980 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8982 0x88 0x0c 0x3d 0x48 VADD.f dm0, dm0, dm3, r17 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8986 0x1b 0x01 0x12 0xf8 VMOV bmll3, bmlh0 + 8990 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8992 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8994 0x00 0x30 0x2b 0x98 VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8998 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 9000 0x18 0x20 0x12 0xf8 VMOV x0, bmll0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 9004 0x00 0x80 0x42 0xc6 0x89 0x0c 0x3d 0x62 VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9012 0x1b 0x02 0x92 0xf8 VMOV bmll3, x1 + 9016 0x00 0x00 NOPX + 9018 0x00 0x00 NOPX + 9020 0x00 0x00 NOPX + 9022 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 9024 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 9028 0x01 0x90 0x4e 0xc6 0x89 0x2c 0x3d 0x62 VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9036 0x1b 0x06 0x92 0xf8 VMOV bmll3, x3 + 9040 0x03 0x01 0x12 0xe6 0x88 0x0c 0x3d 0x62 VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 + 9048 0x00 0x00 NOPX + 9050 0x00 0x00 NOPX + 9052 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9054 0x1a 0x24 0x12 0xf8 VMOV x4, bmll1 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9058 0x06 0x05 0x75 0x40 0xa5 0x94 VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9064 0x18 0x20 0x12 0xf8 VMOV x0, bmll0 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9068 0x00 0x80 0x42 0xc6 0x89 0x0c 0x3d 0x62 VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9076 0x1b 0x02 0x92 0xf8 VMOV bmll3, x1 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9080 0x04 0x0a 0x92 0xe6 0x8a 0x30 0x3d 0x62 VMOV bmll4, x5; VADD.f dm2, dm1, dm4, r17 + 9088 0x00 0x00 NOPX + 9090 0x00 0x00 NOPX + 9092 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 9094 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 9098 0x01 0x90 0x4e 0xc6 0x89 0x2c 0x3d 0x62 VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9106 0x1b 0x06 0x92 0xf8 VMOV bmll3, x3 + 9110 0x03 0x01 0x12 0xe6 0x88 0x0c 0x3d 0x62 VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 + 9118 0x00 0x00 NOPX + 9120 0x1b 0x28 0x12 0xf8 VMOV x6, bmll2 + 9124 0x1b 0xb0 0x5a 0xd8 VSHIFT x7, x6, x0, r22 + 9128 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 + 9132 0x02 0x24 0x12 0xe6 0x8a 0x50 0x3d 0x62 VMOV x4, bmll1; VADD.f dm2, dm2, dm4, r17 + 9140 0x18 0x20 0x12 0xf8 VMOV x0, bmll0 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 9144 0x18 0x80 0x42 0xd8 VSHIFT x1, x0, x0, r16 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 9148 0x03 0x02 0x92 0xe6 0x89 0x0c 0x3d 0x62 VMOV bmll3, x1; VADD.f dm1, dm0, dm3, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9156 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1168 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9168 0x06 0x05 0x72 0xa0 0x52 0xc2 0x8a 0x30 0x3d 0x4a VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9178 0x82 0x22 0xc0 0x02 0x05 0x49 0x70 0x02 VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9186 0x00 0x00 NOPX +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9188 0x1d 0x61 0x01 0xb8 VEXTRACT.16 r21, x8, #0, vaddSign0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9192 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9196 0x19 0x90 0x4e 0xd8 VSHIFT x3, x2, x0, r19 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9200 0x03 0x06 0x92 0xe6 0x89 0x2c 0x3d 0x62 VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9208 0x03 0x28 0x12 0xe6 0x88 0x0c 0x3d 0x62 VMOV x6, bmll2; VADD.f dm0, dm0, dm3, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9216 0x1b 0x01 0x12 0xf8 VMOV bmll3, bmlh0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9220 0x1b 0xb0 0x5a 0xd8 VSHIFT x7, x6, x0, r22 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9224 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9228 0x8a 0x50 0x3d 0x48 VADD.f dm2, dm2, dm4, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9232 0x1a 0x24 0x12 0xf8 VMOV x4, bmll1 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9236 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x10 0x09 0x70 0xf6 NOPA; NOPB; NOPS; VMOV x0, bmll0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9248 0x23 0xd6 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0x21 0x6c 0x48 0x61 0xeb ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1264 +.end_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x81 0x49 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmll3, x1; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9280 0x02 0xa0 0x52 0xc6 0x8a 0x30 0x3d 0x62 VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9288 0x82 0x22 0xc0 0x02 0x05 0x49 0x70 0x02 VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 + 9296 0x00 0x00 NOPX + 9298 0x1d 0x61 0x01 0xb8 VEXTRACT.16 r21, x8, #0, vaddSign0 + 9302 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 + 9306 0x23 0xd6 0xe3 0x20 0x9d 0x94 ST.s16 r21, [p1], #2; VSHIFT x3, x2, x0, r19 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 9312 0x1b 0x28 0x12 0xf8 VMOV x6, bmll2 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 9316 0x03 0xb0 0x5a 0xc6 0x8a 0x50 0x3d 0x62 VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9324 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 + 9328 0x03 0x06 0x92 0xe6 0x89 0x2c 0x3d 0x62 VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 + 9336 0x00 0x00 NOPX + 9338 0x00 0x00 NOPX + 9340 0x00 0x00 NOPX + 9342 0x0c 0x11 0x16 0x18 VCONV.bf16.fp32 x8, cml2 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 9346 0x00 0x00 NOPX +.aggressive_scheduled_block_id 11 +.noswbrkpt + 9348 0x23 0xd6 0xe4 0x48 0x25 0xd4 ST.s16 r21, [p1], #2; VMOV x4, bmll1 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 9354 0x02 0xa0 0x52 0xc6 0x8a 0x30 0x3d 0x62 VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 9362 0x1c 0x0a 0x92 0xf8 VMOV bmll4, x5 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9366 0x1d 0x61 0x01 0xb8 VEXTRACT.16 r21, x8, #0, vaddSign0 + 9370 0x00 0x00 NOPX + 9372 0x00 0x00 NOPX + 9374 0x00 0x00 NOPX +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 9376 0x1b 0x28 0x12 0xf8 VMOV x6, bmll2 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 9380 0x03 0xb0 0x5a 0xc6 0x8a 0x50 0x3d 0x62 VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9388 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 + 9392 0x00 0x00 NOPX + 9394 0x00 0x00 NOPX + 9396 0x00 0x00 NOPX + 9398 0x00 0x00 NOPX + 9400 0x0c 0x11 0x16 0x18 VCONV.bf16.fp32 x8, cml2 + 9404 0x00 0x01 0x67 0x98 NOPA + 9408 0x23 0xd6 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb0 0x80 0xd8 0x00 0x00 0xe1 ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x8, #0, vaddSign0; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1424 + 9424 0x02 0x36 0x1a 0x98 LDA.u16 r16, [p2, #6] + 9428 0x00 0x00 NOPX + 9430 0x00 0x00 NOPX + 9432 0x00 0x00 NOPX + 9434 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 9436 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.noswbrkpt + 9438 0x07 0xfe 0x17 0x18 ST.s16 r16, [p7], #-2 +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 9442 0x07 0x04 0x3a 0x98 LDA.u16 r1, [p7] +.aggressive_scheduled_block_id 13 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9446 0x00 0x1a 0x50 0x00 0x01 0x04 JL #13472 +.delay_slot +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9452 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9456 0x14 0x00 0xb0 0x18 EXTEND.u16 r0, r16 +.delay_slot +.swstall delay_slot + 9460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9464 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.return_address + 9472 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 9476 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8] + 9480 0x07 0x24 0x77 0x18 ST.s16 r3, [p7, #4] + 9484 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 9490 0x00 0x00 NOPX + 9492 0x00 0x00 NOPX + 9494 0x00 0x00 NOPX + 9496 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 9500 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.delay_slot +.swstall delay_slot + 9504 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9506 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9508 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9510 0x00 0x00 NOPX +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E__end +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_end0 + +.text_segment PM 9520 +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 9520 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 9526 0x60 0xc2 0xd1 0xae 0x41 0xd4 LDA r16, [p3]; MOV r3, r14 + 9532 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9538 0xfd 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-20]; MOV r1, r15 + 9546 0xff 0xe3 0xb0 0x02 0x2d 0x70 0x70 0x02 ST p6, [sp, #-4]; MOV r17, CORE_ID + 9554 0xff 0x36 0xb0 0x23 0x14 0x81 0xea 0x60 0x79 0x3a ST r13, [sp, #-8]; EXTEND.u8 r17, r17; MOV r15, p2 + 9564 0xfe 0x06 0xb0 0x01 0xc8 0xf0 0x70 0x02 ST r1, [sp, #-16]; MOV r14, lr + 9572 0x00 0x00 NOPX + 9574 0x80 0x13 0x18 0x40 0x01 0x84 JNZ r16, #9776 +.delay_slot + 9580 0x0f 0xf4 0x75 0x98 ST r3, [sp, #-12] +.delay_slot + 9584 0xf0 0x11 0x60 0x00 0x01 0xf0 0x31 0x20 0x11 0x3a MOVS p7, p0; MOVXM p0, #508480 +.delay_slot + 9594 0x00 0x07 0xcc 0xc4 0x10 0x44 MOVXM p6, #508424 +.delay_slot + 9600 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 9604 0x0e 0x06 0x31 0x98 ST r17, [p6] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9608 0x00 0x07 0xcc 0xc4 0x60 0x44 MOVXM p6, #508464 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9614 0xc0 0xc0 0xe0 0x00 0x01 0xf3 0x31 0x16 0x10 0xba ST.s8 r16, [p6]; MOVXM p6, #508460 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9624 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9626 0x00 0x0e 0x00 0x00 0x01 0x04 JL #7168 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9632 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9634 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9636 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 9640 0x00 0x01 0x00 0x29 0x00 0x00 0x1c 0x22 MOVX r16, #1; NOPV +.delay_slot + 9648 0x00 0x2c 0xf0 0x00 0x26 0x06 0x11 0x80 0x00 0x03 0x31 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6]; NOPX; MOV p6, p1; NOPV +.return_address + 9664 0x00 0x07 0xc6 0xc4 0x80 0x44 MOVXM p3, #508480 + 9670 0x63 0xca 0xd0 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA r18, [p3], #4; MOVXM p1, #508424 + 9680 0x01 0x06 0x96 0x98 LDA r20, [p1] + 9684 0x03 0x1e 0x36 0x98 LDA r17, [p3], #4 + 9688 0x00 0x00 NOPX + 9690 0x03 0x06 0x16 0x98 LDA r16, [p3] + 9694 0x00 0x00 NOPX + 9696 0x03 0x16 0x76 0x98 LDA r19, [p3, #4] + 9700 0x00 0x00 NOPX + 9702 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 9706 0x00 0x00 NOPX + 9708 0x14 0x63 0x2f 0x98 MUL r17, r17, r18 + 9712 0x00 0x07 0xc2 0xc4 0x28 0x44 MOVXM p1, #508436 + 9718 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 9722 0x20 0xce 0x30 0x00 0x01 0xf1 0x31 0x06 0x11 0x3a ST r19, [p1]; MOVXM p2, #508428 + 9732 0x40 0xc2 0x30 0x00 0x40 0x28 0x50 0x02 ST r16, [p2]; MOV dj0, #40 + 9740 0x06 0x00 0x2e 0x98 LDA el0, [p6, dj0] + 9744 0x00 0x00 NOPX + 9746 0x00 0x13 0x20 0x00 0x00 0x84 J #9792 +.delay_slot +.swstall delay_slot + 9752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9756 0x00 0x00 NOPX +.delay_slot + 9758 0x00 0x07 0xc0 0xc4 0x50 0x44 MOVXM p0, #508456 +.delay_slot + 9764 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x02 0x14 0xc1 0x36 NOPA; NOPB; ST el0, [p0]; NOPX +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 + 9776 0x00 0x07 0xc2 0xc4 0x28 0x44 MOVXM p1, #508436 + 9782 0x20 0xce 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r19, [p1]; NOPB; NOPM +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_272 + 9792 0x00 0x00 NOPX + 9794 0x00 0x00 NOPX + 9796 0x00 0x00 NOPX + 9798 0x00 0x00 NOPX + 9800 0x00 0x00 NOPX + 9802 0x10 0x20 0x05 0x18 MOVX r16, #1 + 9806 0x14 0xe1 0x08 0x98 NE r16, r19, r16 + 9810 0x80 0x13 0x80 0x40 0x01 0x84 JNZ r16, #9984 +.delay_slot + 9816 0x1e 0x67 0x86 0x18 ADD.NC p6, r15, #12 +.delay_slot +.swstall delay_slot + 9820 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9822 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9824 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9826 0x00 0x00 NOPX + 9828 0xfd 0x3e 0xb0 0x00 0x01 0xf0 0xb1 0x02 0x11 0x3a ST r15, [sp, #-24]; MOVXM p1, #508420 + 9838 0x20 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x30 0x10 0xba LDA r16, [p1]; MOVXM p1, #508512 + 9848 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 9852 0x00 0x00 NOPX + 9854 0x00 0x00 NOPX +.no_stack_arguments + 9856 0x00 0x1a 0x50 0x00 0x01 0x04 JL #13472 +.delay_slot + 9862 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 9866 0x00 0x00 NOPX +.delay_slot + 9868 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 9872 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 9878 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 9888 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 9894 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 9898 0x80 0x13 0x80 0x40 0x01 0x84 JNZ r16, #9984 +.delay_slot + 9904 0x00 0x07 0xc2 0xc4 0x28 0x44 MOVXM p1, #508436 +.delay_slot + 9910 0x07 0xe9 0xf1 0x18 LDA r15, [sp, #-24] +.delay_slot +.swstall delay_slot + 9914 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9916 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9918 0x00 0x00 NOPX + 9920 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 9926 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 9930 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 9934 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9938 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9940 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9944 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9946 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9948 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9950 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9952 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9956 0x0a 0x06 0x31 0x98 ST r17, [p2] + 9960 0x00 0x00 NOPX + 9962 0x00 0x00 NOPX + 9964 0x00 0x00 NOPX + 9966 0x00 0x00 NOPX + 9968 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x22 0x98 0x40 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ACQ r17, r16; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_464 + 9984 0x10 0x22 0x09 0x18 MOVX r17, #2 + 9988 0x00 0x00 NOPX + 9990 0x00 0x00 NOPX + 9992 0x01 0x06 0x16 0x98 LDA r16, [p1] + 9996 0x00 0x00 NOPX + 9998 0x00 0x00 NOPX + 10000 0x00 0x00 NOPX + 10002 0x00 0x00 NOPX + 10004 0x00 0x00 NOPX + 10006 0x00 0x00 NOPX + 10008 0x14 0x63 0x08 0x98 NE r17, r17, r16 + 10012 0x88 0x13 0xf0 0x40 0x01 0x84 JNZ r17, #10208 +.delay_slot +.swstall delay_slot + 10018 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10024 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10026 0x00 0x00 NOPX + 10028 0xfd 0x3e 0xb0 0x00 0x01 0xf0 0xb1 0x0c 0x11 0x3a ST r15, [sp, #-24]; MOVXM p1, #508440 + 10038 0x20 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x30 0x10 0xba LDA r16, [p1]; MOVXM p1, #508512 + 10048 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 10052 0x00 0x00 NOPX + 10054 0x00 0x00 NOPX +.no_stack_arguments + 10056 0x00 0x1a 0x50 0x00 0x01 0x04 JL #13472 +.delay_slot + 10062 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 10066 0x00 0x00 NOPX +.delay_slot + 10068 0x00 0x2c 0xf8 0x6d 0xb5 0x2c NOPA; LT r27, r16, r13 +.delay_slot + 10074 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 10080 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 10096 0xfd 0x3e 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA r15, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 + 10106 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 10110 0x80 0x13 0xe8 0x40 0x01 0x84 JNZ r16, #10192 +.delay_slot + 10116 0x00 0x07 0xc2 0xc4 0x28 0x44 MOVXM p1, #508436 +.delay_slot +.swstall delay_slot + 10122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10128 0x00 0x00 NOPX + 10130 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 10136 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 10140 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 10144 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10148 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10150 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10154 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10156 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10158 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10160 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10162 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10166 0x0a 0x06 0x31 0x98 ST r17, [p2] + 10170 0x00 0x00 NOPX + 10172 0x00 0x00 NOPX + 10174 0x00 0x00 NOPX + 10176 0x00 0x00 NOPX + 10178 0x00 0x2c 0xf0 0x00 0x24 0x53 0x08 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; ACQ r17, r16; NOPM +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_672 + 10192 0x00 0x00 NOPX + 10194 0x00 0x00 NOPX + 10196 0x00 0x00 NOPX + 10198 0x20 0xc2 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r16, [p1]; NOPB; NOPM +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_688 + 10208 0x10 0x1a 0x01 0x18 MOVX r13, #0 + 10212 0x00 0x00 NOPX + 10214 0x00 0x00 NOPX + 10216 0x00 0x00 NOPX + 10218 0x00 0x00 NOPX + 10220 0x10 0x22 0x11 0x18 MOVX r17, #4 + 10224 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 10228 0x80 0x14 0x68 0x40 0x01 0x84 JNZ r16, #10448 +.delay_slot + 10234 0x00 0x07 0xc2 0xc4 0x38 0x44 MOVXM p1, #508444 +.delay_slot + 10240 0x00 0x07 0xc4 0xc4 0xc0 0x44 MOVXM p2, #508512 +.delay_slot +.swstall delay_slot + 10246 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10250 0x00 0x00 NOPX + 10252 0x01 0x06 0x16 0x98 LDA r16, [p1] + 10256 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 10260 0x00 0x00 NOPX + 10262 0x00 0x00 NOPX +.no_stack_arguments + 10264 0x00 0x1a 0x50 0x00 0x01 0x04 JL #13472 +.delay_slot +.swstall delay_slot + 10270 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10272 0x00 0x00 NOPX +.delay_slot + 10274 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 10278 0xfd 0x6e 0xb6 0xc6 0x03 0x5c ST r27, [sp, #-24]; SUB r17, r13, r16 +.delay_slot + 10284 0x14 0x01 0x12 0x18 SEL.EQZ r0, r16, r17, r27 +.return_address + 10288 0xfd 0x6e 0x26 0xc0 0x63 0x2c LDA r27, [sp, #-24]; SUB r16, r13, r3 + 10294 0x00 0x00 NOPX + 10296 0x00 0x00 NOPX + 10298 0x00 0x00 NOPX + 10300 0x00 0x00 NOPX + 10302 0x00 0x00 NOPX + 10304 0x00 0x00 NOPX + 10306 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 10310 0x80 0x14 0x58 0x40 0x01 0x84 JNZ r16, #10416 +.delay_slot + 10316 0x19 0x6e 0xc0 0xf8 MOV p1, p7 +.delay_slot +.swstall delay_slot + 10320 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10322 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10324 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10326 0x00 0x00 NOPX + 10328 0xdf 0xee 0xd0 0x00 0x01 0xf1 0x31 0x20 0x10 0xba LDA r27, [p6], #-4; MOVXM p2, #508480 + 10338 0xdf 0xc6 0xd0 0x00 0x01 0xf3 0xb1 0x0a 0x10 0xba LDA r17, [p6], #-4; MOVXM p7, #508436 + 10348 0xdf 0xca 0xd0 0x00 0xf0 0x28 0x2b 0xd0 0x78 0xba LDA r18, [p6], #-4; MOVX r15, #1; MOV r1, r15 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 10358 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 10362 0x06 0x46 0x36 0x98 LDA r17, [p6, #16] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10366 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10368 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10372 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10374 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10378 0x0e 0x06 0x31 0x98 ST r17, [p6] + 10382 0x00 0x00 NOPX + 10384 0x00 0x00 NOPX + 10386 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot + 10392 0x1e 0x60 0xa0 0xf8 MOV p6, r1 +.delay_slot + 10396 0x14 0x53 0x08 0x18 ACQ r17, r16 +.delay_slot +.swstall delay_slot + 10400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10404 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_896 + 10416 0xd1 0xe1 0x60 0x00 0x05 0x1e 0x00 0x00 0x21 0x3a MOVS p6, r15; J #10480 +.delay_slot + 10426 0x00 0x07 0xce 0xc4 0x28 0x44 MOVXM p7, #508436 +.delay_slot + 10432 0x00 0x07 0xc4 0xc4 0x80 0x44 MOVXM p2, #508480 +.delay_slot + 10438 0x10 0x1e 0x05 0x18 MOVX r15, #1 +.delay_slot +.swstall delay_slot + 10442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10444 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_928 + 10448 0x00 0x2c 0xf0 0x00 0x21 0x9c 0x8b 0x00 0x01 0xf1 0x31 0x20 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p1, p7; MOVXM p2, #508480; NOPV + 10464 0x00 0x2f 0x00 0x00 0x26 0x8f 0x0b 0x00 0x01 0xf3 0xb1 0x0a 0x10 0x00 0x00 0xe1 MOVA r15, #1; NOPB; MOVS p6, r15; MOVXM p7, #508436; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_960 + 10480 0x20 0xc2 0xd7 0xeb 0x1d 0x80 0x01 0xf1 0xb1 0x00 0x10 0x76 LDA r16, [p1]; ST p6, [sp, #-24]; MOVXM p3, #508416 + 10492 0x60 0xc6 0xd0 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba LDA r17, [p3]; MOVXM p1, #508428 + 10502 0x01 0x06 0x56 0x98 LDA r18, [p1] + 10506 0x06 0x5c 0x9e 0x98 LDA p1, [p6], #20 + 10510 0x00 0x00 NOPX +.no_stack_arguments + 10512 0x00 0x0f 0xa0 0x00 0x01 0x04 JL #8000 +.delay_slot +.swstall delay_slot + 10518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10520 0x00 0x00 NOPX +.delay_slot + 10522 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 10526 0x60 0xc6 0x39 0x49 0xfb 0x5c ST r17, [p3]; LSHL r18, r18, r15 +.delay_slot + 10532 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x34 0xa0 0xa0 0xf6 NOPA; NOPB; NOPS; ADD.NC p0, r18, r16 +.return_address + 10544 0x07 0x06 0x36 0x98 LDA r17, [p7] + 10548 0x00 0x00 NOPX + 10550 0x00 0x00 NOPX + 10552 0x00 0x00 NOPX + 10554 0x00 0x00 NOPX + 10556 0x00 0x00 NOPX + 10558 0x00 0x00 NOPX + 10560 0x13 0xe5 0x18 0x98 NE r18, r15, r17 + 10564 0x90 0x15 0x08 0x40 0x01 0x84 JNZ r18, #10768 +.delay_slot + 10570 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.swstall delay_slot + 10574 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10580 0x00 0x00 NOPX + 10582 0x00 0x07 0xce 0xc4 0x08 0x44 MOVXM p7, #508420 + 10588 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x30 0x10 0xba LDA r16, [p7]; MOVXM p1, #508512 + 10598 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 10602 0x00 0x00 NOPX + 10604 0x00 0x00 NOPX + 10606 0x00 0x00 NOPX +.no_stack_arguments + 10608 0x00 0x1a 0x50 0x00 0x01 0x04 JL #13472 +.delay_slot +.swstall delay_slot + 10614 0x00 0x00 NOPX +.delay_slot + 10616 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 10620 0xe0 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p7]; LT r27, r16, r13 +.delay_slot + 10626 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 10632 0x00 0x20 0x08 0x91 0x00 0x00 0x1c 0x22 SEL.EQZ r0, r16, r17, r27; NOPV +.return_address + 10640 0xfd 0x13 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p1, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 + 10650 0x1c 0xa0 0x48 0xa0 0x05 0x64 SEL.EQZ r18, r3, r16, r27; MOV r17, #1 + 10656 0x90 0x14 0xf8 0x40 0x01 0x84 JNZ r18, #10736 +.delay_slot + 10662 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.swstall delay_slot + 10666 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10670 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10672 0x00 0x00 NOPX + 10674 0x2a 0xca 0xde 0x0b 0x63 0x0c LDA r18, [p1, #20]; ST r13, [p7] + 10680 0x00 0x00 NOPX + 10682 0x00 0x00 NOPX + 10684 0x00 0x00 NOPX + 10686 0x00 0x00 NOPX + 10688 0x00 0x00 NOPX + 10690 0x00 0x00 NOPX + 10692 0x14 0x91 0x18 0x18 REL r18, r17 + 10696 0xdc 0xca 0xd0 0x00 0x01 0xf3 0xb1 0x0a 0x10 0xba LDA r18, [p6, #-8]; MOVXM p7, #508436 + 10706 0x00 0x00 NOPX + 10708 0x00 0x00 NOPX + 10710 0x00 0x15 0x00 0x00 0x00 0x84 J #10752 +.delay_slot +.swstall delay_slot + 10716 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10718 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10720 0x00 0x00 NOPX +.delay_slot + 10722 0x14 0x63 0x21 0x98 SUB r17, r17, r18 +.delay_slot + 10726 0x00 0x2c 0xf6 0xe6 0x31 0x80 0x00 0x00 0x00 0x7a NOPA; ST r17, [p6, #-8]; NOPX +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1216 + 10736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf3 0xb1 0x0a 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p7, #508436; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 + 10752 0xe0 0xc6 0xd0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA r17, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 + 10768 0x00 0x00 NOPX + 10770 0x00 0x00 NOPX + 10772 0x00 0x00 NOPX + 10774 0x00 0x00 NOPX + 10776 0x00 0x00 NOPX + 10778 0x00 0x00 NOPX + 10780 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 10784 0x80 0x15 0x78 0x40 0x01 0x84 JNZ r16, #10992 +.delay_slot + 10790 0x00 0x07 0xc2 0xc4 0xc0 0x44 MOVXM p1, #508512 +.delay_slot + 10796 0x00 0x07 0xce 0xc4 0x30 0x44 MOVXM p7, #508440 +.delay_slot +.swstall delay_slot + 10802 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10804 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10806 0x00 0x00 NOPX + 10808 0x07 0x06 0x16 0x98 LDA r16, [p7] + 10812 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 10816 0x00 0x00 NOPX + 10818 0x00 0x00 NOPX + 10820 0x00 0x00 NOPX +.no_stack_arguments + 10822 0x00 0x1a 0x50 0x00 0x01 0x04 JL #13472 +.delay_slot +.swstall delay_slot + 10828 0x00 0x00 NOPX +.delay_slot + 10830 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 10834 0xe0 0xc2 0x30 0x00 0x24 0x36 0xda 0xd2 NOPB; ST r16, [p7]; LT r27, r16, r13 +.delay_slot + 10842 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 10848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 10864 0xfd 0x23 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p2, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 + 10874 0x10 0xe3 0x02 0x18 SEL.EQZ r17, r3, r16, r27 + 10878 0x88 0x15 0x68 0x40 0x01 0x84 JNZ r17, #10960 +.delay_slot + 10884 0x00 0x07 0xc2 0xc4 0x28 0x44 MOVXM p1, #508436 +.delay_slot + 10890 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot +.swstall delay_slot + 10894 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10896 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10898 0x00 0x00 NOPX + 10900 0x4a 0xc6 0xde 0x0b 0x63 0x0c LDA r17, [p2, #20]; ST r13, [p7] + 10906 0x00 0x00 NOPX + 10908 0x00 0x00 NOPX + 10910 0x00 0x00 NOPX + 10912 0x00 0x00 NOPX + 10914 0x00 0x00 NOPX + 10916 0x00 0x00 NOPX + 10918 0x14 0x51 0x08 0x18 REL r17, r16 + 10922 0x06 0xe6 0x36 0x98 LDA r17, [p6, #-8] + 10926 0x00 0x00 NOPX + 10928 0x00 0x00 NOPX + 10930 0x00 0x00 NOPX + 10932 0x00 0x00 NOPX + 10934 0x00 0x00 NOPX + 10936 0x00 0x00 NOPX + 10938 0x00 0x2c 0xf8 0x42 0x23 0x2c NOPA; SUB r16, r16, r17 + 10944 0x00 0x2c 0xf0 0x00 0x26 0xe6 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6, #-8]; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1440 + 10960 0x00 0x15 0x80 0x00 0x00 0x84 J #11008 +.delay_slot + 10966 0x1f 0x64 0xc0 0xf8 MOV p7, p2 +.delay_slot +.swstall delay_slot + 10970 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10972 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10974 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10976 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1472 + 10992 0xfd 0x73 0x20 0x00 0x20 0x01 0x5b 0x00 0x01 0xf0 0xb1 0x0a 0x10 0x00 0x00 0xe1 LDA p7, [sp, #-24]; NOPB; NOPS; MOVXM p1, #508436; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1488 + 11008 0x01 0x06 0x16 0x98 LDA r16, [p1] + 11012 0x00 0x00 NOPX + 11014 0x00 0x00 NOPX + 11016 0x00 0x00 NOPX + 11018 0x00 0x00 NOPX + 11020 0x00 0x00 NOPX + 11022 0x10 0x22 0x11 0x18 MOVX r17, #4 + 11026 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 11030 0x80 0x15 0xd8 0x40 0x01 0x84 JNZ r16, #11184 +.delay_slot + 11036 0x00 0x07 0xc2 0xc4 0x38 0x44 MOVXM p1, #508444 +.delay_slot +.swstall delay_slot + 11042 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11044 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11046 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11048 0x00 0x00 NOPX + 11050 0x20 0xc2 0xd0 0x00 0x01 0xf1 0x31 0x30 0x10 0xba LDA r16, [p1]; MOVXM p2, #508512 + 11060 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 11064 0x00 0x00 NOPX + 11066 0x00 0x00 NOPX + 11068 0x00 0x00 NOPX +.no_stack_arguments + 11070 0x00 0x1a 0x50 0x00 0x01 0x04 JL #13472 +.delay_slot +.swstall delay_slot + 11076 0x00 0x00 NOPX +.delay_slot + 11078 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 11082 0x20 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p1]; LT r27, r16, r13 +.delay_slot + 11088 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 11094 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 11104 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 11110 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 11114 0x80 0x15 0xd8 0x40 0x01 0x84 JNZ r16, #11184 +.delay_slot + 11120 0x00 0x07 0xc2 0xc4 0x38 0x44 MOVXM p1, #508444 +.delay_slot +.swstall delay_slot + 11126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11130 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11132 0x00 0x00 NOPX + 11134 0xea 0xc6 0xd1 0x05 0xb1 0x80 0x00 0x20 0x05 0x7a LDA r17, [p7, #20]; ST r13, [p1]; MOVX r16, #1 + 11144 0x00 0x00 NOPX + 11146 0x00 0x00 NOPX + 11148 0x00 0x00 NOPX + 11150 0x00 0x00 NOPX + 11152 0x00 0x00 NOPX + 11154 0x00 0x00 NOPX + 11156 0x14 0x51 0x08 0x18 REL r17, r16 + 11160 0x06 0xe6 0x36 0x98 LDA r17, [p6, #-8] + 11164 0x00 0x00 NOPX + 11166 0x00 0x00 NOPX + 11168 0x00 0x00 NOPX + 11170 0x00 0x00 NOPX + 11172 0x00 0x00 NOPX + 11174 0x00 0x00 NOPX + 11176 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 11180 0x0e 0xe6 0x11 0x98 ST r16, [p6, #-8] +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 + 11184 0xfe 0x86 0x20 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r1, [sp, #-12]; MOVXM p6, #508416 + 11194 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x14 0x10 0xba LDA r16, [p6]; MOVXM p1, #508456 + 11204 0x01 0x06 0x36 0x98 LDA r17, [p1] + 11208 0x00 0x00 NOPX + 11210 0x00 0x00 NOPX + 11212 0x00 0x00 NOPX + 11214 0x00 0x00 NOPX + 11216 0x00 0x00 NOPX + 11218 0x00 0x00 NOPX + 11220 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 11224 0x80 0x15 0xf8 0x40 0x01 0x84 JNZ r16, #11248 +.delay_slot + 11230 0x07 0xef 0x99 0x18 LDA p7, [sp, #-20] +.delay_slot + 11234 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] +.delay_slot +.swstall delay_slot + 11238 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11240 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11242 0x00 0x00 NOPX + 11244 0x0e 0x05 0xb1 0x98 ST r13, [p6] +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1728 + 11248 0xff 0xe3 0x2e 0xee 0x41 0xd4 LDA p6, [sp, #-4]; MOV lr, r14 + 11254 0x07 0xf9 0xb1 0x18 LDA r13, [sp, #-8] + 11258 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11262 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 11268 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11270 0x00 0x00 NOPX +.delay_slot + 11272 0x1b 0x90 0xa0 0xf8 MOV r14, r1 +.delay_slot +.swstall delay_slot + 11276 0x00 0x00 NOPX +.label _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 11280 +.label __Z15_b14285_wrapperPPv___func_begin0 +.label _Z15_b14285_wrapperPPv +.function_start + 11280 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11284 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 11288 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 11292 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 11296 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11300 0x00 0x0c 0xd0 0x00 0x00 0x84 J #6560 +.delay_slot +.swstall delay_slot + 11306 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11308 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11310 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11312 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11314 0x00 0x00 NOPX +.label _Z15_b14285_wrapperPPv__end +.label __Z15_b14285_wrapperPPv___func_end0 + +.text_segment PM 11328 +.label __Z15_b14290_wrapperPPv___func_begin0 +.label _Z15_b14290_wrapperPPv +.function_start + 11328 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11332 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11336 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11340 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11344 0x00 0x07 0x70 0x00 0x00 0x84 J #3808 +.delay_slot +.swstall delay_slot + 11350 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11352 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11354 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11356 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11358 0x00 0x00 NOPX +.label _Z15_b14290_wrapperPPv__end +.label __Z15_b14290_wrapperPPv___func_end0 +.label __Z15_b13811_wrapperPPv___func_begin0 +.label _Z15_b13811_wrapperPPv +.function_start + 11360 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11364 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 11368 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 11372 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 11376 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11380 0x00 0x08 0xf8 0x00 0x00 0x84 J #4592 +.delay_slot +.swstall delay_slot + 11386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11394 0x00 0x00 NOPX +.label _Z15_b13811_wrapperPPv__end +.label __Z15_b13811_wrapperPPv___func_end0 + +.text_segment PM 11408 +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function_start + 11408 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11412 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 11416 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 11420 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 11424 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11428 0x00 0x0b 0x28 0x00 0x00 0x84 J #5712 +.delay_slot +.swstall delay_slot + 11434 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11436 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11442 0x00 0x00 NOPX +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + +.text_segment PM 11456 +.label __Z15_b14811_wrapperPPv___func_begin0 +.label _Z15_b14811_wrapperPPv +.function_start + 11456 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11460 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11464 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11468 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11472 0x00 0x12 0x98 0x00 0x00 0x84 J #9520 +.delay_slot +.swstall delay_slot + 11478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11484 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11486 0x00 0x00 NOPX +.label _Z15_b14811_wrapperPPv__end +.label __Z15_b14811_wrapperPPv___func_end0 +.label __Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params___func_begin0 +.label _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params +.function_start + 11488 0x18 0x14 0xc0 0xf8 MOV r0, p2 + 11492 0xff 0x40 0x84 0xc0 0x04 0x14 MOVA m0, #-6; ADD.NC p2, r0, #4 + 11498 0x43 0x82 0xd0 0x00 0x01 0xf1 0xb1 0x18 0x10 0xba LDA r0, [p2], #4; MOVXM p3, #508464 + 11508 0x41 0x4a 0xd0 0x0f 0xee 0x40 0x48 0x00 0x10 0xba LDA r18, [p2], m0; MOVXM r2, #1069088768 + 11518 0x40 0xdb 0x50 0x00 0x00 0x3f 0xaf 0xc0 0x10 0xba LDA.u16 r22, [p2]; MOVXM r29, #65408 + 11528 0x60 0xc4 0x50 0x00 0x00 0x08 0x7e 0xd0 0x10 0xba LDA.s8 r17, [p3]; MOVXM ls, #11680 + 11538 0x00 0x00 0x26 0xfb 0xc0 0x44 MOVXM le, #11744 + 11544 0xf9 0x50 0xa9 0xea 0xe5 0xe4 MOVX r5, #-31; VBCST.16 x9, r29 + 11550 0xf9 0xde 0x20 0x00 0xa2 0xe4 MOVX r7, #-4; VINSERT.32 x0, x0, #0, r2 + 11556 0x07 0x4f 0xb0 0x01 0x25 0xe4 LSHL r29, r0, r7; VMOV bmll0, x0 + 11562 0x14 0x8a 0x5e 0x98 ASHL r5, r18, r5 + 11566 0x00 0x0f 0xd1 0x20 0x05 0x64 ASHL r0, r0, r7; MOV r2, #1 + 11572 0x01 0xe6 0x00 0x2d 0x61 0x6f 0xdc 0x50 0x78 0xba MOVA r6, #15; LSHL r22, r22, r2; MOV crRnd, r17 + 11582 0xfc 0x99 0x00 0x0c 0x69 0x24 0x05 0x90 0x78 0xba MOVA r25, #-28; AND r6, r6, r18; MOV m0, r22 + 11592 0x08 0x02 0xc0 0x17 0x20 0x0a 0x5c 0xee 0xaf 0xff 0x58 0x36 PADDB [p0], m0; VCONV.bf16.fp32 wl0, bmll0; LSHL r5, r5, r25; MOV r21, #-1 + 11604 0x70 0x11 0x60 0x2a 0x63 0x6c 0xa9 0x64 0xa9 0x3a MOVS p3, p0; LSHL r6, r21, r6; ADD.NC r5, r5, r18 + 11614 0x00 0x2c 0xf6 0x3d 0x48 0x2a 0x63 0x34 0x00 0x81 0xa8 0xb6 NOPA; VLDB wl10, [p3], #32; XOR r6, r21, r6; VEXTBCST.16 x0, x0, #0 + 11626 0x29 0x4f 0xd3 0xa0 0x01 0x64 ASHL r5, r5, r7; MOV r7, #0 + 11632 0x00 0x17 0x00 0x00 0x20 0x01 0x5b 0x0f 0xb2 0xd6 0xbf 0x7f 0xc8 0x00 0x00 0xe1 MOVA r23, #0; NOPB; NOPS; LT r27, r7, r5; ADD.NC lc, r29, #-1; NOPV + 11648 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xea 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r30, r23, r21, r27; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0f 0xb2 0xbe 0x19 0x49 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; EQ r27, r7, r5; VMOV x8, x9; NOPV +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11680 0x00 0x2c 0xf6 0x3d 0x48 0x01 0x5b 0x3d 0xf3 0x10 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB wl10, [p3], #32; NOPS; SEL.EQZ r31, r30, r6, r27; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x3f 0x35 0x80 0xe9 0xc0 0x48 0x00 0x00 0xe1 NOPA; NOPB; NOPS; EXTEND.u16 r19, r31; ADD.NC r7, r7, #1; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11712 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0f 0xb2 0xd5 0x66 0x8d 0x18 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LT r27, r7, r5; VSEL.16 x5, x9, x10, r19; NOPV + 11728 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xea 0x90 0xe5 0x91 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r30, r23, r21, r27; VMOV wl3, wl5; NOPV +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_256 +.end_of_loop + 11744 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0f 0xb2 0xbe 0x20 0xf6 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; EQ r27, r7, r5; VMAX_LT.bf16 x8, r16, x8, x3; NOPV +.loop_nesting 0 + 11760 0x01 0x04 0x00 0x00 0x24 0x84 0x8b 0x3d 0xf3 0x10 0x28 0x10 0x58 0x07 0x00 0xe9 MOVA r4, #8; NOPB; MOVS p4, p1; SEL.EQZ r31, r30, r6, r27; MOV r1, #16; VCLR dm0 + 11776 0x03 0xa1 0x30 0x3f 0x35 0x80 0x68 0x04 0x58 0xba VLDA.CONV.fp32.bf16 bmll2, [p0], #32; EXTEND.u16 r19, r31; MOV r3, #4 + 11786 0x07 0x94 0x00 0x01 0xa0 0x49 0x66 0x8d 0x18 0xba MOVA r20, #60; MOVX r26, #2; VSEL.16 x5, x9, x10, r19 + 11796 0x06 0x00 0x23 0x96 0x45 0xe4 MOVX r24, #0; VMOV wl3, wl5 + 11802 0x67 0x91 0x00 0x23 0xd4 0x02 0x20 0xf6 0x78 0xba MOVA r17, #828; MOVX crRnd, r17; VMAX_LT.bf16 x8, r16, x8, x3 + 11812 0x18 0xc0 0x06 0xd8 VSHIFT x1, x8, x0, r1 + 11816 0x18 0xc0 0xec 0xf8 VMAX_LT.bf16 x1, r16, x8, x1 + 11820 0x1c 0x08 0x12 0xd8 VSHIFT x8, x1, x0, r4 + 11824 0x18 0x8c 0x6c 0xf8 VMAX_LT.bf16 x1, r16, x1, x8 + 11828 0x1c 0x08 0x0e 0xd8 VSHIFT x8, x1, x0, r3 + 11832 0x18 0x8c 0x6c 0xf8 VMAX_LT.bf16 x1, r16, x1, x8 + 11836 0x06 0x80 0x28 0x10 0xd5 0xa4 MOVX r26, #0; VSHIFT x8, x1, x0, r26 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11842 0xd6 0xcb 0x51 0x18 0xd9 0xe4 LT r27, r26, r5; VMAX_LT.bf16 x1, r16, x1, x8 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11848 0x03 0xa1 0x35 0xf9 0x52 0x40 0x90 0x35 0xa2 0x40 0x3f 0x46 VLDA.CONV.fp32.bf16 bmll2, [p0], #32; SEL.EQZ r28, r23, r21, r27; VEXTBCST.16 x8, x1, #0; VSUB.f dm2, dm2, dm0, r20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11860 0x2e 0xf4 0xf0 0x22 0x65 0xe4 EQ r27, r5, r26; VCONV.fp32.bf16 bmll0, wl8 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11866 0xe6 0x4c 0x41 0x00 0x25 0xe4 SEL.EQZ r25, r28, r6, r27; VMOV bmhl0, bmll0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11872 0xd6 0x80 0xe1 0xc2 0xe5 0xe4 ADD r26, r26, #1; VBCST.16 x1, r24 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11878 0x03 0xa1 0x36 0x6c 0xb0 0x57 0xef 0xe9 0xa2 0x40 0x3f 0x46 VLDA.CONV.fp32.bf16 bmll2, [p0], #32; EXTEND.u16 r22, r25; ADD.NC lc, r29, #-3; VSUB.f dm2, dm2, dm0, r20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11890 0x16 0xb6 0x5a 0x98 LT r27, r26, r5 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11894 0x68 0x22 0xcb 0xf2 0xa4 0x5c VCONV.bf16.fp32 wl6, bmll2; SEL.EQZ r28, r23, r21, r27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11900 0x2e 0xf4 0xfd 0x3a 0x01 0x24 EQ r27, r5, r26; ADD.NC r26, r26, #1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11906 0x00 0x39 0x93 0x11 0x8b 0xec 0x01 0x62 SEL.EQZ r25, r28, r6, r27; VMUL.f dm3, x6, x0, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11914 0x00 0x35 0xb2 0xd5 0xa2 0x40 0x3f 0x62 LT r27, r26, r5; VSUB.f dm2, dm2, dm0, r20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11922 0x68 0x22 0xc0 0x00 0x00 0x08 0x7f 0x60 0x11 0x3a VCONV.bf16.fp32 wl6, bmll2; MOVXM ls, #11968 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 11932 0x00 0x00 0x26 0xfe 0x20 0x44 MOVXM le, #12048 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 11938 0x00 0x2f 0xca 0x91 0x8b 0xec 0x01 0x62 SEL.EQZ r28, r23, r21, r27; VMUL.f dm3, x6, x0, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 11946 0x11 0x77 0xa7 0x98 EQ r27, r5, r26 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11950 0x1a 0x58 0xf2 0xf8 VEXP2 wl4, bmll3 + 11954 0xcd 0x96 0x07 0x14 0x64 0x64 EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 11960 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_480 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.noswbrkpt + 11968 0x03 0xa1 0x30 0x00 0x24 0x1d 0xea 0xb9 0x93 0x12 0x07 0x99 0x7d 0x21 0x81 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], #32;NOPB; VST wl7, [p4], #32; SEL.EQZ r25, r28, r6, r27; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 11984 0xd6 0x80 0xe2 0x04 0x25 0xe4 ADD r26, r26, #1; VMOV bmll1, bmhl0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 11990 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x06 0xb6 0x5a 0xfa NOPA; NOPS; LT r27, r26, r5 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12000 0x00 0x2c 0xf0 0x00 0x23 0x41 0x16 0x2f 0xca 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 wl6, bmll2; SEL.EQZ r28, r23, r21, r27; NOPM; NOPV + 12016 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0b 0xbd 0x3d 0x2c 0x79 0x7d 0x12 0x01 0xfb NOPA; NOPB; NOPS; EQ r27, r5, r26; VEXP2 wl4, bmll3; VSUB.f dm2, dm2, dm0, r20 + 12032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x33 0x65 0x81 0xc5 0x19 0x1c 0x5f 0x60 0x0b NOPA; NOPB; NOPS; EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22; VMUL.f dm3, x6, x0, r17 +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_560 +.end_of_loop +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 12048 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x48 0x09 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmhl0, bmll4; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 12064 0x04 0x12 0x00 0x00 0x24 0x1d 0xea 0xb9 0x93 0x12 0x07 0x99 0x7d 0x21 0x81 0xeb MOVA r18, #32; NOPB; VST wl7, [p4], #32; SEL.EQZ r25, r28, r6, r27; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12080 0x5c 0x9d 0x50 0x02 0xd2 0x00 0x81 0x09 0x78 0xba LDA.u8 r7, [p2, #-2]; MOVX vaddSign0, #1; VMOV bmll1, bmhl0 + 12090 0x00 0x00 NOPX + 12092 0x0b 0x41 0x16 0x18 VCONV.bf16.fp32 wl6, bmll2 + 12096 0x1a 0x58 0xf2 0xf8 VEXP2 wl4, bmll3 + 12100 0x06 0x6c 0xb0 0x38 0xa3 0x23 0x8b 0xec 0x01 0x5a EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22; VMUL.f dm3, x6, x0, r17 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 12110 0x18 0x90 0x12 0xf8 VMOV bmhl0, bmll4 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 12114 0x83 0xbd 0x54 0x0f 0x32 0xe4 0xa4 0x30 0x3d 0x4a VST wl7, [p4], #32; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12124 0x39 0xc5 0x12 0x04 0x25 0xe4 NE r7, r7, r2; VMOV bmll1, bmhl0 + 12130 0x00 0x00 NOPX + 12132 0x00 0x00 NOPX + 12134 0x1a 0x58 0xf2 0xf8 VEXP2 wl4, bmll3 + 12138 0x1b 0x8a 0x32 0x38 VSEL.16 x7, x1, x4, r22 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 12142 0x18 0x90 0x12 0xf8 VMOV bmhl0, bmll4 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 12146 0x04 0x0f 0x32 0xe6 0xa4 0x30 0x3d 0x62 VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12154 0x19 0x02 0x12 0xf8 VMOV bmll1, bmhl0 + 12158 0x00 0x00 NOPX + 12160 0x00 0x00 NOPX + 12162 0x00 0x00 NOPX + 12164 0x00 0x00 NOPX + 12166 0x18 0x90 0x12 0xf8 VMOV bmhl0, bmll4 + 12170 0x00 0x00 NOPX + 12172 0x19 0x22 0x12 0xf8 VMOV x2, bmhl0 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 12176 0x19 0x10 0x4a 0xd8 VSHIFT x2, x2, x0, r18 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 12180 0x02 0x04 0x92 0xe6 0xa0 0x08 0x3d 0x62 VMOV bmll2, x2; VADD.f dm0, dm0, dm2, r20 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12188 0x18 0x02 0x12 0xf8 VMOV bmll0, bmhl0 + 12192 0x00 0x00 NOPX + 12194 0x00 0x00 NOPX + 12196 0x00 0x00 NOPX + 12198 0x00 0x00 NOPX +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 12200 0x19 0x20 0x12 0xf8 VMOV x2, bmll0 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 12204 0x01 0x10 0x06 0xc6 0xa0 0x08 0x3d 0x62 VSHIFT x2, x2, x0, r1; VADD.f dm0, dm0, dm2, r20 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12212 0x1a 0x04 0x92 0xf8 VMOV bmll2, x2 + 12216 0x00 0x00 NOPX + 12218 0x00 0x00 NOPX + 12220 0x00 0x00 NOPX + 12222 0x00 0x00 NOPX +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 12224 0x19 0x20 0x12 0xf8 VMOV x2, bmll0 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 12228 0x01 0x10 0x12 0xc6 0xa0 0x08 0x3d 0x62 VSHIFT x2, x2, x0, r4; VADD.f dm0, dm0, dm2, r20 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12236 0x1a 0x04 0x92 0xf8 VMOV bmll2, x2 + 12240 0x00 0x00 NOPX + 12242 0x00 0x00 NOPX + 12244 0x00 0x00 NOPX + 12246 0x00 0x00 NOPX +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 12248 0x19 0x20 0x12 0xf8 VMOV x2, bmll0 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 12252 0x01 0x10 0x0e 0xc6 0xa0 0x08 0x3d 0x62 VSHIFT x2, x2, x0, r3; VADD.f dm0, dm0, dm2, r20 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12260 0x1a 0x04 0x92 0xf8 VMOV bmll2, x2 + 12264 0x00 0x00 NOPX + 12266 0x38 0x18 0x10 0x40 0x01 0x84 JNZ r7, #12320 +.delay_slot + 12272 0x0c 0x1d 0xea 0x98 VST wl7, [p4], #32 +.delay_slot +.swstall delay_slot + 12276 0x00 0x00 NOPX +.delay_slot + 12278 0x19 0x20 0x12 0xf8 VMOV x2, bmll0 +.delay_slot + 12282 0x18 0x8a 0x01 0xb8 VEXTRACT.32 r2, x2, #0, vaddSign0 +.delay_slot +.swstall delay_slot + 12286 0x00 0x00 NOPX + 12288 0x00 0x18 0x18 0x00 0x00 0x84 J #12336 +.delay_slot + 12294 0x3f 0x80 0x00 0xa0 0x00 0x44 MOVXM r1, #1065353216 +.delay_slot +.swstall delay_slot + 12300 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12302 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12304 0x00 0x00 NOPX +.delay_slot + 12306 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x01 0x8b 0x00 0x00 0x1c 0x2e NOPA; NOPS; VINSERT.32 x0, x0, #0, r1; NOPV +.label TGT_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_832 + 12320 0x10 0x83 0x04 0x18 INV r1, r2 + 12324 0x00 0x00 NOPX + 12326 0x00 0x00 NOPX + 12328 0x00 0x00 NOPX + 12330 0x00 0x2c 0xf0 0x00 0x62 0xd4 NOPA; VINSERT.32 x0, x0, #0, r1 +.label TGT_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_848 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 12336 0x10 0x91 0x61 0x1c 0x64 0x00 0x00 0x49 0x76 0xba VLDB wl1, [p1], #32; MOVS p0, p1; VMOV bmll0, x0 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 12346 0x02 0x38 0xc8 0x00 0x00 0x0c 0x78 0x48 0x10 0x3a VLDB wl1, [p1], #32; MOVXM ls, #12432 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12356 0x08 0x02 0xc0 0x00 0x00 0x0d 0xb8 0x60 0x11 0x3a VCONV.bf16.fp32 wl0, bmll0; MOVXM le, #12480 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12366 0x1d 0x70 0x7e 0x98 ADD.NC lc, r0, #-3 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12370 0x18 0x01 0x03 0x58 VEXTBCST.16 x0, x0, #0 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12374 0x00 0x00 NOPX +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12376 0x00 0x00 NOPX +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12378 0x88 0xe2 0x01 0x48 VMUL.f dm0, x1, x0, r17 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 12382 0x39 0x1c 0x64 0x18 VLDB wl1, [p1], #32 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12386 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12400 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12416 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x47 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMUL.f dm0, x1, x0, r17 +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_944 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12432 0x00 0x2c 0xf2 0x38 0xc8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB wl1, [p1], #32; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12448 0x00 0x2c 0xf0 0x00 0x20 0x1c 0x12 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.CONV.bf16.fp32 bmll0, [p0], #32;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12464 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_992 +.end_of_loop +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x47 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMUL.f dm0, x1, x0, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12496 0x00 0x00 NOPX +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12498 0x08 0x1c 0x12 0x98 VST.CONV.bf16.fp32 bmll0, [p0], #32 +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 12502 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.noswbrkpt + 12504 0x88 0xe2 0x01 0x48 VMUL.f dm0, x1, x0, r17 +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 12508 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 12510 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12514 0x08 0x1c 0x12 0x98 VST.CONV.bf16.fp32 bmll0, [p0], #32 +.delay_slot +.swstall delay_slot + 12518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12520 0x00 0x00 NOPX +.delay_slot + 12522 0x08 0x1c 0x12 0x98 VST.CONV.bf16.fp32 bmll0, [p0], #32 +.delay_slot +.swstall delay_slot + 12526 0x00 0x00 NOPX +.label _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params__end +.label __Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params___func_end0 +.label __ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE___func_begin0 +.label _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE +.function_start + 12528 0xf0 0x91 0x60 0x01 0xb7 0x60 0x70 0x02 MOVS p7, p1; MOV p3, p7 + 12536 0x33 0x11 0x60 0x00 0x01 0xf3 0x31 0x5a 0x11 0x3a MOVS p1, p6; MOVXM p6, #508596 + 12546 0x06 0xde 0x16 0x98 LDA r16, [p6], #-12 + 12550 0x00 0x00 NOPX + 12552 0x00 0x00 NOPX + 12554 0x00 0x00 NOPX + 12556 0x00 0x00 NOPX + 12558 0x00 0x00 NOPX + 12560 0x00 0x00 NOPX + 12562 0x80 0x18 0xf8 0x40 0x01 0x84 JNZ r16, #12784 +.delay_slot + 12568 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 +.delay_slot + 12574 0x0f 0xfc 0x9d 0x98 ST p1, [sp, #-4] +.delay_slot + 12578 0x0f 0xf9 0x9d 0x98 ST p3, [sp, #-8] +.delay_slot + 12582 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 12586 0x19 0x64 0xc0 0xf8 MOV p1, p2 + 12590 0x00 0xa0 0x83 0x9c 0x8b 0x00 0x01 0xf1 0x31 0x5c 0x10 0x76 MOVA m0, #5; MOVS p3, p7; MOVXM p2, #508600 + 12602 0xfd 0x08 0x80 0x3f 0x16 0x0a 0x0d 0x70 0x78 0xba MOVA m2, #-24; MOVX r17, #-16; MOV r16, CORE_ID + 12612 0xff 0x40 0x82 0x0a 0x11 0xa1 0x08 0xee 0x4c 0x3f 0x88 0x76 MOVA m0, #-6; ST r16, [p2], m0; LSHL r16, r16, r17; ADD.NC r18, r16, #-2 + 12624 0x5f 0xc0 0xe0 0x01 0x10 0x28 0x80 0x0c 0x58 0xba ST.s8 r16, [p2], #-1; MOVX r17, #1; MOV m1, #12 + 12634 0x63 0xce 0xd0 0x00 0x01 0xf2 0x31 0x18 0x10 0xba LDA r19, [p3], #4; MOVXM p4, #508464 + 12644 0x00 0x19 0x00 0x00 0x01 0xf2 0xb1 0x16 0x10 0xba MOVA r25, #0; MOVXM p5, #508460 + 12654 0x10 0x30 0x01 0x18 MOVX r24, #0 + 12658 0x00 0x00 NOPX + 12660 0x00 0x00 NOPX + 12662 0x14 0xa0 0x90 0x18 EXTEND.u8 r16, r18 + 12666 0x02 0x4a 0x47 0x18 ST.s8 r18, [p2], m2 + 12670 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 12674 0x00 0x00 NOPX + 12676 0x00 0x00 NOPX + 12678 0x00 0x00 NOPX + 12680 0x00 0x00 NOPX + 12682 0x00 0x00 NOPX + 12684 0x0a 0x1e 0x71 0x98 ST r19, [p2], #4 + 12688 0x65 0x85 0xda 0x0c 0x63 0x0c LDA el0, [p3], #8; ST r17, [p5] + 12694 0x00 0x00 NOPX + 12696 0x00 0x00 NOPX + 12698 0x00 0x00 NOPX + 12700 0x00 0x00 NOPX + 12702 0x00 0x00 NOPX + 12704 0x00 0x00 NOPX + 12706 0x0a 0x08 0x29 0x98 ST el0, [p2], m0 + 12710 0x02 0xfe 0x17 0x18 ST.s16 r16, [p2], #-2 + 12714 0x03 0x04 0x2e 0x98 LDA el0, [p3] + 12718 0x00 0x00 NOPX + 12720 0x00 0x00 NOPX + 12722 0x00 0x00 NOPX + 12724 0x00 0x00 NOPX + 12726 0x00 0x00 NOPX + 12728 0x02 0x2b 0x07 0x18 ST.s8 r24, [p2], m1 + 12732 0x00 0x00 NOPX + 12734 0x00 0x00 NOPX + 12736 0x00 0x00 NOPX + 12738 0x00 0x00 NOPX + 12740 0x00 0x00 NOPX + 12742 0x00 0x00 NOPX + 12744 0x0a 0x04 0x29 0x98 ST el0, [p2] + 12748 0x04 0x07 0x27 0x18 ST.s8 r25, [p4] + 12752 0x03 0x14 0x2e 0x98 LDA el0, [p3, #4] + 12756 0x00 0x00 NOPX + 12758 0x00 0x00 NOPX + 12760 0x00 0x00 NOPX + 12762 0x00 0x00 NOPX + 12764 0x00 0x00 NOPX + 12766 0x00 0x00 NOPX + 12768 0x00 0x2c 0xf0 0x00 0x22 0x14 0x29 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [p2, #4]; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE_256 + 12784 0xc3 0x81 0xd0 0x1f 0x11 0x54 LDA eh0, [p6], #4; MOV m0, #-60 + 12790 0xc3 0x85 0xd4 0xcb 0xc1 0xd4 LDA el0, [p6], #4; MOV p2, sp + 12796 0xc3 0x9d 0xd5 0xdf 0x20 0x3c LDA el3, [p6], #4; PADDB [p2], #-128 + 12802 0x06 0x1c 0xae 0x98 LDA el2, [p6], #4 + 12806 0x06 0x1c 0x6e 0x98 LDA el1, [p6], #4 + 12810 0x06 0x1c 0x4e 0x98 LDA eh1, [p6], #4 + 12814 0x06 0x1c 0x8e 0x98 LDA eh2, [p6], #4 + 12818 0xc3 0x81 0xdf 0x10 0x1b 0x0c LDA eh0, [p6], #4; ST eh0, [sp, #-120] + 12824 0xc3 0x85 0xdf 0x18 0x5b 0x0c LDA el0, [p6], #4; ST el0, [sp, #-116] + 12830 0xc3 0x9d 0xdf 0x21 0xdb 0x0c LDA el3, [p6], #4; ST el3, [sp, #-112] + 12836 0xc3 0x95 0xdf 0x29 0x5b 0x0c LDA el2, [p6], #4; ST el2, [sp, #-108] + 12842 0xc3 0x8d 0xdf 0x30 0xdb 0x0c LDA el1, [p6], #4; ST el1, [sp, #-104] + 12848 0xc3 0x89 0xdf 0x38 0x9b 0x0c LDA eh1, [p6], #4; ST eh1, [sp, #-100] + 12854 0xc1 0x11 0xdf 0x41 0x1b 0x0c LDA eh2, [p6], m0; ST eh2, [sp, #-96] + 12860 0xc3 0x81 0xdf 0x48 0x1b 0x0c LDA eh0, [p6], #4; ST eh0, [sp, #-92] + 12866 0xc0 0x85 0xdf 0x50 0x5b 0x0c LDA el0, [p6]; ST el0, [sp, #-88] + 12872 0x00 0x83 0xdf 0x59 0xdb 0x0c LDA p0, [p0]; ST el3, [sp, #-84] + 12878 0x20 0x93 0xdf 0x61 0x5b 0x0c LDA p1, [p1]; ST el2, [sp, #-80] +.no_stack_arguments + 12884 0x00 0x16 0x70 0x00 0x01 0x04 JL #11488 +.delay_slot + 12890 0x0f 0xb4 0x6d 0x98 ST el1, [sp, #-76] +.delay_slot + 12894 0x0f 0xb8 0x4d 0x98 ST eh1, [sp, #-72] +.delay_slot + 12898 0x0f 0xbc 0x8d 0x98 ST eh2, [sp, #-68] +.delay_slot + 12902 0x0f 0x80 0x0d 0x98 ST eh0, [sp, #-128] +.delay_slot + 12906 0x00 0x2c 0xff 0x08 0x5b 0x0c NOPA; ST el0, [sp, #-124] +.return_address + 12912 0xc8 0xc6 0xd0 0x40 0x02 0x2c LDA r17, [p6, #16]; MOVX r16, #0 + 12918 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 12922 0x07 0xf9 0x19 0x18 LDA p2, [sp, #-8] + 12926 0x07 0xfc 0x19 0x18 LDA p0, [sp, #-4] + 12930 0x00 0x00 NOPX + 12932 0x00 0x00 NOPX + 12934 0x00 0x00 NOPX + 12936 0x14 0x62 0x07 0x18 ADD r17, r17, #1 + 12940 0x0e 0x46 0x31 0x98 ST r17, [p6, #16] + 12944 0xe4 0xca 0xdf 0x11 0x16 0x0c LDA r18, [p7, #8]; MOVS p7, p2 + 12950 0x00 0x00 NOPX + 12952 0x00 0x00 NOPX + 12954 0x00 0x00 NOPX + 12956 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12960 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 12966 0x00 0x00 NOPX +.delay_slot + 12968 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot + 12972 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 12976 0xc8 0xc2 0x30 0x03 0x30 0x60 0x70 0x02 ST r16, [p6, #16]; MOV p6, p0 +.label _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE__end +.label __ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE___func_end0 + +.text_segment PM 12992 +.label __Z14_b8134_wrapperPPv___func_begin0 +.label _Z14_b8134_wrapperPPv +.function_start + 12992 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 12996 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 13000 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 13004 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 13008 0x00 0x18 0x78 0x00 0x00 0x84 J #12528 +.delay_slot +.swstall delay_slot + 13014 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13016 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13018 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13022 0x00 0x00 NOPX +.label _Z14_b8134_wrapperPPv__end +.label __Z14_b8134_wrapperPPv___func_end0 +.label __ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE___func_begin0 +.label _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE +.function_start + 13024 0x00 0x07 0xc6 0xc4 0x40 0x44 MOVXM p3, #508448 + 13030 0x03 0x04 0x16 0x98 LDA r0, [p3] + 13034 0x00 0x00 NOPX + 13036 0x00 0x00 NOPX + 13038 0x00 0x00 NOPX + 13040 0x00 0x00 NOPX + 13042 0x00 0x00 NOPX + 13044 0x00 0x00 NOPX + 13046 0x00 0x19 0x90 0x40 0x01 0x84 JNZ r0, #13088 +.delay_slot +.swstall delay_slot + 13052 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13054 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13056 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13058 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13060 0x00 0x00 NOPX + 13062 0x18 0x5a 0xe0 0xf8 MOV r1, CORE_ID + 13066 0x10 0x42 0x90 0x18 EXTEND.u8 r1, r1 + 13070 0x10 0x43 0xfb 0x18 ADD r1, r1, #-2 + 13074 0x00 0x07 0xc8 0xc4 0x48 0x44 MOVXM p4, #508452 + 13080 0x80 0x86 0x30 0x00 0x01 0xa5 0x70 0x02 ST r1, [p4]; NOPM +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_64 + 13088 0x22 0x8a 0xd0 0x3f 0x07 0x6b 0x08 0x00 0x58 0xba LDA r2, [p1, #4]; MOVX r16, #-5; MOV r24, #0 + 13098 0x20 0x86 0xd0 0x00 0x01 0xf0 0xb1 0x18 0x10 0xba LDA r1, [p1]; MOVXM p1, #508464 + 13108 0x20 0x9c 0x50 0x00 0x01 0xf0 0xb1 0x12 0x10 0xba LDA.s8 r7, [p1]; MOVXM p1, #508452 + 13118 0x20 0x92 0xd0 0x00 0x30 0x28 0x01 0x88 0xb8 0xba LDA r4, [p1]; MOVX r3, #1; VINSERT.32 x0, x0, #0, r24 + 13128 0x00 0x9a 0xd0 0x02 0x50 0x08 0x00 0x49 0x78 0xba LDA r6, [p0]; MOVX r5, #64; VMOV bmll0, x0 + 13138 0x40 0xa3 0xd0 0x44 0xfa 0x2c LDA p2, [p2]; MOVX r17, #31 + 13144 0x00 0x00 NOPX + 13146 0x00 0x00 NOPX + 13148 0x8e 0xc2 0x90 0xa1 0x1f 0x24 AND r27, r17, r1; ADD.NC r1, r1, #31 + 13154 0x10 0x1a 0x30 0x00 0x01 0x84 JZ r2, #13408 +.delay_slot + 13160 0x11 0x09 0xff 0x67 0x41 0xe4 MUL r4, r2, r4; MOV crRnd, r7 +.delay_slot + 13166 0x08 0x02 0xcd 0xc4 0x7b 0x5c VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r27, r3 +.delay_slot + 13172 0x11 0x06 0x3d 0x98 LSHL r3, r4, r3 +.delay_slot + 13176 0x08 0x61 0xb0 0x02 0x06 0xa4 LSHL r1, r1, r16; VEXTBCST.16 x0, x0, #0 +.delay_slot + 13182 0x29 0x62 0x30 0xc3 0x32 0xa4 SUB r5, r5, r17; ADD.NC p0, r3, r6 + 13188 0xc1 0x0a 0x41 0xa1 0xff 0x24 SEL.EQZ r4, r24, r5, r27; ADD.NC r3, r1, #-1 + 13194 0x00 0x2c 0xf1 0x0b 0xfe 0x2c NOPA; ADD r2, r2, #-1 + 13200 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x0c 0xb1 0xd0 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p1, #13216; NOPV +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_192 +.loop_nesting 1 + 13216 0x00 0x1c 0xd2 0x98 LDA.s16 r6, [p0], #2 + 13220 0x00 0x00 NOPX + 13222 0x18 0x1a 0x20 0x00 0x01 0x84 JZ r3, #13376 +.delay_slot +.swstall delay_slot + 13228 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13232 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13234 0x00 0x00 NOPX +.delay_slot + 13236 0x18 0x99 0x72 0xf8 VBCST.16 x1, r6 + 13240 0x00 0x00 0x31 0xe8 0x60 0x44 MOVXM ls, #13360 + 13246 0x00 0x00 0x36 0xe8 0x60 0x44 MOVXM le, #13360 + 13252 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0xb8 0x7f 0xc0 0xf6 NOPA; NOPB; NOPS; ADD.NC lc, r1, #-1 + 13264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0x49 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmll0, x1; NOPV + 13280 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13296 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13312 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13328 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13344 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_336 +.loop_nesting 2 +.begin_of_loop +.end_of_loop + 13360 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_352 +.loop_nesting 1 + 13376 0x10 0x84 0x60 0x18 JNZD r2, r2, p1 +.delay_slot +.swstall delay_slot + 13380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13382 0x00 0x00 NOPX +.delay_slot + 13384 0x18 0x88 0x12 0xd8 VSHIFT x1, x1, x0, r4 +.delay_slot +.swstall delay_slot + 13388 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 13392 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x53 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x1, [p2], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_384 +.loop_nesting 0 + 13408 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 13412 0x10 0x00 0x07 0x18 ADD r0, r0, #1 +.delay_slot + 13416 0x0b 0x04 0x11 0x98 ST r0, [p3] +.delay_slot +.swstall delay_slot + 13420 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13422 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13424 0x00 0x00 NOPX +.label _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE__end +.label __ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE___func_end0 + +.text_segment PM 13440 +.label __Z14_b8096_wrapperPPv___func_begin0 +.label _Z14_b8096_wrapperPPv +.function_start + 13440 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 13444 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 13448 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 13452 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 13456 0x00 0x19 0x70 0x00 0x00 0x84 J #13024 +.delay_slot +.swstall delay_slot + 13462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13470 0x00 0x00 NOPX +.label _Z14_b8096_wrapperPPv__end +.label __Z14_b8096_wrapperPPv___func_end0 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 13472 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 13478 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13482 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13486 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13490 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13494 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13498 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13502 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13506 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13510 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13514 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13518 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13522 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13526 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13530 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13534 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13538 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13542 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13546 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13550 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13554 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13558 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13562 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13566 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13570 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13574 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13578 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13582 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13586 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 13590 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 13594 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 13598 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 13602 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 13606 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 13610 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.bss_segment DMb 508416 40 + +.data_segment DMb 508456 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 508460 4 + +.bss_segment DMb 508464 1 + +.data_segment DMb 508480 +.label reducesum_params + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x1 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.rodata_segment DMb 508544 +.label _ZL20g_uniformKernelFuncs + 0x10 + 0x2c + 0x0 + 0x0 + 0x40 + 0x2c + 0x0 + 0x0 + 0x60 + 0x2c + 0x0 + 0x0 + 0x90 + 0x2c + 0x0 + 0x0 + 0xc0 + 0x2c + 0x0 + 0x0 + 0xc0 + 0x32 + 0x0 + 0x0 + 0x80 + 0x34 + 0x0 + 0x0 + +.data_segment DMb 508576 +.label _ZZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EEE9sm_params + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x1 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 508672 256 + +.stack DM_stack 507264 508352 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.map b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.map new file mode 100644 index 0000000000000000000000000000000000000000..cda784158f38e483bb2ce7c79629139d2b3d175d --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.map @@ -0,0 +1,318 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:31:54 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable11 ../Release/0_0_reloadable11.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable11.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3586024 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1088 + + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1549 + + 0x00000000..0x0007bd7f ( 507264 items) : Reserved + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + 0x0007c1c0..0x0007c1ff ( 64 items) : Reserved + 0x0007c200..0x0007c203 ( 4 items) : ../Release/0_0_reloadable11.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c204..0x0007c207 ( 4 items) : ../Release/0_0_reloadable11.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c208..0x0007c20b ( 4 items) : ../Release/0_0_reloadable11.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c20c..0x0007c20f ( 4 items) : ../Release/0_0_reloadable11.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c210..0x0007c213 ( 4 items) : ../Release/0_0_reloadable11.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c214..0x0007c217 ( 4 items) : ../Release/0_0_reloadable11.o::_ZL11reduce_axis (Data, Local, .bss.DMb.4) + 0x0007c218..0x0007c21b ( 4 items) : ../Release/0_0_reloadable11.o::_ZL10width_iter (Data, Local, .bss.DMb.4) + 0x0007c21c..0x0007c21f ( 4 items) : ../Release/0_0_reloadable11.o::_ZL11height_iter (Data, Local, .bss.DMb.4) + 0x0007c220..0x0007c223 ( 4 items) : ../Release/0_0_reloadable11.o::_ZN12mllib_graphs9ns_expandL11kernel_iterE (Data, Local, .bss.DMb.4) + 0x0007c224..0x0007c227 ( 4 items) : ../Release/0_0_reloadable11.o::_ZN12mllib_graphs9ns_expandL8core_rowE (Data, Local, .bss.DMb.4) + 0x0007c228..0x0007c22b ( 4 items) : ../Release/0_0_reloadable11.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c22c..0x0007c22f ( 4 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c230..0x0007c230 ( 1 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c240..0x0007c27f ( 64 items) : ../Release/0_0_reloadable11.o::reducesum_params (Data, Global, .data.DMb.64) + 0x0007c280..0x0007c29b ( 28 items) : ../Release/0_0_reloadable11.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z15_b14285_wrapperPPv + _Z15_b14290_wrapperPPv + _Z15_b13811_wrapperPPv + _Z15_b13749_wrapperPPv + _Z15_b14811_wrapperPPv + _Z14_b8134_wrapperPPv + _Z14_b8096_wrapperPPv + + 0x0007c2a0..0x0007c2df ( 64 items) : ../Release/0_0_reloadable11.o::_ZZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EEE9sm_params (Data, Weak, .data.DMb.32) + 0x0007c300..0x0007c33f ( 64 items) : ../Release/0_0_reloadable11.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c340..0x0007c37f ( 64 items) : ../Release/0_0_reloadable11.o::add1d_params (Data, Global, .bss.DMb.64) + 0x0007c380..0x0007c3bf ( 64 items) : ../Release/0_0_reloadable11.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c3c0..0x0007c3ff ( 64 items) : ../Release/0_0_reloadable11.o::sub1d_params (Data, Global, .bss.DMb.64) + 0x0007ca00..0x000fffff ( 538112 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 10872 + + 0x00000000..0x000009df ( 2528 items) : Reserved + 0x000009e0..0x00000b77 ( 408 items) : ../Release/0_0_reloadable11.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000b80..0x00000bf9 ( 122 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_attribute_broadcasting_params + + 0x00000c00..0x00000c4b ( 76 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + + Referenced symbols: mul1d_attribute_broadcasting_params + + 0x00000c50..0x00000e33 ( 484 items) : ../Release/0_0_reloadable11.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 64) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00000e40..0x00000ed5 ( 150 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + Referenced symbols: mul1d_attribute_broadcasting_params + + 0x00000ee0..0x000010c7 ( 488 items) : ../Release/0_0_reloadable11.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x000010d0..0x00001149 ( 122 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_params + + 0x00001150..0x000011c1 ( 114 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + + Referenced symbols: add1d_params + + 0x000011d0..0x000011e3 ( 20 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + Referenced symbols: add1d_params + + 0x000011f0..0x00001449 ( 602 items) : ../Release/0_0_reloadable11.o::_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00001450..0x00001467 ( 24 items) : ../Release/0_0_reloadable11.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + + 0x00001470..0x00001509 ( 154 items) : ../Release/0_0_reloadable11.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + Referenced symbols: mul1d_params + + 0x00001510..0x00001643 ( 308 items) : ../Release/0_0_reloadable11.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + _ZN12me_primitive11control_rndE + + 0x00001650..0x000018a9 ( 602 items) : ../Release/0_0_reloadable11.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x000018b0..0x00001929 ( 122 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sub1d_params + + 0x00001930..0x0000197b ( 76 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + + Referenced symbols: sub1d_params + + 0x00001980..0x00001993 ( 20 items) : ../Release/0_0_reloadable11.o::_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + Referenced symbols: sub1d_params + + 0x000019a0..0x00001bf9 ( 602 items) : ../Release/0_0_reloadable11.o::_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + sub1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00001c00..0x00001f3f ( 832 items) : ../Release/0_0_reloadable11.o::_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: reducesum_params + + 0x00001f40..0x00002527 ( 1512 items) : ../Release/0_0_reloadable11.o::_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: reducesum_params + _ZN12me_primitive11control_rndE + + 0x00002530..0x00002c0d ( 1758 items) : ../Release/0_0_reloadable11.o::_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + reducesum_params + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11reduce_axis + _ZL11ifm1_offset + _ZL8num_iter + _ZL10depth_iter + _ZL10width_iter + _ZL11height_iter + + 0x00002c10..0x00002c33 ( 36 items) : ../Release/0_0_reloadable11.o::_Z15_b14285_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00002c40..0x00002c5f ( 32 items) : ../Release/0_0_reloadable11.o::_Z15_b14290_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002c60..0x00002c83 ( 36 items) : ../Release/0_0_reloadable11.o::_Z15_b13811_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00002c90..0x00002cb3 ( 36 items) : ../Release/0_0_reloadable11.o::_Z15_b13749_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00002cc0..0x00002cdf ( 32 items) : ../Release/0_0_reloadable11.o::_Z15_b14811_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002ce0..0x000030ef ( 1040 items) : ../Release/0_0_reloadable11.o::_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x000030f0..0x000032b7 ( 456 items) : ../Release/0_0_reloadable11.o::_ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE (Function, Weak, .text) (stack frame size = 128) + + Called functions : _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params + + Referenced symbols: _ZZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EEE9sm_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + + 0x000032c0..0x000032df ( 32 items) : ../Release/0_0_reloadable11.o::_Z14_b8134_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE + + 0x000032e0..0x00003471 ( 402 items) : ../Release/0_0_reloadable11.o::_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12mllib_graphs9ns_expandL11kernel_iterE + _ZN12mllib_graphs9ns_expandL8core_rowE + _ZN12me_primitive11control_rndE + + 0x00003480..0x0000349f ( 32 items) : ../Release/0_0_reloadable11.o::_Z14_b8096_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE + + 0x000034a0..0x0000352d ( 142 items) : me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x352e + _pc_start = 0x9e0 + _sp_end_DM_stack = 0x7c1c0 + _sp_start_DM_stack = 0x7bd80 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 1088 + ---------- ---------- + 1088 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 296 132 28 ../Release/0_0_reloadable11.o + 5 0 0 me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 301 132 28 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 10730 ../Release/0_0_reloadable11.o + 142 me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- + 10872 Total + +File summary: + +../Release/0_0_reloadable11.o + DMb 456 + PM 10730 + +me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + PM 142 + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.sdr b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.sdr new file mode 100644 index 0000000000000000000000000000000000000000..4452940207be37acab8ac1faa6114e1a9453be10 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.sdr @@ -0,0 +1,122 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:31:54 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable11 ../Release/0_0_reloadable11.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable11.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3586024 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol _ZN12me_primitive11control_satE 0x0007c22c +_symbol _ZN12me_primitive11control_rndE 0x0007c230 +_symbol reducesum_params 0x0007c240 +_symbol _ZZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EEE9sm_params 0x0007c2a0 +_symbol mul1d_attribute_broadcasting_params 0x0007c300 +_symbol add1d_params 0x0007c340 +_symbol mul1d_params 0x0007c380 +_symbol sub1d_params 0x0007c3c0 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x000009e0 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00000b80 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00000c00 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00000e40 +_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00000ee0 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv 0x000010d0 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001150 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x000011d0 +_symbol _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x000011f0 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00001450 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00001470 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00001510 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00001650 +_symbol _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x000018b0 +_symbol _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00001930 +_symbol _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E 0x00001980 +_symbol _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x000019a0 +_symbol _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv 0x00001c00 +_symbol _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E 0x00001f40 +_symbol _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002530 +_symbol _Z15_b14285_wrapperPPv 0x00002c10 +_symbol _Z15_b14290_wrapperPPv 0x00002c40 +_symbol _Z15_b13811_wrapperPPv 0x00002c60 +_symbol _Z15_b13749_wrapperPPv 0x00002c90 +_symbol _Z15_b14811_wrapperPPv 0x00002cc0 +_symbol _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params 0x00002ce0 +_symbol _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE 0x000030f0 +_symbol _Z14_b8134_wrapperPPv 0x000032c0 +_symbol _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE 0x000032e0 +_symbol _Z14_b8096_wrapperPPv 0x00003480 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x000034a0 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.srv b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.srv new file mode 100644 index 0000000000000000000000000000000000000000..e82e9f9b441b7ccfddefec5dd999d4904993fe42 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.srv @@ -0,0 +1,15455 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:31:55 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable11 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable11.cc" 92 first +.src_ref 0 "0_0_reloadable11.cc" 94 60 +.src_ref 0 "0_0_reloadable11.cc" 94 110 first +.function_start + 2528 "10111010" // LDA r16, [p0]; NEZ r15, r1; MOV r4, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2529 "01111000" // /* MW 9 */ + 2530 "11010000" // /* MW 8 */ + 2531 "10001011" // /* MW 7 */ + 2532 "10000000" // /* MW 6 */ + 2533 "11110111" // /* MW 5 */ + 2534 "00000010" // /* MW 4 */ + 2535 "11010000" // /* MW 3 */ + 2536 "11000010" // /* MW 2 */ + 2537 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 92 + 2538 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2539 "00000001" // /* MW 5 */ + 2540 "00000000" // /* MW 4 */ + 2541 "00000000" // /* MW 3 */ + 2542 "00001000" // /* MW 2 */ + 2543 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2544 "00000010" // ST p6, [sp, #-20]; MOV r26, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "01001011" // /* MW 5 */ + 2548 "00000011" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "11100011" // /* MW 2 */ + 2551 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable11.cc" 97 112 + 2552 "00000010" // ST r13, [sp, #-12]; MOV r13, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "11010000" // /* MW 6 */ + 2555 "10101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "10110110" // /* MW 2 */ + 2559 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2560 "00000010" // ST r14, [sp, #-8]; MOV r14, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2561 "01110000" // /* MW 7 */ + 2562 "01010000" // /* MW 6 */ + 2563 "11001000" // /* MW 5 */ + 2564 "00000001" // /* MW 4 */ + 2565 "10110000" // /* MW 3 */ + 2566 "00111010" // /* MW 2 */ + 2567 "11111111" // /* MW 1 */ + 2568 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "10011101" // /* MW 3 */ + 2570 "11111111" // /* MW 2 */ + 2571 "00001111" // /* MW 1 */ + 2572 "10011000" // ST r4, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "10010101" // /* MW 3 */ + 2574 "11110000" // /* MW 2 */ + 2575 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2576 "00000010" // ST lr, [sp, #-24]; ADD.NC p6, r16, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2577 "00000000" // /* MW 7 */ + 2578 "00000001" // /* MW 6 */ + 2579 "00110100" // /* MW 5 */ + 2580 "00000011" // /* MW 4 */ + 2581 "10110000" // /* MW 3 */ + 2582 "00000111" // /* MW 2 */ + 2583 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2584 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "00010110" // /* MW 3 */ + 2586 "00011110" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2588 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2589 "01010110" // /* MW 3 */ + 2590 "00111110" // /* MW 2 */ + 2591 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2592 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2593 "00110110" // /* MW 3 */ + 2594 "11101110" // /* MW 2 */ + 2595 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2596 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2597 "01110110" // /* MW 3 */ + 2598 "00000111" // /* MW 2 */ + 2599 "00000110" // /* MW 1 */ + 2600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2601 "00000000" // /* MW 1 */ + 2602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2603 "00000000" // /* MW 1 */ + 2604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2605 "00000000" // /* MW 1 */ + 2606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2607 "00000000" // /* MW 1 */ + 2608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2609 "00000000" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2612 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2613 "00100010" // /* MW 3 */ + 2614 "00100001" // /* MW 2 */ + 2615 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2616 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "00010001" // /* MW 3 */ + 2618 "11010110" // /* MW 2 */ + 2619 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2620 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "11111101" // /* MW 3 */ + 2622 "11100000" // /* MW 2 */ + 2623 "00010111" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2630 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2631 "00001000" // /* MW 3 */ + 2632 "01010111" // /* MW 2 */ + 2633 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 +.src_ref 0 "0_0_reloadable11.cc" 97 112 first +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2634 "01100100" // NEZ r26, r13; MOV r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2635 "00001001" // /* MW 5 */ + 2636 "10100000" // /* MW 4 */ + 2637 "00001000" // /* MW 3 */ + 2638 "10011110" // /* MW 2 */ + 2639 "01101110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2640 "11100100" // LSHL r19, r15, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2641 "10000001" // /* MW 5 */ + 2642 "00100001" // /* MW 4 */ + 2643 "10111001" // /* MW 3 */ + 2644 "11100011" // /* MW 2 */ + 2645 "01111100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2646 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2647 "10010010" // /* MW 5 */ + 2648 "10110011" // /* MW 4 */ + 2649 "10111001" // /* MW 3 */ + 2650 "10100011" // /* MW 2 */ + 2651 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2652 "01011000" // ADD.NC p6, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2653 "11001001" // /* MW 3 */ + 2654 "01101001" // /* MW 2 */ + 2655 "00011110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 97 60 + 2656 "00001100" // LDA r18, [p6]; ST r26, [sp, #-28] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2657 "10101011" // /* MW 5 */ + 2658 "11001110" // /* MW 4 */ + 2659 "11011111" // /* MW 3 */ + 2660 "11001010" // /* MW 2 */ + 2661 "11000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ + 2670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2671 "00000000" // /* MW 1 */ + 2672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2673 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2674 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "00000010" // /* MW 3 */ + 2676 "01101001" // /* MW 2 */ + 2677 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2678 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "01110110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2682 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "01010110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2686 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "10010110" // /* MW 3 */ + 2688 "00011110" // /* MW 2 */ + 2689 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2690 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "01110110" // /* MW 3 */ + 2692 "00000111" // /* MW 2 */ + 2693 "00000111" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2703 "00000000" // /* MW 1 */ + 2704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2705 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2706 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "01000010" // /* MW 3 */ + 2708 "11100111" // /* MW 2 */ + 2709 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2710 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2711 "01110001" // /* MW 3 */ + 2712 "11010110" // /* MW 2 */ + 2713 "00001111" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ + 2720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2721 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2722 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00001000" // /* MW 3 */ + 2724 "10010111" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 first + 2726 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2727 "00011101" // /* MW 3 */ + 2728 "00100001" // /* MW 2 */ + 2729 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2730 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2731 "00100000" // /* MW 3 */ + 2732 "10001000" // /* MW 2 */ + 2733 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2734 "01000100" // MOVXM p7, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2735 "00000000" // /* MW 5 */ + 2736 "11000101" // /* MW 4 */ + 2737 "11001110" // /* MW 3 */ + 2738 "00000111" // /* MW 2 */ + 2739 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 7 + 2740 "10011000" // LDA p1, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2741 "10011110" // /* MW 3 */ + 2742 "00000000" // /* MW 2 */ + 2743 "00000111" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 100 4 +.no_stack_arguments + 2756 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2757 "01000000" // /* MW 3 */ + 2758 "00110000" // /* MW 2 */ + 2759 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 103 60 +.delay_slot + 2760 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "11000000" // /* MW 3 */ + 2762 "01100000" // /* MW 2 */ + 2763 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2769 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2770 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 2771 "00011100" // /* MW 13 */ + 2772 "00000000" // /* MW 12 */ + 2773 "00000000" // /* MW 11 */ + 2774 "01010111" // /* MW 10 */ + 2775 "00011010" // /* MW 9 */ + 2776 "01000000" // /* MW 8 */ + 2777 "00000000" // /* MW 7 */ + 2778 "00000000" // /* MW 6 */ + 2779 "10110110" // /* MW 5 */ + 2780 "00000010" // /* MW 4 */ + 2781 "11110000" // /* MW 3 */ + 2782 "00101100" // /* MW 2 */ + 2783 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 103 60 first +.return_address + 2784 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2785 "00010110" // /* MW 3 */ + 2786 "00000110" // /* MW 2 */ + 2787 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2788 "00011000" // LDA el0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "00101001" // /* MW 3 */ + 2790 "11100100" // /* MW 2 */ + 2791 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 108 + 2792 "00011000" // LDA lr, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "00111001" // /* MW 3 */ + 2794 "11101000" // /* MW 2 */ + 2795 "00000111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2804 "00011000" // ADD.NC p7, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "01101000" // /* MW 2 */ + 2807 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2808 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00110110" // /* MW 3 */ + 2810 "00000110" // /* MW 2 */ + 2811 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 + 2812 "11100100" // MOVX r16, #1; MOV r26, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2813 "01000001" // /* MW 5 */ + 2814 "00101111" // /* MW 4 */ + 2815 "10101101" // /* MW 3 */ + 2816 "00000000" // /* MW 2 */ + 2817 "00000100" // /* MW 1 */ + 2818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2819 "00000000" // /* MW 1 */ + 2820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2821 "00000000" // /* MW 1 */ + 2822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2823 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2826 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00011100" // /* MW 3 */ + 2828 "10100000" // /* MW 2 */ + 2829 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2830 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2831 "00001000" // /* MW 3 */ + 2832 "01010101" // /* MW 2 */ + 2833 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2834 "11010100" // LDA r17, [p7, #-4]; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2835 "01000001" // /* MW 5 */ + 2836 "10101110" // /* MW 4 */ + 2837 "11011101" // /* MW 3 */ + 2838 "11000110" // /* MW 2 */ + 2839 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 106 60 first + 2840 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "01010110" // /* MW 3 */ + 2842 "00000110" // /* MW 2 */ + 2843 "00000110" // /* MW 1 */ + 2844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2845 "00000000" // /* MW 1 */ + 2846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2847 "00000000" // /* MW 1 */ + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2854 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2855 "00010001" // /* MW 3 */ + 2856 "00100111" // /* MW 2 */ + 2857 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2858 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2859 "00010100" // /* MW 5 */ + 2860 "11010010" // /* MW 4 */ + 2861 "01000000" // /* MW 3 */ + 2862 "01100110" // /* MW 2 */ + 2863 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2864 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "01100011" // /* MW 5 */ + 2866 "11101100" // /* MW 4 */ + 2867 "11011111" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "00000000" // /* MW 1 */ + 2870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2871 "00000000" // /* MW 1 */ + 2872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2873 "00000000" // /* MW 1 */ + 2874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2875 "00000000" // /* MW 1 */ + 2876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2877 "00000000" // /* MW 1 */ + 2878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2879 "00000000" // /* MW 1 */ + 2880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2881 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2882 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001000" // /* MW 3 */ + 2884 "01010101" // /* MW 2 */ + 2885 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2886 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2887 "00110110" // /* MW 3 */ + 2888 "11100110" // /* MW 2 */ + 2889 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2890 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "00011001" // /* MW 3 */ + 2892 "11101111" // /* MW 2 */ + 2893 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2894 "00011000" // LDA r13, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "10110001" // /* MW 3 */ + 2896 "11110101" // /* MW 2 */ + 2897 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2898 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2899 "11010001" // /* MW 3 */ + 2900 "11111001" // /* MW 2 */ + 2901 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2902 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2903 "10011001" // /* MW 3 */ + 2904 "11111111" // /* MW 2 */ + 2905 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2906 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2907 "11110001" // /* MW 3 */ + 2908 "11110001" // /* MW 2 */ + 2909 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 108 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2910 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2911 "00000000" // /* MW 3 */ + 2912 "00101000" // /* MW 2 */ + 2913 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2914 "11111000" // MOV r27, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2915 "10100000" // /* MW 3 */ + 2916 "11010110" // /* MW 2 */ + 2917 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot + 2918 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2919 "00010001" // /* MW 3 */ + 2920 "00100001" // /* MW 2 */ + 2921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 2922 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2923 "00000010" // /* MW 3 */ + 2924 "01100001" // /* MW 2 */ + 2925 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 108 first +.delay_slot + 2926 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2927 "00000001" // /* MW 5 */ + 2928 "00000000" // /* MW 4 */ + 2929 "00000000" // /* MW 3 */ + 2930 "11111000" // /* MW 2 */ + 2931 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 first +.delay_slot + 2932 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2933 "00010001" // /* MW 3 */ + 2934 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2935 "00001000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 2944 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2945 "00010000" // /* MW 9 */ + 2946 "10000000" // /* MW 8 */ + 2947 "00110001" // /* MW 7 */ + 2948 "11110000" // /* MW 6 */ + 2949 "00000001" // /* MW 5 */ + 2950 "00000000" // /* MW 4 */ + 2951 "11010000" // /* MW 3 */ + 2952 "10000101" // /* MW 2 */ + 2953 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 2954 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2955 "01011000" // /* MW 9 */ + 2956 "00000000" // /* MW 8 */ + 2957 "00001000" // /* MW 7 */ + 2958 "00001011" // /* MW 6 */ + 2959 "00010000" // /* MW 5 */ + 2960 "00001000" // /* MW 4 */ + 2961 "00000000" // /* MW 3 */ + 2962 "00000000" // /* MW 2 */ + 2963 "11110000" // /* MW 1 */ + 2964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2965 "00000000" // /* MW 1 */ + 2966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2967 "00000000" // /* MW 1 */ + 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2969 "00000000" // /* MW 1 */ + 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2971 "00000000" // /* MW 1 */ + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 2974 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2975 "00101001" // /* MW 3 */ + 2976 "00011100" // /* MW 2 */ + 2977 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 2978 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2979 "00101110" // /* MW 3 */ + 2980 "00011100" // /* MW 2 */ + 2981 "00000001" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ + 2990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2991 "00000000" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 2994 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2995 "00101001" // /* MW 3 */ + 2996 "00011100" // /* MW 2 */ + 2997 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 2998 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2999 "00101110" // /* MW 3 */ + 3000 "00000100" // /* MW 2 */ + 3001 "00000001" // /* MW 1 */ + 3002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3003 "00000000" // /* MW 1 */ + 3004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 3014 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3015 "00101001" // /* MW 3 */ + 3016 "00011100" // /* MW 2 */ + 3017 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 3018 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "01110110" // /* MW 3 */ + 3020 "00010100" // /* MW 2 */ + 3021 "00000001" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ + 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3027 "00000000" // /* MW 1 */ + 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3031 "00000000" // /* MW 1 */ + 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3033 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3034 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "01110001" // /* MW 3 */ + 3036 "01001100" // /* MW 2 */ + 3037 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3038 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "00010111" // /* MW 3 */ + 3040 "00000100" // /* MW 2 */ + 3041 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3043 "00000000" // /* MW 3 */ + 3044 "00101000" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3046 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000000" // /* MW 5 */ + 3048 "00111110" // /* MW 4 */ + 3049 "11110001" // /* MW 3 */ + 3050 "00000000" // /* MW 2 */ + 3051 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3052 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3053 "00100100" // /* MW 3 */ + 3054 "11000100" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00100111" // /* MW 3 */ + 3058 "01110110" // /* MW 2 */ + 3059 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3060 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3061 "10000010" // /* MW 3 */ + 3062 "00000001" // /* MW 2 */ + 3063 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 3065 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 3072 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3073 "00000001" // /* MW 5 */ + 3074 "00000000" // /* MW 4 */ + 3075 "00000000" // /* MW 3 */ + 3076 "00001000" // /* MW 2 */ + 3077 "00000000" // /* MW 1 */ + 3078 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3079 "00111101" // /* MW 3 */ + 3080 "11111100" // /* MW 2 */ + 3081 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 3082 "00000100" // JL #2944 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2944 delay_slots=5 */ + 3083 "00000001" // /* MW 5 */ + 3084 "00000000" // /* MW 4 */ + 3085 "11000000" // /* MW 3 */ + 3086 "00000101" // /* MW 2 */ + 3087 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 3088 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3089 "00000000" // /* MW 5 */ + 3090 "11000110" // /* MW 4 */ + 3091 "11000000" // /* MW 3 */ + 3092 "00000111" // /* MW 2 */ + 3093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3100 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3101 "01100111" // /* MW 3 */ + 3102 "00000001" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "mul_impl.h" 152 25 +.return_address + 3104 "10111010" // LDA lr, [sp, #-4]; MOVXM p1, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "00010000" // /* MW 9 */ + 3106 "10000000" // /* MW 8 */ + 3107 "10110001" // /* MW 7 */ + 3108 "11110000" // /* MW 6 */ + 3109 "00000001" // /* MW 5 */ + 3110 "00000000" // /* MW 4 */ + 3111 "00100000" // /* MW 3 */ + 3112 "10000111" // /* MW 2 */ + 3113 "11111111" // /* MW 1 */ + 3114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3115 "00000000" // /* MW 1 */ + 3116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3117 "00000000" // /* MW 1 */ + 3118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3119 "00000000" // /* MW 1 */ + 3120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3121 "00000000" // /* MW 1 */ + 3122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3123 "00000000" // /* MW 1 */ + 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3125 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first + 3126 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3127 "00000000" // /* MW 3 */ + 3128 "00101000" // /* MW 2 */ + 3129 "00010000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 152 25 +.delay_slot + 3130 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3131 "00001001" // /* MW 3 */ + 3132 "00100000" // /* MW 2 */ + 3133 "00010000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 152 25 first +.delay_slot + 3134 "10011000" // ST r16, [p1, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3135 "00010001" // /* MW 3 */ + 3136 "01000110" // /* MW 2 */ + 3137 "00001001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.delay_slot + 3138 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3139 "00000001" // /* MW 5 */ + 3140 "00000000" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "11111000" // /* MW 2 */ + 3143 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 3147 "00000000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 2 "elementwise_binary_shared.h" 66 first +.src_ref 2 "elementwise_binary_shared.h" 78 37 +.function_start + 3152 "11111000" // MOV r2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3153 "11000000" // /* MW 3 */ + 3154 "10010110" // /* MW 2 */ + 3155 "00011000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 +.src_ref 2 "elementwise_binary_shared.h" 78 37 first + 3156 "00100100" // MOVX r0, #0; ADD.NC p5, r2, #14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3157 "00001110" // /* MW 5 */ + 3158 "11000010" // /* MW 4 */ + 3159 "00101010" // /* MW 3 */ + 3160 "00000000" // /* MW 2 */ + 3161 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 134 58 first +.src_ref 2 "elementwise_binary_shared.h" 81 22 first + 3162 "11010100" // LDA.s16 r0, [p5], #2; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3163 "11100101" // /* MW 5 */ + 3164 "00000010" // /* MW 4 */ + 3165 "01010000" // /* MW 3 */ + 3166 "10000010" // /* MW 2 */ + 3167 "10100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 15 first + 3168 "10011000" // LDA r2, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3169 "01010110" // /* MW 3 */ + 3170 "00000100" // /* MW 2 */ + 3171 "00000101" // /* MW 1 */ + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 3182 "00011000" // MOVX r1, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3183 "00001001" // /* MW 3 */ + 3184 "00000010" // /* MW 2 */ + 3185 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 26 + 3186 "10011000" // LTU r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00101100" // /* MW 3 */ + 3188 "01000010" // /* MW 2 */ + 3189 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 83 8 + 3190 "10000100" // JNZ r1, #3328 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3328 delay_slots=5 */ + 3191 "00000001" // /* MW 5 */ + 3192 "01000000" // /* MW 4 */ + 3193 "10000000" // /* MW 3 */ + 3194 "00000110" // /* MW 2 */ + 3195 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 66 +.delay_slot + 3196 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3197 "00000001" // /* MW 5 */ + 3198 "00000000" // /* MW 4 */ + 3199 "00000000" // /* MW 3 */ + 3200 "00001000" // /* MW 2 */ + 3201 "00000000" // /* MW 1 */ +.delay_slot + 3202 "11111000" // MOV p4, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3203 "11100000" // /* MW 3 */ + 3204 "01100101" // /* MW 2 */ + 3205 "00011100" // /* MW 1 */ +.delay_slot + 3206 "00011000" // PADDB [p4], #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3207 "10010000" // /* MW 3 */ + 3208 "11111111" // /* MW 2 */ + 3209 "00111100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 78 37 first +.delay_slot + 3210 "00011000" // VST x0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3211 "00010011" // /* MW 3 */ + 3212 "00000100" // /* MW 2 */ + 3213 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3215 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 85 34 +.src_ref 2 "elementwise_binary_shared.h" 90 19 + 3216 "11010100" // MOVA dj0, #12; VBCST.16 x0, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3217 "11100101" // /* MW 5 */ + 3218 "00000010" // /* MW 4 */ + 3219 "10000000" // /* MW 3 */ + 3220 "10000010" // /* MW 2 */ + 3221 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 85 34 first +.src_ref 2 "elementwise_binary_shared.h" 90 19 first + 3222 "10011000" // LDA.u8 r0, [p3, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3223 "00001010" // /* MW 3 */ + 3224 "00000000" // /* MW 2 */ + 3225 "00000011" // /* MW 1 */ + 3226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3227 "00000000" // /* MW 1 */ + 3228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3229 "00000000" // /* MW 1 */ + 3230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3231 "00000000" // /* MW 1 */ + 3232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3233 "00000000" // /* MW 1 */ + 3234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3235 "00000000" // /* MW 1 */ + 3236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3237 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 90 12 +.src_ref 2 "elementwise_binary_shared.h" 90 35 + 3238 "10000100" // JNZ r0, #3280 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3280 delay_slots=5 */ + 3239 "00000001" // /* MW 5 */ + 3240 "01000000" // /* MW 4 */ + 3241 "01101000" // /* MW 3 */ + 3242 "00000110" // /* MW 2 */ + 3243 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 3244 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "00000000" // /* MW 3 */ + 3246 "00000000" // /* MW 2 */ + 3247 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.delay_slot + 3248 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "10000000" // /* MW 3 */ + 3250 "00000000" // /* MW 2 */ + 3251 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3257 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 3258 "10111010" // MOVA m1, #0; J #3296 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3296 delay_slots=5 */ + 3259 "00100000" // /* MW 9 */ + 3260 "00000000" // /* MW 8 */ + 3261 "00000000" // /* MW 7 */ + 3262 "10011100" // /* MW 6 */ + 3263 "00000001" // /* MW 5 */ + 3264 "00000000" // /* MW 4 */ + 3265 "10000000" // /* MW 3 */ + 3266 "00000100" // /* MW 2 */ + 3267 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3275 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3276 "00011000" // VST x0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3277 "00010011" // /* MW 3 */ + 3278 "00000100" // /* MW 2 */ + 3279 "00001000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 + 3280 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3281 "10000000" // /* MW 3 */ + 3282 "00000000" // /* MW 2 */ + 3283 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "elementwise_binary_shared.h" 130 16 + 3284 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3285 "01010000" // /* MW 11 */ + 3286 "00000000" // /* MW 10 */ + 3287 "00000000" // /* MW 9 */ + 3288 "00000001" // /* MW 8 */ + 3289 "00010011" // /* MW 7 */ + 3290 "00000100" // /* MW 6 */ + 3291 "00100001" // /* MW 5 */ + 3292 "00000000" // /* MW 4 */ + 3293 "11110000" // /* MW 3 */ + 3294 "00101100" // /* MW 2 */ + 3295 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 3296 "10000100" // J #3424 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3424 delay_slots=5 */ + 3297 "00000000" // /* MW 5 */ + 3298 "00000000" // /* MW 4 */ + 3299 "10110000" // /* MW 3 */ + 3300 "00000110" // /* MW 2 */ + 3301 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.delay_slot + 3302 "00000010" // MOVS p0, p4; MOV p4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3303 "01110000" // /* MW 7 */ + 3304 "01100000" // /* MW 6 */ + 3305 "00110000" // /* MW 5 */ + 3306 "00000010" // /* MW 4 */ + 3307 "01100000" // /* MW 3 */ + 3308 "00010001" // /* MW 2 */ + 3309 "00010010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3316 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3317 "10000001" // /* MW 11 */ + 3318 "10101101" // /* MW 10 */ + 3319 "00000000" // /* MW 9 */ + 3320 "00000000" // /* MW 8 */ + 3321 "00000000" // /* MW 7 */ + 3322 "00000000" // /* MW 6 */ + 3323 "00100000" // /* MW 5 */ + 3324 "00000000" // /* MW 4 */ + 3325 "11110000" // /* MW 3 */ + 3326 "00101100" // /* MW 2 */ + 3327 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 +.src_ref 2 "elementwise_binary_shared.h" 109 97 + 3328 "00011000" // MOVX r1, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3329 "00001101" // /* MW 3 */ + 3330 "00000010" // /* MW 2 */ + 3331 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 97 first + 3332 "10011000" // EQ r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3333 "00100111" // /* MW 3 */ + 3334 "01000010" // /* MW 2 */ + 3335 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3336 "10000100" // JNZ r1, #3376 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3376 delay_slots=5 */ + 3337 "00000001" // /* MW 5 */ + 3338 "01000000" // /* MW 4 */ + 3339 "10011000" // /* MW 3 */ + 3340 "00000110" // /* MW 2 */ + 3341 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 3342 "01000100" // MOVXM p3, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3343 "01100000" // /* MW 5 */ + 3344 "11000100" // /* MW 4 */ + 3345 "11000110" // /* MW 3 */ + 3346 "00000111" // /* MW 2 */ + 3347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3353 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.delay_slot + 3354 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3355 "00000000" // /* MW 5 */ + 3356 "00100000" // /* MW 4 */ + 3357 "00000000" // /* MW 3 */ + 3358 "10000000" // /* MW 2 */ + 3359 "00111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3360 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3361 "00000000" // /* MW 15 */ + 3362 "00000000" // /* MW 14 */ + 3363 "00010000" // /* MW 13 */ + 3364 "00000000" // /* MW 12 */ + 3365 "00001000" // /* MW 11 */ + 3366 "00000000" // /* MW 10 */ + 3367 "11100000" // /* MW 9 */ + 3368 "00101111" // /* MW 8 */ + 3369 "01011011" // /* MW 7 */ + 3370 "00000001" // /* MW 6 */ + 3371 "00100000" // /* MW 5 */ + 3372 "00000000" // /* MW 4 */ + 3373 "11110000" // /* MW 3 */ + 3374 "00101100" // /* MW 2 */ + 3375 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.src_ref 2 "elementwise_binary_shared.h" 132 18 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3376 "01010100" // LDA.s8 r0, [p3]; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000001" // /* MW 4 */ + 3379 "01010000" // /* MW 3 */ + 3380 "10000000" // /* MW 2 */ + 3381 "01100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 128 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3382 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00000000" // /* MW 3 */ + 3384 "00000000" // /* MW 2 */ + 3385 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 130 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3386 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "10000000" // /* MW 3 */ + 3388 "00000000" // /* MW 2 */ + 3389 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3391 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3393 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3394 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "00010001" // /* MW 3 */ + 3396 "00000000" // /* MW 2 */ + 3397 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3398 "11111000" // VMOV bmll1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3399 "10010010" // /* MW 3 */ + 3400 "00000000" // /* MW 2 */ + 3401 "00011001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3402 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3403 "10000000" // /* MW 3 */ + 3404 "00111010" // /* MW 2 */ + 3405 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3406 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3407 "10010110" // /* MW 3 */ + 3408 "01000000" // /* MW 2 */ + 3409 "00001000" // /* MW 1 */ + 3410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3411 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "elementwise_binary_shared.h" 109 78 + 3412 "01011000" // VEXTBCST.16 x0, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3413 "00000011" // /* MW 3 */ + 3414 "00000001" // /* MW 2 */ + 3415 "00011000" // /* MW 1 */ + 3416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3418 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "01100110" // /* MW 5 */ + 3420 "11111000" // /* MW 4 */ + 3421 "11111111" // /* MW 3 */ + 3422 "00101100" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 125 4 first +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first + 3424 "10110110" // LDA r1, [p5, #-16]; VLDB x1, [p4], m1; MOVXM ls, #3536 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3425 "00010000" // /* MW 11 */ + 3426 "11101000" // /* MW 10 */ + 3427 "01111110" // /* MW 9 */ + 3428 "00000000" // /* MW 8 */ + 3429 "00000000" // /* MW 7 */ + 3430 "00000000" // /* MW 6 */ + 3431 "11101000" // /* MW 5 */ + 3432 "01010000" // /* MW 4 */ + 3433 "11011000" // /* MW 3 */ + 3434 "10000110" // /* MW 2 */ + 3435 "10111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3436 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #3584 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3437 "00010000" // /* MW 11 */ + 3438 "00000000" // /* MW 10 */ + 3439 "10111111" // /* MW 9 */ + 3440 "00000001" // /* MW 8 */ + 3441 "00000000" // /* MW 7 */ + 3442 "00000000" // /* MW 6 */ + 3443 "01101000" // /* MW 5 */ + 3444 "10010000" // /* MW 4 */ + 3445 "00000010" // /* MW 3 */ + 3446 "01100011" // /* MW 2 */ + 3447 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 136 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3448 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p4], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3449 "11110001" // /* MW 7 */ + 3450 "00000000" // /* MW 6 */ + 3451 "11101000" // /* MW 5 */ + 3452 "01010000" // /* MW 4 */ + 3453 "01111000" // /* MW 3 */ + 3454 "00000101" // /* MW 2 */ + 3455 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary_shared.h" 125 31 +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3456 "10111010" // VLDA x0, [p1], m2; MOVXM p3, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3457 "00010000" // /* MW 9 */ + 3458 "00011000" // /* MW 8 */ + 3459 "10110001" // /* MW 7 */ + 3460 "11110001" // /* MW 6 */ + 3461 "00000001" // /* MW 5 */ + 3462 "00000000" // /* MW 4 */ + 3463 "01110000" // /* MW 3 */ + 3464 "00000011" // /* MW 2 */ + 3465 "00101001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3466 "10011000" // LDA.s8 r2, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3467 "01000010" // /* MW 3 */ + 3468 "00000100" // /* MW 2 */ + 3469 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3470 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "00101011" // /* MW 3 */ + 3472 "00001000" // /* MW 2 */ + 3473 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3476 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "00111101" // /* MW 3 */ + 3478 "01000010" // /* MW 2 */ + 3479 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 125 4 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3480 "01100010" // ADD.NC lc, r1, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3481 "00000001" // /* MW 7 */ + 3482 "00000010" // /* MW 6 */ + 3483 "00000001" // /* MW 5 */ + 3484 "10000110" // /* MW 4 */ + 3485 "11111110" // /* MW 3 */ + 3486 "01110000" // /* MW 2 */ + 3487 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3488 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p4], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3489 "11101000" // /* MW 5 */ + 3490 "01010000" // /* MW 4 */ + 3491 "01111000" // /* MW 3 */ + 3492 "00000011" // /* MW 2 */ + 3493 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3494 "10111010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3495 "01111110" // /* MW 9 */ + 3496 "10100101" // /* MW 8 */ + 3497 "00000001" // /* MW 7 */ + 3498 "00000000" // /* MW 6 */ + 3499 "00010000" // /* MW 5 */ + 3500 "00000000" // /* MW 4 */ + 3501 "01110000" // /* MW 3 */ + 3502 "00000101" // /* MW 2 */ + 3503 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary_shared.h" 144 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3504 "11100001" // NOPA; NOPB; NOPS; MOVX crRnd, r2; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3505 "00000000" // /* MW 15 */ + 3506 "00000000" // /* MW 14 */ + 3507 "01111000" // /* MW 13 */ + 3508 "10100101" // /* MW 12 */ + 3509 "00000001" // /* MW 11 */ + 3510 "00000000" // /* MW 10 */ + 3511 "11010100" // /* MW 9 */ + 3512 "00000101" // /* MW 8 */ + 3513 "01011011" // /* MW 7 */ + 3514 "00000001" // /* MW 6 */ + 3515 "00100000" // /* MW 5 */ + 3516 "00000000" // /* MW 4 */ + 3517 "11110000" // /* MW 3 */ + 3518 "00101100" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3520 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00010000" // /* MW 15 */ + 3522 "00001000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "01011011" // /* MW 7 */ + 3530 "00000001" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary_shared.h" 128 16 first +.src_ref 2 "elementwise_binary_shared.h" 130 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3536 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p4], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3537 "00000000" // /* MW 15 */ + 3538 "00000000" // /* MW 14 */ + 3539 "01111000" // /* MW 13 */ + 3540 "10100101" // /* MW 12 */ + 3541 "00000001" // /* MW 11 */ + 3542 "00000000" // /* MW 10 */ + 3543 "00000000" // /* MW 9 */ + 3544 "00000000" // /* MW 8 */ + 3545 "01011011" // /* MW 7 */ + 3546 "00000001" // /* MW 6 */ + 3547 "11101000" // /* MW 5 */ + 3548 "01010000" // /* MW 4 */ + 3549 "01111000" // /* MW 3 */ + 3550 "00000011" // /* MW 2 */ + 3551 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 132 18 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3552 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3553 "00000000" // /* MW 15 */ + 3554 "00000000" // /* MW 14 */ + 3555 "01111000" // /* MW 13 */ + 3556 "10100101" // /* MW 12 */ + 3557 "00000001" // /* MW 11 */ + 3558 "00000000" // /* MW 10 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "10100011" // /* MW 7 */ + 3562 "00011100" // /* MW 6 */ + 3563 "00100010" // /* MW 5 */ + 3564 "00000000" // /* MW 4 */ + 3565 "01110000" // /* MW 3 */ + 3566 "00000101" // /* MW 2 */ + 3567 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3568 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3569 "00000000" // /* MW 15 */ + 3570 "00000000" // /* MW 14 */ + 3571 "01111000" // /* MW 13 */ + 3572 "10100101" // /* MW 12 */ + 3573 "00000001" // /* MW 11 */ + 3574 "00000000" // /* MW 10 */ + 3575 "00000000" // /* MW 9 */ + 3576 "00000000" // /* MW 8 */ + 3577 "01011011" // /* MW 7 */ + 3578 "00000001" // /* MW 6 */ + 3579 "00100000" // /* MW 5 */ + 3580 "00000000" // /* MW 4 */ + 3581 "11110000" // /* MW 3 */ + 3582 "00101100" // /* MW 2 */ + 3583 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3584 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3585 "00010000" // /* MW 15 */ + 3586 "00001000" // /* MW 14 */ + 3587 "01111000" // /* MW 13 */ + 3588 "10100101" // /* MW 12 */ + 3589 "00000001" // /* MW 11 */ + 3590 "00000000" // /* MW 10 */ + 3591 "00000000" // /* MW 9 */ + 3592 "00000000" // /* MW 8 */ + 3593 "01011011" // /* MW 7 */ + 3594 "00000001" // /* MW 6 */ + 3595 "00100000" // /* MW 5 */ + 3596 "00000000" // /* MW 4 */ + 3597 "11110000" // /* MW 3 */ + 3598 "00101100" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3601 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "10100011" // /* MW 3 */ + 3604 "00011100" // /* MW 2 */ + 3605 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 136 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3608 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "00000001" // /* MW 3 */ + 3610 "00000010" // /* MW 2 */ + 3611 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3613 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3614 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3615 "00000000" // /* MW 3 */ + 3616 "00101000" // /* MW 2 */ + 3617 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3618 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3619 "10100011" // /* MW 3 */ + 3620 "00011100" // /* MW 2 */ + 3621 "00001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 146 first +.delay_slot + 3622 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3623 "00000001" // /* MW 5 */ + 3624 "00000000" // /* MW 4 */ + 3625 "00000000" // /* MW 3 */ + 3626 "11111000" // /* MW 2 */ + 3627 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3629 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary_shared.h" 144 18 first +.delay_slot + 3630 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "10100011" // /* MW 3 */ + 3632 "00011100" // /* MW 2 */ + 3633 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 3635 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary_shared.h" 196 first +.src_ref 2 "elementwise_binary_shared.h" 203 19 +.function_start + 3648 "01000100" // MOVXM p2, #508684 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3649 "00011000" // /* MW 5 */ + 3650 "11000110" // /* MW 4 */ + 3651 "11000100" // /* MW 3 */ + 3652 "00000111" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 203 19 first +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.src_ref 2 "elementwise_binary_shared.h" 206 12 +.src_ref 2 "elementwise_binary_shared.h" 206 12 + 3654 "01110110" // LDA.u8 r0, [p2]; MOVS p2, p1; MOVXM p3, #508672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3655 "00010000" // /* MW 11 */ + 3656 "10000000" // /* MW 10 */ + 3657 "10110001" // /* MW 9 */ + 3658 "11110001" // /* MW 8 */ + 3659 "00000001" // /* MW 7 */ + 3660 "00000000" // /* MW 6 */ + 3661 "10001011" // /* MW 5 */ + 3662 "10000100" // /* MW 4 */ + 3663 "01010010" // /* MW 3 */ + 3664 "10000001" // /* MW 2 */ + 3665 "01000000" // /* MW 1 */ + 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3667 "00000000" // /* MW 1 */ + 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3669 "00000000" // /* MW 1 */ + 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3671 "00000000" // /* MW 1 */ + 3672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3673 "00000000" // /* MW 1 */ + 3674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3675 "00000000" // /* MW 1 */ + 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3677 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 203 12 +.src_ref 2 "elementwise_binary_shared.h" 203 35 + 3678 "10000100" // JZ r0, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */ + 3679 "00000001" // /* MW 5 */ + 3680 "00000000" // /* MW 4 */ + 3681 "01010000" // /* MW 3 */ + 3682 "00000111" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 209 4 +.delay_slot + 3684 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3685 "11100000" // /* MW 3 */ + 3686 "11000001" // /* MW 2 */ + 3687 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 196 +.delay_slot + 3688 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3689 "00000001" // /* MW 5 */ + 3690 "00000000" // /* MW 4 */ + 3691 "00000000" // /* MW 3 */ + 3692 "00001000" // /* MW 2 */ + 3693 "00000000" // /* MW 1 */ +.delay_slot + 3694 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3695 "11100000" // /* MW 3 */ + 3696 "01010101" // /* MW 2 */ + 3697 "00011000" // /* MW 1 */ +.delay_slot + 3698 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3699 "11100000" // /* MW 3 */ + 3700 "01100000" // /* MW 2 */ + 3701 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 3702 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3703 "00101011" // /* MW 3 */ + 3704 "00000111" // /* MW 2 */ + 3705 "00001001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 206 12 first +.no_stack_arguments + 3706 "00000100" // JL #3152 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 3707 "00000001" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "00101000" // /* MW 3 */ + 3710 "00000110" // /* MW 2 */ + 3711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3717 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3719 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3720 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3721 "00011100" // /* MW 7 */ + 3722 "00000000" // /* MW 6 */ + 3723 "00000000" // /* MW 5 */ + 3724 "00000100" // /* MW 4 */ + 3725 "11110000" // /* MW 3 */ + 3726 "00101100" // /* MW 2 */ + 3727 "00000000" // /* MW 1 */ +.return_address + 3728 "10000100" // J #3776 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 3729 "00000000" // /* MW 5 */ + 3730 "00000000" // /* MW 4 */ + 3731 "01100000" // /* MW 3 */ + 3732 "00000111" // /* MW 2 */ + 3733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3737 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3741 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3743 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 2 "elementwise_binary_shared.h" 204 12 first +.no_stack_arguments + 3744 "00000100" // JL #3152 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 3745 "00000001" // /* MW 5 */ + 3746 "00000000" // /* MW 4 */ + 3747 "00101000" // /* MW 3 */ + 3748 "00000110" // /* MW 2 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.src_ref 2 "elementwise_binary_shared.h" 204 12 +.delay_slot + 3750 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3751 "01110000" // /* MW 7 */ + 3752 "01100000" // /* MW 6 */ + 3753 "10110000" // /* MW 5 */ + 3754 "00000000" // /* MW 4 */ + 3755 "01100000" // /* MW 3 */ + 3756 "10010001" // /* MW 2 */ + 3757 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3759 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3761 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3764 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3765 "10000001" // /* MW 11 */ + 3766 "10101101" // /* MW 10 */ + 3767 "00000000" // /* MW 9 */ + 3768 "00000000" // /* MW 8 */ + 3769 "00000000" // /* MW 7 */ + 3770 "00000000" // /* MW 6 */ + 3771 "00100000" // /* MW 5 */ + 3772 "00000000" // /* MW 4 */ + 3773 "11110000" // /* MW 3 */ + 3774 "00101100" // /* MW 2 */ + 3775 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 2 "elementwise_binary_shared.h" 209 4 +.return_address + 3776 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3777 "10000000" // /* MW 3 */ + 3778 "01110001" // /* MW 2 */ + 3779 "00011111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 209 4 first + 3780 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3781 "00000000" // /* MW 3 */ + 3782 "00101000" // /* MW 2 */ + 3783 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 209 4 +.delay_slot + 3784 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3785 "00000001" // /* MW 5 */ + 3786 "00000000" // /* MW 4 */ + 3787 "00000000" // /* MW 3 */ + 3788 "11111000" // /* MW 2 */ + 3789 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3793 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3795 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 3797 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 181 first +.src_ref 6 "superkernels.cpp" 186 6 +.function_start + 3808 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3809 "00000000" // /* MW 5 */ + 3810 "11000100" // /* MW 4 */ + 3811 "11000110" // /* MW 3 */ + 3812 "00000111" // /* MW 2 */ + 3813 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 186 6 first + 3814 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3815 "11000001" // /* MW 5 */ + 3816 "10110101" // /* MW 4 */ + 3817 "11011000" // /* MW 3 */ + 3818 "11000010" // /* MW 2 */ + 3819 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 181 + 3820 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3821 "00000001" // /* MW 5 */ + 3822 "00000000" // /* MW 4 */ + 3823 "00000000" // /* MW 3 */ + 3824 "00001000" // /* MW 2 */ + 3825 "00000000" // /* MW 1 */ + 3826 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3827 "01110000" // /* MW 7 */ + 3828 "11010000" // /* MW 6 */ + 3829 "00001011" // /* MW 5 */ + 3830 "00000000" // /* MW 4 */ + 3831 "10110000" // /* MW 3 */ + 3832 "01100011" // /* MW 2 */ + 3833 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 183 11 + 3834 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3835 "00010001" // /* MW 9 */ + 3836 "00000100" // /* MW 8 */ + 3837 "00110001" // /* MW 7 */ + 3838 "11110011" // /* MW 6 */ + 3839 "00000001" // /* MW 5 */ + 3840 "00000000" // /* MW 4 */ + 3841 "10110000" // /* MW 3 */ + 3842 "10000010" // /* MW 2 */ + 3843 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3844 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3845 "11000000" // /* MW 3 */ + 3846 "11010100" // /* MW 2 */ + 3847 "00011011" // /* MW 1 */ + 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3849 "00000000" // /* MW 1 */ + 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3851 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 186 6 +.src_ref 6 "superkernels.cpp" 186 16 + 3852 "10000100" // JNZ r16, #4016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4016 delay_slots=5 */ + 3853 "00000001" // /* MW 5 */ + 3854 "01000000" // /* MW 4 */ + 3855 "11011000" // /* MW 3 */ + 3856 "00000111" // /* MW 2 */ + 3857 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 183 22 first +.delay_slot + 3858 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3859 "10010000" // /* MW 3 */ + 3860 "01100010" // /* MW 2 */ + 3861 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 183 30 +.delay_slot + 3862 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3863 "11111011" // /* MW 3 */ + 3864 "01100011" // /* MW 2 */ + 3865 "00010100" // /* MW 1 */ +.delay_slot + 3866 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3867 "00111101" // /* MW 3 */ + 3868 "11110100" // /* MW 2 */ + 3869 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 183 11 +.delay_slot + 3870 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3871 "01110000" // /* MW 7 */ + 3872 "01100000" // /* MW 6 */ + 3873 "00110000" // /* MW 5 */ + 3874 "00000011" // /* MW 4 */ + 3875 "00110000" // /* MW 3 */ + 3876 "11000110" // /* MW 2 */ + 3877 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 189 4 +.src_ref 6 "superkernels.cpp" 200 2 +.delay_slot + 3878 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3879 "00000000" // /* MW 5 */ + 3880 "11000110" // /* MW 4 */ + 3881 "11000000" // /* MW 3 */ + 3882 "00000111" // /* MW 2 */ + 3883 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3884 "01000100" // MOVXM p2, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3885 "01100000" // /* MW 5 */ + 3886 "11000100" // /* MW 4 */ + 3887 "11000100" // /* MW 3 */ + 3888 "00000111" // /* MW 2 */ + 3889 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3890 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3891 "00010000" // /* MW 9 */ + 3892 "00010110" // /* MW 8 */ + 3893 "00110001" // /* MW 7 */ + 3894 "11110001" // /* MW 6 */ + 3895 "00000001" // /* MW 5 */ + 3896 "00000000" // /* MW 4 */ + 3897 "11100000" // /* MW 3 */ + 3898 "11000000" // /* MW 2 */ + 3899 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3901 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 189 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3902 "00000100" // JL #3072 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3072 delay_slots=5 */ + 3903 "00000001" // /* MW 5 */ + 3904 "00000000" // /* MW 4 */ + 3905 "00000000" // /* MW 3 */ + 3906 "00000110" // /* MW 2 */ + 3907 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3909 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3911 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3912 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3913 "00110001" // /* MW 3 */ + 3914 "00100000" // /* MW 2 */ + 3915 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 3916 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3917 "00000101" // /* MW 3 */ + 3918 "00100000" // /* MW 2 */ + 3919 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 3920 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3921 "00000000" // /* MW 15 */ + 3922 "00000000" // /* MW 14 */ + 3923 "01111000" // /* MW 13 */ + 3924 "10100101" // /* MW 12 */ + 3925 "00000001" // /* MW 11 */ + 3926 "00000000" // /* MW 10 */ + 3927 "00000000" // /* MW 9 */ + 3928 "10000000" // /* MW 8 */ + 3929 "00010001" // /* MW 7 */ + 3930 "00000110" // /* MW 6 */ + 3931 "00100010" // /* MW 5 */ + 3932 "00000000" // /* MW 4 */ + 3933 "11110000" // /* MW 3 */ + 3934 "00101100" // /* MW 2 */ + 3935 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 18 +.return_address + 3936 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3937 "00010000" // /* MW 5 */ + 3938 "11000100" // /* MW 4 */ + 3939 "11000100" // /* MW 3 */ + 3940 "00000111" // /* MW 2 */ + 3941 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 18 first +.src_ref 6 "superkernels.cpp" 193 65 + 3942 "10111010" // LDA r16, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3943 "00010000" // /* MW 9 */ + 3944 "10000000" // /* MW 8 */ + 3945 "00110001" // /* MW 7 */ + 3946 "11110001" // /* MW 6 */ + 3947 "00000001" // /* MW 5 */ + 3948 "00000000" // /* MW 4 */ + 3949 "11010000" // /* MW 3 */ + 3950 "11000010" // /* MW 2 */ + 3951 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 51 +.src_ref 6 "superkernels.cpp" 193 65 +.src_ref 6 "superkernels.cpp" 200 2 + 3952 "10111010" // LDA r17, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3953 "00010000" // /* MW 9 */ + 3954 "10000000" // /* MW 8 */ + 3955 "00110001" // /* MW 7 */ + 3956 "11110001" // /* MW 6 */ + 3957 "00000001" // /* MW 5 */ + 3958 "00000000" // /* MW 4 */ + 3959 "11010000" // /* MW 3 */ + 3960 "11000110" // /* MW 2 */ + 3961 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 51 first +.src_ref 6 "superkernels.cpp" 193 16 +.src_ref 6 "superkernels.cpp" 198 47 + 3962 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "00010000" // /* MW 9 */ + 3964 "00000110" // /* MW 8 */ + 3965 "10110001" // /* MW 7 */ + 3966 "11110000" // /* MW 6 */ + 3967 "00000001" // /* MW 5 */ + 3968 "00000000" // /* MW 4 */ + 3969 "01010000" // /* MW 3 */ + 3970 "11001011" // /* MW 2 */ + 3971 "01001010" // /* MW 1 */ + 3972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3973 "00000000" // /* MW 1 */ + 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3975 "00000000" // /* MW 1 */ + 3976 "10000100" // J #4032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4032 delay_slots=5 */ + 3977 "00000000" // /* MW 5 */ + 3978 "00000000" // /* MW 4 */ + 3979 "11100000" // /* MW 3 */ + 3980 "00000111" // /* MW 2 */ + 3981 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 13 +.delay_slot + 3982 "01000100" // MOVXM p0, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "01010000" // /* MW 5 */ + 3984 "11000100" // /* MW 4 */ + 3985 "11000000" // /* MW 3 */ + 3986 "00000111" // /* MW 2 */ + 3987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3989 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 27 first +.delay_slot + 3990 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3991 "00001111" // /* MW 3 */ + 3992 "01100001" // /* MW 2 */ + 3993 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 191 13 first +.delay_slot + 3994 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3995 "10100011" // /* MW 5 */ + 3996 "00001100" // /* MW 4 */ + 3997 "11110000" // /* MW 3 */ + 3998 "00101100" // /* MW 2 */ + 3999 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 193 16 first +.delay_slot + 4000 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4001 "00000000" // /* MW 15 */ + 4002 "00000000" // /* MW 14 */ + 4003 "01111000" // /* MW 13 */ + 4004 "10100101" // /* MW 12 */ + 4005 "00000001" // /* MW 11 */ + 4006 "00000000" // /* MW 10 */ + 4007 "00000000" // /* MW 9 */ + 4008 "10000000" // /* MW 8 */ + 4009 "00010001" // /* MW 7 */ + 4010 "00000110" // /* MW 6 */ + 4011 "00100001" // /* MW 5 */ + 4012 "00000000" // /* MW 4 */ + 4013 "11110000" // /* MW 3 */ + 4014 "00101100" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 198 47 +.src_ref 6 "superkernels.cpp" 200 2 + 4016 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508428; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "00010000" // /* MW 13 */ + 4020 "00000110" // /* MW 12 */ + 4021 "10110001" // /* MW 11 */ + 4022 "11110000" // /* MW 10 */ + 4023 "00000001" // /* MW 9 */ + 4024 "00000000" // /* MW 8 */ + 4025 "10001011" // /* MW 7 */ + 4026 "10000000" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4032 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4033 "00000000" // /* MW 7 */ + 4034 "11000011" // /* MW 6 */ + 4035 "10110011" // /* MW 5 */ + 4036 "00000011" // /* MW 4 */ + 4037 "01100000" // /* MW 3 */ + 4038 "10010001" // /* MW 2 */ + 4039 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 197 2 + 4040 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4041 "00010000" // /* MW 9 */ + 4042 "00000000" // /* MW 8 */ + 4043 "00110001" // /* MW 7 */ + 4044 "11110000" // /* MW 6 */ + 4045 "00000001" // /* MW 5 */ + 4046 "00000000" // /* MW 4 */ + 4047 "11010000" // /* MW 3 */ + 4048 "11101110" // /* MW 2 */ + 4049 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4050 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4051 "00010110" // /* MW 3 */ + 4052 "11111110" // /* MW 2 */ + 4053 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4054 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4055 "00110110" // /* MW 3 */ + 4056 "11111110" // /* MW 2 */ + 4057 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4058 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4059 "01010110" // /* MW 3 */ + 4060 "01000110" // /* MW 2 */ + 4061 "00000111" // /* MW 1 */ + 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4063 "00000000" // /* MW 1 */ + 4064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4065 "00000000" // /* MW 1 */ + 4066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4067 "00000000" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4072 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4073 "00000010" // /* MW 3 */ + 4074 "01100001" // /* MW 2 */ + 4075 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4076 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4077 "00010001" // /* MW 3 */ + 4078 "00000110" // /* MW 2 */ + 4079 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4080 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4081 "11111101" // /* MW 3 */ + 4082 "11100000" // /* MW 2 */ + 4083 "00010111" // /* MW 1 */ + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ + 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4087 "00000000" // /* MW 1 */ + 4088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4089 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4090 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4091 "00001000" // /* MW 3 */ + 4092 "10010011" // /* MW 2 */ + 4093 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 45 + 4094 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4095 "10000001" // /* MW 5 */ + 4096 "10101101" // /* MW 4 */ + 4097 "10100111" // /* MW 3 */ + 4098 "00000000" // /* MW 2 */ + 4099 "00000100" // /* MW 1 */ + 4100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4101 "00000000" // /* MW 1 */ + 4102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4103 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 197 2 first + 4104 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4105 "00110110" // /* MW 3 */ + 4106 "00000110" // /* MW 2 */ + 4107 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4108 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4109 "10000001" // /* MW 5 */ + 4110 "11011101" // /* MW 4 */ + 4111 "11011100" // /* MW 3 */ + 4112 "11001010" // /* MW 2 */ + 4113 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 47 first + 4114 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4115 "01110110" // /* MW 3 */ + 4116 "00000110" // /* MW 2 */ + 4117 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4118 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4119 "10011110" // /* MW 3 */ + 4120 "01011100" // /* MW 2 */ + 4121 "00000111" // /* MW 1 */ + 4122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4123 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 200 2 first +.no_stack_arguments + 4124 "00000100" // JL #3648 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3648 delay_slots=5 */ + 4125 "00000001" // /* MW 5 */ + 4126 "00000000" // /* MW 4 */ + 4127 "00100000" // /* MW 3 */ + 4128 "00000111" // /* MW 2 */ + 4129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4131 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 197 2 first +.delay_slot + 4132 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4133 "00000111" // /* MW 3 */ + 4134 "01100010" // /* MW 2 */ + 4135 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 197 2 +.delay_slot + 4136 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4137 "00110001" // /* MW 3 */ + 4138 "00000110" // /* MW 2 */ + 4139 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 45 first +.delay_slot + 4140 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4141 "00001101" // /* MW 3 */ + 4142 "11100001" // /* MW 2 */ + 4143 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 198 45 +.delay_slot + 4144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4145 "00000000" // /* MW 15 */ + 4146 "00000000" // /* MW 14 */ + 4147 "10101000" // /* MW 13 */ + 4148 "10100000" // /* MW 12 */ + 4149 "00110100" // /* MW 11 */ + 4150 "00000000" // /* MW 10 */ + 4151 "00000000" // /* MW 9 */ + 4152 "00000000" // /* MW 8 */ + 4153 "01011011" // /* MW 7 */ + 4154 "00000001" // /* MW 6 */ + 4155 "00100000" // /* MW 5 */ + 4156 "00000000" // /* MW 4 */ + 4157 "11110000" // /* MW 3 */ + 4158 "00101100" // /* MW 2 */ + 4159 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 202 6 +.src_ref 6 "superkernels.cpp" 203 14 +.return_address + 4160 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4161 "00010000" // /* MW 9 */ + 4162 "00000000" // /* MW 8 */ + 4163 "00110001" // /* MW 7 */ + 4164 "11110011" // /* MW 6 */ + 4165 "00000001" // /* MW 5 */ + 4166 "00000000" // /* MW 4 */ + 4167 "11010000" // /* MW 3 */ + 4168 "11000110" // /* MW 2 */ + 4169 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4170 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4171 "00000101" // /* MW 3 */ + 4172 "00100000" // /* MW 2 */ + 4173 "00010000" // /* MW 1 */ + 4174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4175 "00000000" // /* MW 1 */ + 4176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4177 "00000000" // /* MW 1 */ + 4178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4179 "00000000" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4184 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4185 "00001000" // /* MW 3 */ + 4186 "01010001" // /* MW 2 */ + 4187 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 202 19 + 4188 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4189 "00010000" // /* MW 9 */ + 4190 "00010100" // /* MW 8 */ + 4191 "00110001" // /* MW 7 */ + 4192 "11110001" // /* MW 6 */ + 4193 "00000001" // /* MW 5 */ + 4194 "00000000" // /* MW 4 */ + 4195 "11010000" // /* MW 3 */ + 4196 "11001110" // /* MW 2 */ + 4197 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 6 first + 4198 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4199 "00110110" // /* MW 3 */ + 4200 "00000110" // /* MW 2 */ + 4201 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 19 + 4202 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4203 "01010110" // /* MW 3 */ + 4204 "00000110" // /* MW 2 */ + 4205 "00000010" // /* MW 1 */ + 4206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4207 "00000000" // /* MW 1 */ + 4208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4209 "00000000" // /* MW 1 */ + 4210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4211 "00000000" // /* MW 1 */ + 4212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4213 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4214 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "00110001" // /* MW 3 */ + 4216 "00100001" // /* MW 2 */ + 4217 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4218 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4219 "00010001" // /* MW 3 */ + 4220 "11100110" // /* MW 2 */ + 4221 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 16 first + 4222 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4223 "00101000" // /* MW 3 */ + 4224 "01100001" // /* MW 2 */ + 4225 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 202 6 + 4226 "10000100" // JNZ r16, #4256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4256 delay_slots=5 */ + 4227 "00000001" // /* MW 5 */ + 4228 "01000000" // /* MW 4 */ + 4229 "01010000" // /* MW 3 */ + 4230 "00001000" // /* MW 2 */ + 4231 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4241 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 203 14 + 4242 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4243 "00000001" // /* MW 3 */ + 4244 "00100000" // /* MW 2 */ + 4245 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 203 14 first + 4246 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "00000000" // /* MW 7 */ + 4250 "10000000" // /* MW 6 */ + 4251 "00010001" // /* MW 5 */ + 4252 "00000110" // /* MW 4 */ + 4253 "11110110" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 205 + 4256 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4257 "00111001" // /* MW 3 */ + 4258 "11110100" // /* MW 2 */ + 4259 "00000111" // /* MW 1 */ + 4260 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4261 "00011001" // /* MW 3 */ + 4262 "11111011" // /* MW 2 */ + 4263 "00000111" // /* MW 1 */ + 4264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4265 "00000000" // /* MW 1 */ + 4266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4269 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4270 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4271 "11110001" // /* MW 3 */ + 4272 "11111101" // /* MW 2 */ + 4273 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 205 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4277 "00000000" // /* MW 3 */ + 4278 "00101000" // /* MW 2 */ + 4279 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4280 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "10100000" // /* MW 3 */ + 4282 "01100111" // /* MW 2 */ + 4283 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 205 +.delay_slot + 4284 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4285 "00000001" // /* MW 5 */ + 4286 "00000000" // /* MW 4 */ + 4287 "00000000" // /* MW 3 */ + 4288 "11111000" // /* MW 2 */ + 4289 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4295 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 4304 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4305 "00010000" // /* MW 9 */ + 4306 "10100000" // /* MW 8 */ + 4307 "00110001" // /* MW 7 */ + 4308 "11110000" // /* MW 6 */ + 4309 "00000001" // /* MW 5 */ + 4310 "00000000" // /* MW 4 */ + 4311 "11010000" // /* MW 3 */ + 4312 "10000101" // /* MW 2 */ + 4313 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 4314 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4315 "01011000" // /* MW 9 */ + 4316 "00000000" // /* MW 8 */ + 4317 "00001000" // /* MW 7 */ + 4318 "00001011" // /* MW 6 */ + 4319 "00010000" // /* MW 5 */ + 4320 "00001000" // /* MW 4 */ + 4321 "00000000" // /* MW 3 */ + 4322 "00000000" // /* MW 2 */ + 4323 "11110000" // /* MW 1 */ + 4324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4325 "00000000" // /* MW 1 */ + 4326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4327 "00000000" // /* MW 1 */ + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 4334 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4335 "00101001" // /* MW 3 */ + 4336 "00011100" // /* MW 2 */ + 4337 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 4338 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00101110" // /* MW 3 */ + 4340 "00011100" // /* MW 2 */ + 4341 "00000001" // /* MW 1 */ + 4342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4343 "00000000" // /* MW 1 */ + 4344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4345 "00000000" // /* MW 1 */ + 4346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4347 "00000000" // /* MW 1 */ + 4348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4349 "00000000" // /* MW 1 */ + 4350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4351 "00000000" // /* MW 1 */ + 4352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4353 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 4354 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4355 "00101001" // /* MW 3 */ + 4356 "00011100" // /* MW 2 */ + 4357 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 4358 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4359 "00101110" // /* MW 3 */ + 4360 "00000100" // /* MW 2 */ + 4361 "00000001" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ + 4366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4367 "00000000" // /* MW 1 */ + 4368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4369 "00000000" // /* MW 1 */ + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ + 4372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4373 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 4374 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4375 "00101001" // /* MW 3 */ + 4376 "00011100" // /* MW 2 */ + 4377 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 4378 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4379 "01110110" // /* MW 3 */ + 4380 "00010100" // /* MW 2 */ + 4381 "00000001" // /* MW 1 */ + 4382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4383 "00000000" // /* MW 1 */ + 4384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4385 "00000000" // /* MW 1 */ + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4391 "00000000" // /* MW 1 */ + 4392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4393 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4394 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4395 "01110001" // /* MW 3 */ + 4396 "01001100" // /* MW 2 */ + 4397 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4398 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4399 "00010111" // /* MW 3 */ + 4400 "00000100" // /* MW 2 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4403 "00000000" // /* MW 3 */ + 4404 "00101000" // /* MW 2 */ + 4405 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4407 "00000000" // /* MW 5 */ + 4408 "00111110" // /* MW 4 */ + 4409 "11110001" // /* MW 3 */ + 4410 "00000000" // /* MW 2 */ + 4411 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4412 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4413 "00100100" // /* MW 3 */ + 4414 "11000100" // /* MW 2 */ + 4415 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4416 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4417 "00100111" // /* MW 3 */ + 4418 "01110110" // /* MW 2 */ + 4419 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4420 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4421 "10000010" // /* MW 3 */ + 4422 "00000001" // /* MW 2 */ + 4423 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 4425 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 4432 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4433 "00000001" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "00000000" // /* MW 3 */ + 4436 "00001000" // /* MW 2 */ + 4437 "00000000" // /* MW 1 */ + 4438 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00111101" // /* MW 3 */ + 4440 "11111000" // /* MW 2 */ + 4441 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 4442 "00000100" // JL #4304 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4304 delay_slots=5 */ + 4443 "00000001" // /* MW 5 */ + 4444 "00000000" // /* MW 4 */ + 4445 "01101000" // /* MW 3 */ + 4446 "00001000" // /* MW 2 */ + 4447 "00000000" // /* MW 1 */ +.delay_slot + 4448 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "10100000" // /* MW 3 */ + 4450 "00010111" // /* MW 2 */ + 4451 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 4452 "00111010" // ST r0, [sp, #-4]; MOVXM r15, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4453 "00010001" // /* MW 9 */ + 4454 "10100000" // /* MW 8 */ + 4455 "11101001" // /* MW 7 */ + 4456 "11110001" // /* MW 6 */ + 4457 "00000001" // /* MW 5 */ + 4458 "00000000" // /* MW 4 */ + 4459 "10110000" // /* MW 3 */ + 4460 "10000010" // /* MW 2 */ + 4461 "11111111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 4462 "11111000" // MOV p0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4463 "10100000" // /* MW 3 */ + 4464 "01100111" // /* MW 2 */ + 4465 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4469 "10000001" // /* MW 11 */ + 4470 "10101101" // /* MW 10 */ + 4471 "00000000" // /* MW 9 */ + 4472 "00000000" // /* MW 8 */ + 4473 "00000000" // /* MW 7 */ + 4474 "00000000" // /* MW 6 */ + 4475 "00100000" // /* MW 5 */ + 4476 "00000000" // /* MW 4 */ + 4477 "11110000" // /* MW 3 */ + 4478 "00101100" // /* MW 2 */ + 4479 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 4480 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p1, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4481 "00001000" // /* MW 9 */ + 4482 "11000100" // /* MW 8 */ + 4483 "10110011" // /* MW 7 */ + 4484 "01101000" // /* MW 6 */ + 4485 "00000000" // /* MW 5 */ + 4486 "00000001" // /* MW 4 */ + 4487 "00100000" // /* MW 3 */ + 4488 "00000111" // /* MW 2 */ + 4489 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 4490 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4491 "01011000" // /* MW 9 */ + 4492 "11111101" // /* MW 8 */ + 4493 "00000111" // /* MW 7 */ + 4494 "00001000" // /* MW 6 */ + 4495 "10000000" // /* MW 5 */ + 4496 "00000001" // /* MW 4 */ + 4497 "10000000" // /* MW 3 */ + 4498 "11100010" // /* MW 2 */ + 4499 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 4500 "01111010" // LDA r15, [sp, #-4]; ST r16, [p1], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4501 "00000001" // /* MW 9 */ + 4502 "10100000" // /* MW 8 */ + 4503 "00000111" // /* MW 7 */ + 4504 "10000000" // /* MW 6 */ + 4505 "00010001" // /* MW 5 */ + 4506 "00001010" // /* MW 4 */ + 4507 "00100001" // /* MW 3 */ + 4508 "10111110" // /* MW 2 */ + 4509 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 4510 "10011000" // LDA.u8 r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4511 "01001010" // /* MW 3 */ + 4512 "00000110" // /* MW 2 */ + 4513 "00000001" // /* MW 1 */ + 4514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4517 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4518 "00011000" // ST.s16 r16, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4519 "00010111" // /* MW 3 */ + 4520 "00000010" // /* MW 2 */ + 4521 "00000001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4523 "00000000" // /* MW 3 */ + 4524 "00101000" // /* MW 2 */ + 4525 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4526 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4527 "00000101" // /* MW 3 */ + 4528 "00100010" // /* MW 2 */ + 4529 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4530 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4531 "00000001" // /* MW 5 */ + 4532 "00000000" // /* MW 4 */ + 4533 "00000000" // /* MW 3 */ + 4534 "11111000" // /* MW 2 */ + 4535 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4536 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4537 "00100111" // /* MW 3 */ + 4538 "01110111" // /* MW 2 */ + 4539 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4540 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4541 "10000010" // /* MW 3 */ + 4542 "00100001" // /* MW 2 */ + 4543 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 4545 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_shared.h" 186 first +.src_ref 2 "elementwise_binary_shared.h" 191 8 first +.tail_call +.function_start + 4560 "10000100" // J #3152 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 4561 "00000000" // /* MW 5 */ + 4562 "00000000" // /* MW 4 */ + 4563 "00101000" // /* MW 3 */ + 4564 "00000110" // /* MW 2 */ + 4565 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 191 8 +.delay_slot + 4566 "01000100" // MOVXM p3, #508736 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4567 "10000000" // /* MW 5 */ + 4568 "11000110" // /* MW 4 */ + 4569 "11000110" // /* MW 3 */ + 4570 "00000111" // /* MW 2 */ + 4571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 4579 "00000000" // /* MW 1 */ +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_add1d _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 240 first +.src_ref 6 "superkernels.cpp" 245 6 +.function_start + 4592 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4593 "00000000" // /* MW 5 */ + 4594 "11000100" // /* MW 4 */ + 4595 "11001000" // /* MW 3 */ + 4596 "00000111" // /* MW 2 */ + 4597 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first + 4598 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4599 "11000001" // /* MW 5 */ + 4600 "10110101" // /* MW 4 */ + 4601 "11011000" // /* MW 3 */ + 4602 "11000010" // /* MW 2 */ + 4603 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 240 + 4604 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4605 "00000001" // /* MW 5 */ + 4606 "00000000" // /* MW 4 */ + 4607 "00000000" // /* MW 3 */ + 4608 "00001000" // /* MW 2 */ + 4609 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 242 22 first + 4610 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4611 "01111001" // /* MW 9 */ + 4612 "01100000" // /* MW 8 */ + 4613 "11001010" // /* MW 7 */ + 4614 "10000001" // /* MW 6 */ + 4615 "00010100" // /* MW 5 */ + 4616 "00100011" // /* MW 4 */ + 4617 "10110000" // /* MW 3 */ + 4618 "00111010" // /* MW 2 */ + 4619 "11111111" // /* MW 1 */ + 4620 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4621 "01110000" // /* MW 7 */ + 4622 "11010000" // /* MW 6 */ + 4623 "00001011" // /* MW 5 */ + 4624 "00000000" // /* MW 4 */ + 4625 "10110000" // /* MW 3 */ + 4626 "10000011" // /* MW 2 */ + 4627 "11111101" // /* MW 1 */ + 4628 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4629 "00010101" // /* MW 3 */ + 4630 "11111100" // /* MW 2 */ + 4631 "00001111" // /* MW 1 */ + 4632 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4633 "00111101" // /* MW 3 */ + 4634 "11110000" // /* MW 2 */ + 4635 "00001111" // /* MW 1 */ + 4636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4637 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 245 6 first +.src_ref 6 "superkernels.cpp" 245 16 first + 4638 "10000100" // JNZ r16, #4784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4784 delay_slots=5 */ + 4639 "00000001" // /* MW 5 */ + 4640 "01000000" // /* MW 4 */ + 4641 "01011000" // /* MW 3 */ + 4642 "00001001" // /* MW 2 */ + 4643 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 30 first +.delay_slot + 4644 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4645 "11111011" // /* MW 3 */ + 4646 "01100011" // /* MW 2 */ + 4647 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 4648 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4649 "00010000" // /* MW 5 */ + 4650 "11000100" // /* MW 4 */ + 4651 "11000100" // /* MW 3 */ + 4652 "00000111" // /* MW 2 */ + 4653 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 242 11 +.delay_slot + 4654 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4655 "01110000" // /* MW 7 */ + 4656 "01100000" // /* MW 6 */ + 4657 "00110111" // /* MW 5 */ + 4658 "00000001" // /* MW 4 */ + 4659 "00110000" // /* MW 3 */ + 4660 "11000110" // /* MW 2 */ + 4661 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 4662 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4663 "11000000" // /* MW 3 */ + 4664 "11010110" // /* MW 2 */ + 4665 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 +.src_ref 6 "superkernels.cpp" 250 28 +.src_ref 6 "superkernels.cpp" 252 42 +.src_ref 6 "superkernels.cpp" 264 2 +.delay_slot + 4666 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4667 "00010001" // /* MW 9 */ + 4668 "10100000" // /* MW 8 */ + 4669 "10110001" // /* MW 7 */ + 4670 "11110011" // /* MW 6 */ + 4671 "00000001" // /* MW 5 */ + 4672 "00000000" // /* MW 4 */ + 4673 "10110000" // /* MW 3 */ + 4674 "10100011" // /* MW 2 */ + 4675 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 248 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4676 "00111010" // MOVS p0, p7; MOVXM p2, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4677 "00010001" // /* MW 9 */ + 4678 "00011000" // /* MW 8 */ + 4679 "00110001" // /* MW 7 */ + 4680 "11110001" // /* MW 6 */ + 4681 "00000001" // /* MW 5 */ + 4682 "00000000" // /* MW 4 */ + 4683 "01100000" // /* MW 3 */ + 4684 "10010001" // /* MW 2 */ + 4685 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4686 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4687 "00010000" // /* MW 9 */ + 4688 "00010110" // /* MW 8 */ + 4689 "00110001" // /* MW 7 */ + 4690 "11110001" // /* MW 6 */ + 4691 "00000001" // /* MW 5 */ + 4692 "00000000" // /* MW 4 */ + 4693 "11100000" // /* MW 3 */ + 4694 "11000000" // /* MW 2 */ + 4695 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4697 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 248 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4698 "00000100" // JL #4432 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4432 delay_slots=5 */ + 4699 "00000001" // /* MW 5 */ + 4700 "00000000" // /* MW 4 */ + 4701 "10101000" // /* MW 3 */ + 4702 "00001000" // /* MW 2 */ + 4703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4708 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4709 "00110001" // /* MW 3 */ + 4710 "00100000" // /* MW 2 */ + 4711 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4712 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4713 "00000101" // /* MW 3 */ + 4714 "00100000" // /* MW 2 */ + 4715 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4716 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4717 "00010001" // /* MW 3 */ + 4718 "00000110" // /* MW 2 */ + 4719 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 252 42 first +.return_address + 4720 "10111010" // LDA r16, [p7]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4721 "00010000" // /* MW 9 */ + 4722 "00000100" // /* MW 8 */ + 4723 "10110001" // /* MW 7 */ + 4724 "11110000" // /* MW 6 */ + 4725 "00000001" // /* MW 5 */ + 4726 "00000000" // /* MW 4 */ + 4727 "11010000" // /* MW 3 */ + 4728 "11000010" // /* MW 2 */ + 4729 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 +.src_ref 6 "superkernels.cpp" 252 18 +.src_ref 6 "superkernels.cpp" 261 48 + 4730 "10111010" // LDA r17, [p1]; MOVXM p3, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4731 "00010000" // /* MW 9 */ + 4732 "00000110" // /* MW 8 */ + 4733 "10110001" // /* MW 7 */ + 4734 "11110001" // /* MW 6 */ + 4735 "00000001" // /* MW 5 */ + 4736 "00000000" // /* MW 4 */ + 4737 "11010000" // /* MW 3 */ + 4738 "11000110" // /* MW 2 */ + 4739 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 28 first +.src_ref 6 "superkernels.cpp" 253 16 +.src_ref 6 "superkernels.cpp" 262 48 + 4740 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4741 "00010000" // /* MW 9 */ + 4742 "00001000" // /* MW 8 */ + 4743 "10110001" // /* MW 7 */ + 4744 "11110000" // /* MW 6 */ + 4745 "00000001" // /* MW 5 */ + 4746 "00000000" // /* MW 4 */ + 4747 "01010000" // /* MW 3 */ + 4748 "11001011" // /* MW 2 */ + 4749 "11101010" // /* MW 1 */ + 4750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4751 "00000000" // /* MW 1 */ + 4752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4753 "00000000" // /* MW 1 */ + 4754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4755 "00000000" // /* MW 1 */ + 4756 "10000100" // J #4800 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4800 delay_slots=5 */ + 4757 "00000000" // /* MW 5 */ + 4758 "00000000" // /* MW 4 */ + 4759 "01100000" // /* MW 3 */ + 4760 "00001001" // /* MW 2 */ + 4761 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 +.delay_slot + 4762 "01000100" // MOVXM p2, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4763 "01010000" // /* MW 5 */ + 4764 "11000100" // /* MW 4 */ + 4765 "11000100" // /* MW 3 */ + 4766 "00000111" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 27 first +.delay_slot + 4768 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4769 "00001111" // /* MW 3 */ + 4770 "01100001" // /* MW 2 */ + 4771 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 250 13 first +.delay_slot + 4772 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4773 "01010001" // /* MW 3 */ + 4774 "00000110" // /* MW 2 */ + 4775 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 252 16 first +.delay_slot + 4776 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4777 "00010001" // /* MW 3 */ + 4778 "00000110" // /* MW 2 */ + 4779 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 253 16 first +.delay_slot + 4780 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4781 "00010001" // /* MW 3 */ + 4782 "00000110" // /* MW 2 */ + 4783 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 261 48 + 4784 "01000100" // MOVXM p3, #508428 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4785 "00011000" // /* MW 5 */ + 4786 "11000100" // /* MW 4 */ + 4787 "11000110" // /* MW 3 */ + 4788 "00000111" // /* MW 2 */ + 4789 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 + 4790 "10111010" // NOPA; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4791 "00010000" // /* MW 9 */ + 4792 "00001000" // /* MW 8 */ + 4793 "10110001" // /* MW 7 */ + 4794 "11110000" // /* MW 6 */ + 4795 "00000001" // /* MW 5 */ + 4796 "00000000" // /* MW 4 */ + 4797 "11110000" // /* MW 3 */ + 4798 "00101100" // /* MW 2 */ + 4799 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4800 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4801 "10000110" // /* MW 3 */ + 4802 "01100111" // /* MW 2 */ + 4803 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 256 2 + 4804 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4805 "00010000" // /* MW 9 */ + 4806 "00000000" // /* MW 8 */ + 4807 "00110001" // /* MW 7 */ + 4808 "11110001" // /* MW 6 */ + 4809 "00000001" // /* MW 5 */ + 4810 "00000000" // /* MW 4 */ + 4811 "11010000" // /* MW 3 */ + 4812 "11101110" // /* MW 2 */ + 4813 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4814 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "00010110" // /* MW 3 */ + 4816 "11111110" // /* MW 2 */ + 4817 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4818 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4819 "00110110" // /* MW 3 */ + 4820 "11111110" // /* MW 2 */ + 4821 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 first + 4822 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4823 "01010110" // /* MW 3 */ + 4824 "00000110" // /* MW 2 */ + 4825 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4826 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4827 "01110110" // /* MW 3 */ + 4828 "01000110" // /* MW 2 */ + 4829 "00000000" // /* MW 1 */ + 4830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4831 "00000000" // /* MW 1 */ + 4832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4833 "00000000" // /* MW 1 */ + 4834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4835 "00000000" // /* MW 1 */ + 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4837 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4838 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4839 "00000010" // /* MW 3 */ + 4840 "01100001" // /* MW 2 */ + 4841 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 256 2 first + 4842 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4843 "00001110" // /* MW 5 */ + 4844 "01000000" // /* MW 4 */ + 4845 "00111001" // /* MW 3 */ + 4846 "11000010" // /* MW 2 */ + 4847 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 256 2 + 4848 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "00010001" // /* MW 3 */ + 4850 "00000110" // /* MW 2 */ + 4851 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 4852 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4853 "11111101" // /* MW 3 */ + 4854 "11100000" // /* MW 2 */ + 4855 "00010111" // /* MW 1 */ + 4856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4857 "00000000" // /* MW 1 */ + 4858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4859 "00000000" // /* MW 1 */ + 4860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4861 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4862 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4863 "00001000" // /* MW 3 */ + 4864 "11010011" // /* MW 2 */ + 4865 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 4866 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4867 "00000110" // /* MW 3 */ + 4868 "01100111" // /* MW 2 */ + 4869 "00011010" // /* MW 1 */ + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ + 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4873 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 4874 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4875 "01110110" // /* MW 3 */ + 4876 "11111111" // /* MW 2 */ + 4877 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4878 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4879 "00110110" // /* MW 3 */ + 4880 "11111110" // /* MW 2 */ + 4881 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4882 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "01010110" // /* MW 3 */ + 4884 "11111110" // /* MW 2 */ + 4885 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 4886 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "01110110" // /* MW 3 */ + 4888 "01010110" // /* MW 2 */ + 4889 "00000010" // /* MW 1 */ + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ + 4894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4895 "00000000" // /* MW 1 */ + 4896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4897 "00000000" // /* MW 1 */ + 4898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4899 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4900 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "00010010" // /* MW 3 */ + 4902 "10100011" // /* MW 2 */ + 4903 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4904 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00110001" // /* MW 3 */ + 4906 "00000110" // /* MW 2 */ + 4907 "00001010" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4916 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4917 "00001000" // /* MW 3 */ + 4918 "11010011" // /* MW 2 */ + 4919 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 261 46 +.src_ref 6 "superkernels.cpp" 262 46 + 4920 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4921 "01111001" // /* MW 9 */ + 4922 "01100000" // /* MW 8 */ + 4923 "11001110" // /* MW 7 */ + 4924 "00101001" // /* MW 6 */ + 4925 "00000000" // /* MW 5 */ + 4926 "00000001" // /* MW 4 */ + 4927 "01100000" // /* MW 3 */ + 4928 "00010001" // /* MW 2 */ + 4929 "11010001" // /* MW 1 */ + 4930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4931 "00000000" // /* MW 1 */ + 4932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4933 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 4934 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4935 "00011001" // /* MW 3 */ + 4936 "11101110" // /* MW 2 */ + 4937 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 48 first + 4938 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4939 "00111011" // /* MW 5 */ + 4940 "11011000" // /* MW 4 */ + 4941 "11011111" // /* MW 3 */ + 4942 "11000110" // /* MW 2 */ + 4943 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 48 first +.src_ref 6 "superkernels.cpp" 264 2 + 4944 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4945 "10000001" // /* MW 5 */ + 4946 "11011101" // /* MW 4 */ + 4947 "11010110" // /* MW 3 */ + 4948 "11010010" // /* MW 2 */ + 4949 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4950 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4951 "01010110" // /* MW 3 */ + 4952 "01001110" // /* MW 2 */ + 4953 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4954 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4955 "00011110" // /* MW 3 */ + 4956 "01011101" // /* MW 2 */ + 4957 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4958 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4959 "11000000" // /* MW 3 */ + 4960 "01100000" // /* MW 2 */ + 4961 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4963 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4964 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4965 "01110110" // /* MW 3 */ + 4966 "00000110" // /* MW 2 */ + 4967 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4969 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 264 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4970 "00000100" // JL #4560 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4560 delay_slots=5 */ + 4971 "00000001" // /* MW 5 */ + 4972 "00000000" // /* MW 4 */ + 4973 "11101000" // /* MW 3 */ + 4974 "00001000" // /* MW 2 */ + 4975 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4976 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4977 "11000000" // /* MW 3 */ + 4978 "11010100" // /* MW 2 */ + 4979 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 4980 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4981 "00001101" // /* MW 3 */ + 4982 "01100011" // /* MW 2 */ + 4983 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 first +.delay_slot + 4984 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "00001101" // /* MW 3 */ + 4986 "00100001" // /* MW 2 */ + 4987 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 262 46 +.delay_slot + 4988 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "01000001" // /* MW 3 */ + 4990 "01101001" // /* MW 2 */ + 4991 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 261 46 first +.delay_slot + 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4993 "00000000" // /* MW 15 */ + 4994 "00000000" // /* MW 14 */ + 4995 "10101000" // /* MW 13 */ + 4996 "11100010" // /* MW 12 */ + 4997 "00110100" // /* MW 11 */ + 4998 "00000000" // /* MW 10 */ + 4999 "00000000" // /* MW 9 */ + 5000 "00000000" // /* MW 8 */ + 5001 "01011011" // /* MW 7 */ + 5002 "00000001" // /* MW 6 */ + 5003 "00100000" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11110000" // /* MW 3 */ + 5006 "00101100" // /* MW 2 */ + 5007 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 5008 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5009 "01111000" // /* MW 9 */ + 5010 "11010000" // /* MW 8 */ + 5011 "10110011" // /* MW 7 */ + 5012 "00101000" // /* MW 6 */ + 5013 "00000000" // /* MW 5 */ + 5014 "00000001" // /* MW 4 */ + 5015 "11010000" // /* MW 3 */ + 5016 "11000110" // /* MW 2 */ + 5017 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 + 5018 "01000100" // MOVXM p6, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01010000" // /* MW 5 */ + 5020 "11000100" // /* MW 4 */ + 5021 "11001100" // /* MW 3 */ + 5022 "00000111" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ + 5024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5025 "00000000" // /* MW 1 */ + 5026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5027 "00000000" // /* MW 1 */ + 5028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5029 "00000000" // /* MW 1 */ + 5030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5031 "00000000" // /* MW 1 */ + 5032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5033 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5034 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5035 "00001000" // /* MW 3 */ + 5036 "01010001" // /* MW 2 */ + 5037 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 5038 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5039 "00110110" // /* MW 3 */ + 5040 "11110110" // /* MW 2 */ + 5041 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 5042 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5043 "00011001" // /* MW 3 */ + 5044 "11101101" // /* MW 2 */ + 5045 "00000111" // /* MW 1 */ + 5046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5047 "00000000" // /* MW 1 */ + 5048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5049 "00000000" // /* MW 1 */ + 5050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5051 "00000000" // /* MW 1 */ + 5052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5053 "00000000" // /* MW 1 */ + 5054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5055 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 5056 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5057 "00010001" // /* MW 3 */ + 5058 "00100011" // /* MW 2 */ + 5059 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 5060 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5061 "01100011" // /* MW 5 */ + 5062 "11101100" // /* MW 4 */ + 5063 "11010011" // /* MW 3 */ + 5064 "11000110" // /* MW 2 */ + 5065 "01001010" // /* MW 1 */ + 5066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5067 "00000000" // /* MW 1 */ + 5068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5069 "00000000" // /* MW 1 */ + 5070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5071 "00000000" // /* MW 1 */ + 5072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5073 "00000000" // /* MW 1 */ + 5074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5075 "00000000" // /* MW 1 */ + 5076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5077 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5078 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00001000" // /* MW 3 */ + 5080 "01010001" // /* MW 2 */ + 5081 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 268 6 +.src_ref 6 "superkernels.cpp" 269 14 + 5082 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5083 "00010000" // /* MW 9 */ + 5084 "00000000" // /* MW 8 */ + 5085 "10110001" // /* MW 7 */ + 5086 "11110000" // /* MW 6 */ + 5087 "00000001" // /* MW 5 */ + 5088 "00000000" // /* MW 4 */ + 5089 "11010000" // /* MW 3 */ + 5090 "11001110" // /* MW 2 */ + 5091 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 19 first + 5092 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5093 "01010110" // /* MW 3 */ + 5094 "00000110" // /* MW 2 */ + 5095 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 5096 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5097 "00110110" // /* MW 3 */ + 5098 "00000110" // /* MW 2 */ + 5099 "00000001" // /* MW 1 */ + 5100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5101 "00000000" // /* MW 1 */ + 5102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5103 "00000000" // /* MW 1 */ + 5104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5105 "00000000" // /* MW 1 */ + 5106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5107 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5108 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5109 "00110001" // /* MW 3 */ + 5110 "00100001" // /* MW 2 */ + 5111 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5112 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5113 "00010001" // /* MW 3 */ + 5114 "11100110" // /* MW 2 */ + 5115 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 16 first + 5116 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5117 "00101000" // /* MW 3 */ + 5118 "01100001" // /* MW 2 */ + 5119 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 268 6 + 5120 "10000100" // JNZ r16, #5152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5152 delay_slots=5 */ + 5121 "00000001" // /* MW 5 */ + 5122 "01000000" // /* MW 4 */ + 5123 "00010000" // /* MW 3 */ + 5124 "00001010" // /* MW 2 */ + 5125 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5133 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 + 5136 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5137 "00000001" // /* MW 3 */ + 5138 "00100000" // /* MW 2 */ + 5139 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 269 14 first + 5140 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5141 "11000001" // /* MW 11 */ + 5142 "00001000" // /* MW 10 */ + 5143 "10000011" // /* MW 9 */ + 5144 "00000000" // /* MW 8 */ + 5145 "00000000" // /* MW 7 */ + 5146 "00000000" // /* MW 6 */ + 5147 "00100000" // /* MW 5 */ + 5148 "00000000" // /* MW 4 */ + 5149 "11110000" // /* MW 3 */ + 5150 "00101100" // /* MW 2 */ + 5151 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 271 + 5152 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "00111001" // /* MW 3 */ + 5154 "11110000" // /* MW 2 */ + 5155 "00000111" // /* MW 1 */ + 5156 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "11110001" // /* MW 3 */ + 5158 "11111101" // /* MW 2 */ + 5159 "00000111" // /* MW 1 */ + 5160 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "10011001" // /* MW 3 */ + 5162 "11110111" // /* MW 2 */ + 5163 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5165 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5166 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5167 "11010001" // /* MW 3 */ + 5168 "11111001" // /* MW 2 */ + 5169 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5173 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5174 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5175 "00000000" // /* MW 3 */ + 5176 "00101000" // /* MW 2 */ + 5177 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5178 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5179 "00001011" // /* MW 3 */ + 5180 "10001110" // /* MW 2 */ + 5181 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 271 +.delay_slot + 5182 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5183 "00000001" // /* MW 5 */ + 5184 "00000000" // /* MW 4 */ + 5185 "00000000" // /* MW 3 */ + 5186 "11111000" // /* MW 2 */ + 5187 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 5193 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 5200 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5201 "00000000" // /* MW 3 */ + 5202 "00101000" // /* MW 2 */ + 5203 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5204 "01000100" // MOVXM p0, #508832 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5205 "01000000" // /* MW 5 */ + 5206 "11000111" // /* MW 4 */ + 5207 "11000000" // /* MW 3 */ + 5208 "00000111" // /* MW 2 */ + 5209 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5210 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5211 "10000000" // /* MW 3 */ + 5212 "00000000" // /* MW 2 */ + 5213 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 5214 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5215 "00000001" // /* MW 3 */ + 5216 "00000100" // /* MW 2 */ + 5217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5218 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5219 "00000001" // /* MW 3 */ + 5220 "00010100" // /* MW 2 */ + 5221 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 5223 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 5232 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5233 "00010000" // /* MW 9 */ + 5234 "11000000" // /* MW 8 */ + 5235 "00110001" // /* MW 7 */ + 5236 "11110000" // /* MW 6 */ + 5237 "00000001" // /* MW 5 */ + 5238 "00000000" // /* MW 4 */ + 5239 "11010000" // /* MW 3 */ + 5240 "10000101" // /* MW 2 */ + 5241 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 5242 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5243 "00000001" // /* MW 5 */ + 5244 "00000000" // /* MW 4 */ + 5245 "00000000" // /* MW 3 */ + 5246 "00001000" // /* MW 2 */ + 5247 "00000000" // /* MW 1 */ + 5248 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00111101" // /* MW 3 */ + 5250 "11111100" // /* MW 2 */ + 5251 "00001111" // /* MW 1 */ + 5252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5253 "00000000" // /* MW 1 */ + 5254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5255 "00000000" // /* MW 1 */ + 5256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5257 "00000000" // /* MW 1 */ + 5258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5259 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 5260 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5261 "00101001" // /* MW 3 */ + 5262 "00011100" // /* MW 2 */ + 5263 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 5264 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "00101110" // /* MW 3 */ + 5266 "00011100" // /* MW 2 */ + 5267 "00000001" // /* MW 1 */ + 5268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5269 "00000000" // /* MW 1 */ + 5270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5271 "00000000" // /* MW 1 */ + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ + 5276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5277 "00000000" // /* MW 1 */ + 5278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5279 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 5280 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5281 "00101001" // /* MW 3 */ + 5282 "00011100" // /* MW 2 */ + 5283 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 5284 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5285 "00101110" // /* MW 3 */ + 5286 "00000100" // /* MW 2 */ + 5287 "00000001" // /* MW 1 */ + 5288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5289 "00000000" // /* MW 1 */ + 5290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5291 "00000000" // /* MW 1 */ + 5292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5293 "00000000" // /* MW 1 */ + 5294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5295 "00000000" // /* MW 1 */ + 5296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5297 "00000000" // /* MW 1 */ + 5298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5299 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 5300 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5301 "00101001" // /* MW 3 */ + 5302 "00011100" // /* MW 2 */ + 5303 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 5304 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5305 "00101110" // /* MW 3 */ + 5306 "00010100" // /* MW 2 */ + 5307 "00000001" // /* MW 1 */ + 5308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5309 "00000000" // /* MW 1 */ + 5310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5311 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 5312 "00000100" // JL #5200 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5200 delay_slots=5 */ + 5313 "00000001" // /* MW 5 */ + 5314 "00000000" // /* MW 4 */ + 5315 "00101000" // /* MW 3 */ + 5316 "00001010" // /* MW 2 */ + 5317 "00000000" // /* MW 1 */ +.delay_slot + 5318 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5319 "10011101" // /* MW 3 */ + 5320 "11111011" // /* MW 2 */ + 5321 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5325 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 5326 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "00101001" // /* MW 3 */ + 5328 "11011100" // /* MW 2 */ + 5329 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 5330 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5331 "00011100" // /* MW 13 */ + 5332 "00000000" // /* MW 12 */ + 5333 "00000000" // /* MW 11 */ + 5334 "00000111" // /* MW 10 */ + 5335 "00000110" // /* MW 9 */ + 5336 "01111011" // /* MW 8 */ + 5337 "00000000" // /* MW 7 */ + 5338 "00000000" // /* MW 6 */ + 5339 "10110110" // /* MW 5 */ + 5340 "00000010" // /* MW 4 */ + 5341 "11110000" // /* MW 3 */ + 5342 "00101100" // /* MW 2 */ + 5343 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 5344 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "00111001" // /* MW 3 */ + 5346 "11111100" // /* MW 2 */ + 5347 "00000111" // /* MW 1 */ + 5348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5349 "00000000" // /* MW 1 */ + 5350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5351 "00000000" // /* MW 1 */ + 5352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5353 "00000000" // /* MW 1 */ + 5354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5355 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5357 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5358 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5359 "10011001" // /* MW 3 */ + 5360 "11111011" // /* MW 2 */ + 5361 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5362 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5363 "00000000" // /* MW 3 */ + 5364 "00101000" // /* MW 2 */ + 5365 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5367 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5371 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5372 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5373 "00000001" // /* MW 3 */ + 5374 "00100000" // /* MW 2 */ + 5375 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5376 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5377 "01110001" // /* MW 9 */ + 5378 "00000000" // /* MW 8 */ + 5379 "00000000" // /* MW 7 */ + 5380 "00000000" // /* MW 6 */ + 5381 "11111110" // /* MW 5 */ + 5382 "00111111" // /* MW 4 */ + 5383 "00110000" // /* MW 3 */ + 5384 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 5385 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 5392 "10111010" // MOVA m0, #32; MOVXM p3, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5393 "00010000" // /* MW 9 */ + 5394 "11000000" // /* MW 8 */ + 5395 "10110001" // /* MW 7 */ + 5396 "11110001" // /* MW 6 */ + 5397 "00000001" // /* MW 5 */ + 5398 "00000000" // /* MW 4 */ + 5399 "10000000" // /* MW 3 */ + 5400 "00000000" // /* MW 2 */ + 5401 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 5402 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5403 "00010000" // /* MW 9 */ + 5404 "00011000" // /* MW 8 */ + 5405 "00110001" // /* MW 7 */ + 5406 "11110010" // /* MW 6 */ + 5407 "00000001" // /* MW 5 */ + 5408 "00000000" // /* MW 4 */ + 5409 "11010000" // /* MW 3 */ + 5410 "00000110" // /* MW 2 */ + 5411 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 5412 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5413 "01011000" // /* MW 9 */ + 5414 "11111010" // /* MW 8 */ + 5415 "01101111" // /* MW 7 */ + 5416 "10001000" // /* MW 6 */ + 5417 "00000111" // /* MW 5 */ + 5418 "00011000" // /* MW 4 */ + 5419 "11010000" // /* MW 3 */ + 5420 "10010000" // /* MW 2 */ + 5421 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 5422 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #5584 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5423 "00010000" // /* MW 9 */ + 5424 "11101000" // /* MW 8 */ + 5425 "01111010" // /* MW 7 */ + 5426 "00000100" // /* MW 6 */ + 5427 "00000000" // /* MW 5 */ + 5428 "00000000" // /* MW 4 */ + 5429 "11010000" // /* MW 3 */ + 5430 "10000000" // /* MW 2 */ + 5431 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 5432 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #5600 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5433 "00010000" // /* MW 9 */ + 5434 "11110000" // /* MW 8 */ + 5435 "10111010" // /* MW 7 */ + 5436 "00000101" // /* MW 6 */ + 5437 "00000000" // /* MW 5 */ + 5438 "00000000" // /* MW 4 */ + 5439 "01010000" // /* MW 3 */ + 5440 "10001000" // /* MW 2 */ + 5441 "10000000" // /* MW 1 */ + 5442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5443 "00000000" // /* MW 1 */ + 5444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5445 "00000000" // /* MW 1 */ + 5446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5447 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 5448 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5449 "00111101" // /* MW 3 */ + 5450 "01000010" // /* MW 2 */ + 5451 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 5452 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5453 "11111100" // /* MW 3 */ + 5454 "01110000" // /* MW 2 */ + 5455 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 5456 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5457 "11101000" // /* MW 5 */ + 5458 "01010000" // /* MW 4 */ + 5459 "01110000" // /* MW 3 */ + 5460 "00010011" // /* MW 2 */ + 5461 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5462 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5463 "10000000" // /* MW 7 */ + 5464 "10111010" // /* MW 6 */ + 5465 "01101000" // /* MW 5 */ + 5466 "01010000" // /* MW 4 */ + 5467 "01110000" // /* MW 3 */ + 5468 "00011011" // /* MW 2 */ + 5469 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5470 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5471 "11101000" // /* MW 5 */ + 5472 "01010000" // /* MW 4 */ + 5473 "01110000" // /* MW 3 */ + 5474 "00010011" // /* MW 2 */ + 5475 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5476 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5477 "01101000" // /* MW 5 */ + 5478 "01010000" // /* MW 4 */ + 5479 "01110000" // /* MW 3 */ + 5480 "00011011" // /* MW 2 */ + 5481 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5482 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5483 "11101000" // /* MW 5 */ + 5484 "01010000" // /* MW 4 */ + 5485 "01110000" // /* MW 3 */ + 5486 "00010011" // /* MW 2 */ + 5487 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5488 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5489 "01101000" // /* MW 5 */ + 5490 "01010000" // /* MW 4 */ + 5491 "01110000" // /* MW 3 */ + 5492 "00011011" // /* MW 2 */ + 5493 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5494 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5495 "11101000" // /* MW 5 */ + 5496 "01010000" // /* MW 4 */ + 5497 "01110000" // /* MW 3 */ + 5498 "00010011" // /* MW 2 */ + 5499 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5500 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5501 "01000001" // /* MW 9 */ + 5502 "11100010" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00011101" // /* MW 6 */ + 5505 "00110100" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01110000" // /* MW 3 */ + 5508 "00011011" // /* MW 2 */ + 5509 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5510 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5511 "01100001" // /* MW 9 */ + 5512 "11100000" // /* MW 8 */ + 5513 "00000001" // /* MW 7 */ + 5514 "00011101" // /* MW 6 */ + 5515 "01110100" // /* MW 5 */ + 5516 "00101000" // /* MW 4 */ + 5517 "01110000" // /* MW 3 */ + 5518 "00010011" // /* MW 2 */ + 5519 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5520 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5521 "01000001" // /* MW 9 */ + 5522 "11100010" // /* MW 8 */ + 5523 "00000000" // /* MW 7 */ + 5524 "00011101" // /* MW 6 */ + 5525 "00110100" // /* MW 5 */ + 5526 "00101000" // /* MW 4 */ + 5527 "01110000" // /* MW 3 */ + 5528 "00011011" // /* MW 2 */ + 5529 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5531 "01100001" // /* MW 9 */ + 5532 "11100000" // /* MW 8 */ + 5533 "00000001" // /* MW 7 */ + 5534 "00011101" // /* MW 6 */ + 5535 "01110100" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01110000" // /* MW 3 */ + 5538 "00010011" // /* MW 2 */ + 5539 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5540 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5541 "01000001" // /* MW 11 */ + 5542 "11100010" // /* MW 10 */ + 5543 "00000000" // /* MW 9 */ + 5544 "10001110" // /* MW 8 */ + 5545 "10101101" // /* MW 7 */ + 5546 "00000000" // /* MW 6 */ + 5547 "01101000" // /* MW 5 */ + 5548 "01010000" // /* MW 4 */ + 5549 "01110000" // /* MW 3 */ + 5550 "00011011" // /* MW 2 */ + 5551 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5552 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5553 "00000011" // /* MW 15 */ + 5554 "00001111" // /* MW 14 */ + 5555 "01111000" // /* MW 13 */ + 5556 "10100101" // /* MW 12 */ + 5557 "00000001" // /* MW 11 */ + 5558 "00000000" // /* MW 10 */ + 5559 "00000000" // /* MW 9 */ + 5560 "00000000" // /* MW 8 */ + 5561 "01011011" // /* MW 7 */ + 5562 "00000001" // /* MW 6 */ + 5563 "11101000" // /* MW 5 */ + 5564 "01010000" // /* MW 4 */ + 5565 "01110000" // /* MW 3 */ + 5566 "00010011" // /* MW 2 */ + 5567 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5568 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5569 "00010010" // /* MW 15 */ + 5570 "00000111" // /* MW 14 */ + 5571 "01111000" // /* MW 13 */ + 5572 "10100101" // /* MW 12 */ + 5573 "00000001" // /* MW 11 */ + 5574 "00000000" // /* MW 10 */ + 5575 "00000000" // /* MW 9 */ + 5576 "00000000" // /* MW 8 */ + 5577 "00100011" // /* MW 7 */ + 5578 "00011100" // /* MW 6 */ + 5579 "01101010" // /* MW 5 */ + 5580 "01010000" // /* MW 4 */ + 5581 "01110000" // /* MW 3 */ + 5582 "00011011" // /* MW 2 */ + 5583 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5584 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5585 "00000011" // /* MW 15 */ + 5586 "00001111" // /* MW 14 */ + 5587 "01111000" // /* MW 13 */ + 5588 "10100101" // /* MW 12 */ + 5589 "00000001" // /* MW 11 */ + 5590 "00000000" // /* MW 10 */ + 5591 "00000000" // /* MW 9 */ + 5592 "00000000" // /* MW 8 */ + 5593 "10100011" // /* MW 7 */ + 5594 "00011100" // /* MW 6 */ + 5595 "11101010" // /* MW 5 */ + 5596 "01010000" // /* MW 4 */ + 5597 "01110000" // /* MW 3 */ + 5598 "00010011" // /* MW 2 */ + 5599 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5600 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5601 "00010010" // /* MW 15 */ + 5602 "00000111" // /* MW 14 */ + 5603 "01111000" // /* MW 13 */ + 5604 "10100101" // /* MW 12 */ + 5605 "00000001" // /* MW 11 */ + 5606 "00000000" // /* MW 10 */ + 5607 "00000000" // /* MW 9 */ + 5608 "00000000" // /* MW 8 */ + 5609 "00100011" // /* MW 7 */ + 5610 "00011100" // /* MW 6 */ + 5611 "01101010" // /* MW 5 */ + 5612 "01010000" // /* MW 4 */ + 5613 "01110000" // /* MW 3 */ + 5614 "00011011" // /* MW 2 */ + 5615 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5616 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5617 "01100001" // /* MW 7 */ + 5618 "11100000" // /* MW 6 */ + 5619 "00000001" // /* MW 5 */ + 5620 "00000010" // /* MW 4 */ + 5621 "01100000" // /* MW 3 */ + 5622 "10010100" // /* MW 2 */ + 5623 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5624 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5625 "01000001" // /* MW 7 */ + 5626 "11100010" // /* MW 6 */ + 5627 "00000000" // /* MW 5 */ + 5628 "00000010" // /* MW 4 */ + 5629 "01100000" // /* MW 3 */ + 5630 "10000100" // /* MW 2 */ + 5631 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5632 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5633 "01100001" // /* MW 7 */ + 5634 "11100000" // /* MW 6 */ + 5635 "00000001" // /* MW 5 */ + 5636 "00000010" // /* MW 4 */ + 5637 "01100000" // /* MW 3 */ + 5638 "10010100" // /* MW 2 */ + 5639 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5640 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5641 "01000001" // /* MW 7 */ + 5642 "11100010" // /* MW 6 */ + 5643 "00000000" // /* MW 5 */ + 5644 "00000010" // /* MW 4 */ + 5645 "01100000" // /* MW 3 */ + 5646 "10000100" // /* MW 2 */ + 5647 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5649 "01100001" // /* MW 7 */ + 5650 "11100000" // /* MW 6 */ + 5651 "00000001" // /* MW 5 */ + 5652 "00000010" // /* MW 4 */ + 5653 "01100000" // /* MW 3 */ + 5654 "10010100" // /* MW 2 */ + 5655 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5657 "01000001" // /* MW 7 */ + 5658 "11100010" // /* MW 6 */ + 5659 "00000000" // /* MW 5 */ + 5660 "00000010" // /* MW 4 */ + 5661 "01100000" // /* MW 3 */ + 5662 "10000100" // /* MW 2 */ + 5663 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5665 "01100001" // /* MW 7 */ + 5666 "11100000" // /* MW 6 */ + 5667 "00000001" // /* MW 5 */ + 5668 "00000010" // /* MW 4 */ + 5669 "01100000" // /* MW 3 */ + 5670 "10010100" // /* MW 2 */ + 5671 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5672 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5673 "00100011" // /* MW 3 */ + 5674 "00011100" // /* MW 2 */ + 5675 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5676 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 5677 "00000000" // /* MW 5 */ + 5678 "01010000" // /* MW 4 */ + 5679 "01100000" // /* MW 3 */ + 5680 "10010100" // /* MW 2 */ + 5681 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5682 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5683 "00100011" // /* MW 3 */ + 5684 "00011100" // /* MW 2 */ + 5685 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5686 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5687 "10100011" // /* MW 3 */ + 5688 "00011100" // /* MW 2 */ + 5689 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 5690 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5691 "00100011" // /* MW 3 */ + 5692 "00011100" // /* MW 2 */ + 5693 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 5694 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5695 "10100011" // /* MW 3 */ + 5696 "00011100" // /* MW 2 */ + 5697 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 5699 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 5712 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5713 "00000000" // /* MW 5 */ + 5714 "11000100" // /* MW 4 */ + 5715 "11001000" // /* MW 3 */ + 5716 "00000111" // /* MW 2 */ + 5717 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 5718 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5719 "11000001" // /* MW 5 */ + 5720 "10110101" // /* MW 4 */ + 5721 "11011000" // /* MW 3 */ + 5722 "11000010" // /* MW 2 */ + 5723 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 5724 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5725 "00000001" // /* MW 5 */ + 5726 "00000000" // /* MW 4 */ + 5727 "00000000" // /* MW 3 */ + 5728 "00001000" // /* MW 2 */ + 5729 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 5730 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5731 "01111001" // /* MW 9 */ + 5732 "01100000" // /* MW 8 */ + 5733 "11001010" // /* MW 7 */ + 5734 "10000001" // /* MW 6 */ + 5735 "00010100" // /* MW 5 */ + 5736 "00100011" // /* MW 4 */ + 5737 "10110000" // /* MW 3 */ + 5738 "00111010" // /* MW 2 */ + 5739 "11111111" // /* MW 1 */ + 5740 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5741 "01110000" // /* MW 7 */ + 5742 "11010000" // /* MW 6 */ + 5743 "00001011" // /* MW 5 */ + 5744 "00000000" // /* MW 4 */ + 5745 "10110000" // /* MW 3 */ + 5746 "10000011" // /* MW 2 */ + 5747 "11111101" // /* MW 1 */ + 5748 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5749 "00010101" // /* MW 3 */ + 5750 "11111100" // /* MW 2 */ + 5751 "00001111" // /* MW 1 */ + 5752 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5753 "00111101" // /* MW 3 */ + 5754 "11110000" // /* MW 2 */ + 5755 "00001111" // /* MW 1 */ + 5756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5757 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 5758 "10000100" // JNZ r16, #5904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5904 delay_slots=5 */ + 5759 "00000001" // /* MW 5 */ + 5760 "01000000" // /* MW 4 */ + 5761 "10001000" // /* MW 3 */ + 5762 "00001011" // /* MW 2 */ + 5763 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 5764 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5765 "11111011" // /* MW 3 */ + 5766 "01100011" // /* MW 2 */ + 5767 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5768 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5769 "00010000" // /* MW 5 */ + 5770 "11000100" // /* MW 4 */ + 5771 "11000100" // /* MW 3 */ + 5772 "00000111" // /* MW 2 */ + 5773 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5774 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5775 "01110000" // /* MW 7 */ + 5776 "01100000" // /* MW 6 */ + 5777 "00110111" // /* MW 5 */ + 5778 "00000001" // /* MW 4 */ + 5779 "00110000" // /* MW 3 */ + 5780 "11000110" // /* MW 2 */ + 5781 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 5782 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5783 "11000000" // /* MW 3 */ + 5784 "11010110" // /* MW 2 */ + 5785 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 5786 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5787 "00010001" // /* MW 9 */ + 5788 "11000000" // /* MW 8 */ + 5789 "10110001" // /* MW 7 */ + 5790 "11110011" // /* MW 6 */ + 5791 "00000001" // /* MW 5 */ + 5792 "00000000" // /* MW 4 */ + 5793 "10110000" // /* MW 3 */ + 5794 "10100011" // /* MW 2 */ + 5795 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5796 "00111010" // MOVS p0, p7; MOVXM p2, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5797 "00010001" // /* MW 9 */ + 5798 "00011000" // /* MW 8 */ + 5799 "00110001" // /* MW 7 */ + 5800 "11110001" // /* MW 6 */ + 5801 "00000001" // /* MW 5 */ + 5802 "00000000" // /* MW 4 */ + 5803 "01100000" // /* MW 3 */ + 5804 "10010001" // /* MW 2 */ + 5805 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5806 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5807 "00010000" // /* MW 9 */ + 5808 "00010110" // /* MW 8 */ + 5809 "00110001" // /* MW 7 */ + 5810 "11110001" // /* MW 6 */ + 5811 "00000001" // /* MW 5 */ + 5812 "00000000" // /* MW 4 */ + 5813 "11100000" // /* MW 3 */ + 5814 "11000000" // /* MW 2 */ + 5815 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5817 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5818 "00000100" // JL #5232 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5232 delay_slots=5 */ + 5819 "00000001" // /* MW 5 */ + 5820 "00000000" // /* MW 4 */ + 5821 "00111000" // /* MW 3 */ + 5822 "00001010" // /* MW 2 */ + 5823 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5827 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5828 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5829 "00110001" // /* MW 3 */ + 5830 "00100000" // /* MW 2 */ + 5831 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5832 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "00000101" // /* MW 3 */ + 5834 "00100000" // /* MW 2 */ + 5835 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5836 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5837 "00010001" // /* MW 3 */ + 5838 "00000110" // /* MW 2 */ + 5839 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 5840 "10111010" // LDA r16, [p7]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5841 "00010000" // /* MW 9 */ + 5842 "00000100" // /* MW 8 */ + 5843 "10110001" // /* MW 7 */ + 5844 "11110000" // /* MW 6 */ + 5845 "00000001" // /* MW 5 */ + 5846 "00000000" // /* MW 4 */ + 5847 "11010000" // /* MW 3 */ + 5848 "11000010" // /* MW 2 */ + 5849 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 5850 "10111010" // LDA r17, [p1]; MOVXM p3, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5851 "00010000" // /* MW 9 */ + 5852 "00000110" // /* MW 8 */ + 5853 "10110001" // /* MW 7 */ + 5854 "11110001" // /* MW 6 */ + 5855 "00000001" // /* MW 5 */ + 5856 "00000000" // /* MW 4 */ + 5857 "11010000" // /* MW 3 */ + 5858 "11000110" // /* MW 2 */ + 5859 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 5860 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5861 "00010000" // /* MW 9 */ + 5862 "00001000" // /* MW 8 */ + 5863 "10110001" // /* MW 7 */ + 5864 "11110000" // /* MW 6 */ + 5865 "00000001" // /* MW 5 */ + 5866 "00000000" // /* MW 4 */ + 5867 "01010000" // /* MW 3 */ + 5868 "11001011" // /* MW 2 */ + 5869 "11101010" // /* MW 1 */ + 5870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5871 "00000000" // /* MW 1 */ + 5872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5873 "00000000" // /* MW 1 */ + 5874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5875 "00000000" // /* MW 1 */ + 5876 "10000100" // J #5920 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5920 delay_slots=5 */ + 5877 "00000000" // /* MW 5 */ + 5878 "00000000" // /* MW 4 */ + 5879 "10010000" // /* MW 3 */ + 5880 "00001011" // /* MW 2 */ + 5881 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 5882 "01000100" // MOVXM p2, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5883 "01010000" // /* MW 5 */ + 5884 "11000100" // /* MW 4 */ + 5885 "11000100" // /* MW 3 */ + 5886 "00000111" // /* MW 2 */ + 5887 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 5888 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5889 "00001111" // /* MW 3 */ + 5890 "01100001" // /* MW 2 */ + 5891 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 5892 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5893 "01010001" // /* MW 3 */ + 5894 "00000110" // /* MW 2 */ + 5895 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 5896 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5897 "00010001" // /* MW 3 */ + 5898 "00000110" // /* MW 2 */ + 5899 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 5900 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5901 "00010001" // /* MW 3 */ + 5902 "00000110" // /* MW 2 */ + 5903 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 5904 "01000100" // MOVXM p3, #508428 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5905 "00011000" // /* MW 5 */ + 5906 "11000100" // /* MW 4 */ + 5907 "11000110" // /* MW 3 */ + 5908 "00000111" // /* MW 2 */ + 5909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 5910 "10111010" // NOPA; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5911 "00010000" // /* MW 9 */ + 5912 "00001000" // /* MW 8 */ + 5913 "10110001" // /* MW 7 */ + 5914 "11110000" // /* MW 6 */ + 5915 "00000001" // /* MW 5 */ + 5916 "00000000" // /* MW 4 */ + 5917 "11110000" // /* MW 3 */ + 5918 "00101100" // /* MW 2 */ + 5919 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5920 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5921 "10000110" // /* MW 3 */ + 5922 "01100111" // /* MW 2 */ + 5923 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 5924 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5925 "00010000" // /* MW 9 */ + 5926 "00000000" // /* MW 8 */ + 5927 "00110001" // /* MW 7 */ + 5928 "11110001" // /* MW 6 */ + 5929 "00000001" // /* MW 5 */ + 5930 "00000000" // /* MW 4 */ + 5931 "11010000" // /* MW 3 */ + 5932 "11101110" // /* MW 2 */ + 5933 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5934 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5935 "00010110" // /* MW 3 */ + 5936 "11111110" // /* MW 2 */ + 5937 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5938 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5939 "00110110" // /* MW 3 */ + 5940 "11111110" // /* MW 2 */ + 5941 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 5942 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "01010110" // /* MW 3 */ + 5944 "00000110" // /* MW 2 */ + 5945 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5946 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "01110110" // /* MW 3 */ + 5948 "01000110" // /* MW 2 */ + 5949 "00000000" // /* MW 1 */ + 5950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5951 "00000000" // /* MW 1 */ + 5952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5953 "00000000" // /* MW 1 */ + 5954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5955 "00000000" // /* MW 1 */ + 5956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5957 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5958 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5959 "00000010" // /* MW 3 */ + 5960 "01100001" // /* MW 2 */ + 5961 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 5962 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5963 "00001110" // /* MW 5 */ + 5964 "01000000" // /* MW 4 */ + 5965 "00111001" // /* MW 3 */ + 5966 "11000010" // /* MW 2 */ + 5967 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 5968 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5969 "00010001" // /* MW 3 */ + 5970 "00000110" // /* MW 2 */ + 5971 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 5972 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5973 "11111101" // /* MW 3 */ + 5974 "11100000" // /* MW 2 */ + 5975 "00010111" // /* MW 1 */ + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5977 "00000000" // /* MW 1 */ + 5978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5979 "00000000" // /* MW 1 */ + 5980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5981 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5982 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5983 "00001000" // /* MW 3 */ + 5984 "11010011" // /* MW 2 */ + 5985 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 5986 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5987 "00000110" // /* MW 3 */ + 5988 "01100111" // /* MW 2 */ + 5989 "00011010" // /* MW 1 */ + 5990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5991 "00000000" // /* MW 1 */ + 5992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5993 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 5994 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5995 "01110110" // /* MW 3 */ + 5996 "11111111" // /* MW 2 */ + 5997 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5998 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5999 "00110110" // /* MW 3 */ + 6000 "11111110" // /* MW 2 */ + 6001 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6002 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6003 "01010110" // /* MW 3 */ + 6004 "11111110" // /* MW 2 */ + 6005 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6006 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6007 "01110110" // /* MW 3 */ + 6008 "01010110" // /* MW 2 */ + 6009 "00000010" // /* MW 1 */ + 6010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6011 "00000000" // /* MW 1 */ + 6012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6013 "00000000" // /* MW 1 */ + 6014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6015 "00000000" // /* MW 1 */ + 6016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6017 "00000000" // /* MW 1 */ + 6018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6019 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6020 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6021 "00010010" // /* MW 3 */ + 6022 "10100011" // /* MW 2 */ + 6023 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6024 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6025 "00110001" // /* MW 3 */ + 6026 "00000110" // /* MW 2 */ + 6027 "00001010" // /* MW 1 */ + 6028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6029 "00000000" // /* MW 1 */ + 6030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6031 "00000000" // /* MW 1 */ + 6032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6033 "00000000" // /* MW 1 */ + 6034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6035 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6036 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6037 "00001000" // /* MW 3 */ + 6038 "11010011" // /* MW 2 */ + 6039 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 6040 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6041 "01111001" // /* MW 9 */ + 6042 "01100000" // /* MW 8 */ + 6043 "11001110" // /* MW 7 */ + 6044 "00101001" // /* MW 6 */ + 6045 "00000000" // /* MW 5 */ + 6046 "00000001" // /* MW 4 */ + 6047 "01100000" // /* MW 3 */ + 6048 "00010001" // /* MW 2 */ + 6049 "11010001" // /* MW 1 */ + 6050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6051 "00000000" // /* MW 1 */ + 6052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6053 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6054 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6055 "00011001" // /* MW 3 */ + 6056 "11101110" // /* MW 2 */ + 6057 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 6058 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6059 "00111011" // /* MW 5 */ + 6060 "11011000" // /* MW 4 */ + 6061 "11011111" // /* MW 3 */ + 6062 "11000110" // /* MW 2 */ + 6063 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 6064 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6065 "10000001" // /* MW 5 */ + 6066 "11011101" // /* MW 4 */ + 6067 "11010110" // /* MW 3 */ + 6068 "11010010" // /* MW 2 */ + 6069 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6070 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6071 "01010110" // /* MW 3 */ + 6072 "01001110" // /* MW 2 */ + 6073 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6074 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6075 "00011110" // /* MW 3 */ + 6076 "01011101" // /* MW 2 */ + 6077 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6078 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6079 "11000000" // /* MW 3 */ + 6080 "01100000" // /* MW 2 */ + 6081 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6083 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6084 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6085 "01110110" // /* MW 3 */ + 6086 "00000110" // /* MW 2 */ + 6087 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6089 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6090 "00000100" // JL #5392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5392 delay_slots=5 */ + 6091 "00000001" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "10001000" // /* MW 3 */ + 6094 "00001010" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6096 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "11000000" // /* MW 3 */ + 6098 "11010100" // /* MW 2 */ + 6099 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6100 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "00001101" // /* MW 3 */ + 6102 "01100011" // /* MW 2 */ + 6103 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 6104 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00001101" // /* MW 3 */ + 6106 "00100001" // /* MW 2 */ + 6107 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 6108 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01000001" // /* MW 3 */ + 6110 "01101001" // /* MW 2 */ + 6111 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6112 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6113 "00000000" // /* MW 15 */ + 6114 "00000000" // /* MW 14 */ + 6115 "10101000" // /* MW 13 */ + 6116 "11100010" // /* MW 12 */ + 6117 "00110100" // /* MW 11 */ + 6118 "00000000" // /* MW 10 */ + 6119 "00000000" // /* MW 9 */ + 6120 "00000000" // /* MW 8 */ + 6121 "01011011" // /* MW 7 */ + 6122 "00000001" // /* MW 6 */ + 6123 "00100000" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "11110000" // /* MW 3 */ + 6126 "00101100" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6128 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6129 "01111000" // /* MW 9 */ + 6130 "11010000" // /* MW 8 */ + 6131 "10110011" // /* MW 7 */ + 6132 "00101000" // /* MW 6 */ + 6133 "00000000" // /* MW 5 */ + 6134 "00000001" // /* MW 4 */ + 6135 "11010000" // /* MW 3 */ + 6136 "11000110" // /* MW 2 */ + 6137 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 6138 "01000100" // MOVXM p6, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6139 "01010000" // /* MW 5 */ + 6140 "11000100" // /* MW 4 */ + 6141 "11001100" // /* MW 3 */ + 6142 "00000111" // /* MW 2 */ + 6143 "00000000" // /* MW 1 */ + 6144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6145 "00000000" // /* MW 1 */ + 6146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6147 "00000000" // /* MW 1 */ + 6148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6149 "00000000" // /* MW 1 */ + 6150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6151 "00000000" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6154 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "00001000" // /* MW 3 */ + 6156 "01010001" // /* MW 2 */ + 6157 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6158 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6159 "00110110" // /* MW 3 */ + 6160 "11110110" // /* MW 2 */ + 6161 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6162 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6163 "00011001" // /* MW 3 */ + 6164 "11101101" // /* MW 2 */ + 6165 "00000111" // /* MW 1 */ + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6176 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6177 "00010001" // /* MW 3 */ + 6178 "00100011" // /* MW 2 */ + 6179 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6180 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6181 "01100011" // /* MW 5 */ + 6182 "11101100" // /* MW 4 */ + 6183 "11010011" // /* MW 3 */ + 6184 "11000110" // /* MW 2 */ + 6185 "01001010" // /* MW 1 */ + 6186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6187 "00000000" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ + 6190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6191 "00000000" // /* MW 1 */ + 6192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6193 "00000000" // /* MW 1 */ + 6194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6195 "00000000" // /* MW 1 */ + 6196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6197 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6198 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6199 "00001000" // /* MW 3 */ + 6200 "01010001" // /* MW 2 */ + 6201 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 6202 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6203 "00010000" // /* MW 9 */ + 6204 "00000000" // /* MW 8 */ + 6205 "10110001" // /* MW 7 */ + 6206 "11110000" // /* MW 6 */ + 6207 "00000001" // /* MW 5 */ + 6208 "00000000" // /* MW 4 */ + 6209 "11010000" // /* MW 3 */ + 6210 "11001110" // /* MW 2 */ + 6211 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 6212 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6213 "01010110" // /* MW 3 */ + 6214 "00000110" // /* MW 2 */ + 6215 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6216 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6217 "00110110" // /* MW 3 */ + 6218 "00000110" // /* MW 2 */ + 6219 "00000001" // /* MW 1 */ + 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6221 "00000000" // /* MW 1 */ + 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6223 "00000000" // /* MW 1 */ + 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6225 "00000000" // /* MW 1 */ + 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6227 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6228 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6229 "00110001" // /* MW 3 */ + 6230 "00100001" // /* MW 2 */ + 6231 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6232 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6233 "00010001" // /* MW 3 */ + 6234 "11100110" // /* MW 2 */ + 6235 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 6236 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6237 "00101000" // /* MW 3 */ + 6238 "01100001" // /* MW 2 */ + 6239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6240 "10000100" // JNZ r16, #6272 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6272 delay_slots=5 */ + 6241 "00000001" // /* MW 5 */ + 6242 "01000000" // /* MW 4 */ + 6243 "01000000" // /* MW 3 */ + 6244 "00001100" // /* MW 2 */ + 6245 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6255 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 6256 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "00000001" // /* MW 3 */ + 6258 "00100000" // /* MW 2 */ + 6259 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 6260 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6261 "11000001" // /* MW 11 */ + 6262 "00001000" // /* MW 10 */ + 6263 "10000011" // /* MW 9 */ + 6264 "00000000" // /* MW 8 */ + 6265 "00000000" // /* MW 7 */ + 6266 "00000000" // /* MW 6 */ + 6267 "00100000" // /* MW 5 */ + 6268 "00000000" // /* MW 4 */ + 6269 "11110000" // /* MW 3 */ + 6270 "00101100" // /* MW 2 */ + 6271 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 6272 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00111001" // /* MW 3 */ + 6274 "11110000" // /* MW 2 */ + 6275 "00000111" // /* MW 1 */ + 6276 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110001" // /* MW 3 */ + 6278 "11111101" // /* MW 2 */ + 6279 "00000111" // /* MW 1 */ + 6280 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10011001" // /* MW 3 */ + 6282 "11110111" // /* MW 2 */ + 6283 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6285 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6286 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6287 "11010001" // /* MW 3 */ + 6288 "11111001" // /* MW 2 */ + 6289 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6291 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6293 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6294 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00101000" // /* MW 2 */ + 6297 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6298 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00001011" // /* MW 3 */ + 6300 "10001110" // /* MW 2 */ + 6301 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 6302 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6303 "00000001" // /* MW 5 */ + 6304 "00000000" // /* MW 4 */ + 6305 "00000000" // /* MW 3 */ + 6306 "11111000" // /* MW 2 */ + 6307 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6313 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 164 first +.src_ref 2 "elementwise_binary_shared.h" 170 22 +.src_ref 2 "elementwise_binary_shared.h" 170 24 first +.function_start + 6320 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6321 "00010000" // /* MW 9 */ + 6322 "11100000" // /* MW 8 */ + 6323 "00110001" // /* MW 7 */ + 6324 "11110000" // /* MW 6 */ + 6325 "00000001" // /* MW 5 */ + 6326 "00000000" // /* MW 4 */ + 6327 "11010000" // /* MW 3 */ + 6328 "10000101" // /* MW 2 */ + 6329 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.src_ref 2 "elementwise_binary_shared.h" 175 48 + 6330 "10111010" // MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6331 "01011000" // /* MW 9 */ + 6332 "00000000" // /* MW 8 */ + 6333 "00001000" // /* MW 7 */ + 6334 "00001011" // /* MW 6 */ + 6335 "00010000" // /* MW 5 */ + 6336 "00001000" // /* MW 4 */ + 6337 "00000000" // /* MW 3 */ + 6338 "00000000" // /* MW 2 */ + 6339 "11110000" // /* MW 1 */ + 6340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6341 "00000000" // /* MW 1 */ + 6342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6343 "00000000" // /* MW 1 */ + 6344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6345 "00000000" // /* MW 1 */ + 6346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6347 "00000000" // /* MW 1 */ + 6348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6349 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 170 22 first + 6350 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6351 "00101001" // /* MW 3 */ + 6352 "00011100" // /* MW 2 */ + 6353 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 24 first + 6354 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6355 "00101110" // /* MW 3 */ + 6356 "00011100" // /* MW 2 */ + 6357 "00000001" // /* MW 1 */ + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ + 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6363 "00000000" // /* MW 1 */ + 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6365 "00000000" // /* MW 1 */ + 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6367 "00000000" // /* MW 1 */ + 6368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6369 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 171 22 + 6370 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6371 "00101001" // /* MW 3 */ + 6372 "00011100" // /* MW 2 */ + 6373 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 24 first + 6374 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6375 "00101110" // /* MW 3 */ + 6376 "00000100" // /* MW 2 */ + 6377 "00000001" // /* MW 1 */ + 6378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6379 "00000000" // /* MW 1 */ + 6380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ + 6388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6389 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 172 22 + 6390 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6391 "00101001" // /* MW 3 */ + 6392 "00011100" // /* MW 2 */ + 6393 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 24 first + 6394 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01110110" // /* MW 3 */ + 6396 "00010100" // /* MW 2 */ + 6397 "00000001" // /* MW 1 */ + 6398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6399 "00000000" // /* MW 1 */ + 6400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6401 "00000000" // /* MW 1 */ + 6402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6403 "00000000" // /* MW 1 */ + 6404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6405 "00000000" // /* MW 1 */ + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 173 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6410 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6411 "01110001" // /* MW 3 */ + 6412 "01001100" // /* MW 2 */ + 6413 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6414 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00010111" // /* MW 3 */ + 6416 "00000100" // /* MW 2 */ + 6417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 176 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6418 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6419 "00000000" // /* MW 3 */ + 6420 "00101000" // /* MW 2 */ + 6421 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6422 "01000100" // MOVXM r2, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6423 "00000000" // /* MW 5 */ + 6424 "00111110" // /* MW 4 */ + 6425 "11110001" // /* MW 3 */ + 6426 "00000000" // /* MW 2 */ + 6427 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6428 "10011000" // AND r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6429 "00100100" // /* MW 3 */ + 6430 "11000100" // /* MW 2 */ + 6431 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6432 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00100111" // /* MW 3 */ + 6434 "01110110" // /* MW 2 */ + 6435 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 175 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6436 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "10000010" // /* MW 3 */ + 6438 "00000001" // /* MW 2 */ + 6439 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 6441 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary_shared.h" 178 +.src_ref 2 "elementwise_binary_shared.h" 178 first +.function_start + 6448 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6449 "00000001" // /* MW 5 */ + 6450 "00000000" // /* MW 4 */ + 6451 "00000000" // /* MW 3 */ + 6452 "00001000" // /* MW 2 */ + 6453 "00000000" // /* MW 1 */ + 6454 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6455 "00111101" // /* MW 3 */ + 6456 "11111100" // /* MW 2 */ + 6457 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 first +.no_stack_arguments + 6458 "00000100" // JL #6320 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6320 delay_slots=5 */ + 6459 "00000001" // /* MW 5 */ + 6460 "00000000" // /* MW 4 */ + 6461 "01011000" // /* MW 3 */ + 6462 "00001100" // /* MW 2 */ + 6463 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 179 8 +.delay_slot + 6464 "01000100" // MOVXM p0, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6465 "10000000" // /* MW 5 */ + 6466 "11000111" // /* MW 4 */ + 6467 "11000000" // /* MW 3 */ + 6468 "00000111" // /* MW 2 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6476 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6477 "01100111" // /* MW 3 */ + 6478 "00000001" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 +.src_ref 3 "sub_impl.h" 88 27 +.return_address + 6480 "10111010" // LDA lr, [sp, #-4]; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6481 "00010000" // /* MW 9 */ + 6482 "11100000" // /* MW 8 */ + 6483 "10110001" // /* MW 7 */ + 6484 "11110000" // /* MW 6 */ + 6485 "00000001" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "00100000" // /* MW 3 */ + 6488 "10000111" // /* MW 2 */ + 6489 "11111111" // /* MW 1 */ + 6490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6491 "00000000" // /* MW 1 */ + 6492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6493 "00000000" // /* MW 1 */ + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ + 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6497 "00000000" // /* MW 1 */ + 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6499 "00000000" // /* MW 1 */ + 6500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6501 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first + 6502 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6503 "00000000" // /* MW 3 */ + 6504 "00101000" // /* MW 2 */ + 6505 "00010000" // /* MW 1 */ +.src_ref 3 "sub_impl.h" 88 27 +.delay_slot + 6506 "00011000" // MOVX r16, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6507 "00011001" // /* MW 3 */ + 6508 "00100000" // /* MW 2 */ + 6509 "00010000" // /* MW 1 */ +.src_ref 3 "sub_impl.h" 88 27 first +.delay_slot + 6510 "10011000" // ST r16, [p1, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6511 "00010001" // /* MW 3 */ + 6512 "01000110" // /* MW 2 */ + 6513 "00001001" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 181 4 first +.delay_slot + 6514 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6515 "00000001" // /* MW 5 */ + 6516 "00000000" // /* MW 4 */ + 6517 "00000000" // /* MW 3 */ + 6518 "11111000" // /* MW 2 */ + 6519 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 6523 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary_shared.h" 186 first +.src_ref 2 "elementwise_binary_shared.h" 191 8 first +.tail_call +.function_start + 6528 "10000100" // J #3152 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3152 delay_slots=5 */ + 6529 "00000000" // /* MW 5 */ + 6530 "00000000" // /* MW 4 */ + 6531 "00101000" // /* MW 3 */ + 6532 "00000110" // /* MW 2 */ + 6533 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_shared.h" 191 8 +.delay_slot + 6534 "01000100" // MOVXM p3, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6535 "10000000" // /* MW 5 */ + 6536 "11000111" // /* MW 4 */ + 6537 "11000110" // /* MW 3 */ + 6538 "00000111" // /* MW 2 */ + 6539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 6547 "00000000" // /* MW 1 */ +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_sub1d _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 314 first +.src_ref 6 "superkernels.cpp" 319 6 +.function_start + 6560 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6561 "00000000" // /* MW 5 */ + 6562 "11000100" // /* MW 4 */ + 6563 "11001000" // /* MW 3 */ + 6564 "00000111" // /* MW 2 */ + 6565 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 319 6 first + 6566 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6567 "11000001" // /* MW 5 */ + 6568 "10110101" // /* MW 4 */ + 6569 "11011000" // /* MW 3 */ + 6570 "11000010" // /* MW 2 */ + 6571 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 314 + 6572 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6573 "00000001" // /* MW 5 */ + 6574 "00000000" // /* MW 4 */ + 6575 "00000000" // /* MW 3 */ + 6576 "00001000" // /* MW 2 */ + 6577 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 316 22 first + 6578 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6579 "01111001" // /* MW 9 */ + 6580 "01100000" // /* MW 8 */ + 6581 "11001010" // /* MW 7 */ + 6582 "10000001" // /* MW 6 */ + 6583 "00010100" // /* MW 5 */ + 6584 "00100011" // /* MW 4 */ + 6585 "10110000" // /* MW 3 */ + 6586 "00111010" // /* MW 2 */ + 6587 "11111111" // /* MW 1 */ + 6588 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6589 "01110000" // /* MW 7 */ + 6590 "11010000" // /* MW 6 */ + 6591 "00001011" // /* MW 5 */ + 6592 "00000000" // /* MW 4 */ + 6593 "10110000" // /* MW 3 */ + 6594 "10000011" // /* MW 2 */ + 6595 "11111101" // /* MW 1 */ + 6596 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6597 "00010101" // /* MW 3 */ + 6598 "11111100" // /* MW 2 */ + 6599 "00001111" // /* MW 1 */ + 6600 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6601 "00111101" // /* MW 3 */ + 6602 "11110000" // /* MW 2 */ + 6603 "00001111" // /* MW 1 */ + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 319 6 first +.src_ref 6 "superkernels.cpp" 319 16 first + 6606 "10000100" // JNZ r16, #6752 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6752 delay_slots=5 */ + 6607 "00000001" // /* MW 5 */ + 6608 "01000000" // /* MW 4 */ + 6609 "00110000" // /* MW 3 */ + 6610 "00001101" // /* MW 2 */ + 6611 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 316 30 first +.delay_slot + 6612 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6613 "11111011" // /* MW 3 */ + 6614 "01100011" // /* MW 2 */ + 6615 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 316 11 +.delay_slot + 6616 "01000100" // MOVXM p2, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6617 "00010000" // /* MW 5 */ + 6618 "11000100" // /* MW 4 */ + 6619 "11000100" // /* MW 3 */ + 6620 "00000111" // /* MW 2 */ + 6621 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 316 11 +.delay_slot + 6622 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "01110000" // /* MW 7 */ + 6624 "01100000" // /* MW 6 */ + 6625 "00110111" // /* MW 5 */ + 6626 "00000001" // /* MW 4 */ + 6627 "00110000" // /* MW 3 */ + 6628 "11000110" // /* MW 2 */ + 6629 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 6630 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6631 "11000000" // /* MW 3 */ + 6632 "11010110" // /* MW 2 */ + 6633 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 322 4 +.src_ref 6 "superkernels.cpp" 324 28 +.src_ref 6 "superkernels.cpp" 326 42 +.src_ref 6 "superkernels.cpp" 338 2 +.delay_slot + 6634 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6635 "00010001" // /* MW 9 */ + 6636 "11100000" // /* MW 8 */ + 6637 "10110001" // /* MW 7 */ + 6638 "11110011" // /* MW 6 */ + 6639 "00000001" // /* MW 5 */ + 6640 "00000000" // /* MW 4 */ + 6641 "10110000" // /* MW 3 */ + 6642 "10100011" // /* MW 2 */ + 6643 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 322 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6644 "00111010" // MOVS p0, p7; MOVXM p2, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6645 "00010001" // /* MW 9 */ + 6646 "00011000" // /* MW 8 */ + 6647 "00110001" // /* MW 7 */ + 6648 "11110001" // /* MW 6 */ + 6649 "00000001" // /* MW 5 */ + 6650 "00000000" // /* MW 4 */ + 6651 "01100000" // /* MW 3 */ + 6652 "10010001" // /* MW 2 */ + 6653 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6654 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6655 "00010000" // /* MW 9 */ + 6656 "00010110" // /* MW 8 */ + 6657 "00110001" // /* MW 7 */ + 6658 "11110001" // /* MW 6 */ + 6659 "00000001" // /* MW 5 */ + 6660 "00000000" // /* MW 4 */ + 6661 "11100000" // /* MW 3 */ + 6662 "11000000" // /* MW 2 */ + 6663 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6665 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 322 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6666 "00000100" // JL #6448 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6448 delay_slots=5 */ + 6667 "00000001" // /* MW 5 */ + 6668 "00000000" // /* MW 4 */ + 6669 "10011000" // /* MW 3 */ + 6670 "00001100" // /* MW 2 */ + 6671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6673 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6675 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6676 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "00110001" // /* MW 3 */ + 6678 "00100000" // /* MW 2 */ + 6679 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6680 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00000101" // /* MW 3 */ + 6682 "00100000" // /* MW 2 */ + 6683 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6684 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "00010001" // /* MW 3 */ + 6686 "00000110" // /* MW 2 */ + 6687 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 18 +.src_ref 6 "superkernels.cpp" 326 42 first +.return_address + 6688 "10111010" // LDA r16, [p7]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6689 "00010000" // /* MW 9 */ + 6690 "00000100" // /* MW 8 */ + 6691 "10110001" // /* MW 7 */ + 6692 "11110000" // /* MW 6 */ + 6693 "00000001" // /* MW 5 */ + 6694 "00000000" // /* MW 4 */ + 6695 "11010000" // /* MW 3 */ + 6696 "11000010" // /* MW 2 */ + 6697 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 16 +.src_ref 6 "superkernels.cpp" 326 18 +.src_ref 6 "superkernels.cpp" 335 48 + 6698 "10111010" // LDA r17, [p1]; MOVXM p3, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6699 "00010000" // /* MW 9 */ + 6700 "00000110" // /* MW 8 */ + 6701 "10110001" // /* MW 7 */ + 6702 "11110001" // /* MW 6 */ + 6703 "00000001" // /* MW 5 */ + 6704 "00000000" // /* MW 4 */ + 6705 "11010000" // /* MW 3 */ + 6706 "11000110" // /* MW 2 */ + 6707 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 324 28 first +.src_ref 6 "superkernels.cpp" 327 16 +.src_ref 6 "superkernels.cpp" 336 48 + 6708 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6709 "00010000" // /* MW 9 */ + 6710 "00001000" // /* MW 8 */ + 6711 "10110001" // /* MW 7 */ + 6712 "11110000" // /* MW 6 */ + 6713 "00000001" // /* MW 5 */ + 6714 "00000000" // /* MW 4 */ + 6715 "01010000" // /* MW 3 */ + 6716 "11001011" // /* MW 2 */ + 6717 "11101010" // /* MW 1 */ + 6718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6719 "00000000" // /* MW 1 */ + 6720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6721 "00000000" // /* MW 1 */ + 6722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6723 "00000000" // /* MW 1 */ + 6724 "10000100" // J #6768 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6768 delay_slots=5 */ + 6725 "00000000" // /* MW 5 */ + 6726 "00000000" // /* MW 4 */ + 6727 "00111000" // /* MW 3 */ + 6728 "00001101" // /* MW 2 */ + 6729 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 324 13 +.delay_slot + 6730 "01000100" // MOVXM p2, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6731 "01010000" // /* MW 5 */ + 6732 "11000100" // /* MW 4 */ + 6733 "11000100" // /* MW 3 */ + 6734 "00000111" // /* MW 2 */ + 6735 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 27 first +.delay_slot + 6736 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6737 "00001111" // /* MW 3 */ + 6738 "01100001" // /* MW 2 */ + 6739 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 324 13 first +.delay_slot + 6740 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6741 "01010001" // /* MW 3 */ + 6742 "00000110" // /* MW 2 */ + 6743 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 326 16 first +.delay_slot + 6744 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6745 "00010001" // /* MW 3 */ + 6746 "00000110" // /* MW 2 */ + 6747 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 327 16 first +.delay_slot + 6748 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6749 "00010001" // /* MW 3 */ + 6750 "00000110" // /* MW 2 */ + 6751 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 335 48 + 6752 "01000100" // MOVXM p3, #508428 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6753 "00011000" // /* MW 5 */ + 6754 "11000100" // /* MW 4 */ + 6755 "11000110" // /* MW 3 */ + 6756 "00000111" // /* MW 2 */ + 6757 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 48 + 6758 "10111010" // NOPA; MOVXM p1, #508432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6759 "00010000" // /* MW 9 */ + 6760 "00001000" // /* MW 8 */ + 6761 "10110001" // /* MW 7 */ + 6762 "11110000" // /* MW 6 */ + 6763 "00000001" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6768 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10000110" // /* MW 3 */ + 6770 "01100111" // /* MW 2 */ + 6771 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 330 2 + 6772 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6773 "00010000" // /* MW 9 */ + 6774 "00000000" // /* MW 8 */ + 6775 "00110001" // /* MW 7 */ + 6776 "11110001" // /* MW 6 */ + 6777 "00000001" // /* MW 5 */ + 6778 "00000000" // /* MW 4 */ + 6779 "11010000" // /* MW 3 */ + 6780 "11101110" // /* MW 2 */ + 6781 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6782 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6783 "00010110" // /* MW 3 */ + 6784 "11111110" // /* MW 2 */ + 6785 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6786 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6787 "00110110" // /* MW 3 */ + 6788 "11111110" // /* MW 2 */ + 6789 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 330 2 first + 6790 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6791 "01010110" // /* MW 3 */ + 6792 "00000110" // /* MW 2 */ + 6793 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6794 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6795 "01110110" // /* MW 3 */ + 6796 "01000110" // /* MW 2 */ + 6797 "00000000" // /* MW 1 */ + 6798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6799 "00000000" // /* MW 1 */ + 6800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6801 "00000000" // /* MW 1 */ + 6802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6803 "00000000" // /* MW 1 */ + 6804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6805 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6806 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "00000010" // /* MW 3 */ + 6808 "01100001" // /* MW 2 */ + 6809 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 330 2 first + 6810 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6811 "00001110" // /* MW 5 */ + 6812 "01000000" // /* MW 4 */ + 6813 "00111001" // /* MW 3 */ + 6814 "11000010" // /* MW 2 */ + 6815 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 330 2 + 6816 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00010001" // /* MW 3 */ + 6818 "00000110" // /* MW 2 */ + 6819 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6820 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6821 "11111101" // /* MW 3 */ + 6822 "11100000" // /* MW 2 */ + 6823 "00010111" // /* MW 1 */ + 6824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6825 "00000000" // /* MW 1 */ + 6826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6827 "00000000" // /* MW 1 */ + 6828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6829 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6830 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6831 "00001000" // /* MW 3 */ + 6832 "11010011" // /* MW 2 */ + 6833 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6834 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6835 "00000110" // /* MW 3 */ + 6836 "01100111" // /* MW 2 */ + 6837 "00011010" // /* MW 1 */ + 6838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6839 "00000000" // /* MW 1 */ + 6840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6841 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6842 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6843 "01110110" // /* MW 3 */ + 6844 "11111111" // /* MW 2 */ + 6845 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6846 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6847 "00110110" // /* MW 3 */ + 6848 "11111110" // /* MW 2 */ + 6849 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6850 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6851 "01010110" // /* MW 3 */ + 6852 "11111110" // /* MW 2 */ + 6853 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6854 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6855 "01110110" // /* MW 3 */ + 6856 "01010110" // /* MW 2 */ + 6857 "00000010" // /* MW 1 */ + 6858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6859 "00000000" // /* MW 1 */ + 6860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6861 "00000000" // /* MW 1 */ + 6862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6863 "00000000" // /* MW 1 */ + 6864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6865 "00000000" // /* MW 1 */ + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6868 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6869 "00010010" // /* MW 3 */ + 6870 "10100011" // /* MW 2 */ + 6871 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6872 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6873 "00110001" // /* MW 3 */ + 6874 "00000110" // /* MW 2 */ + 6875 "00001010" // /* MW 1 */ + 6876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6877 "00000000" // /* MW 1 */ + 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6879 "00000000" // /* MW 1 */ + 6880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6881 "00000000" // /* MW 1 */ + 6882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6883 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6884 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6885 "00001000" // /* MW 3 */ + 6886 "11010011" // /* MW 2 */ + 6887 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 335 46 +.src_ref 6 "superkernels.cpp" 336 46 + 6888 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6889 "01111001" // /* MW 9 */ + 6890 "01100000" // /* MW 8 */ + 6891 "11001110" // /* MW 7 */ + 6892 "00101001" // /* MW 6 */ + 6893 "00000000" // /* MW 5 */ + 6894 "00000001" // /* MW 4 */ + 6895 "01100000" // /* MW 3 */ + 6896 "00010001" // /* MW 2 */ + 6897 "11010001" // /* MW 1 */ + 6898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6899 "00000000" // /* MW 1 */ + 6900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6901 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6902 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6903 "00011001" // /* MW 3 */ + 6904 "11101110" // /* MW 2 */ + 6905 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 335 48 first + 6906 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6907 "00111011" // /* MW 5 */ + 6908 "11011000" // /* MW 4 */ + 6909 "11011111" // /* MW 3 */ + 6910 "11000110" // /* MW 2 */ + 6911 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 48 first +.src_ref 6 "superkernels.cpp" 338 2 + 6912 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6913 "10000001" // /* MW 5 */ + 6914 "11011101" // /* MW 4 */ + 6915 "11010110" // /* MW 3 */ + 6916 "11010010" // /* MW 2 */ + 6917 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6918 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6919 "01010110" // /* MW 3 */ + 6920 "01001110" // /* MW 2 */ + 6921 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6922 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6923 "00011110" // /* MW 3 */ + 6924 "01011101" // /* MW 2 */ + 6925 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6926 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6927 "11000000" // /* MW 3 */ + 6928 "01100000" // /* MW 2 */ + 6929 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6931 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6932 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "01110110" // /* MW 3 */ + 6934 "00000110" // /* MW 2 */ + 6935 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 338 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6938 "00000100" // JL #6528 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6528 delay_slots=5 */ + 6939 "00000001" // /* MW 5 */ + 6940 "00000000" // /* MW 4 */ + 6941 "11000000" // /* MW 3 */ + 6942 "00001100" // /* MW 2 */ + 6943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6944 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6945 "11000000" // /* MW 3 */ + 6946 "11010100" // /* MW 2 */ + 6947 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 335 46 first +.delay_slot + 6948 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "00001101" // /* MW 3 */ + 6950 "01100011" // /* MW 2 */ + 6951 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 46 first +.delay_slot + 6952 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6953 "00001101" // /* MW 3 */ + 6954 "00100001" // /* MW 2 */ + 6955 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 336 46 +.delay_slot + 6956 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6957 "01000001" // /* MW 3 */ + 6958 "01101001" // /* MW 2 */ + 6959 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 335 46 first +.delay_slot + 6960 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6961 "00000000" // /* MW 15 */ + 6962 "00000000" // /* MW 14 */ + 6963 "10101000" // /* MW 13 */ + 6964 "11100010" // /* MW 12 */ + 6965 "00110100" // /* MW 11 */ + 6966 "00000000" // /* MW 10 */ + 6967 "00000000" // /* MW 9 */ + 6968 "00000000" // /* MW 8 */ + 6969 "01011011" // /* MW 7 */ + 6970 "00000001" // /* MW 6 */ + 6971 "00100000" // /* MW 5 */ + 6972 "00000000" // /* MW 4 */ + 6973 "11110000" // /* MW 3 */ + 6974 "00101100" // /* MW 2 */ + 6975 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6976 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6977 "01111000" // /* MW 9 */ + 6978 "11010000" // /* MW 8 */ + 6979 "10110011" // /* MW 7 */ + 6980 "00101000" // /* MW 6 */ + 6981 "00000000" // /* MW 5 */ + 6982 "00000001" // /* MW 4 */ + 6983 "11010000" // /* MW 3 */ + 6984 "11000110" // /* MW 2 */ + 6985 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 19 + 6986 "01000100" // MOVXM p6, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6987 "01010000" // /* MW 5 */ + 6988 "11000100" // /* MW 4 */ + 6989 "11001100" // /* MW 3 */ + 6990 "00000111" // /* MW 2 */ + 6991 "00000000" // /* MW 1 */ + 6992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6993 "00000000" // /* MW 1 */ + 6994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6995 "00000000" // /* MW 1 */ + 6996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6997 "00000000" // /* MW 1 */ + 6998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6999 "00000000" // /* MW 1 */ + 7000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7001 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7002 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7003 "00001000" // /* MW 3 */ + 7004 "01010001" // /* MW 2 */ + 7005 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 7006 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7007 "00110110" // /* MW 3 */ + 7008 "11110110" // /* MW 2 */ + 7009 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 7010 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7011 "00011001" // /* MW 3 */ + 7012 "11101101" // /* MW 2 */ + 7013 "00000111" // /* MW 1 */ + 7014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7015 "00000000" // /* MW 1 */ + 7016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7017 "00000000" // /* MW 1 */ + 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7019 "00000000" // /* MW 1 */ + 7020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7021 "00000000" // /* MW 1 */ + 7022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7023 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 7024 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7025 "00010001" // /* MW 3 */ + 7026 "00100011" // /* MW 2 */ + 7027 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 7028 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7029 "01100011" // /* MW 5 */ + 7030 "11101100" // /* MW 4 */ + 7031 "11010011" // /* MW 3 */ + 7032 "11000110" // /* MW 2 */ + 7033 "01001010" // /* MW 1 */ + 7034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7035 "00000000" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ + 7038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7039 "00000000" // /* MW 1 */ + 7040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7041 "00000000" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 7046 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7047 "00001000" // /* MW 3 */ + 7048 "01010001" // /* MW 2 */ + 7049 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 342 6 +.src_ref 6 "superkernels.cpp" 343 14 + 7050 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7051 "00010000" // /* MW 9 */ + 7052 "00000000" // /* MW 8 */ + 7053 "10110001" // /* MW 7 */ + 7054 "11110000" // /* MW 6 */ + 7055 "00000001" // /* MW 5 */ + 7056 "00000000" // /* MW 4 */ + 7057 "11010000" // /* MW 3 */ + 7058 "11001110" // /* MW 2 */ + 7059 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 19 first + 7060 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "01010110" // /* MW 3 */ + 7062 "00000110" // /* MW 2 */ + 7063 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 6 + 7064 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7065 "00110110" // /* MW 3 */ + 7066 "00000110" // /* MW 2 */ + 7067 "00000001" // /* MW 1 */ + 7068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7069 "00000000" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 7076 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7077 "00110001" // /* MW 3 */ + 7078 "00100001" // /* MW 2 */ + 7079 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 7080 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7081 "00010001" // /* MW 3 */ + 7082 "11100110" // /* MW 2 */ + 7083 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 16 first + 7084 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7085 "00101000" // /* MW 3 */ + 7086 "01100001" // /* MW 2 */ + 7087 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 342 6 + 7088 "10000100" // JNZ r16, #7120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7120 delay_slots=5 */ + 7089 "00000001" // /* MW 5 */ + 7090 "01000000" // /* MW 4 */ + 7091 "11101000" // /* MW 3 */ + 7092 "00001101" // /* MW 2 */ + 7093 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7103 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 343 14 + 7104 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7105 "00000001" // /* MW 3 */ + 7106 "00100000" // /* MW 2 */ + 7107 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 343 14 first + 7108 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7109 "11000001" // /* MW 11 */ + 7110 "00001000" // /* MW 10 */ + 7111 "10000011" // /* MW 9 */ + 7112 "00000000" // /* MW 8 */ + 7113 "00000000" // /* MW 7 */ + 7114 "00000000" // /* MW 6 */ + 7115 "00100000" // /* MW 5 */ + 7116 "00000000" // /* MW 4 */ + 7117 "11110000" // /* MW 3 */ + 7118 "00101100" // /* MW 2 */ + 7119 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 345 + 7120 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7121 "00111001" // /* MW 3 */ + 7122 "11110000" // /* MW 2 */ + 7123 "00000111" // /* MW 1 */ + 7124 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7125 "11110001" // /* MW 3 */ + 7126 "11111101" // /* MW 2 */ + 7127 "00000111" // /* MW 1 */ + 7128 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7129 "10011001" // /* MW 3 */ + 7130 "11110111" // /* MW 2 */ + 7131 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7134 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7135 "11010001" // /* MW 3 */ + 7136 "11111001" // /* MW 2 */ + 7137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7141 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 345 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7143 "00000000" // /* MW 3 */ + 7144 "00101000" // /* MW 2 */ + 7145 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7146 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00001011" // /* MW 3 */ + 7148 "10001110" // /* MW 2 */ + 7149 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 345 +.delay_slot + 7150 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7151 "00000001" // /* MW 5 */ + 7152 "00000000" // /* MW 4 */ + 7153 "00000000" // /* MW 3 */ + 7154 "11111000" // /* MW 2 */ + 7155 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 7161 "00000000" // /* MW 1 */ +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_begin0 +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.function setup _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.src_ref 2 "reduce_base.h" 144 first +.src_ref 2 "reduce_base.h" 146 25 +.src_ref 2 "reduce_base.h" 146 27 first +.function_start + 7168 "10111010" // LDA r1, [p1], #4; MOVXM p0, #508480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7169 "00010000" // /* MW 9 */ + 7170 "00100000" // /* MW 8 */ + 7171 "00110001" // /* MW 7 */ + 7172 "11110000" // /* MW 6 */ + 7173 "00000001" // /* MW 5 */ + 7174 "00000000" // /* MW 4 */ + 7175 "11010000" // /* MW 3 */ + 7176 "10000110" // /* MW 2 */ + 7177 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 152 30 +.src_ref 2 "reduce_base.h" 154 31 +.src_ref 2 "reduce_base.h" 155 8 + 7178 "10111010" // MOVA r24, #0; MOVX r4, #4; MOV m0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7179 "01011000" // /* MW 9 */ + 7180 "00000110" // /* MW 8 */ + 7181 "00000000" // /* MW 7 */ + 7182 "10001000" // /* MW 6 */ + 7183 "01000000" // /* MW 5 */ + 7184 "00000000" // /* MW 4 */ + 7185 "00000000" // /* MW 3 */ + 7186 "00011000" // /* MW 2 */ + 7187 "00000000" // /* MW 1 */ + 7188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7189 "00000000" // /* MW 1 */ + 7190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7191 "00000000" // /* MW 1 */ + 7192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7193 "00000000" // /* MW 1 */ + 7194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7195 "00000000" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 146 25 first + 7198 "10011000" // ST r1, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "00110001" // /* MW 3 */ + 7200 "00011100" // /* MW 2 */ + 7201 "00001000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 147 28 first + 7202 "10011000" // LDA r6, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7203 "11010110" // /* MW 3 */ + 7204 "00011100" // /* MW 2 */ + 7205 "00000001" // /* MW 1 */ + 7206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7207 "00000000" // /* MW 1 */ + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ + 7216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7217 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 147 26 +.src_ref 2 "reduce_base.h" 189 37 first + 7218 "01011100" // ST r6, [p0], #4; ADD r0, r6, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7219 "11111110" // /* MW 5 */ + 7220 "00000011" // /* MW 4 */ + 7221 "00110011" // /* MW 3 */ + 7222 "10011010" // /* MW 2 */ + 7223 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 148 26 first + 7224 "10011000" // LDA r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7225 "10110110" // /* MW 3 */ + 7226 "00011100" // /* MW 2 */ + 7227 "00000001" // /* MW 1 */ + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ + 7234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7235 "00000000" // /* MW 1 */ + 7236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7237 "00000000" // /* MW 1 */ + 7238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7239 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 148 24 +.src_ref 2 "reduce_base.h" 191 53 first + 7240 "01011100" // ST r5, [p0], #4; MUL r7, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7241 "11011111" // /* MW 5 */ + 7242 "10011100" // /* MW 4 */ + 7243 "00110010" // /* MW 3 */ + 7244 "10010110" // /* MW 2 */ + 7245 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 149 29 first + 7246 "10011000" // LDA r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7247 "00010110" // /* MW 3 */ + 7248 "00011110" // /* MW 2 */ + 7249 "00000001" // /* MW 1 */ + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7251 "00000000" // /* MW 1 */ + 7252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7253 "00000000" // /* MW 1 */ + 7254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7255 "00000000" // /* MW 1 */ + 7256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7257 "00000000" // /* MW 1 */ + 7258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7259 "00000000" // /* MW 1 */ + 7260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7261 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 149 27 +.src_ref 2 "reduce_base.h" 155 8 first + 7262 "01011100" // ST r16, [p0], #4; EQ r4, r4, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7263 "00001111" // /* MW 5 */ + 7264 "00010010" // /* MW 4 */ + 7265 "00110010" // /* MW 3 */ + 7266 "11000010" // /* MW 2 */ + 7267 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 150 33 first + 7268 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7269 "00101110" // /* MW 3 */ + 7270 "00011100" // /* MW 2 */ + 7271 "00000001" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ + 7276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7277 "00000000" // /* MW 1 */ + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ + 7280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7281 "00000000" // /* MW 1 */ + 7282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7283 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 150 31 +.src_ref 2 "reduce_base.h" 153 53 + 7284 "00000010" // ST el0, [p0], #4; MOV r18, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7285 "01110000" // /* MW 7 */ + 7286 "00001110" // /* MW 6 */ + 7287 "01010000" // /* MW 5 */ + 7288 "00000010" // /* MW 4 */ + 7289 "00110000" // /* MW 3 */ + 7290 "10000101" // /* MW 2 */ + 7291 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 151 34 first + 7292 "10011000" // LDA r19, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7293 "01110110" // /* MW 3 */ + 7294 "00011110" // /* MW 2 */ + 7295 "00000001" // /* MW 1 */ + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ + 7298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7299 "00000000" // /* MW 1 */ + 7300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7301 "00000000" // /* MW 1 */ + 7302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7303 "00000000" // /* MW 1 */ + 7304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7305 "00000000" // /* MW 1 */ + 7306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7307 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 151 32 +.src_ref 2 "reduce_base.h" 153 53 first + 7308 "01011100" // ST r19, [p0], #4; MUL r18, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7309 "01011111" // /* MW 5 */ + 7310 "11001010" // /* MW 4 */ + 7311 "00111001" // /* MW 3 */ + 7312 "11001110" // /* MW 2 */ + 7313 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 152 32 first + 7314 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7315 "00001110" // /* MW 3 */ + 7316 "00000100" // /* MW 2 */ + 7317 "00000001" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ + 7320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7321 "00000000" // /* MW 1 */ + 7322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7323 "00000000" // /* MW 1 */ + 7324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7325 "00000000" // /* MW 1 */ + 7326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7327 "00000000" // /* MW 1 */ + 7328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7329 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 152 30 +.src_ref 2 "reduce_base.h" 153 79 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7330 "00000010" // ST eh0, [p0], m0; MOV r20, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7331 "01110000" // /* MW 7 */ + 7332 "10001110" // /* MW 6 */ + 7333 "10010000" // /* MW 5 */ + 7334 "00000010" // /* MW 4 */ + 7335 "00110000" // /* MW 3 */ + 7336 "00000001" // /* MW 2 */ + 7337 "00000001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 153 28 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7338 "00011000" // ST.s16 r18, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7339 "01010111" // /* MW 3 */ + 7340 "00101110" // /* MW 2 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 153 79 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7342 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "01001111" // /* MW 3 */ + 7344 "10100101" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 first + 7346 "10000100" // JNZ r4, #7440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7440 delay_slots=5 */ + 7347 "00000001" // /* MW 5 */ + 7348 "01000000" // /* MW 4 */ + 7349 "10001000" // /* MW 3 */ + 7350 "00001110" // /* MW 2 */ + 7351 "00100000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 167 55 +.src_ref 2 "reduce_base.h" 172 89 +.src_ref 2 "reduce_base.h" 187 53 +.src_ref 2 "reduce_base.h" 193 89 +.src_ref 2 "reduce_base.h" 195 55 +.delay_slot + 7352 "00011000" // MOVX r2, #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7353 "11101101" // /* MW 3 */ + 7354 "11000100" // /* MW 2 */ + 7355 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 187 53 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7356 "10011000" // LSHL r3, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7357 "00101101" // /* MW 3 */ + 7358 "01000110" // /* MW 2 */ + 7359 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 193 70 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7360 "10011000" // MUL r3, r7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7361 "00011111" // /* MW 3 */ + 7362 "11000110" // /* MW 2 */ + 7363 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 187 37 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7364 "00011000" // ADD r17, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "11111111" // /* MW 3 */ + 7366 "11100011" // /* MW 2 */ + 7367 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 154 31 first +.src_ref 2 "reduce_base.h" 193 89 first +.delay_slot + 7368 "00101100" // ST.s16 r24, [p0]; LSHL r3, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7369 "01011011" // /* MW 5 */ + 7370 "10001100" // /* MW 4 */ + 7371 "11100001" // /* MW 3 */ + 7372 "11100010" // /* MW 2 */ + 7373 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7374 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "00001001" // /* MW 3 */ + 7376 "00100100" // /* MW 2 */ + 7377 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 first + 7378 "10011000" // EQ r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00000111" // /* MW 3 */ + 7380 "10100101" // /* MW 2 */ + 7381 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7382 "10000100" // JNZ r18, #7840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7840 delay_slots=5 */ + 7383 "00000001" // /* MW 5 */ + 7384 "01000000" // /* MW 4 */ + 7385 "01010000" // /* MW 3 */ + 7386 "00001111" // /* MW 2 */ + 7387 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7395 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 178 37 first +.delay_slot + 7396 "00011000" // ADD r4, r1, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7397 "11111111" // /* MW 3 */ + 7398 "01001001" // /* MW 2 */ + 7399 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7400 "00011000" // MOVX r6, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7401 "00000101" // /* MW 3 */ + 7402 "00001100" // /* MW 2 */ + 7403 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 first + 7404 "10011000" // EQ r6, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7405 "00000111" // /* MW 3 */ + 7406 "10001101" // /* MW 2 */ + 7407 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 155 8 + 7408 "10000100" // JNZ r6, #7680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7680 delay_slots=5 */ + 7409 "00000001" // /* MW 5 */ + 7410 "01000000" // /* MW 4 */ + 7411 "00000000" // /* MW 3 */ + 7412 "00001111" // /* MW 2 */ + 7413 "00110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7423 "00000000" // /* MW 1 */ + 7424 "10000100" // J #7632 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7632 delay_slots=5 */ + 7425 "00000000" // /* MW 5 */ + 7426 "00000000" // /* MW 4 */ + 7427 "11101000" // /* MW 3 */ + 7428 "00001110" // /* MW 2 */ + 7429 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7435 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_272 +.src_ref 2 "reduce_base.h" 186 34 +.src_ref 2 "reduce_base.h" 186 34 +.src_ref 2 "reduce_base.h" 188 34 +.src_ref 2 "reduce_base.h" 190 36 + 7440 "10111010" // MOVA r1, #32; MOVXM p2, #508518 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7441 "00010000" // /* MW 9 */ + 7442 "00110011" // /* MW 8 */ + 7443 "00110001" // /* MW 7 */ + 7444 "11110001" // /* MW 6 */ + 7445 "00000001" // /* MW 5 */ + 7446 "00000000" // /* MW 4 */ + 7447 "00000000" // /* MW 3 */ + 7448 "00000001" // /* MW 2 */ + 7449 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 192 34 +.src_ref 2 "reduce_base.h" 194 38 +.src_ref 2 "reduce_base.h" 195 55 first + 7450 "10111010" // MOVA r4, #32; LSHL r16, r7, r2; MOV r2, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7451 "01111000" // /* MW 9 */ + 7452 "00001110" // /* MW 8 */ + 7453 "01010000" // /* MW 7 */ + 7454 "01101100" // /* MW 6 */ + 7455 "00000001" // /* MW 5 */ + 7456 "00001111" // /* MW 4 */ + 7457 "00000000" // /* MW 3 */ + 7458 "00000100" // /* MW 2 */ + 7459 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 194 36 +.src_ref 2 "reduce_base.h" 194 38 first + 7460 "01100100" // MSC r4, r4, r5, r6; MOV dj0, #26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7461 "01101001" // /* MW 5 */ + 7462 "00000000" // /* MW 4 */ + 7463 "11000001" // /* MW 3 */ + 7464 "00001101" // /* MW 2 */ + 7465 "00101001" // /* MW 1 */ + 7466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7467 "00000000" // /* MW 1 */ + 7468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7469 "00000000" // /* MW 1 */ + 7470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7471 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 186 34 first + 7472 "00011000" // ST.s16 r1, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7473 "00110111" // /* MW 3 */ + 7474 "00011100" // /* MW 2 */ + 7475 "00000010" // /* MW 1 */ + 7476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7477 "00000000" // /* MW 1 */ + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7479 "00000000" // /* MW 1 */ + 7480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7481 "00000000" // /* MW 1 */ + 7482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7483 "00000000" // /* MW 1 */ + 7484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7485 "00000000" // /* MW 1 */ + 7486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7487 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 187 35 first + 7488 "00011000" // ST.s16 r17, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00110111" // /* MW 3 */ + 7490 "00011110" // /* MW 2 */ + 7491 "00000010" // /* MW 1 */ + 7492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7493 "00000000" // /* MW 1 */ + 7494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7495 "00000000" // /* MW 1 */ + 7496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7497 "00000000" // /* MW 1 */ + 7498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7499 "00000000" // /* MW 1 */ + 7500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7501 "00000000" // /* MW 1 */ + 7502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7503 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 188 34 first + 7504 "00011000" // ST.s16 r1, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7505 "00110111" // /* MW 3 */ + 7506 "00011100" // /* MW 2 */ + 7507 "00000010" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ + 7512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7513 "00000000" // /* MW 1 */ + 7514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7515 "00000000" // /* MW 1 */ + 7516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7517 "00000000" // /* MW 1 */ + 7518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 189 35 first + 7520 "00011000" // ST.s16 r0, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00010111" // /* MW 3 */ + 7522 "00011100" // /* MW 2 */ + 7523 "00000010" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ + 7532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7533 "00000000" // /* MW 1 */ + 7534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7535 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 190 36 first + 7536 "00011000" // ST.s16 r1, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00110111" // /* MW 3 */ + 7538 "00011100" // /* MW 2 */ + 7539 "00000010" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 191 35 first + 7552 "10011000" // ST r7, [p2], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "11110001" // /* MW 3 */ + 7554 "11001100" // /* MW 2 */ + 7555 "00001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 192 34 first + 7556 "00011000" // ST.s16 r2, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "01010111" // /* MW 3 */ + 7558 "11101100" // /* MW 2 */ + 7559 "00000010" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ + 7564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7565 "00000000" // /* MW 1 */ + 7566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7567 "00000000" // /* MW 1 */ + 7568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7569 "00000000" // /* MW 1 */ + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 194 36 first + 7572 "00011000" // ST.s16 r4, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7573 "10010111" // /* MW 3 */ + 7574 "00000000" // /* MW 2 */ + 7575 "00000010" // /* MW 1 */ + 7576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7577 "00000000" // /* MW 1 */ + 7578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7579 "00000000" // /* MW 1 */ + 7580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7581 "00000000" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 193 32 first + 7588 "00110110" // ST.s16 r3, [p2]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7589 "10000001" // /* MW 11 */ + 7590 "10101101" // /* MW 10 */ + 7591 "00000000" // /* MW 9 */ + 7592 "00000000" // /* MW 8 */ + 7593 "00000000" // /* MW 7 */ + 7594 "00000000" // /* MW 6 */ + 7595 "00100000" // /* MW 5 */ + 7596 "00000000" // /* MW 4 */ + 7597 "11100000" // /* MW 3 */ + 7598 "10001110" // /* MW 2 */ + 7599 "01000000" // /* MW 1 */ +.label __ll7__ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + 7600 "00011000" // ADD r0, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7601 "11111111" // /* MW 3 */ + 7602 "00000001" // /* MW 2 */ + 7603 "00010100" // /* MW 1 */ + 7604 "01000100" // MOVXM p2, #508532 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7605 "11101000" // /* MW 5 */ + 7606 "11000100" // /* MW 4 */ + 7607 "11000100" // /* MW 3 */ + 7608 "00000111" // /* MW 2 */ + 7609 "00000000" // /* MW 1 */ + 7610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7611 "00000000" // /* MW 1 */ + 7612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7613 "00000000" // /* MW 1 */ + 7614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7615 "00000000" // /* MW 1 */ + 7616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00101110" // ST.s16 r0, [p2]; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7619 "00011100" // /* MW 13 */ + 7620 "00000000" // /* MW 12 */ + 7621 "00000000" // /* MW 11 */ + 7622 "01010111" // /* MW 10 */ + 7623 "00011010" // /* MW 9 */ + 7624 "01000000" // /* MW 8 */ + 7625 "00000000" // /* MW 7 */ + 7626 "00000000" // /* MW 6 */ + 7627 "10110110" // /* MW 5 */ + 7628 "00000010" // /* MW 4 */ + 7629 "11100000" // /* MW 3 */ + 7630 "10000010" // /* MW 2 */ + 7631 "01000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_464 +.src_ref 3 "reducesum_impl.h" 95 38 +.src_ref 3 "reducesum_impl.h" 95 50 first + 7632 "01010100" // LDA r0, [p1, #4]; MOV dj0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7633 "01011001" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "11010001" // /* MW 3 */ + 7636 "10000010" // /* MW 2 */ + 7637 "00100010" // /* MW 1 */ + 7638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7639 "00000000" // /* MW 1 */ + 7640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7641 "00000000" // /* MW 1 */ + 7642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7643 "00000000" // /* MW 1 */ + 7644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7645 "00000000" // /* MW 1 */ + 7646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7647 "00000000" // /* MW 1 */ +.src_ref 3 "reducesum_impl.h" 95 38 + 7648 "00011000" // ST.s8 r0, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7649 "00000111" // /* MW 3 */ + 7650 "00000000" // /* MW 2 */ + 7651 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 219 4 first + 7652 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7653 "00000000" // /* MW 3 */ + 7654 "00101000" // /* MW 2 */ + 7655 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "01111000" // /* MW 13 */ + 7668 "10100101" // /* MW 12 */ + 7669 "00000001" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_512 +.src_ref 2 "reduce_base.h" 158 34 +.src_ref 2 "reduce_base.h" 158 34 + 7680 "10111010" // MOVA r6, #32; MOVXM p2, #508518 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "00010000" // /* MW 9 */ + 7682 "00110011" // /* MW 8 */ + 7683 "00110001" // /* MW 7 */ + 7684 "11110001" // /* MW 6 */ + 7685 "00000001" // /* MW 5 */ + 7686 "00000000" // /* MW 4 */ + 7687 "00000000" // /* MW 3 */ + 7688 "00000110" // /* MW 2 */ + 7689 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 158 34 first +.src_ref 2 "reduce_base.h" 163 53 first + 7690 "00101100" // ST.s16 r6, [p2], #2; MUL r1, r5, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7691 "00111111" // /* MW 5 */ + 7692 "10000100" // /* MW 4 */ + 7693 "11100010" // /* MW 3 */ + 7694 "10011010" // /* MW 2 */ + 7695 "01000011" // /* MW 1 */ + 7696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7697 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 167 55 first + 7698 "10011000" // LSHL r16, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00101101" // /* MW 3 */ + 7700 "01100000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ + 7704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7705 "00000000" // /* MW 1 */ + 7706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7707 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 160 36 +.src_ref 2 "reduce_base.h" 160 36 first +.src_ref 2 "reduce_base.h" 160 63 +.src_ref 2 "reduce_base.h" 160 63 first +.src_ref 2 "reduce_base.h" 162 38 +.src_ref 2 "reduce_base.h" 162 90 +.src_ref 2 "reduce_base.h" 166 38 + 7708 "10111010" // MOVA r0, #32; MAC r6, r6, r5, r0; MOV r6, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7709 "01011000" // /* MW 9 */ + 7710 "00100000" // /* MW 8 */ + 7711 "11001000" // /* MW 7 */ + 7712 "00110000" // /* MW 6 */ + 7713 "01100000" // /* MW 5 */ + 7714 "00001010" // /* MW 4 */ + 7715 "00000000" // /* MW 3 */ + 7716 "00000000" // /* MW 2 */ + 7717 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 159 35 first +.src_ref 2 "reduce_base.h" 166 38 first + 7718 "00101100" // ST.s16 r17, [p2], #2; SUB r5, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7719 "00100011" // /* MW 5 */ + 7720 "00010100" // /* MW 4 */ + 7721 "11100000" // /* MW 3 */ + 7722 "11000110" // /* MW 2 */ + 7723 "01000011" // /* MW 1 */ + 7724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7725 "00000000" // /* MW 1 */ + 7726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7727 "00000000" // /* MW 1 */ + 7728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7729 "00000000" // /* MW 1 */ + 7730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7731 "00000000" // /* MW 1 */ + 7732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7733 "00000000" // /* MW 1 */ + 7734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7735 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 160 34 first + 7736 "00011000" // ST.s16 r6, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "11010111" // /* MW 3 */ + 7738 "00011100" // /* MW 2 */ + 7739 "00000010" // /* MW 1 */ + 7740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7741 "00000000" // /* MW 1 */ + 7742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7743 "00000000" // /* MW 1 */ + 7744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7745 "00000000" // /* MW 1 */ + 7746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7747 "00000000" // /* MW 1 */ + 7748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7749 "00000000" // /* MW 1 */ + 7750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7751 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 161 35 first + 7752 "00011000" // ST.s16 r4, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "10010111" // /* MW 3 */ + 7754 "00011100" // /* MW 2 */ + 7755 "00000010" // /* MW 1 */ + 7756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7757 "00000000" // /* MW 1 */ + 7758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7759 "00000000" // /* MW 1 */ + 7760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7761 "00000000" // /* MW 1 */ + 7762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7763 "00000000" // /* MW 1 */ + 7764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7765 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7767 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 162 36 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 7768 "00011000" // ST.s16 r0, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7769 "00010111" // /* MW 3 */ + 7770 "00011100" // /* MW 2 */ + 7771 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7777 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 162 38 +.src_ref 2 "reduce_base.h" 162 90 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7778 "00011000" // MSC r0, r0, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7779 "01001110" // /* MW 3 */ + 7780 "11000000" // /* MW 2 */ + 7781 "00010001" // /* MW 1 */ + 7782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7783 "00000000" // /* MW 1 */ + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 163 35 first + 7786 "10011000" // ST r1, [p2], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7787 "00110001" // /* MW 3 */ + 7788 "11001100" // /* MW 2 */ + 7789 "00001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 164 34 first + 7790 "00011000" // ST.s16 r19, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "01110111" // /* MW 3 */ + 7792 "11101110" // /* MW 2 */ + 7793 "00000010" // /* MW 1 */ + 7794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7795 "00000000" // /* MW 1 */ + 7796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7797 "00000000" // /* MW 1 */ + 7798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7799 "00000000" // /* MW 1 */ + 7800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7801 "00000000" // /* MW 1 */ + 7802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7803 "00000000" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 165 32 first + 7806 "00011000" // ST.s16 r3, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7807 "01110111" // /* MW 3 */ + 7808 "00000100" // /* MW 2 */ + 7809 "00000010" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "10000100" // J #7600 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7600 delay_slots=5 */ + 7813 "00000000" // /* MW 5 */ + 7814 "00000000" // /* MW 4 */ + 7815 "11011000" // /* MW 3 */ + 7816 "00001110" // /* MW 2 */ + 7817 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 166 36 +.delay_slot + 7818 "10111000" // MOV dj0, #26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7819 "00110100" // /* MW 3 */ + 7820 "10000000" // /* MW 2 */ + 7821 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7823 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7827 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 166 36 first +.delay_slot + 7828 "00110110" // ST.s16 r5, [p2, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7829 "10000001" // /* MW 11 */ + 7830 "10101101" // /* MW 10 */ + 7831 "00000000" // /* MW 9 */ + 7832 "00000000" // /* MW 8 */ + 7833 "00000000" // /* MW 7 */ + 7834 "00000000" // /* MW 6 */ + 7835 "00100000" // /* MW 5 */ + 7836 "00000000" // /* MW 4 */ + 7837 "11100000" // /* MW 3 */ + 7838 "00010110" // /* MW 2 */ + 7839 "01000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_672 +.src_ref 2 "reduce_base.h" 172 32 +.src_ref 2 "reduce_base.h" 173 34 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 7840 "10111010" // MOVA m1, #16; MOVXM p2, #508508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7841 "00010000" // /* MW 9 */ + 7842 "00101110" // /* MW 8 */ + 7843 "00110001" // /* MW 7 */ + 7844 "11110001" // /* MW 6 */ + 7845 "00000001" // /* MW 5 */ + 7846 "00000000" // /* MW 4 */ + 7847 "10000000" // /* MW 3 */ + 7848 "00000100" // /* MW 2 */ + 7849 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 172 32 first +.src_ref 2 "reduce_base.h" 173 34 +.src_ref 2 "reduce_base.h" 180 38 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 7850 "10111010" // ST.s16 r1, [p2], #4; MOVX r17, #5; MOV r7, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7851 "01111000" // /* MW 9 */ + 7852 "10001110" // /* MW 8 */ + 7853 "11110000" // /* MW 7 */ + 7854 "10101000" // /* MW 6 */ + 7855 "00010000" // /* MW 5 */ + 7856 "00000001" // /* MW 4 */ + 7857 "11100000" // /* MW 3 */ + 7858 "10000110" // /* MW 2 */ + 7859 "01000101" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 174 35 +.src_ref 2 "reduce_base.h" 174 55 first +.src_ref 2 "reduce_base.h" 179 38 +.src_ref 2 "reduce_base.h" 179 110 +.src_ref 2 "reduce_base.h" 180 38 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7860 "10111010" // MOVA r3, #32; MUL r16, r1, r6; MOV m0, #-10 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7861 "01011000" // /* MW 9 */ + 7862 "11110110" // /* MW 8 */ + 7863 "00000111" // /* MW 7 */ + 7864 "01111100" // /* MW 6 */ + 7865 "00000011" // /* MW 5 */ + 7866 "00000011" // /* MW 4 */ + 7867 "00000000" // /* MW 3 */ + 7868 "00000011" // /* MW 2 */ + 7869 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7871 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 172 70 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7872 "10011000" // MUL r1, r5, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00001111" // /* MW 3 */ + 7874 "01000011" // /* MW 2 */ + 7875 "00010001" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 172 89 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7878 "10011000" // LSHL r1, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7879 "00101101" // /* MW 3 */ + 7880 "01000010" // /* MW 2 */ + 7881 "00010000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 173 34 first + 7884 "00011000" // ST.s16 r7, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7885 "11110111" // /* MW 3 */ + 7886 "00101000" // /* MW 2 */ + 7887 "00000010" // /* MW 1 */ + 7888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7889 "00000000" // /* MW 1 */ + 7890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7891 "00000000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ + 7894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7895 "00000000" // /* MW 1 */ + 7896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7897 "00000000" // /* MW 1 */ + 7898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 174 35 first + 7900 "10011000" // ST r16, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7901 "00010001" // /* MW 3 */ + 7902 "00001010" // /* MW 2 */ + 7903 "00001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 175 34 first + 7904 "00011000" // ST.s16 r5, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "10110111" // /* MW 3 */ + 7906 "00011100" // /* MW 2 */ + 7907 "00000010" // /* MW 1 */ + 7908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7909 "00000000" // /* MW 1 */ + 7910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7911 "00000000" // /* MW 1 */ + 7912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7913 "00000000" // /* MW 1 */ + 7914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7915 "00000000" // /* MW 1 */ + 7916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7917 "00000000" // /* MW 1 */ + 7918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7919 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 176 35 first + 7920 "00011000" // ST.s16 r0, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7921 "00010111" // /* MW 3 */ + 7922 "00011100" // /* MW 2 */ + 7923 "00000010" // /* MW 1 */ + 7924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7925 "00000000" // /* MW 1 */ + 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7927 "00000000" // /* MW 1 */ + 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7929 "00000000" // /* MW 1 */ + 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7931 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 179 90 first + 7932 "00011000" // MAC r0, r0, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7933 "01000110" // /* MW 3 */ + 7934 "10000000" // /* MW 2 */ + 7935 "00010001" // /* MW 1 */ + 7936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7937 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 175 34 first +.src_ref 2 "reduce_base.h" 177 34 first + 7938 "00011000" // ST.s16 r5, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7939 "10110111" // /* MW 3 */ + 7940 "00011100" // /* MW 2 */ + 7941 "00000010" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ + 7944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7945 "00000000" // /* MW 1 */ + 7946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7947 "00000000" // /* MW 1 */ + 7948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7949 "00000000" // /* MW 1 */ + 7950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7951 "00000000" // /* MW 1 */ + 7952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 178 35 first + 7954 "00011000" // ST.s16 r4, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7955 "10010111" // /* MW 3 */ + 7956 "00011100" // /* MW 2 */ + 7957 "00000010" // /* MW 1 */ + 7958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7959 "00000000" // /* MW 1 */ + 7960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7961 "00000000" // /* MW 1 */ + 7962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7963 "00000000" // /* MW 1 */ + 7964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7965 "00000000" // /* MW 1 */ + 7966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7967 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 7968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7969 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 179 36 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 7970 "00011000" // ST.s16 r3, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7971 "01110111" // /* MW 3 */ + 7972 "00000100" // /* MW 2 */ + 7973 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7975 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7976 "10000100" // J #7600 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7600 delay_slots=5 */ + 7977 "00000000" // /* MW 5 */ + 7978 "00000000" // /* MW 4 */ + 7979 "11011000" // /* MW 3 */ + 7980 "00001110" // /* MW 2 */ + 7981 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 180 38 first +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7982 "10011000" // LSHL r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7983 "00011101" // /* MW 3 */ + 7984 "00100011" // /* MW 2 */ + 7985 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 179 38 first +.src_ref 2 "reduce_base.h" 179 110 first +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 7986 "00011000" // MSC r3, r3, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "00001110" // /* MW 3 */ + 7988 "01000110" // /* MW 2 */ + 7989 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 180 38 first +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7990 "10011000" // SUB r1, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7991 "00010001" // /* MW 3 */ + 7992 "11000011" // /* MW 2 */ + 7993 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7995 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 180 36 +.delay_slot + 7996 "00011000" // ST.s16 r1, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7997 "00110111" // /* MW 3 */ + 7998 "01000100" // /* MW 2 */ +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv__end +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_end0 + 7999 "00000010" // /* MW 1 */ +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_begin0 +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E +.function run _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E +.src_ref 2 "reduce_base.h" 232 first +.src_ref 2 "reduce_base.h" 236 19 +.src_ref 2 "reduce_base.h" 236 19 +.function_start + 8000 "10111010" // MOVA m4, #-24; MOVXM p2, #508516 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8001 "00010000" // /* MW 9 */ + 8002 "00110010" // /* MW 8 */ + 8003 "00110001" // /* MW 7 */ + 8004 "11110001" // /* MW 6 */ + 8005 "00000001" // /* MW 5 */ + 8006 "00000000" // /* MW 4 */ + 8007 "10000000" // /* MW 3 */ + 8008 "00010000" // /* MW 2 */ + 8009 "11111101" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 236 19 first +.src_ref 2 "reduce_base.h" 240 69 + 8010 "00101100" // LDA.u16 r16, [p2], m4; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8011 "00000010" // /* MW 5 */ + 8012 "01100000" // /* MW 4 */ + 8013 "01010000" // /* MW 3 */ + 8014 "01000011" // /* MW 2 */ + 8015 "01010001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 first + 8016 "01111000" // VINSERT.32 x0, x0, #0, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "00010001" // /* MW 3 */ + 8018 "00000011" // /* MW 2 */ + 8019 "00011000" // /* MW 1 */ + 8020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8021 "00000000" // /* MW 1 */ + 8022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8023 "00000000" // /* MW 1 */ + 8024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8025 "00000000" // /* MW 1 */ + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ + 8028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8029 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 236 12 first +.src_ref 2 "reduce_base.h" 236 27 first + 8030 "10000100" // JNZ r16, #8256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8256 delay_slots=5 */ + 8031 "00000001" // /* MW 5 */ + 8032 "01000000" // /* MW 4 */ + 8033 "00100000" // /* MW 3 */ + 8034 "00010000" // /* MW 2 */ + 8035 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 232 +.delay_slot + 8036 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8037 "00000001" // /* MW 5 */ + 8038 "00000000" // /* MW 4 */ + 8039 "00000000" // /* MW 3 */ + 8040 "00001000" // /* MW 2 */ + 8041 "00000000" // /* MW 1 */ +.delay_slot + 8042 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8043 "00111101" // /* MW 3 */ + 8044 "11111100" // /* MW 2 */ + 8045 "00001111" // /* MW 1 */ +.delay_slot + 8046 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8047 "10011101" // /* MW 3 */ + 8048 "11111011" // /* MW 2 */ + 8049 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 + 8054 "01000100" // MOVXM p7, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8055 "01100000" // /* MW 5 */ + 8056 "11000100" // /* MW 4 */ + 8057 "11001110" // /* MW 3 */ + 8058 "00000111" // /* MW 2 */ + 8059 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 first +.src_ref 2 "reduce_base.h" 241 39 + 8060 "10111010" // LDA.s8 r17, [p7]; MOVXM p7, #508528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8061 "00010000" // /* MW 9 */ + 8062 "00111000" // /* MW 8 */ + 8063 "10110001" // /* MW 7 */ + 8064 "11110011" // /* MW 6 */ + 8065 "00000001" // /* MW 5 */ + 8066 "00000000" // /* MW 4 */ + 8067 "01010000" // /* MW 3 */ + 8068 "11000100" // /* MW 2 */ + 8069 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 39 first + 8070 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8071 "00010110" // /* MW 3 */ + 8072 "00000110" // /* MW 2 */ + 8073 "00000111" // /* MW 1 */ + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ + 8076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8077 "00000000" // /* MW 1 */ + 8078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8079 "00000000" // /* MW 1 */ + 8080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8081 "00000000" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 +.src_ref 2 "reduce_base.h" 241 30 + 8086 "10000100" // JZ r16, #8256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8256 delay_slots=5 */ + 8087 "00000001" // /* MW 5 */ + 8088 "00000000" // /* MW 4 */ + 8089 "00100000" // /* MW 3 */ + 8090 "00010000" // /* MW 2 */ + 8091 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 +.delay_slot + 8092 "11111000" // VMOV bmhl1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8093 "10010010" // /* MW 3 */ + 8094 "10000000" // /* MW 2 */ + 8095 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 +.delay_slot + 8096 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8097 "10000000" // /* MW 3 */ + 8098 "01111010" // /* MW 2 */ + 8099 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 240 69 first +.delay_slot + 8100 "00011000" // VCONV.bf16.fp32 wl2, bmhl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "11010110" // /* MW 3 */ + 8102 "01000000" // /* MW 2 */ + 8103 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8105 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "reduce_base.h" 240 69 +.delay_slot + 8106 "01011000" // VEXTBCST.16 x1, x2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8107 "00000011" // /* MW 3 */ + 8108 "10010001" // /* MW 2 */ + 8109 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base.h" 241 12 +.src_ref 2 "reduce_base.h" 241 12 first +.src_ref 2 "reduce_base.h" 243 29 + 8110 "01110110" // MOVA r17, #-5; MOVS p7, p1; MOVXM ls, #8240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8111 "00010000" // /* MW 11 */ + 8112 "00011000" // /* MW 10 */ + 8113 "01111000" // /* MW 9 */ + 8114 "00001000" // /* MW 8 */ + 8115 "00000000" // /* MW 7 */ + 8116 "00000000" // /* MW 6 */ + 8117 "10001011" // /* MW 5 */ + 8118 "10000100" // /* MW 4 */ + 8119 "00000111" // /* MW 3 */ + 8120 "01110001" // /* MW 2 */ + 8121 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 + 8122 "01000100" // MOVXM le, #8240 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8123 "01100000" // /* MW 5 */ + 8124 "11100000" // /* MW 4 */ + 8125 "00100110" // /* MW 3 */ + 8126 "00000000" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base.h" 241 12 +.src_ref 2 "reduce_base.h" 243 29 + 8128 "11100100" // ADD r16, r16, #-1; VMOV bmhl1, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8129 "00100101" // /* MW 5 */ + 8130 "00000101" // /* MW 4 */ + 8131 "11100011" // /* MW 3 */ + 8132 "00111111" // /* MW 2 */ + 8133 "10000100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 + 8134 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8135 "00011101" // /* MW 3 */ + 8136 "00100001" // /* MW 2 */ + 8137 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 241 12 + 8138 "00010100" // NOPA; ADD.NC lc, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8139 "00000001" // /* MW 5 */ + 8140 "11110000" // /* MW 4 */ + 8141 "11111010" // /* MW 3 */ + 8142 "00101100" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ + 8144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "00000000" // /* MW 8 */ + 8153 "01011011" // /* MW 7 */ + 8154 "00000001" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ + 8160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8161 "00000000" // /* MW 15 */ + 8162 "00000000" // /* MW 14 */ + 8163 "01111000" // /* MW 13 */ + 8164 "10100101" // /* MW 12 */ + 8165 "00000001" // /* MW 11 */ + 8166 "00000000" // /* MW 10 */ + 8167 "00000000" // /* MW 9 */ + 8168 "00000000" // /* MW 8 */ + 8169 "01011011" // /* MW 7 */ + 8170 "00000001" // /* MW 6 */ + 8171 "00100000" // /* MW 5 */ + 8172 "00000000" // /* MW 4 */ + 8173 "11110000" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ + 8176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8177 "00000000" // /* MW 15 */ + 8178 "00000000" // /* MW 14 */ + 8179 "01111000" // /* MW 13 */ + 8180 "10100101" // /* MW 12 */ + 8181 "00000001" // /* MW 11 */ + 8182 "00000000" // /* MW 10 */ + 8183 "00000000" // /* MW 9 */ + 8184 "00000000" // /* MW 8 */ + 8185 "01011011" // /* MW 7 */ + 8186 "00000001" // /* MW 6 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ + 8192 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8193 "00000000" // /* MW 15 */ + 8194 "00000000" // /* MW 14 */ + 8195 "01111000" // /* MW 13 */ + 8196 "10100101" // /* MW 12 */ + 8197 "00000001" // /* MW 11 */ + 8198 "00000000" // /* MW 10 */ + 8199 "00000000" // /* MW 9 */ + 8200 "00000000" // /* MW 8 */ + 8201 "01011011" // /* MW 7 */ + 8202 "00000001" // /* MW 6 */ + 8203 "00100000" // /* MW 5 */ + 8204 "00000000" // /* MW 4 */ + 8205 "11110000" // /* MW 3 */ + 8206 "00101100" // /* MW 2 */ + 8207 "00000000" // /* MW 1 */ + 8208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8209 "00000000" // /* MW 15 */ + 8210 "00000000" // /* MW 14 */ + 8211 "01111000" // /* MW 13 */ + 8212 "10100101" // /* MW 12 */ + 8213 "00000001" // /* MW 11 */ + 8214 "00000000" // /* MW 10 */ + 8215 "00000000" // /* MW 9 */ + 8216 "00000000" // /* MW 8 */ + 8217 "01011011" // /* MW 7 */ + 8218 "00000001" // /* MW 6 */ + 8219 "00100000" // /* MW 5 */ + 8220 "00000000" // /* MW 4 */ + 8221 "11110000" // /* MW 3 */ + 8222 "00101100" // /* MW 2 */ + 8223 "00000000" // /* MW 1 */ + 8224 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8225 "00000000" // /* MW 15 */ + 8226 "00000000" // /* MW 14 */ + 8227 "01111000" // /* MW 13 */ + 8228 "10100101" // /* MW 12 */ + 8229 "00000001" // /* MW 11 */ + 8230 "00000000" // /* MW 10 */ + 8231 "00000000" // /* MW 9 */ + 8232 "00000000" // /* MW 8 */ + 8233 "01011011" // /* MW 7 */ + 8234 "00000001" // /* MW 6 */ + 8235 "00100000" // /* MW 5 */ + 8236 "00000000" // /* MW 4 */ + 8237 "11110000" // /* MW 3 */ + 8238 "00101100" // /* MW 2 */ + 8239 "00000000" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_240 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 243 29 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 8240 "11100001" // NOPA; NOPB; VST bmhl1, [p7], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8241 "00000000" // /* MW 15 */ + 8242 "00000000" // /* MW 14 */ + 8243 "01111000" // /* MW 13 */ + 8244 "10100101" // /* MW 12 */ + 8245 "00000001" // /* MW 11 */ + 8246 "00000000" // /* MW 10 */ + 8247 "00000000" // /* MW 9 */ + 8248 "10000000" // /* MW 8 */ + 8249 "11000110" // /* MW 7 */ + 8250 "00011100" // /* MW 6 */ + 8251 "00100111" // /* MW 5 */ + 8252 "00000000" // /* MW 4 */ + 8253 "11110000" // /* MW 3 */ + 8254 "00101100" // /* MW 2 */ + 8255 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_256 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "reduce_base.h" 267 19 first +.src_ref 2 "reduce_base.h" 267 31 +.loop_nesting 0 + 8256 "10111010" // LDA r16, [p2], #-12; MOVX r18, #2; MOV p3, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8257 "01111000" // /* MW 9 */ + 8258 "01100000" // /* MW 8 */ + 8259 "10110001" // /* MW 7 */ + 8260 "01001001" // /* MW 6 */ + 8261 "00100000" // /* MW 5 */ + 8262 "00000001" // /* MW 4 */ + 8263 "11010000" // /* MW 3 */ + 8264 "11000010" // /* MW 2 */ + 8265 "01011011" // /* MW 1 */ + 8266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8267 "00000000" // /* MW 1 */ + 8268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8269 "00000000" // /* MW 1 */ + 8270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8271 "00000000" // /* MW 1 */ + 8272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8273 "00000000" // /* MW 1 */ + 8274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8275 "00000000" // /* MW 1 */ + 8276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8277 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 267 31 + 8278 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8279 "00001000" // /* MW 3 */ + 8280 "10100001" // /* MW 2 */ + 8281 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 267 12 + 8282 "10000100" // JNZ r16, #8576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8576 delay_slots=5 */ + 8283 "00000001" // /* MW 5 */ + 8284 "01000000" // /* MW 4 */ + 8285 "11000000" // /* MW 3 */ + 8286 "00010000" // /* MW 2 */ + 8287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 405 45 +.delay_slot + 8296 "11111000" // MOV r17, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8297 "11000000" // /* MW 3 */ + 8298 "01010100" // /* MW 2 */ + 8299 "00011100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 41 + 8300 "01000100" // MOVXM p7, #508528 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8301 "11100000" // /* MW 5 */ + 8302 "11000100" // /* MW 4 */ + 8303 "11001110" // /* MW 3 */ + 8304 "00000111" // /* MW 2 */ + 8305 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 41 first + 8306 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8307 "01010110" // /* MW 3 */ + 8308 "00000110" // /* MW 2 */ + 8309 "00000111" // /* MW 1 */ + 8310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8311 "00000000" // /* MW 1 */ + 8312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8313 "00000000" // /* MW 1 */ + 8314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8315 "00000000" // /* MW 1 */ + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ + 8318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8319 "00000000" // /* MW 1 */ + 8320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 first +.src_ref 2 "reduce_base.h" 274 30 first + 8322 "10000100" // JZ r18, #8576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8576 delay_slots=5 */ + 8323 "00000001" // /* MW 5 */ + 8324 "00000000" // /* MW 4 */ + 8325 "11000000" // /* MW 3 */ + 8326 "00010000" // /* MW 2 */ + 8327 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8329 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 31 +.delay_slot + 8330 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8331 "00000101" // /* MW 3 */ + 8332 "00100110" // /* MW 2 */ + 8333 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 31 +.src_ref 2 "reduce_base.h" 269 31 first +.delay_slot + 8334 "11100100" // LSHL r19, r18, r19; MOV r20, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8335 "10000001" // /* MW 5 */ + 8336 "00100101" // /* MW 4 */ + 8337 "10111010" // /* MW 3 */ + 8338 "11100111" // /* MW 2 */ + 8339 "10010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 269 31 +.delay_slot + 8340 "01011000" // ADD.NC p3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8341 "11010001" // /* MW 3 */ + 8342 "01101001" // /* MW 2 */ + 8343 "00011011" // /* MW 1 */ +.delay_slot + 8344 "01000100" // MOVXM p7, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8345 "01100000" // /* MW 5 */ + 8346 "11000100" // /* MW 4 */ + 8347 "11001110" // /* MW 3 */ + 8348 "00000111" // /* MW 2 */ + 8349 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base.h" 280 16 first +.src_ref 2 "reduce_base.h" 289 45 + 8350 "01110110" // LDA.s8 r20, [p7]; MOVS p2, p3; MOVXM ls, #8512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8351 "00010000" // /* MW 11 */ + 8352 "10100000" // /* MW 10 */ + 8353 "01111000" // /* MW 9 */ + 8354 "00001000" // /* MW 8 */ + 8355 "00000000" // /* MW 7 */ + 8356 "00000000" // /* MW 6 */ + 8357 "10001011" // /* MW 5 */ + 8358 "10001100" // /* MW 4 */ + 8359 "01010010" // /* MW 3 */ + 8360 "11010000" // /* MW 2 */ + 8361 "11100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "reduce_base.h" 274 12 +.src_ref 2 "reduce_base.h" 277 29 +.src_ref 2 "reduce_base.h" 280 16 + 8362 "01110110" // MOVA r19, #-5; MOVS p4, p1; MOVXM le, #8528 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8363 "00010000" // /* MW 11 */ + 8364 "10101000" // /* MW 10 */ + 8365 "10111000" // /* MW 9 */ + 8366 "00001001" // /* MW 8 */ + 8367 "00000000" // /* MW 7 */ + 8368 "00000000" // /* MW 6 */ + 8369 "10001011" // /* MW 5 */ + 8370 "10000100" // /* MW 4 */ + 8371 "00000100" // /* MW 3 */ + 8372 "01110011" // /* MW 2 */ + 8373 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 + 8374 "01000100" // MOVXM p7, #8416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8375 "11000000" // /* MW 5 */ + 8376 "11000001" // /* MW 4 */ + 8377 "00101110" // /* MW 3 */ + 8378 "00000000" // /* MW 2 */ + 8379 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 + 8380 "00011000" // MOVX vaddSign0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8381 "01000000" // /* MW 3 */ + 8382 "01011010" // /* MW 2 */ + 8383 "00010000" // /* MW 1 */ + 8384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8385 "00000000" // /* MW 1 */ + 8386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8387 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 282 91 + 8388 "11111000" // VMOV bmhl1, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "10010010" // /* MW 3 */ + 8390 "10000000" // /* MW 2 */ + 8391 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 first +.src_ref 2 "reduce_base.h" 282 91 + 8392 "00100100" // MOVX crRnd, r20; ADD.NC r21, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8393 "11111111" // /* MW 5 */ + 8394 "10110010" // /* MW 4 */ + 8395 "00001010" // /* MW 3 */ + 8396 "01010000" // /* MW 2 */ + 8397 "10100111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 +.src_ref 2 "reduce_base.h" 282 91 first + 8398 "01011100" // VCONV.bf16.fp32 wl0, bmhl1; LSHL r19, r21, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8399 "01111011" // /* MW 5 */ + 8400 "11001110" // /* MW 4 */ + 8401 "11001010" // /* MW 3 */ + 8402 "00011010" // /* MW 2 */ + 8403 "00001000" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 2 "reduce_base.h" 282 91 + 8406 "10111010" // NOPA; NOPB; VEXTBCST.16 x0, x0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8407 "10101110" // /* MW 9 */ + 8408 "10000001" // /* MW 8 */ + 8409 "00000000" // /* MW 7 */ + 8410 "00000000" // /* MW 6 */ + 8411 "00010000" // /* MW 5 */ + 8412 "00000000" // /* MW 4 */ + 8413 "11110000" // /* MW 3 */ + 8414 "00101100" // /* MW 2 */ + 8415 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_416 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "reduce_base.h" 277 29 first +.src_ref 2 "reduce_base.h" 280 16 first +.loop_nesting 1 + 8416 "00011100" // VLDB x1, [p4], #64; MOVX lc, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8417 "11110010" // /* MW 5 */ + 8418 "00000010" // /* MW 4 */ + 8419 "10000000" // /* MW 3 */ + 8420 "10001110" // /* MW 2 */ + 8421 "10000011" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ + 8426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8427 "00000000" // /* MW 1 */ + 8428 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8429 "01100111" // /* MW 3 */ + 8430 "00000001" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ + 8432 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "00000000" // /* MW 8 */ + 8441 "01011011" // /* MW 7 */ + 8442 "00000001" // /* MW 6 */ + 8443 "00100000" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 + 8448 "11100001" // NOPA; NOPB; NOPS; MOVX r20, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "01111000" // /* MW 13 */ + 8452 "10100101" // /* MW 12 */ + 8453 "00000001" // /* MW 11 */ + 8454 "00001000" // /* MW 10 */ + 8455 "01000000" // /* MW 9 */ + 8456 "00000001" // /* MW 8 */ + 8457 "01011011" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00100000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 2 "reduce_base.h" 280 67 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8464 "11100001" // NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8465 "00000000" // /* MW 15 */ + 8466 "00000000" // /* MW 14 */ + 8467 "01101000" // /* MW 13 */ + 8468 "10101000" // /* MW 12 */ + 8469 "10100010" // /* MW 11 */ + 8470 "00111010" // /* MW 10 */ + 8471 "01000000" // /* MW 9 */ + 8472 "00101001" // /* MW 8 */ + 8473 "01011011" // /* MW 7 */ + 8474 "00000001" // /* MW 6 */ + 8475 "00100000" // /* MW 5 */ + 8476 "00000000" // /* MW 4 */ + 8477 "11110000" // /* MW 3 */ + 8478 "00101100" // /* MW 2 */ + 8479 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 2 "reduce_base.h" 280 67 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8480 "11100001" // NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8481 "00000000" // /* MW 15 */ + 8482 "00000000" // /* MW 14 */ + 8483 "01101000" // /* MW 13 */ + 8484 "10101000" // /* MW 12 */ + 8485 "10100010" // /* MW 11 */ + 8486 "00111010" // /* MW 10 */ + 8487 "01000000" // /* MW 9 */ + 8488 "00101001" // /* MW 8 */ + 8489 "01011011" // /* MW 7 */ + 8490 "00000001" // /* MW 6 */ + 8491 "00100000" // /* MW 5 */ + 8492 "00000000" // /* MW 4 */ + 8493 "11110000" // /* MW 3 */ + 8494 "00101100" // /* MW 2 */ + 8495 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 856 23 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8496 "11100001" // NOPA; NOPB; NOPS; NOPX; VINSERT.16 x2, x0, #0, r21; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8497 "00000000" // /* MW 15 */ + 8498 "00000000" // /* MW 14 */ + 8499 "10111000" // /* MW 13 */ + 8500 "01010100" // /* MW 12 */ + 8501 "10000001" // /* MW 11 */ + 8502 "00000000" // /* MW 10 */ + 8503 "00000000" // /* MW 9 */ + 8504 "00000000" // /* MW 8 */ + 8505 "01011011" // /* MW 7 */ + 8506 "00000001" // /* MW 6 */ + 8507 "00100000" // /* MW 5 */ + 8508 "00000000" // /* MW 4 */ + 8509 "11110000" // /* MW 3 */ + 8510 "00101100" // /* MW 2 */ + 8511 "00000000" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_512 +.src_ref 4 "vector.hpp" 915 23 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 2 + 8512 "11100001" // NOPA; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8513 "00000000" // /* MW 15 */ + 8514 "00000000" // /* MW 14 */ + 8515 "01101000" // /* MW 13 */ + 8516 "10101000" // /* MW 12 */ + 8517 "10100010" // /* MW 11 */ + 8518 "00000010" // /* MW 10 */ + 8519 "00000000" // /* MW 9 */ + 8520 "00000000" // /* MW 8 */ + 8521 "01011011" // /* MW 7 */ + 8522 "00000001" // /* MW 6 */ + 8523 "00100000" // /* MW 5 */ + 8524 "00000000" // /* MW 4 */ + 8525 "11110000" // /* MW 3 */ + 8526 "00101100" // /* MW 2 */ + 8527 "00000000" // /* MW 1 */ +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_528 +.src_ref 4 "vector.hpp" 856 23 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 280 67 +.src_ref 2 "reduce_base.h" 289 45 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8528 "11100001" // NOPA; NOPB; VST x2, [p2], #64; ADD r20, r20, #1; VINSERT.16 x2, x0, #0, r21; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8529 "00000000" // /* MW 15 */ + 8530 "00000000" // /* MW 14 */ + 8531 "10111000" // /* MW 13 */ + 8532 "01010100" // /* MW 12 */ + 8533 "10000001" // /* MW 11 */ + 8534 "00111000" // /* MW 10 */ + 8535 "01000000" // /* MW 9 */ + 8536 "00101001" // /* MW 8 */ + 8537 "10010011" // /* MW 7 */ + 8538 "00011100" // /* MW 6 */ + 8539 "00100010" // /* MW 5 */ + 8540 "00000000" // /* MW 4 */ + 8541 "11110000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 274 12 first +.loop_nesting 1 + 8544 "00011000" // JNZD r19, r19, p7 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 8545 "11100000" // /* MW 3 */ + 8546 "11100111" // /* MW 2 */ + 8547 "00010100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 289 45 first +.delay_slot + 8548 "00011000" // VST x2, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8549 "10010011" // /* MW 3 */ + 8550 "00011100" // /* MW 2 */ + 8551 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8553 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 856 23 first +.delay_slot + 8554 "01111000" // VINSERT.16 x2, x0, #0, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8555 "10101001" // /* MW 3 */ + 8556 "00000010" // /* MW 2 */ + 8557 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8559 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base.h" 289 45 first +.delay_slot + 8560 "11100001" // NOPA; NOPB; VST x2, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8561 "00000000" // /* MW 15 */ + 8562 "00000000" // /* MW 14 */ + 8563 "01111000" // /* MW 13 */ + 8564 "10100101" // /* MW 12 */ + 8565 "00000001" // /* MW 11 */ + 8566 "00000000" // /* MW 10 */ + 8567 "00000000" // /* MW 9 */ + 8568 "00000000" // /* MW 8 */ + 8569 "10010011" // /* MW 7 */ + 8570 "00011100" // /* MW 6 */ + 8571 "00100010" // /* MW 5 */ + 8572 "00000000" // /* MW 4 */ + 8573 "11110000" // /* MW 3 */ + 8574 "00101100" // /* MW 2 */ + 8575 "00000000" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_576 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 915 23 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "reduce_base.h" 405 45 first +.loop_nesting 0 + 8576 "10111010" // MOVA dc2, #0; MOVX vaddSign0, #1; ADD.NC p4, r17, #54 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8577 "10001000" // /* MW 9 */ + 8578 "01001101" // /* MW 8 */ + 8579 "00110100" // /* MW 7 */ + 8580 "00000010" // /* MW 6 */ + 8581 "11010010" // /* MW 5 */ + 8582 "00000010" // /* MW 4 */ + 8583 "10000000" // /* MW 3 */ + 8584 "00001011" // /* MW 2 */ + 8585 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 294 28 +.src_ref 2 "reduce_base.h" 405 45 + 8586 "01110110" // LDA.s16 r19, [p4], #-2; MOVS dc3, dc2; MOVXM p2, #508464 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8587 "00010000" // /* MW 11 */ + 8588 "00011000" // /* MW 10 */ + 8589 "00110001" // /* MW 9 */ + 8590 "11110001" // /* MW 8 */ + 8591 "00000001" // /* MW 7 */ + 8592 "00000000" // /* MW 6 */ + 8593 "01001011" // /* MW 5 */ + 8594 "00001000" // /* MW 4 */ + 8595 "01010011" // /* MW 3 */ + 8596 "11001110" // /* MW 2 */ + 8597 "10011111" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 +.src_ref 4 "add_reduce.hpp" 332 18 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 4 "add_reduce.hpp" 337 22 +.src_ref 2 "reduce_base.h" 294 28 +.src_ref 2 "reduce_base.h" 406 38 first + 8598 "01110110" // LDA.u16 r26, [p4], #-6; MOVS dc4, dc2; MOVX r17, #60; ADD.NC p7, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8599 "00001000" // /* MW 11 */ + 8600 "01000111" // /* MW 10 */ + 8601 "10110100" // /* MW 9 */ + 8602 "10001011" // /* MW 8 */ + 8603 "00010111" // /* MW 7 */ + 8604 "00000001" // /* MW 6 */ + 8605 "01001011" // /* MW 5 */ + 8606 "00001000" // /* MW 4 */ + 8607 "01010100" // /* MW 3 */ + 8608 "11101011" // /* MW 2 */ + 8609 "10011011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 409 43 first + 8610 "10111010" // LDA.s16 r20, [p4], #-6; MOVS dc1, dc4; MOV dj3, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8611 "01010010" // /* MW 9 */ + 8612 "01000000" // /* MW 8 */ + 8613 "11000000" // /* MW 7 */ + 8614 "00000001" // /* MW 6 */ + 8615 "01001011" // /* MW 5 */ + 8616 "00010000" // /* MW 4 */ + 8617 "01010001" // /* MW 3 */ + 8618 "11010010" // /* MW 2 */ + 8619 "10011011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 409 64 + 8620 "01010100" // LDA.u16 r28, [p4], #-2; MOV dc0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8621 "00000001" // /* MW 5 */ + 8622 "10000000" // /* MW 4 */ + 8623 "01010001" // /* MW 3 */ + 8624 "11110011" // /* MW 2 */ + 8625 "10011111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 305 8 first +.src_ref 2 "reduce_base.h" 410 56 first + 8626 "10111010" // LDA.s16 r21, [p4], #6; MOVXM ls, #8816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8627 "00010000" // /* MW 9 */ + 8628 "00111000" // /* MW 8 */ + 8629 "01111001" // /* MW 7 */ + 8630 "00001000" // /* MW 6 */ + 8631 "00000000" // /* MW 5 */ + 8632 "00000000" // /* MW 4 */ + 8633 "01010000" // /* MW 3 */ + 8634 "11010110" // /* MW 2 */ + 8635 "10000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 305 8 +.src_ref 2 "reduce_base.h" 411 56 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8636 "10111010" // LDA.s16 r22, [p4, #-2]; MOVXM le, #8832 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8637 "00010000" // /* MW 9 */ + 8638 "01000000" // /* MW 8 */ + 8639 "10111001" // /* MW 7 */ + 8640 "00001001" // /* MW 6 */ + 8641 "00000000" // /* MW 5 */ + 8642 "00000000" // /* MW 4 */ + 8643 "01010000" // /* MW 3 */ + 8644 "11011010" // /* MW 2 */ + 8645 "10011110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "reduce_base.h" 410 75 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8646 "10111010" // LDA.u16 r26, [p4]; MOVS p4, p3; MOV dj2, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8647 "01010010" // /* MW 9 */ + 8648 "01000000" // /* MW 8 */ + 8649 "01000000" // /* MW 7 */ + 8650 "00000001" // /* MW 6 */ + 8651 "10001011" // /* MW 5 */ + 8652 "10001100" // /* MW 4 */ + 8653 "01010100" // /* MW 3 */ + 8654 "11101011" // /* MW 2 */ + 8655 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 +.src_ref 2 "reduce_base.h" 294 28 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8656 "11010100" // LDA.s8 r23, [p2]; MOV p2, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8657 "10000001" // /* MW 5 */ + 8658 "11011101" // /* MW 4 */ + 8659 "01010100" // /* MW 3 */ + 8660 "11011100" // /* MW 2 */ + 8661 "01000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 294 28 +.src_ref 2 "reduce_base.h" 313 60 +.src_ref 2 "reduce_base.h" 314 27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8662 "10111010" // LDA.u16 r19, [p7], #6; MOVX r18, #1; MOV dn3, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8663 "01111000" // /* MW 9 */ + 8664 "10010000" // /* MW 8 */ + 8665 "10100110" // /* MW 7 */ + 8666 "00101001" // /* MW 6 */ + 8667 "00100000" // /* MW 5 */ + 8668 "00000001" // /* MW 4 */ + 8669 "01010000" // /* MW 3 */ + 8670 "11001111" // /* MW 2 */ + 8671 "11100111" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8672 "11100100" // LSHL r27, r19, r18; MOV dn2, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8673 "01000001" // /* MW 5 */ + 8674 "10011010" // /* MW 4 */ + 8675 "10110100" // /* MW 3 */ + 8676 "11100101" // /* MW 2 */ + 8677 "10011110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 + 8678 "00111010" // MOVS dn0, r28; LSHL r20, r20, r18; MOV m3, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "01111001" // /* MW 9 */ + 8680 "11010000" // /* MW 8 */ + 8681 "10000110" // /* MW 7 */ + 8682 "01101101" // /* MW 6 */ + 8683 "01001001" // /* MW 5 */ + 8684 "00101001" // /* MW 4 */ + 8685 "01100000" // /* MW 3 */ + 8686 "10000001" // /* MW 2 */ + 8687 "00001011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 + 8688 "11100100" // LSHL r20, r21, r18; MOV m0, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8689 "01000001" // /* MW 5 */ + 8690 "00010100" // /* MW 4 */ + 8691 "10110000" // /* MW 3 */ + 8692 "00100101" // /* MW 2 */ + 8693 "10101101" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 + 8694 "11100100" // LSHL r20, r22, r18; MOV dj0, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8695 "01000001" // /* MW 5 */ + 8696 "00010100" // /* MW 4 */ + 8697 "10110001" // /* MW 3 */ + 8698 "00100101" // /* MW 2 */ + 8699 "10110101" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 7 "aie_core.h" 90 15 +.src_ref 7 "aie_core.h" 90 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 940 83 first + 8700 "10111010" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; MOVS dn4, r26; MOV dj4, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8701 "01110010" // /* MW 9 */ + 8702 "00010000" // /* MW 8 */ + 8703 "01000101" // /* MW 7 */ + 8704 "00000010" // /* MW 6 */ + 8705 "00001011" // /* MW 5 */ + 8706 "01011010" // /* MW 4 */ + 8707 "01110100" // /* MW 3 */ + 8708 "00010101" // /* MW 2 */ + 8709 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8710 "10111010" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0; MOVX crRnd, r23; MOV m2, m3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8711 "01111000" // /* MW 9 */ + 8712 "00000000" // /* MW 8 */ + 8713 "00000011" // /* MW 7 */ + 8714 "00000001" // /* MW 6 */ + 8715 "11010100" // /* MW 5 */ + 8716 "00101111" // /* MW 4 */ + 8717 "01110000" // /* MW 3 */ + 8718 "00100101" // /* MW 2 */ + 8719 "00000011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "reduce_base.h" 305 8 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8720 "00010100" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; ADD.NC lc, r19, #-5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8721 "11111011" // /* MW 5 */ + 8722 "11110011" // /* MW 4 */ + 8723 "01111010" // /* MW 3 */ + 8724 "00010101" // /* MW 2 */ + 8725 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8726 "10011000" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8727 "00101011" // /* MW 3 */ + 8728 "00011001" // /* MW 2 */ + 8729 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8730 "00111100" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8731 "00100000" // /* MW 5 */ + 8732 "00000000" // /* MW 4 */ + 8733 "01110000" // /* MW 3 */ + 8734 "00010101" // /* MW 2 */ + 8735 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8736 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8737 "01000001" // /* MW 15 */ + 8738 "01000001" // /* MW 14 */ + 8739 "01111100" // /* MW 13 */ + 8740 "10100101" // /* MW 12 */ + 8741 "00000001" // /* MW 11 */ + 8742 "00000000" // /* MW 10 */ + 8743 "00000000" // /* MW 9 */ + 8744 "00000000" // /* MW 8 */ + 8745 "01011011" // /* MW 7 */ + 8746 "00000001" // /* MW 6 */ + 8747 "00100000" // /* MW 5 */ + 8748 "00000000" // /* MW 4 */ + 8749 "01110000" // /* MW 3 */ + 8750 "00100101" // /* MW 2 */ + 8751 "00000011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8752 "11100001" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8753 "00000000" // /* MW 15 */ + 8754 "00000000" // /* MW 14 */ + 8755 "01111000" // /* MW 13 */ + 8756 "10100101" // /* MW 12 */ + 8757 "00000001" // /* MW 11 */ + 8758 "00000000" // /* MW 10 */ + 8759 "00000000" // /* MW 9 */ + 8760 "00000000" // /* MW 8 */ + 8761 "01011011" // /* MW 7 */ + 8762 "00000001" // /* MW 6 */ + 8763 "00100000" // /* MW 5 */ + 8764 "00000000" // /* MW 4 */ + 8765 "01110000" // /* MW 3 */ + 8766 "00010101" // /* MW 2 */ + 8767 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8768 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "01000001" // /* MW 15 */ + 8770 "01000001" // /* MW 14 */ + 8771 "01111100" // /* MW 13 */ + 8772 "10100101" // /* MW 12 */ + 8773 "00000001" // /* MW 11 */ + 8774 "00000000" // /* MW 10 */ + 8775 "00000000" // /* MW 9 */ + 8776 "00000000" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "01110000" // /* MW 3 */ + 8782 "00100101" // /* MW 2 */ + 8783 "00000011" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8784 "11100001" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8785 "00000000" // /* MW 15 */ + 8786 "00000000" // /* MW 14 */ + 8787 "01111000" // /* MW 13 */ + 8788 "10100101" // /* MW 12 */ + 8789 "00000001" // /* MW 11 */ + 8790 "00000000" // /* MW 10 */ + 8791 "00000000" // /* MW 9 */ + 8792 "00000000" // /* MW 8 */ + 8793 "01011011" // /* MW 7 */ + 8794 "00000001" // /* MW 6 */ + 8795 "00100000" // /* MW 5 */ + 8796 "00000000" // /* MW 4 */ + 8797 "01110000" // /* MW 3 */ + 8798 "00010101" // /* MW 2 */ + 8799 "01101110" // /* MW 1 */ +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8800 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8801 "01000001" // /* MW 15 */ + 8802 "01000001" // /* MW 14 */ + 8803 "01111100" // /* MW 13 */ + 8804 "10100101" // /* MW 12 */ + 8805 "00000001" // /* MW 11 */ + 8806 "00000000" // /* MW 10 */ + 8807 "00000000" // /* MW 9 */ + 8808 "00000000" // /* MW 8 */ + 8809 "01011011" // /* MW 7 */ + 8810 "00000001" // /* MW 6 */ + 8811 "00100000" // /* MW 5 */ + 8812 "00000000" // /* MW 4 */ + 8813 "01110000" // /* MW 3 */ + 8814 "00100101" // /* MW 2 */ + 8815 "00000011" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_816 +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 199 120 first +.src_ref 5 "accum.hpp" 940 83 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8816 "11100001" // VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8817 "00000000" // /* MW 15 */ + 8818 "00000000" // /* MW 14 */ + 8819 "01111000" // /* MW 13 */ + 8820 "10100101" // /* MW 12 */ + 8821 "00000001" // /* MW 11 */ + 8822 "00000000" // /* MW 10 */ + 8823 "00000000" // /* MW 9 */ + 8824 "00000000" // /* MW 8 */ + 8825 "01011011" // /* MW 7 */ + 8826 "00000001" // /* MW 6 */ + 8827 "00100000" // /* MW 5 */ + 8828 "00000000" // /* MW 4 */ + 8829 "01110000" // /* MW 3 */ + 8830 "00010101" // /* MW 2 */ + 8831 "01101110" // /* MW 1 */ +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_832 +.src_ref 7 "aie_core.h" 73 15 +.src_ref 7 "aie_core.h" 90 15 first +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 199 120 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 4 "add.hpp" 28 49 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8832 "11101011" // VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; VST.2D.CONV.bf16.fp32 cml0, [p4], d2;NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8833 "01000001" // /* MW 15 */ + 8834 "01000001" // /* MW 14 */ + 8835 "01111100" // /* MW 13 */ + 8836 "10100101" // /* MW 12 */ + 8837 "00000001" // /* MW 11 */ + 8838 "00000000" // /* MW 10 */ + 8839 "00000000" // /* MW 9 */ + 8840 "00000000" // /* MW 8 */ + 8841 "00100011" // /* MW 7 */ + 8842 "01010000" // /* MW 6 */ + 8843 "00100100" // /* MW 5 */ + 8844 "00000000" // /* MW 4 */ + 8845 "01110000" // /* MW 3 */ + 8846 "00100101" // /* MW 2 */ + 8847 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8849 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 4 "add.hpp" 28 49 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8850 "01100010" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8851 "00111101" // /* MW 7 */ + 8852 "00101000" // /* MW 6 */ + 8853 "10001000" // /* MW 5 */ + 8854 "00000010" // /* MW 4 */ + 8855 "01100000" // /* MW 3 */ + 8856 "00000100" // /* MW 2 */ + 8857 "10001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8859 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 4 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8860 "01100010" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8861 "00111101" // /* MW 7 */ + 8862 "00101000" // /* MW 6 */ + 8863 "10001000" // /* MW 5 */ + 8864 "00000010" // /* MW 4 */ + 8865 "01100000" // /* MW 3 */ + 8866 "00000100" // /* MW 2 */ + 8867 "10001010" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 312 12 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8868 "10000100" // JNZ r16, #9424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9424 delay_slots=5 */ + 8869 "00000001" // /* MW 5 */ + 8870 "01000000" // /* MW 4 */ + 8871 "01101000" // /* MW 3 */ + 8872 "00010010" // /* MW 2 */ + 8873 "10000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1119 102 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8874 "00011000" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8875 "00100011" // /* MW 3 */ + 8876 "01010000" // /* MW 2 */ + 8877 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8879 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8880 "00011000" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "00100011" // /* MW 3 */ + 8882 "01010000" // /* MW 2 */ + 8883 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8885 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1119 102 +.delay_slot + 8886 "00011000" // VST.2D.CONV.bf16.fp32 cml0, [p4], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8887 "00100011" // /* MW 3 */ + 8888 "01010000" // /* MW 2 */ + 8889 "00001100" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 314 27 + 8890 "10111010" // MOVA dj1, #64; MOVXM p0, #508534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8891 "00010000" // /* MW 9 */ + 8892 "00111011" // /* MW 8 */ + 8893 "00110001" // /* MW 7 */ + 8894 "11110000" // /* MW 6 */ + 8895 "00000001" // /* MW 5 */ + 8896 "00000000" // /* MW 4 */ + 8897 "10000000" // /* MW 3 */ + 8898 "00000110" // /* MW 2 */ + 8899 "00001000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 2 "reduce_base.h" 313 60 +.src_ref 2 "reduce_base.h" 314 27 first + 8900 "10111010" // LDA.s16 r7, [p0], #-2; MOVX r16, #32; MOV r23, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8901 "01111000" // /* MW 9 */ + 8902 "01100000" // /* MW 8 */ + 8903 "11101001" // /* MW 7 */ + 8904 "00001010" // /* MW 6 */ + 8905 "00000100" // /* MW 5 */ + 8906 "00000001" // /* MW 4 */ + 8907 "01010000" // /* MW 3 */ + 8908 "10011110" // /* MW 2 */ + 8909 "00011111" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 2 "reduce_base.h" 313 69 first + 8910 "10111010" // LDA r29, [p0, #-4]; MOVX r19, #16; MOV r20, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8911 "01011000" // /* MW 9 */ + 8912 "00001000" // /* MW 8 */ + 8913 "10001000" // /* MW 7 */ + 8914 "00001010" // /* MW 6 */ + 8915 "00110010" // /* MW 5 */ + 8916 "00000001" // /* MW 4 */ + 8917 "11010000" // /* MW 3 */ + 8918 "11110110" // /* MW 2 */ + 8919 "00011110" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 314 27 first +.src_ref 2 "reduce_base.h" 319 12 first + 8920 "10111010" // LDA.u16 r27, [p0]; MOVXM ls, #9168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8921 "00010000" // /* MW 9 */ + 8922 "11101000" // /* MW 8 */ + 8923 "01111001" // /* MW 7 */ + 8924 "00001000" // /* MW 6 */ + 8925 "00000000" // /* MW 5 */ + 8926 "00000000" // /* MW 4 */ + 8927 "01010000" // /* MW 3 */ + 8928 "11101111" // /* MW 2 */ + 8929 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 +.src_ref 2 "reduce_base.h" 319 12 + 8930 "10111010" // MOVA r22, #4; MOVXM le, #9264 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8931 "00010000" // /* MW 9 */ + 8932 "00011000" // /* MW 8 */ + 8933 "10111010" // /* MW 7 */ + 8934 "00001001" // /* MW 6 */ + 8935 "00000000" // /* MW 5 */ + 8936 "00000000" // /* MW 4 */ + 8937 "00000000" // /* MW 3 */ + 8938 "10010110" // /* MW 2 */ + 8939 "00000000" // /* MW 1 */ + 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8941 "00000000" // /* MW 1 */ + 8942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8943 "00000000" // /* MW 1 */ + 8944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8945 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 314 27 + 8946 "10011000" // LSHL r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8947 "00101101" // /* MW 3 */ + 8948 "11001111" // /* MW 2 */ + 8949 "00010001" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 313 60 first + 8950 "11100100" // LSHL r18, r29, r18; MOV m1, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8951 "01000001" // /* MW 5 */ + 8952 "00000111" // /* MW 4 */ + 8953 "10110010" // /* MW 3 */ + 8954 "10100101" // /* MW 2 */ + 8955 "11101100" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "reduce_base.h" 319 12 first + 8956 "00000010" // MOVS dn1, r27; ADD.NC lc, r29, #-3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8957 "01000000" // /* MW 7 */ + 8958 "01111111" // /* MW 6 */ + 8959 "10111111" // /* MW 5 */ + 8960 "00000010" // /* MW 4 */ + 8961 "01100000" // /* MW 3 */ + 8962 "01100001" // /* MW 2 */ + 8963 "00101011" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 313 60 first + 8964 "01011000" // ADD.NC p0, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8965 "11001001" // /* MW 3 */ + 8966 "01101011" // /* MW 2 */ + 8967 "00011000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first + 8968 "10011000" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8969 "00101011" // /* MW 3 */ + 8970 "00110000" // /* MW 2 */ + 8971 "00000000" // /* MW 1 */ + 8972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8973 "00000000" // /* MW 1 */ + 8974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8975 "00000000" // /* MW 1 */ + 8976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8977 "00000000" // /* MW 1 */ + 8978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8979 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8981 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 332 18 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8982 "01001000" // VADD.f dm0, dm0, dm3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8983 "00111101" // /* MW 3 */ + 8984 "00001100" // /* MW 2 */ + 8985 "10001000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8986 "11111000" // VMOV bmll3, bmlh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8987 "00010010" // /* MW 3 */ + 8988 "00000001" // /* MW 2 */ + 8989 "00011011" // /* MW 1 */ + 8990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8991 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8993 "00000000" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8994 "10011000" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8995 "00101011" // /* MW 3 */ + 8996 "00110000" // /* MW 2 */ + 8997 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8999 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 9000 "11111000" // VMOV x0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9001 "00010010" // /* MW 3 */ + 9002 "00100000" // /* MW 2 */ + 9003 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 9004 "01100010" // VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9005 "00111101" // /* MW 7 */ + 9006 "00001100" // /* MW 6 */ + 9007 "10001001" // /* MW 5 */ + 9008 "11000110" // /* MW 4 */ + 9009 "01000010" // /* MW 3 */ + 9010 "10000000" // /* MW 2 */ + 9011 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9012 "11111000" // VMOV bmll3, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9013 "10010010" // /* MW 3 */ + 9014 "00000010" // /* MW 2 */ + 9015 "00011011" // /* MW 1 */ + 9016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9017 "00000000" // /* MW 1 */ + 9018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9019 "00000000" // /* MW 1 */ + 9020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9021 "00000000" // /* MW 1 */ + 9022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9023 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 9024 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9025 "00010010" // /* MW 3 */ + 9026 "00100100" // /* MW 2 */ + 9027 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 9028 "01100010" // VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9029 "00111101" // /* MW 7 */ + 9030 "00101100" // /* MW 6 */ + 9031 "10001001" // /* MW 5 */ + 9032 "11000110" // /* MW 4 */ + 9033 "01001110" // /* MW 3 */ + 9034 "10010000" // /* MW 2 */ + 9035 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9036 "11111000" // VMOV bmll3, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9037 "10010010" // /* MW 3 */ + 9038 "00000110" // /* MW 2 */ + 9039 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 332 18 first + 9040 "01100010" // VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "00111101" // /* MW 7 */ + 9042 "00001100" // /* MW 6 */ + 9043 "10001000" // /* MW 5 */ + 9044 "11100110" // /* MW 4 */ + 9045 "00010010" // /* MW 3 */ + 9046 "00000001" // /* MW 2 */ + 9047 "00000011" // /* MW 1 */ + 9048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9049 "00000000" // /* MW 1 */ + 9050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9051 "00000000" // /* MW 1 */ + 9052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9053 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9054 "11111000" // VMOV x4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9055 "00010010" // /* MW 3 */ + 9056 "00100100" // /* MW 2 */ + 9057 "00011010" // /* MW 1 */ +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9058 "10010100" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9059 "10100101" // /* MW 5 */ + 9060 "01000000" // /* MW 4 */ + 9061 "01110101" // /* MW 3 */ + 9062 "00000101" // /* MW 2 */ + 9063 "00000110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9064 "11111000" // VMOV x0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9065 "00010010" // /* MW 3 */ + 9066 "00100000" // /* MW 2 */ + 9067 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9068 "01100010" // VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9069 "00111101" // /* MW 7 */ + 9070 "00001100" // /* MW 6 */ + 9071 "10001001" // /* MW 5 */ + 9072 "11000110" // /* MW 4 */ + 9073 "01000010" // /* MW 3 */ + 9074 "10000000" // /* MW 2 */ + 9075 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9076 "11111000" // VMOV bmll3, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "10010010" // /* MW 3 */ + 9078 "00000010" // /* MW 2 */ + 9079 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9080 "01100010" // VMOV bmll4, x5; VADD.f dm2, dm1, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9081 "00111101" // /* MW 7 */ + 9082 "00110000" // /* MW 6 */ + 9083 "10001010" // /* MW 5 */ + 9084 "11100110" // /* MW 4 */ + 9085 "10010010" // /* MW 3 */ + 9086 "00001010" // /* MW 2 */ + 9087 "00000100" // /* MW 1 */ + 9088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9089 "00000000" // /* MW 1 */ + 9090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9091 "00000000" // /* MW 1 */ + 9092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9093 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 9094 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9095 "00010010" // /* MW 3 */ + 9096 "00100100" // /* MW 2 */ + 9097 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 9098 "01100010" // VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9099 "00111101" // /* MW 7 */ + 9100 "00101100" // /* MW 6 */ + 9101 "10001001" // /* MW 5 */ + 9102 "11000110" // /* MW 4 */ + 9103 "01001110" // /* MW 3 */ + 9104 "10010000" // /* MW 2 */ + 9105 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9106 "11111000" // VMOV bmll3, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9107 "10010010" // /* MW 3 */ + 9108 "00000110" // /* MW 2 */ + 9109 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 332 18 first + 9110 "01100010" // VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9111 "00111101" // /* MW 7 */ + 9112 "00001100" // /* MW 6 */ + 9113 "10001000" // /* MW 5 */ + 9114 "11100110" // /* MW 4 */ + 9115 "00010010" // /* MW 3 */ + 9116 "00000001" // /* MW 2 */ + 9117 "00000011" // /* MW 1 */ + 9118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9119 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 + 9120 "11111000" // VMOV x6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9121 "00010010" // /* MW 3 */ + 9122 "00101000" // /* MW 2 */ + 9123 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first + 9124 "11011000" // VSHIFT x7, x6, x0, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9125 "01011010" // /* MW 3 */ + 9126 "10110000" // /* MW 2 */ + 9127 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first + 9128 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9129 "10010010" // /* MW 3 */ + 9130 "00001110" // /* MW 2 */ + 9131 "00011100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 4 "add_reduce.hpp" 337 22 first + 9132 "01100010" // VMOV x4, bmll1; VADD.f dm2, dm2, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9133 "00111101" // /* MW 7 */ + 9134 "01010000" // /* MW 6 */ + 9135 "10001010" // /* MW 5 */ + 9136 "11100110" // /* MW 4 */ + 9137 "00010010" // /* MW 3 */ + 9138 "00100100" // /* MW 2 */ + 9139 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 + 9140 "11111000" // VMOV x0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "00010010" // /* MW 3 */ + 9142 "00100000" // /* MW 2 */ + 9143 "00011000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 9144 "11011000" // VSHIFT x1, x0, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9145 "01000010" // /* MW 3 */ + 9146 "10000000" // /* MW 2 */ + 9147 "00011000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.noswbrkpt + 9148 "01100010" // VMOV bmll3, x1; VADD.f dm1, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9149 "00111101" // /* MW 7 */ + 9150 "00001100" // /* MW 6 */ + 9151 "10001001" // /* MW 5 */ + 9152 "11100110" // /* MW 4 */ + 9153 "10010010" // /* MW 3 */ + 9154 "00000010" // /* MW 2 */ + 9155 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9156 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9157 "10000001" // /* MW 11 */ + 9158 "10101101" // /* MW 10 */ + 9159 "00000000" // /* MW 9 */ + 9160 "00000000" // /* MW 8 */ + 9161 "00000000" // /* MW 7 */ + 9162 "00000000" // /* MW 6 */ + 9163 "00100000" // /* MW 5 */ + 9164 "00000000" // /* MW 4 */ + 9165 "11110000" // /* MW 3 */ + 9166 "00101100" // /* MW 2 */ + 9167 "00000000" // /* MW 1 */ +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1168 +.src_ref 7 "aie_core.h" 73 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 +.begin_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9168 "01001010" // VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9169 "00111101" // /* MW 9 */ + 9170 "00110000" // /* MW 8 */ + 9171 "10001010" // /* MW 7 */ + 9172 "11000010" // /* MW 6 */ + 9173 "01010010" // /* MW 5 */ + 9174 "10100000" // /* MW 4 */ + 9175 "01110010" // /* MW 3 */ + 9176 "00000101" // /* MW 2 */ + 9177 "00000110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9178 "00000010" // VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9179 "01110000" // /* MW 7 */ + 9180 "01001001" // /* MW 6 */ + 9181 "00000101" // /* MW 5 */ + 9182 "00000010" // /* MW 4 */ + 9183 "11000000" // /* MW 3 */ + 9184 "00100010" // /* MW 2 */ + 9185 "10000010" // /* MW 1 */ +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9187 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9188 "10111000" // VEXTRACT.16 r21, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9189 "00000001" // /* MW 3 */ + 9190 "01100001" // /* MW 2 */ + 9191 "00011101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9192 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9193 "00010010" // /* MW 3 */ + 9194 "00100100" // /* MW 2 */ + 9195 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9196 "11011000" // VSHIFT x3, x2, x0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9197 "01001110" // /* MW 3 */ + 9198 "10010000" // /* MW 2 */ + 9199 "00011001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9200 "01100010" // VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9201 "00111101" // /* MW 7 */ + 9202 "00101100" // /* MW 6 */ + 9203 "10001001" // /* MW 5 */ + 9204 "11100110" // /* MW 4 */ + 9205 "10010010" // /* MW 3 */ + 9206 "00000110" // /* MW 2 */ + 9207 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 4 "add_reduce.hpp" 332 18 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9208 "01100010" // VMOV x6, bmll2; VADD.f dm0, dm0, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9209 "00111101" // /* MW 7 */ + 9210 "00001100" // /* MW 6 */ + 9211 "10001000" // /* MW 5 */ + 9212 "11100110" // /* MW 4 */ + 9213 "00010010" // /* MW 3 */ + 9214 "00101000" // /* MW 2 */ + 9215 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9216 "11111000" // VMOV bmll3, bmlh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9217 "00010010" // /* MW 3 */ + 9218 "00000001" // /* MW 2 */ + 9219 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9220 "11011000" // VSHIFT x7, x6, x0, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9221 "01011010" // /* MW 3 */ + 9222 "10110000" // /* MW 2 */ + 9223 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9224 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9225 "10010010" // /* MW 3 */ + 9226 "00001110" // /* MW 2 */ + 9227 "00011100" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9228 "01001000" // VADD.f dm2, dm2, dm4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9229 "00111101" // /* MW 3 */ + 9230 "01010000" // /* MW 2 */ + 9231 "10001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9232 "11111000" // VMOV x4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9233 "00010010" // /* MW 3 */ + 9234 "00100100" // /* MW 2 */ + 9235 "00011010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9236 "11110110" // NOPA; NOPB; NOPS; VMOV x0, bmll0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9237 "01110000" // /* MW 11 */ + 9238 "00001001" // /* MW 10 */ + 9239 "00010000" // /* MW 9 */ + 9240 "00000000" // /* MW 8 */ + 9241 "01011011" // /* MW 7 */ + 9242 "00000001" // /* MW 6 */ + 9243 "00100000" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11110000" // /* MW 3 */ + 9246 "00101100" // /* MW 2 */ + 9247 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.src_ref 2 "reduce_base.h" 326 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9248 "11101011" // ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9249 "01100001" // /* MW 15 */ + 9250 "01001000" // /* MW 14 */ + 9251 "01101100" // /* MW 13 */ + 9252 "00100001" // /* MW 12 */ + 9253 "01000000" // /* MW 11 */ + 9254 "00000000" // /* MW 10 */ + 9255 "00000000" // /* MW 9 */ + 9256 "00000000" // /* MW 8 */ + 9257 "01011011" // /* MW 7 */ + 9258 "00000001" // /* MW 6 */ + 9259 "00100000" // /* MW 5 */ + 9260 "00000000" // /* MW 4 */ + 9261 "11100000" // /* MW 3 */ + 9262 "11010110" // /* MW 2 */ + 9263 "00100011" // /* MW 1 */ +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1264 +.src_ref 5 "accum.hpp" 198 120 first +.end_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 9264 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9265 "00000000" // /* MW 15 */ + 9266 "00000000" // /* MW 14 */ + 9267 "01111000" // /* MW 13 */ + 9268 "01001001" // /* MW 12 */ + 9269 "10000001" // /* MW 11 */ + 9270 "00000001" // /* MW 10 */ + 9271 "00000000" // /* MW 9 */ + 9272 "00000000" // /* MW 8 */ + 9273 "01011011" // /* MW 7 */ + 9274 "00000001" // /* MW 6 */ + 9275 "00100000" // /* MW 5 */ + 9276 "00000000" // /* MW 4 */ + 9277 "11110000" // /* MW 3 */ + 9278 "00101100" // /* MW 2 */ + 9279 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9280 "01100010" // VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9281 "00111101" // /* MW 7 */ + 9282 "00110000" // /* MW 6 */ + 9283 "10001010" // /* MW 5 */ + 9284 "11000110" // /* MW 4 */ + 9285 "01010010" // /* MW 3 */ + 9286 "10100000" // /* MW 2 */ + 9287 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9288 "00000010" // VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9289 "01110000" // /* MW 7 */ + 9290 "01001001" // /* MW 6 */ + 9291 "00000101" // /* MW 5 */ + 9292 "00000010" // /* MW 4 */ + 9293 "11000000" // /* MW 3 */ + 9294 "00100010" // /* MW 2 */ + 9295 "10000010" // /* MW 1 */ + 9296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9297 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first + 9298 "10111000" // VEXTRACT.16 r21, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "00000001" // /* MW 3 */ + 9300 "01100001" // /* MW 2 */ + 9301 "00011101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first + 9302 "11111000" // VMOV x2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9303 "00010010" // /* MW 3 */ + 9304 "00100100" // /* MW 2 */ + 9305 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 2 "reduce_base.h" 326 30 first + 9306 "10010100" // ST.s16 r21, [p1], #2; VSHIFT x3, x2, x0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9307 "10011101" // /* MW 5 */ + 9308 "00100000" // /* MW 4 */ + 9309 "11100011" // /* MW 3 */ + 9310 "11010110" // /* MW 2 */ + 9311 "00100011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 9312 "11111000" // VMOV x6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9313 "00010010" // /* MW 3 */ + 9314 "00101000" // /* MW 2 */ + 9315 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 9316 "01100010" // VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9317 "00111101" // /* MW 7 */ + 9318 "01010000" // /* MW 6 */ + 9319 "10001010" // /* MW 5 */ + 9320 "11000110" // /* MW 4 */ + 9321 "01011010" // /* MW 3 */ + 9322 "10110000" // /* MW 2 */ + 9323 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9324 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9325 "10010010" // /* MW 3 */ + 9326 "00001110" // /* MW 2 */ + 9327 "00011100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 4 "add_reduce.hpp" 337 22 first + 9328 "01100010" // VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00111101" // /* MW 7 */ + 9330 "00101100" // /* MW 6 */ + 9331 "10001001" // /* MW 5 */ + 9332 "11100110" // /* MW 4 */ + 9333 "10010010" // /* MW 3 */ + 9334 "00000110" // /* MW 2 */ + 9335 "00000011" // /* MW 1 */ + 9336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9337 "00000000" // /* MW 1 */ + 9338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9339 "00000000" // /* MW 1 */ + 9340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9341 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 9342 "00011000" // VCONV.bf16.fp32 x8, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9343 "00010110" // /* MW 3 */ + 9344 "00010001" // /* MW 2 */ + 9345 "00001100" // /* MW 1 */ +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 9346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9347 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 2 "reduce_base.h" 326 30 first +.aggressive_scheduled_block_id 11 +.noswbrkpt + 9348 "11010100" // ST.s16 r21, [p1], #2; VMOV x4, bmll1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9349 "00100101" // /* MW 5 */ + 9350 "01001000" // /* MW 4 */ + 9351 "11100100" // /* MW 3 */ + 9352 "11010110" // /* MW 2 */ + 9353 "00100011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 9354 "01100010" // VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9355 "00111101" // /* MW 7 */ + 9356 "00110000" // /* MW 6 */ + 9357 "10001010" // /* MW 5 */ + 9358 "11000110" // /* MW 4 */ + 9359 "01010010" // /* MW 3 */ + 9360 "10100000" // /* MW 2 */ + 9361 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 9362 "11111000" // VMOV bmll4, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9363 "10010010" // /* MW 3 */ + 9364 "00001010" // /* MW 2 */ + 9365 "00011100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9366 "10111000" // VEXTRACT.16 r21, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9367 "00000001" // /* MW 3 */ + 9368 "01100001" // /* MW 2 */ + 9369 "00011101" // /* MW 1 */ + 9370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9371 "00000000" // /* MW 1 */ + 9372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9373 "00000000" // /* MW 1 */ + 9374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9375 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 9376 "11111000" // VMOV x6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "00010010" // /* MW 3 */ + 9378 "00101000" // /* MW 2 */ + 9379 "00011011" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 335 47 first +.src_ref 4 "add_reduce.hpp" 337 22 first +.aggressive_scheduled_block_id 12 +.noswbrkpt + 9380 "01100010" // VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9381 "00111101" // /* MW 7 */ + 9382 "01010000" // /* MW 6 */ + 9383 "10001010" // /* MW 5 */ + 9384 "11000110" // /* MW 4 */ + 9385 "01011010" // /* MW 3 */ + 9386 "10110000" // /* MW 2 */ + 9387 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9388 "11111000" // VMOV bmll4, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9389 "10010010" // /* MW 3 */ + 9390 "00001110" // /* MW 2 */ + 9391 "00011100" // /* MW 1 */ + 9392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9393 "00000000" // /* MW 1 */ + 9394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9395 "00000000" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 9400 "00011000" // VCONV.bf16.fp32 x8, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00010110" // /* MW 3 */ + 9402 "00010001" // /* MW 2 */ + 9403 "00001100" // /* MW 1 */ + 9404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9405 "01100111" // /* MW 3 */ + 9406 "00000001" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 2 "reduce_base.h" 326 30 first + 9408 "11100001" // ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x8, #0, vaddSign0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9409 "00000000" // /* MW 15 */ + 9410 "00000000" // /* MW 14 */ + 9411 "11011000" // /* MW 13 */ + 9412 "10000000" // /* MW 12 */ + 9413 "10110000" // /* MW 11 */ + 9414 "00000010" // /* MW 10 */ + 9415 "00000000" // /* MW 9 */ + 9416 "00000000" // /* MW 8 */ + 9417 "01011011" // /* MW 7 */ + 9418 "00000001" // /* MW 6 */ + 9419 "00100000" // /* MW 5 */ + 9420 "00000000" // /* MW 4 */ + 9421 "11100000" // /* MW 3 */ + 9422 "11010110" // /* MW 2 */ + 9423 "00100011" // /* MW 1 */ +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1424 +.src_ref 2 "reduce_base.h" 222 30 first + 9424 "10011000" // LDA.u16 r16, [p2, #6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9425 "00011010" // /* MW 3 */ + 9426 "00110110" // /* MW 2 */ + 9427 "00000010" // /* MW 1 */ + 9428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9429 "00000000" // /* MW 1 */ + 9430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9431 "00000000" // /* MW 1 */ + 9432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9433 "00000000" // /* MW 1 */ + 9434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9435 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 9436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9437 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 +.aggressive_scheduled_block_id 13 +.noswbrkpt + 9438 "00011000" // ST.s16 r16, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9439 "00010111" // /* MW 3 */ + 9440 "11111110" // /* MW 2 */ + 9441 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 223 57 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 9442 "10011000" // LDA.u16 r1, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9443 "00111010" // /* MW 3 */ + 9444 "00000100" // /* MW 2 */ + 9445 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 223 57 +.aggressive_scheduled_block_id 13 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9446 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 9447 "00000001" // /* MW 5 */ + 9448 "00000000" // /* MW 4 */ + 9449 "01010000" // /* MW 3 */ + 9450 "00011010" // /* MW 2 */ + 9451 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 first +.delay_slot +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9452 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9453 "00000111" // /* MW 3 */ + 9454 "00100000" // /* MW 2 */ + 9455 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 222 30 +.delay_slot + 9456 "00011000" // EXTEND.u16 r0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9457 "10110000" // /* MW 3 */ + 9458 "00000000" // /* MW 2 */ + 9459 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9464 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9465 "00011100" // /* MW 7 */ + 9466 "00000000" // /* MW 6 */ + 9467 "00000000" // /* MW 5 */ + 9468 "00000100" // /* MW 4 */ + 9469 "11110000" // /* MW 3 */ + 9470 "00101100" // /* MW 2 */ + 9471 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 355 4 +.return_address + 9472 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9473 "00111001" // /* MW 3 */ + 9474 "11111100" // /* MW 2 */ + 9475 "00000111" // /* MW 1 */ + 9476 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9477 "10011001" // /* MW 3 */ + 9478 "11111000" // /* MW 2 */ + 9479 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 223 23 first + 9480 "00011000" // ST.s16 r3, [p7, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "01110111" // /* MW 3 */ + 9482 "00100100" // /* MW 2 */ + 9483 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 355 4 first + 9484 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9485 "00000001" // /* MW 5 */ + 9486 "00000000" // /* MW 4 */ + 9487 "00000000" // /* MW 3 */ + 9488 "11111000" // /* MW 2 */ + 9489 "11111111" // /* MW 1 */ + 9490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9491 "00000000" // /* MW 1 */ + 9492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9493 "00000000" // /* MW 1 */ + 9494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9495 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base.h" 355 4 + 9496 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9497 "00000000" // /* MW 3 */ + 9498 "00101000" // /* MW 2 */ + 9499 "00010000" // /* MW 1 */ +.delay_slot + 9500 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9501 "11000000" // /* MW 3 */ + 9502 "01100010" // /* MW 2 */ + 9503 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E__end +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_end0 + 9511 "00000000" // /* MW 1 */ +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_reducesum _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 405 first +.src_ref 6 "superkernels.cpp" 410 6 +.function_start + 9520 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9521 "00000000" // /* MW 5 */ + 9522 "11000100" // /* MW 4 */ + 9523 "11000110" // /* MW 3 */ + 9524 "00000111" // /* MW 2 */ + 9525 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 410 6 first + 9526 "11010100" // LDA r16, [p3]; MOV r3, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9527 "01000001" // /* MW 5 */ + 9528 "10101110" // /* MW 4 */ + 9529 "11010001" // /* MW 3 */ + 9530 "11000010" // /* MW 2 */ + 9531 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 405 + 9532 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9533 "00000001" // /* MW 5 */ + 9534 "00000000" // /* MW 4 */ + 9535 "00000000" // /* MW 3 */ + 9536 "00001000" // /* MW 2 */ + 9537 "00000000" // /* MW 1 */ + 9538 "00000010" // ST p7, [sp, #-20]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9539 "01110000" // /* MW 7 */ + 9540 "11010000" // /* MW 6 */ + 9541 "00101011" // /* MW 5 */ + 9542 "00000000" // /* MW 4 */ + 9543 "10110000" // /* MW 3 */ + 9544 "11110011" // /* MW 2 */ + 9545 "11111101" // /* MW 1 */ + 9546 "00000010" // ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9547 "01110000" // /* MW 7 */ + 9548 "01110000" // /* MW 6 */ + 9549 "00101101" // /* MW 5 */ + 9550 "00000010" // /* MW 4 */ + 9551 "10110000" // /* MW 3 */ + 9552 "11100011" // /* MW 2 */ + 9553 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 407 22 first + 9554 "00111010" // ST r13, [sp, #-8]; EXTEND.u8 r17, r17; MOV r15, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9555 "01111001" // /* MW 9 */ + 9556 "01100000" // /* MW 8 */ + 9557 "11101010" // /* MW 7 */ + 9558 "10000001" // /* MW 6 */ + 9559 "00010100" // /* MW 5 */ + 9560 "00100011" // /* MW 4 */ + 9561 "10110000" // /* MW 3 */ + 9562 "00110110" // /* MW 2 */ + 9563 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 517 + 9564 "00000010" // ST r1, [sp, #-16]; MOV r14, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9565 "01110000" // /* MW 7 */ + 9566 "11110000" // /* MW 6 */ + 9567 "11001000" // /* MW 5 */ + 9568 "00000001" // /* MW 4 */ + 9569 "10110000" // /* MW 3 */ + 9570 "00000110" // /* MW 2 */ + 9571 "11111110" // /* MW 1 */ + 9572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9573 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 410 6 first +.src_ref 6 "superkernels.cpp" 410 16 first + 9574 "10000100" // JNZ r16, #9776 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9776 delay_slots=5 */ + 9575 "00000001" // /* MW 5 */ + 9576 "01000000" // /* MW 4 */ + 9577 "00011000" // /* MW 3 */ + 9578 "00010011" // /* MW 2 */ + 9579 "10000000" // /* MW 1 */ +.delay_slot + 9580 "10011000" // ST r3, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9581 "01110101" // /* MW 3 */ + 9582 "11110100" // /* MW 2 */ + 9583 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 413 4 +.delay_slot + 9584 "00111010" // MOVS p7, p0; MOVXM p0, #508480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9585 "00010001" // /* MW 9 */ + 9586 "00100000" // /* MW 8 */ + 9587 "00110001" // /* MW 7 */ + 9588 "11110000" // /* MW 6 */ + 9589 "00000001" // /* MW 5 */ + 9590 "00000000" // /* MW 4 */ + 9591 "01100000" // /* MW 3 */ + 9592 "00010001" // /* MW 2 */ + 9593 "11110000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 407 11 +.delay_slot + 9594 "01000100" // MOVXM p6, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9595 "00010000" // /* MW 5 */ + 9596 "11000100" // /* MW 4 */ + 9597 "11001100" // /* MW 3 */ + 9598 "00000111" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 407 30 first +.delay_slot + 9600 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9601 "11111011" // /* MW 3 */ + 9602 "01100011" // /* MW 2 */ + 9603 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 407 11 +.delay_slot + 9604 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9605 "00110001" // /* MW 3 */ + 9606 "00000110" // /* MW 2 */ + 9607 "00001110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9608 "01000100" // MOVXM p6, #508464 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9609 "01100000" // /* MW 5 */ + 9610 "11000100" // /* MW 4 */ + 9611 "11001100" // /* MW 3 */ + 9612 "00000111" // /* MW 2 */ + 9613 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9614 "10111010" // ST.s8 r16, [p6]; MOVXM p6, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9615 "00010000" // /* MW 9 */ + 9616 "00010110" // /* MW 8 */ + 9617 "00110001" // /* MW 7 */ + 9618 "11110011" // /* MW 6 */ + 9619 "00000001" // /* MW 5 */ + 9620 "00000000" // /* MW 4 */ + 9621 "11100000" // /* MW 3 */ + 9622 "11000000" // /* MW 2 */ + 9623 "11000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9625 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 413 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9626 "00000100" // JL #7168 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7168 delay_slots=5 */ + 9627 "00000001" // /* MW 5 */ + 9628 "00000000" // /* MW 4 */ + 9629 "00000000" // /* MW 3 */ + 9630 "00001110" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9636 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9637 "00110001" // /* MW 3 */ + 9638 "00100000" // /* MW 2 */ + 9639 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9640 "00100010" // MOVX r16, #1; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9641 "00011100" // /* MW 7 */ + 9642 "00000000" // /* MW 6 */ + 9643 "00000000" // /* MW 5 */ + 9644 "00101001" // /* MW 4 */ + 9645 "00000000" // /* MW 3 */ + 9646 "00000001" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 6 "superkernels.cpp" 416 15 +.delay_slot + 9648 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; MOV p6, p1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "01100000" // /* MW 12 */ + 9653 "00110001" // /* MW 11 */ + 9654 "00000011" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "10000000" // /* MW 8 */ + 9657 "00010001" // /* MW 7 */ + 9658 "00000110" // /* MW 6 */ + 9659 "00100110" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11110000" // /* MW 3 */ + 9662 "00101100" // /* MW 2 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 46 +.return_address + 9664 "01000100" // MOVXM p3, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9665 "10000000" // /* MW 5 */ + 9666 "11000100" // /* MW 4 */ + 9667 "11000110" // /* MW 3 */ + 9668 "00000111" // /* MW 2 */ + 9669 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 18 +.src_ref 6 "superkernels.cpp" 414 46 first + 9670 "10111010" // LDA r18, [p3], #4; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9671 "00010000" // /* MW 9 */ + 9672 "00000100" // /* MW 8 */ + 9673 "10110001" // /* MW 7 */ + 9674 "11110000" // /* MW 6 */ + 9675 "00000001" // /* MW 5 */ + 9676 "00000000" // /* MW 4 */ + 9677 "11010000" // /* MW 3 */ + 9678 "11001010" // /* MW 2 */ + 9679 "01100011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 18 + 9680 "10011000" // LDA r20, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9681 "10010110" // /* MW 3 */ + 9682 "00000110" // /* MW 2 */ + 9683 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 75 + 9684 "10011000" // LDA r17, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "00110110" // /* MW 3 */ + 9686 "00011110" // /* MW 2 */ + 9687 "00000011" // /* MW 1 */ + 9688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9689 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 105 + 9690 "10011000" // LDA r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9691 "00010110" // /* MW 3 */ + 9692 "00000110" // /* MW 2 */ + 9693 "00000011" // /* MW 1 */ + 9694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9695 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 415 35 first + 9696 "10011000" // LDA r19, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9697 "01110110" // /* MW 3 */ + 9698 "00010110" // /* MW 2 */ + 9699 "00000011" // /* MW 1 */ + 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 27 first + 9702 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "01001111" // /* MW 3 */ + 9704 "10100101" // /* MW 2 */ + 9705 "00010100" // /* MW 1 */ + 9706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9707 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 56 + 9708 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9709 "00101111" // /* MW 3 */ + 9710 "01100011" // /* MW 2 */ + 9711 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 415 16 +.src_ref 6 "superkernels.cpp" 444 7 + 9712 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9713 "00101000" // /* MW 5 */ + 9714 "11000100" // /* MW 4 */ + 9715 "11000010" // /* MW 3 */ + 9716 "00000111" // /* MW 2 */ + 9717 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 86 + 9718 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9719 "00001111" // /* MW 3 */ + 9720 "01100001" // /* MW 2 */ + 9721 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 16 +.src_ref 6 "superkernels.cpp" 415 16 first + 9722 "00111010" // ST r19, [p1]; MOVXM p2, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9723 "00010001" // /* MW 9 */ + 9724 "00000110" // /* MW 8 */ + 9725 "00110001" // /* MW 7 */ + 9726 "11110001" // /* MW 6 */ + 9727 "00000001" // /* MW 5 */ + 9728 "00000000" // /* MW 4 */ + 9729 "00110000" // /* MW 3 */ + 9730 "11001110" // /* MW 2 */ + 9731 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 414 16 first +.src_ref 6 "superkernels.cpp" 416 15 + 9732 "00000010" // ST r16, [p2]; MOV dj0, #40 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9733 "01010000" // /* MW 7 */ + 9734 "00101000" // /* MW 6 */ + 9735 "01000000" // /* MW 5 */ + 9736 "00000000" // /* MW 4 */ + 9737 "00110000" // /* MW 3 */ + 9738 "11000010" // /* MW 2 */ + 9739 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 416 15 first + 9740 "10011000" // LDA el0, [p6, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9741 "00101110" // /* MW 3 */ + 9742 "00000000" // /* MW 2 */ + 9743 "00000110" // /* MW 1 */ + 9744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9745 "00000000" // /* MW 1 */ + 9746 "10000100" // J #9792 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9792 delay_slots=5 */ + 9747 "00000000" // /* MW 5 */ + 9748 "00000000" // /* MW 4 */ + 9749 "00100000" // /* MW 3 */ + 9750 "00010011" // /* MW 2 */ + 9751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9757 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 416 13 +.delay_slot + 9758 "01000100" // MOVXM p0, #508456 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9759 "01010000" // /* MW 5 */ + 9760 "11000100" // /* MW 4 */ + 9761 "11000000" // /* MW 3 */ + 9762 "00000111" // /* MW 2 */ + 9763 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 416 13 +.delay_slot + 9764 "00110110" // NOPA; NOPB; ST el0, [p0]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9765 "11000001" // /* MW 11 */ + 9766 "00010100" // /* MW 10 */ + 9767 "00000010" // /* MW 9 */ + 9768 "00000000" // /* MW 8 */ + 9769 "00000000" // /* MW 7 */ + 9770 "00000000" // /* MW 6 */ + 9771 "00100000" // /* MW 5 */ + 9772 "00000000" // /* MW 4 */ + 9773 "11110000" // /* MW 3 */ + 9774 "00101100" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.src_ref 6 "superkernels.cpp" 441 7 +.src_ref 6 "superkernels.cpp" 444 7 + 9776 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9777 "00101000" // /* MW 5 */ + 9778 "11000100" // /* MW 4 */ + 9779 "11000010" // /* MW 3 */ + 9780 "00000111" // /* MW 2 */ + 9781 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 7 first + 9782 "10111010" // LDA r19, [p1]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9783 "01111110" // /* MW 9 */ + 9784 "10100101" // /* MW 8 */ + 9785 "00000001" // /* MW 7 */ + 9786 "00000000" // /* MW 6 */ + 9787 "00010000" // /* MW 5 */ + 9788 "00000000" // /* MW 4 */ + 9789 "11010000" // /* MW 3 */ + 9790 "11001110" // /* MW 2 */ + 9791 "00100000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_272 + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ + 9794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9795 "00000000" // /* MW 1 */ + 9796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9797 "00000000" // /* MW 1 */ + 9798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9799 "00000000" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 19 + 9802 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9803 "00000101" // /* MW 3 */ + 9804 "00100000" // /* MW 2 */ + 9805 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 19 + 9806 "10011000" // NE r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9807 "00001000" // /* MW 3 */ + 9808 "11100001" // /* MW 2 */ + 9809 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 25 + 9810 "10000100" // JNZ r16, #9984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9984 delay_slots=5 */ + 9811 "00000001" // /* MW 5 */ + 9812 "01000000" // /* MW 4 */ + 9813 "10000000" // /* MW 3 */ + 9814 "00010011" // /* MW 2 */ + 9815 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first +.delay_slot + 9816 "00011000" // ADD.NC p6, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "10000110" // /* MW 3 */ + 9818 "01100111" // /* MW 2 */ + 9819 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 29 + 9828 "00111010" // ST r15, [sp, #-24]; MOVXM p1, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9829 "00010001" // /* MW 9 */ + 9830 "00000010" // /* MW 8 */ + 9831 "10110001" // /* MW 7 */ + 9832 "11110000" // /* MW 6 */ + 9833 "00000001" // /* MW 5 */ + 9834 "00000000" // /* MW 4 */ + 9835 "10110000" // /* MW 3 */ + 9836 "00111110" // /* MW 2 */ + 9837 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 29 first +.src_ref 6 "superkernels.cpp" 441 60 + 9838 "10111010" // LDA r16, [p1]; MOVXM p1, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9839 "00010000" // /* MW 9 */ + 9840 "00110000" // /* MW 8 */ + 9841 "10110001" // /* MW 7 */ + 9842 "11110000" // /* MW 6 */ + 9843 "00000001" // /* MW 5 */ + 9844 "00000000" // /* MW 4 */ + 9845 "11010000" // /* MW 3 */ + 9846 "11000010" // /* MW 2 */ + 9847 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 60 + 9848 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9849 "00111010" // /* MW 3 */ + 9850 "00000100" // /* MW 2 */ + 9851 "00000001" // /* MW 1 */ + 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9853 "00000000" // /* MW 1 */ + 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.no_stack_arguments + 9856 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 9857 "00000001" // /* MW 5 */ + 9858 "00000000" // /* MW 4 */ + 9859 "01010000" // /* MW 3 */ + 9860 "00011010" // /* MW 2 */ + 9861 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9862 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9863 "00000001" // /* MW 3 */ + 9864 "00011010" // /* MW 2 */ + 9865 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9867 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9868 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9869 "11011010" // /* MW 3 */ + 9870 "00110110" // /* MW 2 */ + 9871 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9872 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9873 "01000001" // /* MW 5 */ + 9874 "10111011" // /* MW 4 */ + 9875 "00110111" // /* MW 3 */ + 9876 "01100000" // /* MW 2 */ + 9877 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.delay_slot + 9878 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "00010010" // /* MW 9 */ + 9880 "00000001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "01011011" // /* MW 5 */ + 9884 "00000001" // /* MW 4 */ + 9885 "11110000" // /* MW 3 */ + 9886 "00101100" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 +.src_ref 6 "superkernels.cpp" 441 41 +.return_address + 9888 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "01000001" // /* MW 5 */ + 9890 "10101111" // /* MW 4 */ + 9891 "00111101" // /* MW 3 */ + 9892 "00000110" // /* MW 2 */ + 9893 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 41 + 9894 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00000010" // /* MW 3 */ + 9896 "11100001" // /* MW 2 */ + 9897 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 441 6 +.src_ref 6 "superkernels.cpp" 441 71 + 9898 "10000100" // JNZ r16, #9984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9984 delay_slots=5 */ + 9899 "00000001" // /* MW 5 */ + 9900 "01000000" // /* MW 4 */ + 9901 "10000000" // /* MW 3 */ + 9902 "00010011" // /* MW 2 */ + 9903 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 7 +.delay_slot + 9904 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9905 "00101000" // /* MW 5 */ + 9906 "11000100" // /* MW 4 */ + 9907 "11000010" // /* MW 3 */ + 9908 "00000111" // /* MW 2 */ + 9909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 9910 "00011000" // LDA r15, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9911 "11110001" // /* MW 3 */ + 9912 "11101001" // /* MW 2 */ + 9913 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9919 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 419 8 + 9920 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9921 "10000001" // /* MW 5 */ + 9922 "11011001" // /* MW 4 */ + 9923 "10100100" // /* MW 3 */ + 9924 "00011111" // /* MW 2 */ + 9925 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 9926 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9927 "01110110" // /* MW 3 */ + 9928 "11111111" // /* MW 2 */ + 9929 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 9930 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9931 "00110110" // /* MW 3 */ + 9932 "11111110" // /* MW 2 */ + 9933 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 9934 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9935 "01010110" // /* MW 3 */ + 9936 "11111110" // /* MW 2 */ + 9937 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9940 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9941 "00110110" // /* MW 3 */ + 9942 "01000110" // /* MW 2 */ + 9943 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9949 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9951 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9952 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "00010010" // /* MW 3 */ + 9954 "10100011" // /* MW 2 */ + 9955 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9956 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9957 "00110001" // /* MW 3 */ + 9958 "00000110" // /* MW 2 */ + 9959 "00001010" // /* MW 1 */ + 9960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9961 "00000000" // /* MW 1 */ + 9962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9963 "00000000" // /* MW 1 */ + 9964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9965 "00000000" // /* MW 1 */ + 9966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9967 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 9968 "11100001" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9969 "00000000" // /* MW 15 */ + 9970 "00000000" // /* MW 14 */ + 9971 "01111000" // /* MW 13 */ + 9972 "10100101" // /* MW 12 */ + 9973 "00000001" // /* MW 11 */ + 9974 "01000000" // /* MW 10 */ + 9975 "10011000" // /* MW 9 */ + 9976 "00100010" // /* MW 8 */ + 9977 "01011011" // /* MW 7 */ + 9978 "00000001" // /* MW 6 */ + 9979 "00100000" // /* MW 5 */ + 9980 "00000000" // /* MW 4 */ + 9981 "11110000" // /* MW 3 */ + 9982 "00101100" // /* MW 2 */ + 9983 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_464 +.src_ref 6 "superkernels.cpp" 444 19 + 9984 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9985 "00001001" // /* MW 3 */ + 9986 "00100010" // /* MW 2 */ + 9987 "00010000" // /* MW 1 */ + 9988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9989 "00000000" // /* MW 1 */ + 9990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9991 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 7 first + 9992 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9993 "00010110" // /* MW 3 */ + 9994 "00000110" // /* MW 2 */ + 9995 "00000001" // /* MW 1 */ + 9996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9997 "00000000" // /* MW 1 */ + 9998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9999 "00000000" // /* MW 1 */ + 10000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10001 "00000000" // /* MW 1 */ + 10002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10003 "00000000" // /* MW 1 */ + 10004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10005 "00000000" // /* MW 1 */ + 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10007 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 19 + 10008 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10009 "00001000" // /* MW 3 */ + 10010 "01100011" // /* MW 2 */ + 10011 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 25 + 10012 "10000100" // JNZ r17, #10208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10208 delay_slots=5 */ + 10013 "00000001" // /* MW 5 */ + 10014 "01000000" // /* MW 4 */ + 10015 "11110000" // /* MW 3 */ + 10016 "00010011" // /* MW 2 */ + 10017 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 29 + 10028 "00111010" // ST r15, [sp, #-24]; MOVXM p1, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10029 "00010001" // /* MW 9 */ + 10030 "00001100" // /* MW 8 */ + 10031 "10110001" // /* MW 7 */ + 10032 "11110000" // /* MW 6 */ + 10033 "00000001" // /* MW 5 */ + 10034 "00000000" // /* MW 4 */ + 10035 "10110000" // /* MW 3 */ + 10036 "00111110" // /* MW 2 */ + 10037 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 29 +.src_ref 6 "superkernels.cpp" 444 60 + 10038 "10111010" // LDA r16, [p1]; MOVXM p1, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "00010000" // /* MW 9 */ + 10040 "00110000" // /* MW 8 */ + 10041 "10110001" // /* MW 7 */ + 10042 "11110000" // /* MW 6 */ + 10043 "00000001" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11010000" // /* MW 3 */ + 10046 "11000010" // /* MW 2 */ + 10047 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 60 + 10048 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10049 "00111010" // /* MW 3 */ + 10050 "00000100" // /* MW 2 */ + 10051 "00000001" // /* MW 1 */ + 10052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10053 "00000000" // /* MW 1 */ + 10054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.no_stack_arguments + 10056 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10057 "00000001" // /* MW 5 */ + 10058 "00000000" // /* MW 4 */ + 10059 "01010000" // /* MW 3 */ + 10060 "00011010" // /* MW 2 */ + 10061 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10062 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000001" // /* MW 3 */ + 10064 "00011010" // /* MW 2 */ + 10065 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10067 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10068 "00101100" // NOPA; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10069 "10110101" // /* MW 5 */ + 10070 "01101101" // /* MW 4 */ + 10071 "11111000" // /* MW 3 */ + 10072 "00101100" // /* MW 2 */ + 10073 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10074 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10075 "01000001" // /* MW 5 */ + 10076 "10111011" // /* MW 4 */ + 10077 "00110111" // /* MW 3 */ + 10078 "01100000" // /* MW 2 */ + 10079 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 +.delay_slot + 10080 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "10100101" // /* MW 12 */ + 10085 "00000001" // /* MW 11 */ + 10086 "10010000" // /* MW 10 */ + 10087 "00001000" // /* MW 9 */ + 10088 "00100000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11110000" // /* MW 3 */ + 10094 "00101100" // /* MW 2 */ + 10095 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 444 41 +.src_ref 6 "superkernels.cpp" 444 41 +.return_address + 10096 "10111010" // LDA r15, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10097 "01111000" // /* MW 9 */ + 10098 "11010000" // /* MW 8 */ + 10099 "01101011" // /* MW 7 */ + 10100 "10001111" // /* MW 6 */ + 10101 "00000001" // /* MW 5 */ + 10102 "00011011" // /* MW 4 */ + 10103 "00100000" // /* MW 3 */ + 10104 "00111110" // /* MW 2 */ + 10105 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 41 + 10106 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10107 "00000010" // /* MW 3 */ + 10108 "11100001" // /* MW 2 */ + 10109 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 444 6 +.src_ref 6 "superkernels.cpp" 444 71 + 10110 "10000100" // JNZ r16, #10192 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10192 delay_slots=5 */ + 10111 "00000001" // /* MW 5 */ + 10112 "01000000" // /* MW 4 */ + 10113 "11101000" // /* MW 3 */ + 10114 "00010011" // /* MW 2 */ + 10115 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 7 +.delay_slot + 10116 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10117 "00101000" // /* MW 5 */ + 10118 "11000100" // /* MW 4 */ + 10119 "11000010" // /* MW 3 */ + 10120 "00000111" // /* MW 2 */ + 10121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10129 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 419 8 + 10130 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10131 "10000001" // /* MW 5 */ + 10132 "11011001" // /* MW 4 */ + 10133 "10100100" // /* MW 3 */ + 10134 "00011111" // /* MW 2 */ + 10135 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 10136 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10137 "01110110" // /* MW 3 */ + 10138 "11111111" // /* MW 2 */ + 10139 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 10140 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10141 "00110110" // /* MW 3 */ + 10142 "11111110" // /* MW 2 */ + 10143 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 10144 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10145 "01010110" // /* MW 3 */ + 10146 "11111110" // /* MW 2 */ + 10147 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10149 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10150 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10151 "00110110" // /* MW 3 */ + 10152 "01000110" // /* MW 2 */ + 10153 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10155 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10159 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10161 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10162 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10163 "00010010" // /* MW 3 */ + 10164 "10100011" // /* MW 2 */ + 10165 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10166 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10167 "00110001" // /* MW 3 */ + 10168 "00000110" // /* MW 2 */ + 10169 "00001010" // /* MW 1 */ + 10170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10171 "00000000" // /* MW 1 */ + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ + 10174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10175 "00000000" // /* MW 1 */ + 10176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10177 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 10178 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10179 "01100000" // /* MW 13 */ + 10180 "00101011" // /* MW 12 */ + 10181 "00000000" // /* MW 11 */ + 10182 "10101111" // /* MW 10 */ + 10183 "00110100" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "00001000" // /* MW 7 */ + 10186 "01010011" // /* MW 6 */ + 10187 "00100100" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_672 + 10192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10193 "00000000" // /* MW 1 */ + 10194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10195 "00000000" // /* MW 1 */ + 10196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10197 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 7 first + 10198 "10111010" // LDA r16, [p1]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10199 "01111110" // /* MW 9 */ + 10200 "10100101" // /* MW 8 */ + 10201 "00000001" // /* MW 7 */ + 10202 "00000000" // /* MW 6 */ + 10203 "00010000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "11010000" // /* MW 3 */ + 10206 "11000010" // /* MW 2 */ + 10207 "00100000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_688 +.src_ref 6 "superkernels.cpp" 447 42 +.src_ref 6 "superkernels.cpp" 496 43 +.src_ref 6 "superkernels.cpp" 499 15 +.src_ref 6 "superkernels.cpp" 502 43 +.src_ref 6 "superkernels.cpp" 505 15 +.src_ref 6 "superkernels.cpp" 508 44 +.src_ref 6 "superkernels.cpp" 511 16 +.src_ref 6 "superkernels.cpp" 515 14 + 10208 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10209 "00000001" // /* MW 3 */ + 10210 "00011010" // /* MW 2 */ + 10211 "00010000" // /* MW 1 */ + 10212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10213 "00000000" // /* MW 1 */ + 10214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10215 "00000000" // /* MW 1 */ + 10216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10217 "00000000" // /* MW 1 */ + 10218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 19 + 10220 "00011000" // MOVX r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10221 "00010001" // /* MW 3 */ + 10222 "00100010" // /* MW 2 */ + 10223 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 19 + 10224 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10225 "00001000" // /* MW 3 */ + 10226 "01100001" // /* MW 2 */ + 10227 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 25 + 10228 "10000100" // JNZ r16, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10229 "00000001" // /* MW 5 */ + 10230 "01000000" // /* MW 4 */ + 10231 "01101000" // /* MW 3 */ + 10232 "00010100" // /* MW 2 */ + 10233 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 29 +.delay_slot + 10234 "01000100" // MOVXM p1, #508444 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10235 "00111000" // /* MW 5 */ + 10236 "11000100" // /* MW 4 */ + 10237 "11000010" // /* MW 3 */ + 10238 "00000111" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 61 +.delay_slot + 10240 "01000100" // MOVXM p2, #508512 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10241 "11000000" // /* MW 5 */ + 10242 "11000100" // /* MW 4 */ + 10243 "11000100" // /* MW 3 */ + 10244 "00000111" // /* MW 2 */ + 10245 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10251 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 29 + 10252 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10253 "00010110" // /* MW 3 */ + 10254 "00000110" // /* MW 2 */ + 10255 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 61 + 10256 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10257 "00111010" // /* MW 3 */ + 10258 "00000100" // /* MW 2 */ + 10259 "00000010" // /* MW 1 */ + 10260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10261 "00000000" // /* MW 1 */ + 10262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10263 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.no_stack_arguments + 10264 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10265 "00000001" // /* MW 5 */ + 10266 "00000000" // /* MW 4 */ + 10267 "01010000" // /* MW 3 */ + 10268 "00011010" // /* MW 2 */ + 10269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10273 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.delay_slot + 10274 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10275 "11011010" // /* MW 3 */ + 10276 "00110110" // /* MW 2 */ + 10277 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.delay_slot + 10278 "01011100" // ST r27, [sp, #-24]; SUB r17, r13, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10279 "00000011" // /* MW 5 */ + 10280 "11000110" // /* MW 4 */ + 10281 "10110110" // /* MW 3 */ + 10282 "01101110" // /* MW 2 */ + 10283 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.delay_slot + 10284 "00011000" // SEL.EQZ r0, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10285 "00010010" // /* MW 3 */ + 10286 "00000001" // /* MW 2 */ + 10287 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 +.src_ref 6 "superkernels.cpp" 447 42 +.return_address + 10288 "00101100" // LDA r27, [sp, #-24]; SUB r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10289 "01100011" // /* MW 5 */ + 10290 "11000000" // /* MW 4 */ + 10291 "00100110" // /* MW 3 */ + 10292 "01101110" // /* MW 2 */ + 10293 "11111101" // /* MW 1 */ + 10294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10295 "00000000" // /* MW 1 */ + 10296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10297 "00000000" // /* MW 1 */ + 10298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10299 "00000000" // /* MW 1 */ + 10300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10301 "00000000" // /* MW 1 */ + 10302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10303 "00000000" // /* MW 1 */ + 10304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10305 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 42 + 10306 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10307 "00000010" // /* MW 3 */ + 10308 "11100001" // /* MW 2 */ + 10309 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 447 6 +.src_ref 6 "superkernels.cpp" 447 72 + 10310 "10000100" // JNZ r16, #10416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10416 delay_slots=5 */ + 10311 "00000001" // /* MW 5 */ + 10312 "01000000" // /* MW 4 */ + 10313 "01011000" // /* MW 3 */ + 10314 "00010100" // /* MW 2 */ + 10315 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 10316 "11111000" // MOV p1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "11000000" // /* MW 3 */ + 10318 "01101110" // /* MW 2 */ + 10319 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10327 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first +.src_ref 6 "superkernels.cpp" 494 2 + 10328 "10111010" // LDA r27, [p6], #-4; MOVXM p2, #508480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10329 "00010000" // /* MW 9 */ + 10330 "00100000" // /* MW 8 */ + 10331 "00110001" // /* MW 7 */ + 10332 "11110001" // /* MW 6 */ + 10333 "00000001" // /* MW 5 */ + 10334 "00000000" // /* MW 4 */ + 10335 "11010000" // /* MW 3 */ + 10336 "11101110" // /* MW 2 */ + 10337 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 +.src_ref 6 "superkernels.cpp" 496 7 + 10338 "10111010" // LDA r17, [p6], #-4; MOVXM p7, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10339 "00010000" // /* MW 9 */ + 10340 "00001010" // /* MW 8 */ + 10341 "10110001" // /* MW 7 */ + 10342 "11110011" // /* MW 6 */ + 10343 "00000001" // /* MW 5 */ + 10344 "00000000" // /* MW 4 */ + 10345 "11010000" // /* MW 3 */ + 10346 "11000110" // /* MW 2 */ + 10347 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 80 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 452 45 +.src_ref 6 "superkernels.cpp" 496 19 + 10348 "10111010" // LDA r18, [p6], #-4; MOVX r15, #1; MOV r1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10349 "01111000" // /* MW 9 */ + 10350 "11010000" // /* MW 8 */ + 10351 "00101011" // /* MW 7 */ + 10352 "00101000" // /* MW 6 */ + 10353 "11110000" // /* MW 5 */ + 10354 "00000000" // /* MW 4 */ + 10355 "11010000" // /* MW 3 */ + 10356 "11001010" // /* MW 2 */ + 10357 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 10358 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10359 "11111101" // /* MW 3 */ + 10360 "11100000" // /* MW 2 */ + 10361 "00010111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 10362 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10363 "00110110" // /* MW 3 */ + 10364 "01000110" // /* MW 2 */ + 10365 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10367 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 10374 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10375 "00010010" // /* MW 3 */ + 10376 "10100011" // /* MW 2 */ + 10377 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10378 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10379 "00110001" // /* MW 3 */ + 10380 "00000110" // /* MW 2 */ + 10381 "00001110" // /* MW 1 */ + 10382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10383 "00000000" // /* MW 1 */ + 10384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10385 "00000000" // /* MW 1 */ + 10386 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10387 "00000000" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "01111000" // /* MW 3 */ + 10390 "00010100" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 10392 "11111000" // MOV p6, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "10100000" // /* MW 3 */ + 10394 "01100000" // /* MW 2 */ + 10395 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first +.delay_slot + 10396 "00011000" // ACQ r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "00001000" // /* MW 3 */ + 10398 "01010011" // /* MW 2 */ + 10399 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10405 "10000001" // /* MW 11 */ + 10406 "10101101" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "00000000" // /* MW 7 */ + 10410 "00000000" // /* MW 6 */ + 10411 "00100000" // /* MW 5 */ + 10412 "00000000" // /* MW 4 */ + 10413 "11110000" // /* MW 3 */ + 10414 "00101100" // /* MW 2 */ + 10415 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_896 +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 + 10416 "00111010" // MOVS p6, r15; J #10480 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10417 "00100001" // /* MW 9 */ + 10418 "00000000" // /* MW 8 */ + 10419 "00000000" // /* MW 7 */ + 10420 "00011110" // /* MW 6 */ + 10421 "00000101" // /* MW 5 */ + 10422 "00000000" // /* MW 4 */ + 10423 "01100000" // /* MW 3 */ + 10424 "11100001" // /* MW 2 */ + 10425 "11010001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 7 +.delay_slot + 10426 "01000100" // MOVXM p7, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10427 "00101000" // /* MW 5 */ + 10428 "11000100" // /* MW 4 */ + 10429 "11001110" // /* MW 3 */ + 10430 "00000111" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 494 2 +.delay_slot + 10432 "01000100" // MOVXM p2, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10433 "10000000" // /* MW 5 */ + 10434 "11000100" // /* MW 4 */ + 10435 "11000100" // /* MW 3 */ + 10436 "00000111" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 452 45 +.src_ref 6 "superkernels.cpp" 496 19 +.delay_slot + 10438 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10439 "00000101" // /* MW 3 */ + 10440 "00011110" // /* MW 2 */ + 10441 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "01100111" // /* MW 3 */ + 10446 "00000001" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_928 +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 494 2 + 10448 "11100001" // NOPA; NOPB; MOVS p1, p7; MOVXM p2, #508480; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00000000" // /* MW 15 */ + 10450 "00000000" // /* MW 14 */ + 10451 "00010000" // /* MW 13 */ + 10452 "00100000" // /* MW 12 */ + 10453 "00110001" // /* MW 11 */ + 10454 "11110001" // /* MW 10 */ + 10455 "00000001" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "10001011" // /* MW 7 */ + 10458 "10011100" // /* MW 6 */ + 10459 "00100001" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 452 45 +.src_ref 6 "superkernels.cpp" 496 7 +.src_ref 6 "superkernels.cpp" 496 19 + 10464 "11100001" // MOVA r15, #1; NOPB; MOVS p6, r15; MOVXM p7, #508436; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "00010000" // /* MW 13 */ + 10468 "00001010" // /* MW 12 */ + 10469 "10110001" // /* MW 11 */ + 10470 "11110011" // /* MW 10 */ + 10471 "00000001" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00001011" // /* MW 7 */ + 10474 "10001111" // /* MW 6 */ + 10475 "00100110" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "00000000" // /* MW 3 */ + 10478 "00101111" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_960 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 6 "superkernels.cpp" 450 2 + 10480 "01110110" // LDA r16, [p1]; ST p6, [sp, #-24]; MOVXM p3, #508416 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10481 "00010000" // /* MW 11 */ + 10482 "00000000" // /* MW 10 */ + 10483 "10110001" // /* MW 9 */ + 10484 "11110001" // /* MW 8 */ + 10485 "00000001" // /* MW 7 */ + 10486 "10000000" // /* MW 6 */ + 10487 "00011101" // /* MW 5 */ + 10488 "11101011" // /* MW 4 */ + 10489 "11010111" // /* MW 3 */ + 10490 "11000010" // /* MW 2 */ + 10491 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 450 2 first +.src_ref 6 "superkernels.cpp" 452 47 + 10492 "10111010" // LDA r17, [p3]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10493 "00010000" // /* MW 9 */ + 10494 "00000110" // /* MW 8 */ + 10495 "10110001" // /* MW 7 */ + 10496 "11110000" // /* MW 6 */ + 10497 "00000001" // /* MW 5 */ + 10498 "00000000" // /* MW 4 */ + 10499 "11010000" // /* MW 3 */ + 10500 "11000110" // /* MW 2 */ + 10501 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 452 47 first + 10502 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10503 "01010110" // /* MW 3 */ + 10504 "00000110" // /* MW 2 */ + 10505 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 first + 10506 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10507 "10011110" // /* MW 3 */ + 10508 "01011100" // /* MW 2 */ + 10509 "00000110" // /* MW 1 */ + 10510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10511 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 494 2 first +.no_stack_arguments + 10512 "00000100" // JL #8000 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8000 delay_slots=5 */ + 10513 "00000001" // /* MW 5 */ + 10514 "00000000" // /* MW 4 */ + 10515 "10100000" // /* MW 3 */ + 10516 "00001111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 450 2 first +.delay_slot + 10522 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10523 "00000111" // /* MW 3 */ + 10524 "01100010" // /* MW 2 */ + 10525 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 450 2 +.src_ref 6 "superkernels.cpp" 452 45 first +.delay_slot + 10526 "01011100" // ST r17, [p3]; LSHL r18, r18, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10527 "11111011" // /* MW 5 */ + 10528 "01001001" // /* MW 4 */ + 10529 "00111001" // /* MW 3 */ + 10530 "11000110" // /* MW 2 */ + 10531 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 452 45 +.delay_slot + 10532 "11110110" // NOPA; NOPB; NOPS; ADD.NC p0, r18, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10533 "10100000" // /* MW 11 */ + 10534 "10100000" // /* MW 10 */ + 10535 "00110100" // /* MW 9 */ + 10536 "00000000" // /* MW 8 */ + 10537 "01011011" // /* MW 7 */ + 10538 "00000001" // /* MW 6 */ + 10539 "00100000" // /* MW 5 */ + 10540 "00000000" // /* MW 4 */ + 10541 "11110000" // /* MW 3 */ + 10542 "00101100" // /* MW 2 */ + 10543 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 7 first +.return_address + 10544 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10545 "00110110" // /* MW 3 */ + 10546 "00000110" // /* MW 2 */ + 10547 "00000111" // /* MW 1 */ + 10548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10549 "00000000" // /* MW 1 */ + 10550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10551 "00000000" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ + 10556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10557 "00000000" // /* MW 1 */ + 10558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10559 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 19 + 10560 "10011000" // NE r18, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "00011000" // /* MW 3 */ + 10562 "11100101" // /* MW 2 */ + 10563 "00010011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 25 + 10564 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */ + 10565 "00000001" // /* MW 5 */ + 10566 "01000000" // /* MW 4 */ + 10567 "00001000" // /* MW 3 */ + 10568 "00010101" // /* MW 2 */ + 10569 "10010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 19 +.delay_slot + 10570 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00001001" // /* MW 3 */ + 10572 "00100000" // /* MW 2 */ + 10573 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10581 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.src_ref 6 "superkernels.cpp" 499 15 + 10582 "01000100" // MOVXM p7, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00001000" // /* MW 5 */ + 10584 "11000100" // /* MW 4 */ + 10585 "11001110" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.src_ref 6 "superkernels.cpp" 496 62 + 10588 "10111010" // LDA r16, [p7]; MOVXM p1, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10589 "00010000" // /* MW 9 */ + 10590 "00110000" // /* MW 8 */ + 10591 "10110001" // /* MW 7 */ + 10592 "11110000" // /* MW 6 */ + 10593 "00000001" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "11010000" // /* MW 3 */ + 10596 "11000010" // /* MW 2 */ + 10597 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 62 + 10598 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10599 "00111010" // /* MW 3 */ + 10600 "00000100" // /* MW 2 */ + 10601 "00000001" // /* MW 1 */ + 10602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10603 "00000000" // /* MW 1 */ + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 43 +.no_stack_arguments + 10608 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10609 "00000001" // /* MW 5 */ + 10610 "00000000" // /* MW 4 */ + 10611 "01010000" // /* MW 3 */ + 10612 "00011010" // /* MW 2 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.delay_slot + 10616 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00000111" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 29 +.src_ref 6 "superkernels.cpp" 496 43 +.delay_slot + 10620 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10621 "10110101" // /* MW 5 */ + 10622 "01101101" // /* MW 4 */ + 10623 "00111000" // /* MW 3 */ + 10624 "11000010" // /* MW 2 */ + 10625 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 43 +.src_ref 6 "superkernels.cpp" 496 43 +.delay_slot + 10626 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10627 "01000001" // /* MW 5 */ + 10628 "10111011" // /* MW 4 */ + 10629 "00110111" // /* MW 3 */ + 10630 "01100000" // /* MW 2 */ + 10631 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 43 +.delay_slot + 10632 "00100010" // SEL.EQZ r0, r16, r17, r27; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10633 "00011100" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00000000" // /* MW 5 */ + 10636 "10010001" // /* MW 4 */ + 10637 "00001000" // /* MW 3 */ + 10638 "00100000" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 496 43 +.src_ref 6 "superkernels.cpp" 496 43 +.return_address + 10640 "10111010" // LDA p1, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10641 "01111000" // /* MW 9 */ + 10642 "11010000" // /* MW 8 */ + 10643 "01101011" // /* MW 7 */ + 10644 "10001111" // /* MW 6 */ + 10645 "00000001" // /* MW 5 */ + 10646 "00011011" // /* MW 4 */ + 10647 "00100000" // /* MW 3 */ + 10648 "00010011" // /* MW 2 */ + 10649 "11111101" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 496 43 + 10650 "01100100" // SEL.EQZ r18, r3, r16, r27; MOV r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10651 "00000101" // /* MW 5 */ + 10652 "10100000" // /* MW 4 */ + 10653 "01001000" // /* MW 3 */ + 10654 "10100000" // /* MW 2 */ + 10655 "00011100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 496 6 +.src_ref 6 "superkernels.cpp" 496 73 + 10656 "10000100" // JNZ r18, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */ + 10657 "00000001" // /* MW 5 */ + 10658 "01000000" // /* MW 4 */ + 10659 "11111000" // /* MW 3 */ + 10660 "00010100" // /* MW 2 */ + 10661 "10010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 19 +.delay_slot + 10662 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10663 "00001001" // /* MW 3 */ + 10664 "00100000" // /* MW 2 */ + 10665 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10673 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 499 15 first + 10674 "00001100" // LDA r18, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10675 "01100011" // /* MW 5 */ + 10676 "00001011" // /* MW 4 */ + 10677 "11011110" // /* MW 3 */ + 10678 "11001010" // /* MW 2 */ + 10679 "00101010" // /* MW 1 */ + 10680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10681 "00000000" // /* MW 1 */ + 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10683 "00000000" // /* MW 1 */ + 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10685 "00000000" // /* MW 1 */ + 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10687 "00000000" // /* MW 1 */ + 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10689 "00000000" // /* MW 1 */ + 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10691 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 10692 "00011000" // REL r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10693 "00011000" // /* MW 3 */ + 10694 "10010001" // /* MW 2 */ + 10695 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 502 7 + 10696 "10111010" // LDA r18, [p6, #-8]; MOVXM p7, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10697 "00010000" // /* MW 9 */ + 10698 "00001010" // /* MW 8 */ + 10699 "10110001" // /* MW 7 */ + 10700 "11110011" // /* MW 6 */ + 10701 "00000001" // /* MW 5 */ + 10702 "00000000" // /* MW 4 */ + 10703 "11010000" // /* MW 3 */ + 10704 "11001010" // /* MW 2 */ + 10705 "11011100" // /* MW 1 */ + 10706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10707 "00000000" // /* MW 1 */ + 10708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10709 "00000000" // /* MW 1 */ + 10710 "10000100" // J #10752 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10752 delay_slots=5 */ + 10711 "00000000" // /* MW 5 */ + 10712 "00000000" // /* MW 4 */ + 10713 "00000000" // /* MW 3 */ + 10714 "00010101" // /* MW 2 */ + 10715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10717 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10719 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10721 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.delay_slot + 10722 "10011000" // SUB r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "00100001" // /* MW 3 */ + 10724 "01100011" // /* MW 2 */ + 10725 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.delay_slot + 10726 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10727 "00000000" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "00000000" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00110001" // /* MW 5 */ + 10732 "11100110" // /* MW 4 */ + 10733 "11110110" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1216 +.src_ref 6 "superkernels.cpp" 502 7 + 10736 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #508436; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10737 "00000000" // /* MW 15 */ + 10738 "00000000" // /* MW 14 */ + 10739 "00010000" // /* MW 13 */ + 10740 "00001010" // /* MW 12 */ + 10741 "10110001" // /* MW 11 */ + 10742 "11110011" // /* MW 10 */ + 10743 "00000001" // /* MW 9 */ + 10744 "00000000" // /* MW 8 */ + 10745 "01011011" // /* MW 7 */ + 10746 "00000001" // /* MW 6 */ + 10747 "00100000" // /* MW 5 */ + 10748 "00000000" // /* MW 4 */ + 10749 "11110000" // /* MW 3 */ + 10750 "00101100" // /* MW 2 */ + 10751 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 +.src_ref 6 "superkernels.cpp" 502 7 first + 10752 "11100001" // LDA r17, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10753 "00000000" // /* MW 15 */ + 10754 "00000000" // /* MW 14 */ + 10755 "01111000" // /* MW 13 */ + 10756 "10100101" // /* MW 12 */ + 10757 "00000001" // /* MW 11 */ + 10758 "00000000" // /* MW 10 */ + 10759 "00000000" // /* MW 9 */ + 10760 "00000000" // /* MW 8 */ + 10761 "01011011" // /* MW 7 */ + 10762 "00000001" // /* MW 6 */ + 10763 "00100000" // /* MW 5 */ + 10764 "00000000" // /* MW 4 */ + 10765 "11010000" // /* MW 3 */ + 10766 "11000110" // /* MW 2 */ + 10767 "11100000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ + 10776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10777 "00000000" // /* MW 1 */ + 10778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10779 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 19 + 10780 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00001000" // /* MW 3 */ + 10782 "01100001" // /* MW 2 */ + 10783 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 25 + 10784 "10000100" // JNZ r16, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */ + 10785 "00000001" // /* MW 5 */ + 10786 "01000000" // /* MW 4 */ + 10787 "01111000" // /* MW 3 */ + 10788 "00010101" // /* MW 2 */ + 10789 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 62 +.delay_slot + 10790 "01000100" // MOVXM p1, #508512 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10791 "11000000" // /* MW 5 */ + 10792 "11000100" // /* MW 4 */ + 10793 "11000010" // /* MW 3 */ + 10794 "00000111" // /* MW 2 */ + 10795 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 +.src_ref 6 "superkernels.cpp" 505 15 +.delay_slot + 10796 "01000100" // MOVXM p7, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10797 "00110000" // /* MW 5 */ + 10798 "11000100" // /* MW 4 */ + 10799 "11001110" // /* MW 3 */ + 10800 "00000111" // /* MW 2 */ + 10801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 + 10808 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00010110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 62 + 10812 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10813 "00111010" // /* MW 3 */ + 10814 "00000100" // /* MW 2 */ + 10815 "00000001" // /* MW 1 */ + 10816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10817 "00000000" // /* MW 1 */ + 10818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10819 "00000000" // /* MW 1 */ + 10820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10821 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 +.no_stack_arguments + 10822 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 10823 "00000001" // /* MW 5 */ + 10824 "00000000" // /* MW 4 */ + 10825 "01010000" // /* MW 3 */ + 10826 "00011010" // /* MW 2 */ + 10827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10829 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 +.delay_slot + 10830 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10831 "00000111" // /* MW 3 */ + 10832 "00100000" // /* MW 2 */ + 10833 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 29 +.src_ref 6 "superkernels.cpp" 502 43 +.delay_slot + 10834 "11010010" // NOPB; ST r16, [p7]; LT r27, r16, r13 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10835 "11011010" // /* MW 7 */ + 10836 "00110110" // /* MW 6 */ + 10837 "00100100" // /* MW 5 */ + 10838 "00000000" // /* MW 4 */ + 10839 "00110000" // /* MW 3 */ + 10840 "11000010" // /* MW 2 */ + 10841 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 +.src_ref 6 "superkernels.cpp" 502 43 +.delay_slot + 10842 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10843 "01000001" // /* MW 5 */ + 10844 "10111011" // /* MW 4 */ + 10845 "00110111" // /* MW 3 */ + 10846 "01100000" // /* MW 2 */ + 10847 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "01111000" // /* MW 13 */ + 10852 "10100101" // /* MW 12 */ + 10853 "00000001" // /* MW 11 */ + 10854 "10010000" // /* MW 10 */ + 10855 "00001000" // /* MW 9 */ + 10856 "00100000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 502 43 +.src_ref 6 "superkernels.cpp" 502 43 +.return_address + 10864 "10111010" // LDA p2, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "01111000" // /* MW 9 */ + 10866 "11010000" // /* MW 8 */ + 10867 "01101011" // /* MW 7 */ + 10868 "10001111" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00011011" // /* MW 4 */ + 10871 "00100000" // /* MW 3 */ + 10872 "00100011" // /* MW 2 */ + 10873 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 43 + 10874 "00011000" // SEL.EQZ r17, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000010" // /* MW 3 */ + 10876 "11100011" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 502 6 +.src_ref 6 "superkernels.cpp" 502 73 + 10878 "10000100" // JNZ r17, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10879 "00000001" // /* MW 5 */ + 10880 "01000000" // /* MW 4 */ + 10881 "01101000" // /* MW 3 */ + 10882 "00010101" // /* MW 2 */ + 10883 "10001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 7 +.delay_slot + 10884 "01000100" // MOVXM p1, #508436 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10885 "00101000" // /* MW 5 */ + 10886 "11000100" // /* MW 4 */ + 10887 "11000010" // /* MW 3 */ + 10888 "00000111" // /* MW 2 */ + 10889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.delay_slot + 10890 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "00000101" // /* MW 3 */ + 10892 "00100000" // /* MW 2 */ + 10893 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10895 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10897 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10899 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 505 15 first + 10900 "00001100" // LDA r17, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10901 "01100011" // /* MW 5 */ + 10902 "00001011" // /* MW 4 */ + 10903 "11011110" // /* MW 3 */ + 10904 "11000110" // /* MW 2 */ + 10905 "01001010" // /* MW 1 */ + 10906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10907 "00000000" // /* MW 1 */ + 10908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10909 "00000000" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 10918 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00001000" // /* MW 3 */ + 10920 "01010001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 10922 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00110110" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00000110" // /* MW 1 */ + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ + 10928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10929 "00000000" // /* MW 1 */ + 10930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10931 "00000000" // /* MW 1 */ + 10932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10933 "00000000" // /* MW 1 */ + 10934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10935 "00000000" // /* MW 1 */ + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 10938 "00101100" // NOPA; SUB r16, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10939 "00100011" // /* MW 5 */ + 10940 "01000010" // /* MW 4 */ + 10941 "11111000" // /* MW 3 */ + 10942 "00101100" // /* MW 2 */ + 10943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 10944 "11100001" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10945 "00000000" // /* MW 15 */ + 10946 "00000000" // /* MW 14 */ + 10947 "01111000" // /* MW 13 */ + 10948 "10100101" // /* MW 12 */ + 10949 "00000001" // /* MW 11 */ + 10950 "00000000" // /* MW 10 */ + 10951 "00000000" // /* MW 9 */ + 10952 "10000000" // /* MW 8 */ + 10953 "00010001" // /* MW 7 */ + 10954 "11100110" // /* MW 6 */ + 10955 "00100110" // /* MW 5 */ + 10956 "00000000" // /* MW 4 */ + 10957 "11110000" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1440 + 10960 "10000100" // J #11008 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 10961 "00000000" // /* MW 5 */ + 10962 "00000000" // /* MW 4 */ + 10963 "10000000" // /* MW 3 */ + 10964 "00010101" // /* MW 2 */ + 10965 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 10966 "11111000" // MOV p7, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10967 "11000000" // /* MW 3 */ + 10968 "01100100" // /* MW 2 */ + 10969 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10977 "00000000" // /* MW 15 */ + 10978 "00000000" // /* MW 14 */ + 10979 "01111000" // /* MW 13 */ + 10980 "10100101" // /* MW 12 */ + 10981 "00000001" // /* MW 11 */ + 10982 "00000000" // /* MW 10 */ + 10983 "00000000" // /* MW 9 */ + 10984 "00000000" // /* MW 8 */ + 10985 "01011011" // /* MW 7 */ + 10986 "00000001" // /* MW 6 */ + 10987 "00100000" // /* MW 5 */ + 10988 "00000000" // /* MW 4 */ + 10989 "11110000" // /* MW 3 */ + 10990 "00101100" // /* MW 2 */ + 10991 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1472 +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 6 "superkernels.cpp" 508 7 + 10992 "11100001" // LDA p7, [sp, #-24]; NOPB; NOPS; MOVXM p1, #508436; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10993 "00000000" // /* MW 15 */ + 10994 "00000000" // /* MW 14 */ + 10995 "00010000" // /* MW 13 */ + 10996 "00001010" // /* MW 12 */ + 10997 "10110001" // /* MW 11 */ + 10998 "11110000" // /* MW 10 */ + 10999 "00000001" // /* MW 9 */ + 11000 "00000000" // /* MW 8 */ + 11001 "01011011" // /* MW 7 */ + 11002 "00000001" // /* MW 6 */ + 11003 "00100000" // /* MW 5 */ + 11004 "00000000" // /* MW 4 */ + 11005 "00100000" // /* MW 3 */ + 11006 "01110011" // /* MW 2 */ + 11007 "11111101" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1488 +.src_ref 6 "superkernels.cpp" 508 7 first + 11008 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11009 "00010110" // /* MW 3 */ + 11010 "00000110" // /* MW 2 */ + 11011 "00000001" // /* MW 1 */ + 11012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11013 "00000000" // /* MW 1 */ + 11014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11015 "00000000" // /* MW 1 */ + 11016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11017 "00000000" // /* MW 1 */ + 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11019 "00000000" // /* MW 1 */ + 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11021 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 19 + 11022 "00011000" // MOVX r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11023 "00010001" // /* MW 3 */ + 11024 "00100010" // /* MW 2 */ + 11025 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 19 + 11026 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11027 "00001000" // /* MW 3 */ + 11028 "01100001" // /* MW 2 */ + 11029 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 25 + 11030 "10000100" // JNZ r16, #11184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11184 delay_slots=5 */ + 11031 "00000001" // /* MW 5 */ + 11032 "01000000" // /* MW 4 */ + 11033 "11011000" // /* MW 3 */ + 11034 "00010101" // /* MW 2 */ + 11035 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.delay_slot + 11036 "01000100" // MOVXM p1, #508444 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11037 "00111000" // /* MW 5 */ + 11038 "11000100" // /* MW 4 */ + 11039 "11000010" // /* MW 3 */ + 11040 "00000111" // /* MW 2 */ + 11041 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11049 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.src_ref 6 "superkernels.cpp" 508 63 + 11050 "10111010" // LDA r16, [p1]; MOVXM p2, #508512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11051 "00010000" // /* MW 9 */ + 11052 "00110000" // /* MW 8 */ + 11053 "00110001" // /* MW 7 */ + 11054 "11110001" // /* MW 6 */ + 11055 "00000001" // /* MW 5 */ + 11056 "00000000" // /* MW 4 */ + 11057 "11010000" // /* MW 3 */ + 11058 "11000010" // /* MW 2 */ + 11059 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 63 + 11060 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11061 "00111010" // /* MW 3 */ + 11062 "00000100" // /* MW 2 */ + 11063 "00000010" // /* MW 1 */ + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ + 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11067 "00000000" // /* MW 1 */ + 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11069 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.no_stack_arguments + 11070 "00000100" // JL #13472 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13472 delay_slots=5 */ + 11071 "00000001" // /* MW 5 */ + 11072 "00000000" // /* MW 4 */ + 11073 "01010000" // /* MW 3 */ + 11074 "00011010" // /* MW 2 */ + 11075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.delay_slot + 11078 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11079 "00000111" // /* MW 3 */ + 11080 "00100000" // /* MW 2 */ + 11081 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 29 +.src_ref 6 "superkernels.cpp" 508 44 +.delay_slot + 11082 "01011100" // ST r16, [p1]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11083 "10110101" // /* MW 5 */ + 11084 "01101101" // /* MW 4 */ + 11085 "00111000" // /* MW 3 */ + 11086 "11000010" // /* MW 2 */ + 11087 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.src_ref 6 "superkernels.cpp" 508 44 +.delay_slot + 11088 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11089 "01000001" // /* MW 5 */ + 11090 "10111011" // /* MW 4 */ + 11091 "00110111" // /* MW 3 */ + 11092 "01100000" // /* MW 2 */ + 11093 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.delay_slot + 11094 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11095 "00010010" // /* MW 9 */ + 11096 "00000001" // /* MW 8 */ + 11097 "00000100" // /* MW 7 */ + 11098 "00000000" // /* MW 6 */ + 11099 "01011011" // /* MW 5 */ + 11100 "00000001" // /* MW 4 */ + 11101 "11110000" // /* MW 3 */ + 11102 "00101100" // /* MW 2 */ + 11103 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 +.src_ref 6 "superkernels.cpp" 508 44 +.return_address + 11104 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11105 "01000001" // /* MW 5 */ + 11106 "10101111" // /* MW 4 */ + 11107 "00111101" // /* MW 3 */ + 11108 "00000110" // /* MW 2 */ + 11109 "01101100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 44 + 11110 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11111 "00000010" // /* MW 3 */ + 11112 "11100001" // /* MW 2 */ + 11113 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 508 6 +.src_ref 6 "superkernels.cpp" 508 74 + 11114 "10000100" // JNZ r16, #11184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11184 delay_slots=5 */ + 11115 "00000001" // /* MW 5 */ + 11116 "01000000" // /* MW 4 */ + 11117 "11011000" // /* MW 3 */ + 11118 "00010101" // /* MW 2 */ + 11119 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 511 16 +.delay_slot + 11120 "01000100" // MOVXM p1, #508444 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11121 "00111000" // /* MW 5 */ + 11122 "11000100" // /* MW 4 */ + 11123 "11000010" // /* MW 3 */ + 11124 "00000111" // /* MW 2 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11133 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 511 16 first + 11134 "01111010" // LDA r17, [p7, #20]; ST r13, [p1]; MOVX r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11135 "00000101" // /* MW 9 */ + 11136 "00100000" // /* MW 8 */ + 11137 "00000000" // /* MW 7 */ + 11138 "10000000" // /* MW 6 */ + 11139 "10110001" // /* MW 5 */ + 11140 "00000101" // /* MW 4 */ + 11141 "11010001" // /* MW 3 */ + 11142 "11000110" // /* MW 2 */ + 11143 "11101010" // /* MW 1 */ + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ + 11150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11151 "00000000" // /* MW 1 */ + 11152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11153 "00000000" // /* MW 1 */ + 11154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11155 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 11156 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00001000" // /* MW 3 */ + 11158 "01010001" // /* MW 2 */ + 11159 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 11160 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "00110110" // /* MW 3 */ + 11162 "11100110" // /* MW 2 */ + 11163 "00000110" // /* MW 1 */ + 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11165 "00000000" // /* MW 1 */ + 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11167 "00000000" // /* MW 1 */ + 11168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11169 "00000000" // /* MW 1 */ + 11170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11171 "00000000" // /* MW 1 */ + 11172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11173 "00000000" // /* MW 1 */ + 11174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 11176 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11177 "00010001" // /* MW 3 */ + 11178 "00100001" // /* MW 2 */ + 11179 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 11180 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11181 "00010001" // /* MW 3 */ + 11182 "11100110" // /* MW 2 */ + 11183 "00001110" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.src_ref 6 "superkernels.cpp" 514 6 +.src_ref 6 "superkernels.cpp" 515 14 + 11184 "10111010" // LDA r1, [sp, #-12]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11185 "00010000" // /* MW 9 */ + 11186 "00000000" // /* MW 8 */ + 11187 "00110001" // /* MW 7 */ + 11188 "11110011" // /* MW 6 */ + 11189 "00000001" // /* MW 5 */ + 11190 "00000000" // /* MW 4 */ + 11191 "00100000" // /* MW 3 */ + 11192 "10000110" // /* MW 2 */ + 11193 "11111110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 6 first +.src_ref 6 "superkernels.cpp" 514 19 + 11194 "10111010" // LDA r16, [p6]; MOVXM p1, #508456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11195 "00010000" // /* MW 9 */ + 11196 "00010100" // /* MW 8 */ + 11197 "10110001" // /* MW 7 */ + 11198 "11110000" // /* MW 6 */ + 11199 "00000001" // /* MW 5 */ + 11200 "00000000" // /* MW 4 */ + 11201 "11010000" // /* MW 3 */ + 11202 "11000010" // /* MW 2 */ + 11203 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 19 + 11204 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00110110" // /* MW 3 */ + 11206 "00000110" // /* MW 2 */ + 11207 "00000001" // /* MW 1 */ + 11208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11209 "00000000" // /* MW 1 */ + 11210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11211 "00000000" // /* MW 1 */ + 11212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11213 "00000000" // /* MW 1 */ + 11214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11215 "00000000" // /* MW 1 */ + 11216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11217 "00000000" // /* MW 1 */ + 11218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 16 + 11220 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11221 "00001000" // /* MW 3 */ + 11222 "01100001" // /* MW 2 */ + 11223 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 514 6 + 11224 "10000100" // JNZ r16, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */ + 11225 "00000001" // /* MW 5 */ + 11226 "01000000" // /* MW 4 */ + 11227 "11111000" // /* MW 3 */ + 11228 "00010101" // /* MW 2 */ + 11229 "10000000" // /* MW 1 */ +.delay_slot + 11230 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11231 "10011001" // /* MW 3 */ + 11232 "11101111" // /* MW 2 */ + 11233 "00000111" // /* MW 1 */ +.delay_slot + 11234 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11235 "11110001" // /* MW 3 */ + 11236 "11110001" // /* MW 2 */ + 11237 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11241 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11243 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 515 14 first + 11244 "10011000" // ST r13, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11245 "10110001" // /* MW 3 */ + 11246 "00000101" // /* MW 2 */ + 11247 "00001110" // /* MW 1 */ +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1728 +.src_ref 6 "superkernels.cpp" 517 + 11248 "11010100" // LDA p6, [sp, #-4]; MOV lr, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11249 "01000001" // /* MW 5 */ + 11250 "11101110" // /* MW 4 */ + 11251 "00101110" // /* MW 3 */ + 11252 "11100011" // /* MW 2 */ + 11253 "11111111" // /* MW 1 */ + 11254 "00011000" // LDA r13, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11255 "10110001" // /* MW 3 */ + 11256 "11111001" // /* MW 2 */ + 11257 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 517 first + 11258 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11259 "00000000" // /* MW 3 */ + 11260 "00101000" // /* MW 2 */ + 11261 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 517 +.delay_slot + 11262 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11263 "00000001" // /* MW 5 */ + 11264 "00000000" // /* MW 4 */ + 11265 "00000000" // /* MW 3 */ + 11266 "11111000" // /* MW 2 */ + 11267 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11271 "00000000" // /* MW 1 */ +.delay_slot + 11272 "11111000" // MOV r14, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11273 "10100000" // /* MW 3 */ + 11274 "10010000" // /* MW 2 */ + 11275 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 11277 "00000000" // /* MW 1 */ +.label __Z15_b14285_wrapperPPv___func_begin0 +.label _Z15_b14285_wrapperPPv +.function _b14285_wrapper _Z15_b14285_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 21 first +.src_ref 0 "0_0_reloadable11.cc" 23 79 +.function_start + 11280 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "01100000" // /* MW 2 */ + 11283 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 23 79 first + 11284 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11285 "00011110" // /* MW 3 */ + 11286 "00111100" // /* MW 2 */ + 11287 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 24 47 first + 11288 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11289 "10011110" // /* MW 3 */ + 11290 "11101100" // /* MW 2 */ + 11291 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 26 81 first + 11292 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "10011110" // /* MW 3 */ + 11294 "00010101" // /* MW 2 */ + 11295 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 25 80 first + 11296 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00011110" // /* MW 3 */ + 11298 "00000101" // /* MW 2 */ + 11299 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 22 4 first +.tail_call + 11300 "10000100" // J #6560 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6560 delay_slots=5 */ + 11301 "00000000" // /* MW 5 */ + 11302 "00000000" // /* MW 4 */ + 11303 "11010000" // /* MW 3 */ + 11304 "00001100" // /* MW 2 */ + 11305 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11307 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11311 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11313 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14285_wrapperPPv__end +.label __Z15_b14285_wrapperPPv___func_end0 + 11315 "00000000" // /* MW 1 */ +.label __Z15_b14290_wrapperPPv___func_begin0 +.label _Z15_b14290_wrapperPPv +.function _b14290_wrapper _Z15_b14290_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 30 first +.src_ref 0 "0_0_reloadable11.cc" 32 79 +.function_start + 11328 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "11000000" // /* MW 3 */ + 11330 "01100000" // /* MW 2 */ + 11331 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 32 79 first + 11332 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11333 "00011110" // /* MW 3 */ + 11334 "00101100" // /* MW 2 */ + 11335 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 34 81 first + 11336 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11337 "00011110" // /* MW 3 */ + 11338 "11110101" // /* MW 2 */ + 11339 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 33 47 first + 11340 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11341 "10011110" // /* MW 3 */ + 11342 "00000100" // /* MW 2 */ + 11343 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 31 4 first +.tail_call + 11344 "10000100" // J #3808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3808 delay_slots=5 */ + 11345 "00000000" // /* MW 5 */ + 11346 "00000000" // /* MW 4 */ + 11347 "01110000" // /* MW 3 */ + 11348 "00000111" // /* MW 2 */ + 11349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14290_wrapperPPv__end +.label __Z15_b14290_wrapperPPv___func_end0 + 11359 "00000000" // /* MW 1 */ +.label __Z15_b13811_wrapperPPv___func_begin0 +.label _Z15_b13811_wrapperPPv +.function _b13811_wrapper _Z15_b13811_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 38 first +.src_ref 0 "0_0_reloadable11.cc" 40 79 +.function_start + 11360 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11361 "11000000" // /* MW 3 */ + 11362 "01100000" // /* MW 2 */ + 11363 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 40 79 first + 11364 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11365 "00011110" // /* MW 3 */ + 11366 "00111100" // /* MW 2 */ + 11367 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 41 47 first + 11368 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11369 "10011110" // /* MW 3 */ + 11370 "11101100" // /* MW 2 */ + 11371 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 43 81 first + 11372 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11373 "10011110" // /* MW 3 */ + 11374 "00010101" // /* MW 2 */ + 11375 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 42 80 first + 11376 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "00011110" // /* MW 3 */ + 11378 "00000101" // /* MW 2 */ + 11379 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 39 4 first +.tail_call + 11380 "10000100" // J #4592 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4592 delay_slots=5 */ + 11381 "00000000" // /* MW 5 */ + 11382 "00000000" // /* MW 4 */ + 11383 "11111000" // /* MW 3 */ + 11384 "00001000" // /* MW 2 */ + 11385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13811_wrapperPPv__end +.label __Z15_b13811_wrapperPPv___func_end0 + 11395 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 47 first +.src_ref 0 "0_0_reloadable11.cc" 49 79 +.function_start + 11408 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11409 "11000000" // /* MW 3 */ + 11410 "01100000" // /* MW 2 */ + 11411 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 49 79 first + 11412 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11413 "00011110" // /* MW 3 */ + 11414 "00111100" // /* MW 2 */ + 11415 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 50 47 first + 11416 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11417 "10011110" // /* MW 3 */ + 11418 "11101100" // /* MW 2 */ + 11419 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 52 81 first + 11420 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11421 "10011110" // /* MW 3 */ + 11422 "00010101" // /* MW 2 */ + 11423 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 51 80 first + 11424 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11425 "00011110" // /* MW 3 */ + 11426 "00000101" // /* MW 2 */ + 11427 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 48 4 first +.tail_call + 11428 "10000100" // J #5712 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5712 delay_slots=5 */ + 11429 "00000000" // /* MW 5 */ + 11430 "00000000" // /* MW 4 */ + 11431 "00101000" // /* MW 3 */ + 11432 "00001011" // /* MW 2 */ + 11433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11435 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11443 "00000000" // /* MW 1 */ +.label __Z15_b14811_wrapperPPv___func_begin0 +.label _Z15_b14811_wrapperPPv +.function _b14811_wrapper _Z15_b14811_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 56 first +.src_ref 0 "0_0_reloadable11.cc" 58 79 +.function_start + 11456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11457 "11000000" // /* MW 3 */ + 11458 "01100000" // /* MW 2 */ + 11459 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 58 79 first + 11460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00011110" // /* MW 3 */ + 11462 "00101100" // /* MW 2 */ + 11463 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 60 81 first + 11464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00011110" // /* MW 3 */ + 11466 "11110101" // /* MW 2 */ + 11467 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 59 47 first + 11468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11469 "10011110" // /* MW 3 */ + 11470 "00000100" // /* MW 2 */ + 11471 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 57 4 first +.tail_call + 11472 "10000100" // J #9520 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9520 delay_slots=5 */ + 11473 "00000000" // /* MW 5 */ + 11474 "00000000" // /* MW 4 */ + 11475 "10011000" // /* MW 3 */ + 11476 "00010010" // /* MW 2 */ + 11477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14811_wrapperPPv__end +.label __Z15_b14811_wrapperPPv___func_end0 + 11487 "00000000" // /* MW 1 */ +.label __Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params___func_begin0 +.label _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params +.function softmax_row_major<1, bfloat16, bfloat16, (unsigned short)1> _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params +.src_ref 3 "softmax_row_major.h" 214 +.src_ref 3 "softmax_row_major.h" 214 first +.function_start + 11488 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11489 "11000000" // /* MW 3 */ + 11490 "00010100" // /* MW 2 */ + 11491 "00011000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 214 +.src_ref 3 "softmax_row_major.h" 219 32 + 11492 "00010100" // MOVA m0, #-6; ADD.NC p2, r0, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11493 "00000100" // /* MW 5 */ + 11494 "11000000" // /* MW 4 */ + 11495 "10000100" // /* MW 3 */ + 11496 "01000000" // /* MW 2 */ + 11497 "11111111" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 217 25 first + 11498 "10111010" // LDA r0, [p2], #4; MOVXM p3, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11499 "00010000" // /* MW 9 */ + 11500 "00011000" // /* MW 8 */ + 11501 "10110001" // /* MW 7 */ + 11502 "11110001" // /* MW 6 */ + 11503 "00000001" // /* MW 5 */ + 11504 "00000000" // /* MW 4 */ + 11505 "11010000" // /* MW 3 */ + 11506 "10000010" // /* MW 2 */ + 11507 "01000011" // /* MW 1 */ +.src_ref 5 "accum_native_types.hpp" 213 147 +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 219 32 first + 11508 "10111010" // LDA r18, [p2], m0; MOVXM r2, #1069088768 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11509 "00010000" // /* MW 9 */ + 11510 "00000000" // /* MW 8 */ + 11511 "01001000" // /* MW 7 */ + 11512 "01000000" // /* MW 6 */ + 11513 "11101110" // /* MW 5 */ + 11514 "00001111" // /* MW 4 */ + 11515 "11010000" // /* MW 3 */ + 11516 "01001010" // /* MW 2 */ + 11517 "01000001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 +.src_ref 3 "softmax_row_major.h" 256 76 first + 11518 "10111010" // LDA.u16 r22, [p2]; MOVXM r29, #65408 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11519 "00010000" // /* MW 9 */ + 11520 "11000000" // /* MW 8 */ + 11521 "10101111" // /* MW 7 */ + 11522 "00111111" // /* MW 6 */ + 11523 "00000000" // /* MW 5 */ + 11524 "00000000" // /* MW 4 */ + 11525 "01010000" // /* MW 3 */ + 11526 "11011011" // /* MW 2 */ + 11527 "01000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 83 8 first + 11528 "10111010" // LDA.s8 r17, [p3]; MOVXM ls, #11680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11529 "00010000" // /* MW 9 */ + 11530 "11010000" // /* MW 8 */ + 11531 "01111110" // /* MW 7 */ + 11532 "00001000" // /* MW 6 */ + 11533 "00000000" // /* MW 5 */ + 11534 "00000000" // /* MW 4 */ + 11535 "01010000" // /* MW 3 */ + 11536 "11000100" // /* MW 2 */ + 11537 "01100000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 83 8 + 11538 "01000100" // MOVXM le, #11744 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11539 "11000000" // /* MW 5 */ + 11540 "11111011" // /* MW 4 */ + 11541 "00100110" // /* MW 3 */ + 11542 "00000000" // /* MW 2 */ + 11543 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 3 "softmax_row_major.h" 68 40 + 11544 "11100100" // MOVX r5, #-31; VBCST.16 x9, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11545 "11100101" // /* MW 5 */ + 11546 "11101010" // /* MW 4 */ + 11547 "10101001" // /* MW 3 */ + 11548 "01010000" // /* MW 2 */ + 11549 "11111001" // /* MW 1 */ +.src_ref 5 "accum_native_types.hpp" 213 147 first +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 55 37 +.src_ref 3 "softmax_row_major.h" 68 40 +.src_ref 3 "softmax_row_major.h" 264 29 + 11550 "11100100" // MOVX r7, #-4; VINSERT.32 x0, x0, #0, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11551 "10100010" // /* MW 5 */ + 11552 "00000000" // /* MW 4 */ + 11553 "00100000" // /* MW 3 */ + 11554 "11011110" // /* MW 2 */ + 11555 "11111001" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 55 37 first + 11556 "11100100" // LSHL r29, r0, r7; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11557 "00100101" // /* MW 5 */ + 11558 "00000001" // /* MW 4 */ + 11559 "10110000" // /* MW 3 */ + 11560 "01001111" // /* MW 2 */ + 11561 "00000111" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 68 40 first + 11562 "10011000" // ASHL r5, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11563 "01011110" // /* MW 3 */ + 11564 "10001010" // /* MW 2 */ + 11565 "00010100" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 256 64 +.src_ref 3 "softmax_row_major.h" 264 29 first +.src_ref 3 "softmax_row_major.h" 269 47 + 11566 "01100100" // ASHL r0, r0, r7; MOV r2, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11567 "00000101" // /* MW 5 */ + 11568 "00100000" // /* MW 4 */ + 11569 "11010001" // /* MW 3 */ + 11570 "00001111" // /* MW 2 */ + 11571 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 +.src_ref 3 "softmax_row_major.h" 69 53 +.src_ref 3 "softmax_row_major.h" 256 64 first + 11572 "10111010" // MOVA r6, #15; LSHL r22, r22, r2; MOV crRnd, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "01111000" // /* MW 9 */ + 11574 "01010000" // /* MW 8 */ + 11575 "11011100" // /* MW 7 */ + 11576 "01101111" // /* MW 6 */ + 11577 "01100001" // /* MW 5 */ + 11578 "00101101" // /* MW 4 */ + 11579 "00000000" // /* MW 3 */ + 11580 "11100110" // /* MW 2 */ + 11581 "00000001" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 68 40 +.src_ref 3 "softmax_row_major.h" 69 53 first +.src_ref 3 "softmax_row_major.h" 256 64 + 11582 "10111010" // MOVA r25, #-28; AND r6, r6, r18; MOV m0, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "01111000" // /* MW 9 */ + 11584 "10010000" // /* MW 8 */ + 11585 "00000101" // /* MW 7 */ + 11586 "00100100" // /* MW 6 */ + 11587 "01101001" // /* MW 5 */ + 11588 "00001100" // /* MW 4 */ + 11589 "00000000" // /* MW 3 */ + 11590 "10011001" // /* MW 2 */ + 11591 "11111100" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 68 40 first +.src_ref 3 "softmax_row_major.h" 70 65 +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 176 59 +.src_ref 3 "softmax_row_major.h" 256 64 first + 11592 "00110110" // PADDB [p0], m0; VCONV.bf16.fp32 wl0, bmll0; LSHL r5, r5, r25; MOV r21, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11593 "01011000" // /* MW 11 */ + 11594 "11111111" // /* MW 10 */ + 11595 "10101111" // /* MW 9 */ + 11596 "11101110" // /* MW 8 */ + 11597 "01011100" // /* MW 7 */ + 11598 "00001010" // /* MW 6 */ + 11599 "00100000" // /* MW 5 */ + 11600 "00010111" // /* MW 4 */ + 11601 "11000000" // /* MW 3 */ + 11602 "00000010" // /* MW 2 */ + 11603 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 3 "softmax_row_major.h" 68 40 +.src_ref 3 "softmax_row_major.h" 70 65 first +.src_ref 3 "softmax_row_major.h" 99 35 + 11604 "00111010" // MOVS p3, p0; LSHL r6, r21, r6; ADD.NC r5, r5, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11605 "10101001" // /* MW 9 */ + 11606 "01100100" // /* MW 8 */ + 11607 "10101001" // /* MW 7 */ + 11608 "01101100" // /* MW 6 */ + 11609 "01100011" // /* MW 5 */ + 11610 "00101010" // /* MW 4 */ + 11611 "01100000" // /* MW 3 */ + 11612 "00010001" // /* MW 2 */ + 11613 "01110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 4 "vector_native_types.hpp" 373 137 first +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 3 "softmax_row_major.h" 52 87 first +.src_ref 3 "softmax_row_major.h" 70 65 +.src_ref 3 "softmax_row_major.h" 99 35 first + 11614 "10110110" // NOPA; VLDB wl10, [p3], #32; XOR r6, r21, r6; VEXTBCST.16 x0, x0, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11615 "10101000" // /* MW 11 */ + 11616 "10000001" // /* MW 10 */ + 11617 "00000000" // /* MW 9 */ + 11618 "00110100" // /* MW 8 */ + 11619 "01100011" // /* MW 7 */ + 11620 "00101010" // /* MW 6 */ + 11621 "01001000" // /* MW 5 */ + 11622 "00111101" // /* MW 4 */ + 11623 "11110110" // /* MW 3 */ + 11624 "00101100" // /* MW 2 */ + 11625 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 68 40 first +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 85 65 + 11626 "01100100" // ASHL r5, r5, r7; MOV r7, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11627 "00000001" // /* MW 5 */ + 11628 "10100000" // /* MW 4 */ + 11629 "11010011" // /* MW 3 */ + 11630 "01001111" // /* MW 2 */ + 11631 "00101001" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 83 8 first +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 85 63 first +.src_ref 3 "softmax_row_major.h" 176 59 + 11632 "11100001" // MOVA r23, #0; NOPB; NOPS; LT r27, r7, r5; ADD.NC lc, r29, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11633 "00000000" // /* MW 15 */ + 11634 "00000000" // /* MW 14 */ + 11635 "11001000" // /* MW 13 */ + 11636 "01111111" // /* MW 12 */ + 11637 "10111111" // /* MW 11 */ + 11638 "11010110" // /* MW 10 */ + 11639 "10110010" // /* MW 9 */ + 11640 "00001111" // /* MW 8 */ + 11641 "01011011" // /* MW 7 */ + 11642 "00000001" // /* MW 6 */ + 11643 "00100000" // /* MW 5 */ + 11644 "00000000" // /* MW 4 */ + 11645 "00000000" // /* MW 3 */ + 11646 "00010111" // /* MW 2 */ + 11647 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 85 63 + 11648 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r30, r23, r21, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11649 "00000000" // /* MW 15 */ + 11650 "00000000" // /* MW 14 */ + 11651 "01111000" // /* MW 13 */ + 11652 "10100101" // /* MW 12 */ + 11653 "00000001" // /* MW 11 */ + 11654 "10010000" // /* MW 10 */ + 11655 "11101010" // /* MW 9 */ + 11656 "00101111" // /* MW 8 */ + 11657 "01011011" // /* MW 7 */ + 11658 "00000001" // /* MW 6 */ + 11659 "00100000" // /* MW 5 */ + 11660 "00000000" // /* MW 4 */ + 11661 "11110000" // /* MW 3 */ + 11662 "00101100" // /* MW 2 */ + 11663 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.src_ref 3 "softmax_row_major.h" 85 65 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11664 "11100001" // NOPA; NOPB; NOPS; EQ r27, r7, r5; VMOV x8, x9; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11665 "00000000" // /* MW 15 */ + 11666 "00000000" // /* MW 14 */ + 11667 "01111000" // /* MW 13 */ + 11668 "01001001" // /* MW 12 */ + 11669 "00011001" // /* MW 11 */ + 11670 "10111110" // /* MW 10 */ + 11671 "10110010" // /* MW 9 */ + 11672 "00001111" // /* MW 8 */ + 11673 "01011011" // /* MW 7 */ + 11674 "00000001" // /* MW 6 */ + 11675 "00100000" // /* MW 5 */ + 11676 "00000000" // /* MW 4 */ + 11677 "11110000" // /* MW 3 */ + 11678 "00101100" // /* MW 2 */ + 11679 "00000000" // /* MW 1 */ +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_192 +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 99 35 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 11680 "11100001" // NOPA; VLDB wl10, [p3], #32; NOPS; SEL.EQZ r31, r30, r6, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11681 "00000000" // /* MW 15 */ + 11682 "00000000" // /* MW 14 */ + 11683 "01111000" // /* MW 13 */ + 11684 "10100101" // /* MW 12 */ + 11685 "00000001" // /* MW 11 */ + 11686 "00010000" // /* MW 10 */ + 11687 "11110011" // /* MW 9 */ + 11688 "00111101" // /* MW 8 */ + 11689 "01011011" // /* MW 7 */ + 11690 "00000001" // /* MW 6 */ + 11691 "01001000" // /* MW 5 */ + 11692 "00111101" // /* MW 4 */ + 11693 "11110110" // /* MW 3 */ + 11694 "00101100" // /* MW 2 */ + 11695 "00000000" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 3 "softmax_row_major.h" 83 41 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 "11100001" // NOPA; NOPB; NOPS; EXTEND.u16 r19, r31; ADD.NC r7, r7, #1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11697 "00000000" // /* MW 15 */ + 11698 "00000000" // /* MW 14 */ + 11699 "01001000" // /* MW 13 */ + 11700 "11000000" // /* MW 12 */ + 11701 "11101001" // /* MW 11 */ + 11702 "10000000" // /* MW 10 */ + 11703 "00110101" // /* MW 9 */ + 11704 "00111111" // /* MW 8 */ + 11705 "01011011" // /* MW 7 */ + 11706 "00000001" // /* MW 6 */ + 11707 "00100000" // /* MW 5 */ + 11708 "00000000" // /* MW 4 */ + 11709 "11110000" // /* MW 3 */ + 11710 "00101100" // /* MW 2 */ + 11711 "00000000" // /* MW 1 */ +.src_ref 4 "blend.hpp" 163 48 first +.src_ref 3 "softmax_row_major.h" 85 63 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11712 "11100001" // NOPA; NOPB; NOPS; LT r27, r7, r5; VSEL.16 x5, x9, x10, r19; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11713 "00000000" // /* MW 15 */ + 11714 "00000000" // /* MW 14 */ + 11715 "00011000" // /* MW 13 */ + 11716 "10001101" // /* MW 12 */ + 11717 "01100110" // /* MW 11 */ + 11718 "11010101" // /* MW 10 */ + 11719 "10110010" // /* MW 9 */ + 11720 "00001111" // /* MW 8 */ + 11721 "01011011" // /* MW 7 */ + 11722 "00000001" // /* MW 6 */ + 11723 "00100000" // /* MW 5 */ + 11724 "00000000" // /* MW 4 */ + 11725 "11110000" // /* MW 3 */ + 11726 "00101100" // /* MW 2 */ + 11727 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 1454 19 first +.src_ref 3 "softmax_row_major.h" 85 63 + 11728 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r30, r23, r21, r27; VMOV wl3, wl5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11729 "00000000" // /* MW 15 */ + 11730 "00000000" // /* MW 14 */ + 11731 "01111000" // /* MW 13 */ + 11732 "10010001" // /* MW 12 */ + 11733 "11100101" // /* MW 11 */ + 11734 "10010000" // /* MW 10 */ + 11735 "11101010" // /* MW 9 */ + 11736 "00101111" // /* MW 8 */ + 11737 "01011011" // /* MW 7 */ + 11738 "00000001" // /* MW 6 */ + 11739 "00100000" // /* MW 5 */ + 11740 "00000000" // /* MW 4 */ + 11741 "11110000" // /* MW 3 */ + 11742 "00101100" // /* MW 2 */ + 11743 "00000000" // /* MW 1 */ +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_256 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "softmax_row_major.h" 85 65 first +.end_of_loop + 11744 "11100001" // NOPA; NOPB; NOPS; EQ r27, r7, r5; VMAX_LT.bf16 x8, r16, x8, x3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11745 "00000000" // /* MW 15 */ + 11746 "00000000" // /* MW 14 */ + 11747 "01111000" // /* MW 13 */ + 11748 "11110110" // /* MW 12 */ + 11749 "00100000" // /* MW 11 */ + 11750 "10111110" // /* MW 10 */ + 11751 "10110010" // /* MW 9 */ + 11752 "00001111" // /* MW 8 */ + 11753 "01011011" // /* MW 7 */ + 11754 "00000001" // /* MW 6 */ + 11755 "00100000" // /* MW 5 */ + 11756 "00000000" // /* MW 4 */ + 11757 "11110000" // /* MW 3 */ + 11758 "00101100" // /* MW 2 */ + 11759 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 3 "softmax_row_major.h" 85 63 +.src_ref 3 "softmax_row_major.h" 180 25 +.loop_nesting 0 + 11760 "11101001" // MOVA r4, #8; NOPB; MOVS p4, p1; SEL.EQZ r31, r30, r6, r27; MOV r1, #16; VCLR dm0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11761 "00000000" // /* MW 15 */ + 11762 "00000111" // /* MW 14 */ + 11763 "01011000" // /* MW 13 */ + 11764 "00010000" // /* MW 12 */ + 11765 "00101000" // /* MW 11 */ + 11766 "00010000" // /* MW 10 */ + 11767 "11110011" // /* MW 9 */ + 11768 "00111101" // /* MW 8 */ + 11769 "10001011" // /* MW 7 */ + 11770 "10000100" // /* MW 6 */ + 11771 "00100100" // /* MW 5 */ + 11772 "00000000" // /* MW 4 */ + 11773 "00000000" // /* MW 3 */ + 11774 "00000100" // /* MW 2 */ + 11775 "00000001" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 3 "softmax_row_major.h" 148 30 first + 11776 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32; EXTEND.u16 r19, r31; MOV r3, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11777 "01011000" // /* MW 9 */ + 11778 "00000100" // /* MW 8 */ + 11779 "01101000" // /* MW 7 */ + 11780 "10000000" // /* MW 6 */ + 11781 "00110101" // /* MW 5 */ + 11782 "00111111" // /* MW 4 */ + 11783 "00110000" // /* MW 3 */ + 11784 "10100001" // /* MW 2 */ + 11785 "00000011" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "add_reduce.hpp" 324 44 +.src_ref 4 "blend.hpp" 163 48 first +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 20 92 + 11786 "10111010" // MOVA r20, #60; MOVX r26, #2; VSEL.16 x5, x9, x10, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11787 "00011000" // /* MW 9 */ + 11788 "10001101" // /* MW 8 */ + 11789 "01100110" // /* MW 7 */ + 11790 "01001001" // /* MW 6 */ + 11791 "10100000" // /* MW 5 */ + 11792 "00000001" // /* MW 4 */ + 11793 "00000000" // /* MW 3 */ + 11794 "10010100" // /* MW 2 */ + 11795 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 1454 19 first + 11796 "11100100" // MOVX r24, #0; VMOV wl3, wl5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11797 "01000101" // /* MW 5 */ + 11798 "10010110" // /* MW 4 */ + 11799 "00100011" // /* MW 3 */ + 11800 "00000000" // /* MW 2 */ + 11801 "00000110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "softmax_row_major.h" 153 29 +.src_ref 3 "softmax_row_major.h" 269 +.src_ref 3 "softmax_row_major.h" 277 31 + 11802 "10111010" // MOVA r17, #828; MOVX crRnd, r17; VMAX_LT.bf16 x8, r16, x8, x3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11803 "01111000" // /* MW 9 */ + 11804 "11110110" // /* MW 8 */ + 11805 "00100000" // /* MW 7 */ + 11806 "00000010" // /* MW 6 */ + 11807 "11010100" // /* MW 5 */ + 11808 "00100011" // /* MW 4 */ + 11809 "00000000" // /* MW 3 */ + 11810 "10010001" // /* MW 2 */ + 11811 "01100111" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first + 11812 "11011000" // VSHIFT x1, x8, x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00000110" // /* MW 3 */ + 11814 "11000000" // /* MW 2 */ + 11815 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 11816 "11111000" // VMAX_LT.bf16 x1, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "11101100" // /* MW 3 */ + 11818 "11000000" // /* MW 2 */ + 11819 "00011000" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first + 11820 "11011000" // VSHIFT x8, x1, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11821 "00010010" // /* MW 3 */ + 11822 "00001000" // /* MW 2 */ + 11823 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 11824 "11111000" // VMAX_LT.bf16 x1, r16, x1, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11825 "01101100" // /* MW 3 */ + 11826 "10001100" // /* MW 2 */ + 11827 "00011000" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first + 11828 "11011000" // VSHIFT x8, x1, x0, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11829 "00001110" // /* MW 3 */ + 11830 "00001000" // /* MW 2 */ + 11831 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 11832 "11111000" // VMAX_LT.bf16 x1, r16, x1, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11833 "01101100" // /* MW 3 */ + 11834 "10001100" // /* MW 2 */ + 11835 "00011000" // /* MW 1 */ +.src_ref 4 "max_min_reduce.hpp" 93 30 first +.src_ref 3 "softmax_row_major.h" 176 59 +.src_ref 3 "softmax_row_major.h" 176 61 + 11836 "10100100" // MOVX r26, #0; VSHIFT x8, x1, x0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11837 "11010101" // /* MW 5 */ + 11838 "00010000" // /* MW 4 */ + 11839 "00101000" // /* MW 3 */ + 11840 "10000000" // /* MW 2 */ + 11841 "00000110" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11842 "11100100" // LT r27, r26, r5; VMAX_LT.bf16 x1, r16, x1, x8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11843 "11011001" // /* MW 5 */ + 11844 "00011000" // /* MW 4 */ + 11845 "01010001" // /* MW 3 */ + 11846 "11001011" // /* MW 2 */ + 11847 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "add_accum.hpp" 20 92 +.src_ref 3 "softmax_row_major.h" 148 30 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11848 "01000110" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32; SEL.EQZ r28, r23, r21, r27; VEXTBCST.16 x8, x1, #0; VSUB.f dm2, dm2, dm0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11849 "00111111" // /* MW 11 */ + 11850 "01000000" // /* MW 10 */ + 11851 "10100010" // /* MW 9 */ + 11852 "00110101" // /* MW 8 */ + 11853 "10010000" // /* MW 7 */ + 11854 "01000000" // /* MW 6 */ + 11855 "01010010" // /* MW 5 */ + 11856 "11111001" // /* MW 4 */ + 11857 "00110101" // /* MW 3 */ + 11858 "10100001" // /* MW 2 */ + 11859 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 3 "softmax_row_major.h" 176 61 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11860 "11100100" // EQ r27, r5, r26; VCONV.fp32.bf16 bmll0, wl8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11861 "01100101" // /* MW 5 */ + 11862 "00100010" // /* MW 4 */ + 11863 "11110000" // /* MW 3 */ + 11864 "11110100" // /* MW 2 */ + 11865 "00101110" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 471 87 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11866 "11100100" // SEL.EQZ r25, r28, r6, r27; VMOV bmhl0, bmll0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11867 "00100101" // /* MW 5 */ + 11868 "00000000" // /* MW 4 */ + 11869 "01000001" // /* MW 3 */ + 11870 "01001100" // /* MW 2 */ + 11871 "11100110" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 112 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11872 "11100100" // ADD r26, r26, #1; VBCST.16 x1, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11873 "11100101" // /* MW 5 */ + 11874 "11000010" // /* MW 4 */ + 11875 "11100001" // /* MW 3 */ + 11876 "10000000" // /* MW 2 */ + 11877 "11010110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 5 "add_accum.hpp" 20 92 first +.src_ref 3 "softmax_row_major.h" 112 4 first +.src_ref 3 "softmax_row_major.h" 148 30 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11878 "01000110" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32; EXTEND.u16 r22, r25; ADD.NC lc, r29, #-3; VSUB.f dm2, dm2, dm0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11879 "00111111" // /* MW 11 */ + 11880 "01000000" // /* MW 10 */ + 11881 "10100010" // /* MW 9 */ + 11882 "11101001" // /* MW 8 */ + 11883 "11101111" // /* MW 7 */ + 11884 "01010111" // /* MW 6 */ + 11885 "10110000" // /* MW 5 */ + 11886 "01101100" // /* MW 4 */ + 11887 "00110110" // /* MW 3 */ + 11888 "10100001" // /* MW 2 */ + 11889 "00000011" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11890 "10011000" // LT r27, r26, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11891 "01011010" // /* MW 3 */ + 11892 "10110110" // /* MW 2 */ + 11893 "00010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 153 29 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11894 "01011100" // VCONV.bf16.fp32 wl6, bmll2; SEL.EQZ r28, r23, r21, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11895 "10100100" // /* MW 5 */ + 11896 "11110010" // /* MW 4 */ + 11897 "11001011" // /* MW 3 */ + 11898 "00100010" // /* MW 2 */ + 11899 "01101000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 112 37 +.src_ref 3 "softmax_row_major.h" 176 61 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11900 "00100100" // EQ r27, r5, r26; ADD.NC r26, r26, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11901 "00000001" // /* MW 5 */ + 11902 "00111010" // /* MW 4 */ + 11903 "11111101" // /* MW 3 */ + 11904 "11110100" // /* MW 2 */ + 11905 "00101110" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11906 "01100010" // SEL.EQZ r25, r28, r6, r27; VMUL.f dm3, x6, x0, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11907 "00000001" // /* MW 7 */ + 11908 "11101100" // /* MW 6 */ + 11909 "10001011" // /* MW 5 */ + 11910 "00010001" // /* MW 4 */ + 11911 "10010011" // /* MW 3 */ + 11912 "00111001" // /* MW 2 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 5 "add_accum.hpp" 20 92 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11914 "01100010" // LT r27, r26, r5; VSUB.f dm2, dm2, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11915 "00111111" // /* MW 7 */ + 11916 "01000000" // /* MW 6 */ + 11917 "10100010" // /* MW 5 */ + 11918 "11010101" // /* MW 4 */ + 11919 "10110010" // /* MW 3 */ + 11920 "00110101" // /* MW 2 */ + 11921 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 112 4 first +.src_ref 3 "softmax_row_major.h" 153 29 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11922 "00111010" // VCONV.bf16.fp32 wl6, bmll2; MOVXM ls, #11968 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11923 "00010001" // /* MW 9 */ + 11924 "01100000" // /* MW 8 */ + 11925 "01111111" // /* MW 7 */ + 11926 "00001000" // /* MW 6 */ + 11927 "00000000" // /* MW 5 */ + 11928 "00000000" // /* MW 4 */ + 11929 "11000000" // /* MW 3 */ + 11930 "00100010" // /* MW 2 */ + 11931 "01101000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 112 4 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 11932 "01000100" // MOVXM le, #12048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11933 "00100000" // /* MW 5 */ + 11934 "11111110" // /* MW 4 */ + 11935 "00100110" // /* MW 3 */ + 11936 "00000000" // /* MW 2 */ + 11937 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 11938 "01100010" // SEL.EQZ r28, r23, r21, r27; VMUL.f dm3, x6, x0, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11939 "00000001" // /* MW 7 */ + 11940 "11101100" // /* MW 6 */ + 11941 "10001011" // /* MW 5 */ + 11942 "10010001" // /* MW 4 */ + 11943 "11001010" // /* MW 3 */ + 11944 "00101111" // /* MW 2 */ + 11945 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 176 61 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 11946 "10011000" // EQ r27, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11947 "10100111" // /* MW 3 */ + 11948 "01110111" // /* MW 2 */ + 11949 "00010001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "elementary.hpp" 473 55 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11950 "11111000" // VEXP2 wl4, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11951 "11110010" // /* MW 3 */ + 11952 "01011000" // /* MW 2 */ + 11953 "00011010" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 4 "blend.hpp" 163 48 first + 11954 "01100100" // EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11955 "01100100" // /* MW 5 */ + 11956 "00010100" // /* MW 4 */ + 11957 "00000111" // /* MW 3 */ + 11958 "10010110" // /* MW 2 */ + 11959 "11001101" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 11960 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11961 "00011100" // /* MW 7 */ + 11962 "00000000" // /* MW 6 */ + 11963 "00000000" // /* MW 5 */ + 11964 "00000100" // /* MW 4 */ + 11965 "11110000" // /* MW 3 */ + 11966 "00101100" // /* MW 2 */ + 11967 "00000000" // /* MW 1 */ +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_480 +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "softmax_row_major.h" 148 30 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.noswbrkpt +.loop_nesting 1 + 11968 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], #32;NOPB; VST wl7, [p4], #32; SEL.EQZ r25, r28, r6, r27; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11969 "10000001" // /* MW 15 */ + 11970 "00100001" // /* MW 14 */ + 11971 "01111101" // /* MW 13 */ + 11972 "10011001" // /* MW 12 */ + 11973 "00000111" // /* MW 11 */ + 11974 "00010010" // /* MW 10 */ + 11975 "10010011" // /* MW 9 */ + 11976 "10111001" // /* MW 8 */ + 11977 "11101010" // /* MW 7 */ + 11978 "00011101" // /* MW 6 */ + 11979 "00100100" // /* MW 5 */ + 11980 "00000000" // /* MW 4 */ + 11981 "00110000" // /* MW 3 */ + 11982 "10100001" // /* MW 2 */ + 11983 "00000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 3 "softmax_row_major.h" 112 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 11984 "11100100" // ADD r26, r26, #1; VMOV bmll1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11985 "00100101" // /* MW 5 */ + 11986 "00000100" // /* MW 4 */ + 11987 "11100010" // /* MW 3 */ + 11988 "10000000" // /* MW 2 */ + 11989 "11010110" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 11990 "11111010" // NOPA; NOPS; LT r27, r26, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11991 "01011010" // /* MW 9 */ + 11992 "10110110" // /* MW 8 */ + 11993 "00000110" // /* MW 7 */ + 11994 "00000000" // /* MW 6 */ + 11995 "01011011" // /* MW 5 */ + 11996 "00000001" // /* MW 4 */ + 11997 "11110000" // /* MW 3 */ + 11998 "00101100" // /* MW 2 */ + 11999 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 153 29 first +.src_ref 3 "softmax_row_major.h" 176 59 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12000 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl6, bmll2; SEL.EQZ r28, r23, r21, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "10010000" // /* MW 10 */ + 12007 "11001010" // /* MW 9 */ + 12008 "00101111" // /* MW 8 */ + 12009 "00010110" // /* MW 7 */ + 12010 "01000001" // /* MW 6 */ + 12011 "00100011" // /* MW 5 */ + 12012 "00000000" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00101100" // /* MW 2 */ + 12015 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "add_accum.hpp" 20 92 first +.src_ref 5 "elementary.hpp" 473 55 first +.src_ref 3 "softmax_row_major.h" 176 61 first + 12016 "11111011" // NOPA; NOPB; NOPS; EQ r27, r5, r26; VEXP2 wl4, bmll3; VSUB.f dm2, dm2, dm0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12017 "00000001" // /* MW 15 */ + 12018 "00010010" // /* MW 14 */ + 12019 "01111101" // /* MW 13 */ + 12020 "01111001" // /* MW 12 */ + 12021 "00101100" // /* MW 11 */ + 12022 "00111101" // /* MW 10 */ + 12023 "10111101" // /* MW 9 */ + 12024 "00001011" // /* MW 8 */ + 12025 "01011011" // /* MW 7 */ + 12026 "00000001" // /* MW 6 */ + 12027 "00100000" // /* MW 5 */ + 12028 "00000000" // /* MW 4 */ + 12029 "11110000" // /* MW 3 */ + 12030 "00101100" // /* MW 2 */ + 12031 "00000000" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 4 "blend.hpp" 163 48 first + 12032 "00001011" // NOPA; NOPB; NOPS; EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22; VMUL.f dm3, x6, x0, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12033 "01100000" // /* MW 15 */ + 12034 "01011111" // /* MW 14 */ + 12035 "00011100" // /* MW 13 */ + 12036 "00011001" // /* MW 12 */ + 12037 "11000101" // /* MW 11 */ + 12038 "10000001" // /* MW 10 */ + 12039 "01100101" // /* MW 9 */ + 12040 "00110011" // /* MW 8 */ + 12041 "01011011" // /* MW 7 */ + 12042 "00000001" // /* MW 6 */ + 12043 "00100000" // /* MW 5 */ + 12044 "00000000" // /* MW 4 */ + 12045 "11110000" // /* MW 3 */ + 12046 "00101100" // /* MW 2 */ + 12047 "00000000" // /* MW 1 */ +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_560 +.src_ref 5 "accum.hpp" 153 115 first +.end_of_loop +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhl0, bmll4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12049 "00000000" // /* MW 15 */ + 12050 "00000000" // /* MW 14 */ + 12051 "01111000" // /* MW 13 */ + 12052 "00001001" // /* MW 12 */ + 12053 "01001000" // /* MW 11 */ + 12054 "00000000" // /* MW 10 */ + 12055 "00000000" // /* MW 9 */ + 12056 "00000000" // /* MW 8 */ + 12057 "01011011" // /* MW 7 */ + 12058 "00000001" // /* MW 6 */ + 12059 "00100000" // /* MW 5 */ + 12060 "00000000" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 4 "add_reduce.hpp" 322 47 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "softmax_row_major.h" 176 59 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.aggressive_scheduled_block_id 5 +.noswbrkpt +.loop_nesting 0 + 12064 "11101011" // MOVA r18, #32; NOPB; VST wl7, [p4], #32; SEL.EQZ r25, r28, r6, r27; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12065 "10000001" // /* MW 15 */ + 12066 "00100001" // /* MW 14 */ + 12067 "01111101" // /* MW 13 */ + 12068 "10011001" // /* MW 12 */ + 12069 "00000111" // /* MW 11 */ + 12070 "00010010" // /* MW 10 */ + 12071 "10010011" // /* MW 9 */ + 12072 "10111001" // /* MW 8 */ + 12073 "11101010" // /* MW 7 */ + 12074 "00011101" // /* MW 6 */ + 12075 "00100100" // /* MW 5 */ + 12076 "00000000" // /* MW 4 */ + 12077 "00000000" // /* MW 3 */ + 12078 "00010010" // /* MW 2 */ + 12079 "00000100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 +.src_ref 5 "accum.hpp" 198 120 +.src_ref 3 "softmax_row_major.h" 269 36 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12080 "10111010" // LDA.u8 r7, [p2, #-2]; MOVX vaddSign0, #1; VMOV bmll1, bmhl0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12081 "01111000" // /* MW 9 */ + 12082 "00001001" // /* MW 8 */ + 12083 "10000001" // /* MW 7 */ + 12084 "00000000" // /* MW 6 */ + 12085 "11010010" // /* MW 5 */ + 12086 "00000010" // /* MW 4 */ + 12087 "01010000" // /* MW 3 */ + 12088 "10011101" // /* MW 2 */ + 12089 "01011100" // /* MW 1 */ + 12090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12091 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 3 "softmax_row_major.h" 153 29 first + 12092 "00011000" // VCONV.bf16.fp32 wl6, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12093 "00010110" // /* MW 3 */ + 12094 "01000001" // /* MW 2 */ + 12095 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "elementary.hpp" 473 55 first + 12096 "11111000" // VEXP2 wl4, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12097 "11110010" // /* MW 3 */ + 12098 "01011000" // /* MW 2 */ + 12099 "00011010" // /* MW 1 */ +.src_ref 8 "mask.hpp" 57 33 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 4 "blend.hpp" 163 48 first + 12100 "01011010" // EXTEND.u16 r22, r25; VSEL.16 x7, x1, x4, r22; VMUL.f dm3, x6, x0, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12101 "00000001" // /* MW 9 */ + 12102 "11101100" // /* MW 8 */ + 12103 "10001011" // /* MW 7 */ + 12104 "00100011" // /* MW 6 */ + 12105 "10100011" // /* MW 5 */ + 12106 "00111000" // /* MW 4 */ + 12107 "10110000" // /* MW 3 */ + 12108 "01101100" // /* MW 2 */ + 12109 "00000110" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 12110 "11111000" // VMOV bmhl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "00010010" // /* MW 3 */ + 12112 "10010000" // /* MW 2 */ + 12113 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 12114 "01001010" // VST wl7, [p4], #32; VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12115 "00111101" // /* MW 9 */ + 12116 "00110000" // /* MW 8 */ + 12117 "10100100" // /* MW 7 */ + 12118 "11100100" // /* MW 6 */ + 12119 "00110010" // /* MW 5 */ + 12120 "00001111" // /* MW 4 */ + 12121 "01010100" // /* MW 3 */ + 12122 "10111101" // /* MW 2 */ + 12123 "10000011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.src_ref 3 "softmax_row_major.h" 269 47 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12124 "11100100" // NE r7, r7, r2; VMOV bmll1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12125 "00100101" // /* MW 5 */ + 12126 "00000100" // /* MW 4 */ + 12127 "00010010" // /* MW 3 */ + 12128 "11000101" // /* MW 2 */ + 12129 "00111001" // /* MW 1 */ + 12130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12131 "00000000" // /* MW 1 */ + 12132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12133 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "elementary.hpp" 473 55 first + 12134 "11111000" // VEXP2 wl4, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12135 "11110010" // /* MW 3 */ + 12136 "01011000" // /* MW 2 */ + 12137 "00011010" // /* MW 1 */ +.src_ref 4 "blend.hpp" 163 48 first + 12138 "00111000" // VSEL.16 x7, x1, x4, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12139 "00110010" // /* MW 3 */ + 12140 "10001010" // /* MW 2 */ + 12141 "00011011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 12142 "11111000" // VMOV bmhl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12143 "00010010" // /* MW 3 */ + 12144 "10010000" // /* MW 2 */ + 12145 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 5 "accum.hpp" 938 83 first +.src_ref 5 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 12146 "01100010" // VCONV.fp32.bf16 bmll4, wl7; VADD.f dm4, dm1, dm4, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12147 "00111101" // /* MW 7 */ + 12148 "00110000" // /* MW 6 */ + 12149 "10100100" // /* MW 5 */ + 12150 "11100110" // /* MW 4 */ + 12151 "00110010" // /* MW 3 */ + 12152 "00001111" // /* MW 2 */ + 12153 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12154 "11111000" // VMOV bmll1, bmhl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12155 "00010010" // /* MW 3 */ + 12156 "00000010" // /* MW 2 */ + 12157 "00011001" // /* MW 1 */ + 12158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12159 "00000000" // /* MW 1 */ + 12160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12161 "00000000" // /* MW 1 */ + 12162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12163 "00000000" // /* MW 1 */ + 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12165 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first + 12166 "11111000" // VMOV bmhl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12167 "00010010" // /* MW 3 */ + 12168 "10010000" // /* MW 2 */ + 12169 "00011000" // /* MW 1 */ + 12170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12171 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 + 12172 "11111000" // VMOV x2, bmhl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12173 "00010010" // /* MW 3 */ + 12174 "00100010" // /* MW 2 */ + 12175 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 12176 "11011000" // VSHIFT x2, x2, x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12177 "01001010" // /* MW 3 */ + 12178 "00010000" // /* MW 2 */ + 12179 "00011001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.src_ref 4 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 12180 "01100010" // VMOV bmll2, x2; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12181 "00111101" // /* MW 7 */ + 12182 "00001000" // /* MW 6 */ + 12183 "10100000" // /* MW 5 */ + 12184 "11100110" // /* MW 4 */ + 12185 "10010010" // /* MW 3 */ + 12186 "00000100" // /* MW 2 */ + 12187 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12188 "11111000" // VMOV bmll0, bmhl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12189 "00010010" // /* MW 3 */ + 12190 "00000010" // /* MW 2 */ + 12191 "00011000" // /* MW 1 */ + 12192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12193 "00000000" // /* MW 1 */ + 12194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12195 "00000000" // /* MW 1 */ + 12196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12197 "00000000" // /* MW 1 */ + 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12199 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 12200 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12201 "00010010" // /* MW 3 */ + 12202 "00100000" // /* MW 2 */ + 12203 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.src_ref 4 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 12204 "01100010" // VSHIFT x2, x2, x0, r1; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12205 "00111101" // /* MW 7 */ + 12206 "00001000" // /* MW 6 */ + 12207 "10100000" // /* MW 5 */ + 12208 "11000110" // /* MW 4 */ + 12209 "00000110" // /* MW 3 */ + 12210 "00010000" // /* MW 2 */ + 12211 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12212 "11111000" // VMOV bmll2, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12213 "10010010" // /* MW 3 */ + 12214 "00000100" // /* MW 2 */ + 12215 "00011010" // /* MW 1 */ + 12216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12217 "00000000" // /* MW 1 */ + 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12219 "00000000" // /* MW 1 */ + 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12221 "00000000" // /* MW 1 */ + 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12223 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 12224 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12225 "00010010" // /* MW 3 */ + 12226 "00100000" // /* MW 2 */ + 12227 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.src_ref 4 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 12228 "01100010" // VSHIFT x2, x2, x0, r4; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12229 "00111101" // /* MW 7 */ + 12230 "00001000" // /* MW 6 */ + 12231 "10100000" // /* MW 5 */ + 12232 "11000110" // /* MW 4 */ + 12233 "00010010" // /* MW 3 */ + 12234 "00010000" // /* MW 2 */ + 12235 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12236 "11111000" // VMOV bmll2, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12237 "10010010" // /* MW 3 */ + 12238 "00000100" // /* MW 2 */ + 12239 "00011010" // /* MW 1 */ + 12240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12241 "00000000" // /* MW 1 */ + 12242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12243 "00000000" // /* MW 1 */ + 12244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12245 "00000000" // /* MW 1 */ + 12246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12247 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 12248 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "00010010" // /* MW 3 */ + 12250 "00100000" // /* MW 2 */ + 12251 "00011001" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 322 47 first +.src_ref 4 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 12252 "01100010" // VSHIFT x2, x2, x0, r3; VADD.f dm0, dm0, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12253 "00111101" // /* MW 7 */ + 12254 "00001000" // /* MW 6 */ + 12255 "10100000" // /* MW 5 */ + 12256 "11000110" // /* MW 4 */ + 12257 "00001110" // /* MW 3 */ + 12258 "00010000" // /* MW 2 */ + 12259 "00000001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12260 "11111000" // VMOV bmll2, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12261 "10010010" // /* MW 3 */ + 12262 "00000100" // /* MW 2 */ + 12263 "00011010" // /* MW 1 */ + 12264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12265 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 26 first + 12266 "10000100" // JNZ r7, #12320 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12320 delay_slots=5 */ + 12267 "00000001" // /* MW 5 */ + 12268 "01000000" // /* MW 4 */ + 12269 "00010000" // /* MW 3 */ + 12270 "00011000" // /* MW 2 */ + 12271 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 111 101 first +.src_ref 3 "softmax_row_major.h" 180 25 first +.delay_slot + 12272 "10011000" // VST wl7, [p4], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "11101010" // /* MW 3 */ + 12274 "00011101" // /* MW 2 */ + 12275 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12277 "00000000" // /* MW 1 */ +.src_ref 4 "add_reduce.hpp" 324 22 first +.delay_slot + 12278 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12279 "00010010" // /* MW 3 */ + 12280 "00100000" // /* MW 2 */ + 12281 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 915 23 first +.delay_slot + 12282 "10111000" // VEXTRACT.32 r2, x2, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12283 "00000001" // /* MW 3 */ + 12284 "10001010" // /* MW 2 */ + 12285 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ + 12288 "10000100" // J #12336 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12336 delay_slots=5 */ + 12289 "00000000" // /* MW 5 */ + 12290 "00000000" // /* MW 4 */ + 12291 "00011000" // /* MW 3 */ + 12292 "00011000" // /* MW 2 */ + 12293 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 54 +.delay_slot + 12294 "01000100" // MOVXM r1, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12295 "00000000" // /* MW 5 */ + 12296 "10100000" // /* MW 4 */ + 12297 "00000000" // /* MW 3 */ + 12298 "10000000" // /* MW 2 */ + 12299 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12305 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 54 first +.delay_slot + 12306 "00101110" // NOPA; NOPS; VINSERT.32 x0, x0, #0, r1; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12307 "00011100" // /* MW 13 */ + 12308 "00000000" // /* MW 12 */ + 12309 "00000000" // /* MW 11 */ + 12310 "10001011" // /* MW 10 */ + 12311 "00000001" // /* MW 9 */ + 12312 "01000000" // /* MW 8 */ + 12313 "00000000" // /* MW 7 */ + 12314 "00000000" // /* MW 6 */ + 12315 "10110110" // /* MW 5 */ + 12316 "00000010" // /* MW 4 */ + 12317 "11110000" // /* MW 3 */ + 12318 "00101100" // /* MW 2 */ + 12319 "00000000" // /* MW 1 */ +.label TGT_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_832 +.src_ref 5 "elementary.hpp" 618 15 first + 12320 "00011000" // INV r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12321 "00000100" // /* MW 3 */ + 12322 "10000011" // /* MW 2 */ + 12323 "00010000" // /* MW 1 */ + 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12325 "00000000" // /* MW 1 */ + 12326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12327 "00000000" // /* MW 1 */ + 12328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12329 "00000000" // /* MW 1 */ +.src_ref 3 "softmax.h" 166 25 first + 12330 "11010100" // NOPA; VINSERT.32 x0, x0, #0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12331 "01100010" // /* MW 5 */ + 12332 "00000000" // /* MW 4 */ + 12333 "11110000" // /* MW 3 */ + 12334 "00101100" // /* MW 2 */ + 12335 "00000000" // /* MW 1 */ +.label TGT_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_848 +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 3 "softmax_row_major.h" 269 +.src_ref 3 "softmax_row_major.h" 275 25 first +.src_ref 3 "softmax_row_major.h" 277 31 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 12336 "10111010" // VLDB wl1, [p1], #32; MOVS p0, p1; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12337 "01110110" // /* MW 9 */ + 12338 "01001001" // /* MW 8 */ + 12339 "00000000" // /* MW 7 */ + 12340 "00000000" // /* MW 6 */ + 12341 "01100100" // /* MW 5 */ + 12342 "00011100" // /* MW 4 */ + 12343 "01100001" // /* MW 3 */ + 12344 "10010001" // /* MW 2 */ + 12345 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 +.src_ref 3 "softmax_row_major.h" 273 12 first +.src_ref 3 "softmax_row_major.h" 275 25 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 12346 "00111010" // VLDB wl1, [p1], #32; MOVXM ls, #12432 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12347 "00010000" // /* MW 9 */ + 12348 "01001000" // /* MW 8 */ + 12349 "01111000" // /* MW 7 */ + 12350 "00001100" // /* MW 6 */ + 12351 "00000000" // /* MW 5 */ + 12352 "00000000" // /* MW 4 */ + 12353 "11001000" // /* MW 3 */ + 12354 "00111000" // /* MW 2 */ + 12355 "00000010" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 first +.src_ref 3 "softmax_row_major.h" 273 12 +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12356 "00111010" // VCONV.bf16.fp32 wl0, bmll0; MOVXM le, #12480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12357 "00010001" // /* MW 9 */ + 12358 "01100000" // /* MW 8 */ + 12359 "10111000" // /* MW 7 */ + 12360 "00001101" // /* MW 6 */ + 12361 "00000000" // /* MW 5 */ + 12362 "00000000" // /* MW 4 */ + 12363 "11000000" // /* MW 3 */ + 12364 "00000010" // /* MW 2 */ + 12365 "00001000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 273 12 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12366 "10011000" // ADD.NC lc, r0, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12367 "01111110" // /* MW 3 */ + 12368 "01110000" // /* MW 2 */ + 12369 "00011101" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 269 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12370 "01011000" // VEXTBCST.16 x0, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12371 "00000011" // /* MW 3 */ + 12372 "00000001" // /* MW 2 */ + 12373 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12375 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12377 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12378 "01001000" // VMUL.f dm0, x1, x0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12379 "00000001" // /* MW 3 */ + 12380 "11100010" // /* MW 2 */ + 12381 "10001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 3 "softmax_row_major.h" 275 25 first +.aggressive_scheduled_block_id 12 +.noswbrkpt + 12382 "00011000" // VLDB wl1, [p1], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12383 "01100100" // /* MW 3 */ + 12384 "00011100" // /* MW 2 */ + 12385 "00111001" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12386 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12387 "00011100" // /* MW 13 */ + 12388 "00000000" // /* MW 12 */ + 12389 "00000000" // /* MW 11 */ + 12390 "01010111" // /* MW 10 */ + 12391 "00011010" // /* MW 9 */ + 12392 "01000000" // /* MW 8 */ + 12393 "00000000" // /* MW 7 */ + 12394 "00000000" // /* MW 6 */ + 12395 "10110110" // /* MW 5 */ + 12396 "00000010" // /* MW 4 */ + 12397 "11110000" // /* MW 3 */ + 12398 "00101100" // /* MW 2 */ + 12399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12400 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12401 "00000000" // /* MW 15 */ + 12402 "00000000" // /* MW 14 */ + 12403 "01111000" // /* MW 13 */ + 12404 "10100101" // /* MW 12 */ + 12405 "00000001" // /* MW 11 */ + 12406 "00000000" // /* MW 10 */ + 12407 "00000000" // /* MW 9 */ + 12408 "00000000" // /* MW 8 */ + 12409 "01011011" // /* MW 7 */ + 12410 "00000001" // /* MW 6 */ + 12411 "00100000" // /* MW 5 */ + 12412 "00000000" // /* MW 4 */ + 12413 "11110000" // /* MW 3 */ + 12414 "00101100" // /* MW 2 */ + 12415 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12416 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMUL.f dm0, x1, x0, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12417 "00010000" // /* MW 15 */ + 12418 "01000111" // /* MW 14 */ + 12419 "01111100" // /* MW 13 */ + 12420 "10100101" // /* MW 12 */ + 12421 "00000001" // /* MW 11 */ + 12422 "00000000" // /* MW 10 */ + 12423 "00000000" // /* MW 9 */ + 12424 "00000000" // /* MW 8 */ + 12425 "01011011" // /* MW 7 */ + 12426 "00000001" // /* MW 6 */ + 12427 "00100000" // /* MW 5 */ + 12428 "00000000" // /* MW 4 */ + 12429 "11110000" // /* MW 3 */ + 12430 "00101100" // /* MW 2 */ + 12431 "00000000" // /* MW 1 */ +.label ZLS_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_944 +.src_ref 4 "vector.hpp" 212 115 first +.src_ref 3 "softmax_row_major.h" 275 25 first +.begin_of_loop +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 12432 "11100001" // NOPA; VLDB wl1, [p1], #32; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12433 "00000000" // /* MW 15 */ + 12434 "00000000" // /* MW 14 */ + 12435 "01111000" // /* MW 13 */ + 12436 "10100101" // /* MW 12 */ + 12437 "00000001" // /* MW 11 */ + 12438 "00000000" // /* MW 10 */ + 12439 "00000000" // /* MW 9 */ + 12440 "00000000" // /* MW 8 */ + 12441 "01011011" // /* MW 7 */ + 12442 "00000001" // /* MW 6 */ + 12443 "11001000" // /* MW 5 */ + 12444 "00111000" // /* MW 4 */ + 12445 "11110010" // /* MW 3 */ + 12446 "00101100" // /* MW 2 */ + 12447 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 3 "softmax_row_major.h" 277 31 first +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12448 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll0, [p0], #32;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12449 "00000000" // /* MW 15 */ + 12450 "00000000" // /* MW 14 */ + 12451 "01111000" // /* MW 13 */ + 12452 "10100101" // /* MW 12 */ + 12453 "00000001" // /* MW 11 */ + 12454 "00000000" // /* MW 10 */ + 12455 "00000000" // /* MW 9 */ + 12456 "10000000" // /* MW 8 */ + 12457 "00010010" // /* MW 7 */ + 12458 "00011100" // /* MW 6 */ + 12459 "00100000" // /* MW 5 */ + 12460 "00000000" // /* MW 4 */ + 12461 "11110000" // /* MW 3 */ + 12462 "00101100" // /* MW 2 */ + 12463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12464 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12465 "00000000" // /* MW 15 */ + 12466 "00000000" // /* MW 14 */ + 12467 "01111000" // /* MW 13 */ + 12468 "10100101" // /* MW 12 */ + 12469 "00000001" // /* MW 11 */ + 12470 "00000000" // /* MW 10 */ + 12471 "00000000" // /* MW 9 */ + 12472 "00000000" // /* MW 8 */ + 12473 "01011011" // /* MW 7 */ + 12474 "00000001" // /* MW 6 */ + 12475 "00100000" // /* MW 5 */ + 12476 "00000000" // /* MW 4 */ + 12477 "11110000" // /* MW 3 */ + 12478 "00101100" // /* MW 2 */ + 12479 "00000000" // /* MW 1 */ +.label ZLE_F_Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params_992 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.end_of_loop +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt + 12480 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMUL.f dm0, x1, x0, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00010000" // /* MW 15 */ + 12482 "01000111" // /* MW 14 */ + 12483 "01111100" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "00000000" // /* MW 10 */ + 12487 "00000000" // /* MW 9 */ + 12488 "00000000" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 12 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12497 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 3 "softmax_row_major.h" 277 31 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12498 "10011000" // VST.CONV.bf16.fp32 bmll0, [p0], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12499 "00010010" // /* MW 3 */ + 12500 "00011100" // /* MW 2 */ + 12501 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 12502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12503 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 13 +.noswbrkpt + 12504 "01001000" // VMUL.f dm0, x1, x0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12505 "00000001" // /* MW 3 */ + 12506 "11100010" // /* MW 2 */ + 12507 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 12508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12509 "00000000" // /* MW 1 */ +.src_ref 3 "softmax_row_major.h" 281 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 12510 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12511 "00000000" // /* MW 3 */ + 12512 "00101000" // /* MW 2 */ + 12513 "00010000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 first +.src_ref 5 "accum.hpp" 1117 103 first +.src_ref 3 "softmax_row_major.h" 277 31 first +.delay_slot +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12514 "10011000" // VST.CONV.bf16.fp32 bmll0, [p0], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12515 "00010010" // /* MW 3 */ + 12516 "00011100" // /* MW 2 */ + 12517 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12521 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 153 115 +.src_ref 5 "accum.hpp" 1117 103 +.src_ref 3 "softmax_row_major.h" 277 31 +.delay_slot + 12522 "10011000" // VST.CONV.bf16.fp32 bmll0, [p0], #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "00010010" // /* MW 3 */ + 12524 "00011100" // /* MW 2 */ + 12525 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params__end +.label __Z17softmax_row_majorILi1E8bfloat16S0_Lt1EEvPT0_PT1_33softmax_row_major_internal_params___func_end0 + 12527 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE___func_begin0 +.label _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE +.function softmax_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE +.src_ref 9 "softmax_adf_wrapper.cpp" 34 first +.src_ref 9 "softmax_adf_wrapper.cpp" 46 30 +.src_ref 9 "softmax_adf_wrapper.cpp" 57 56 +.function_start + 12528 "00000010" // MOVS p7, p1; MOV p3, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12529 "01110000" // /* MW 7 */ + 12530 "01100000" // /* MW 6 */ + 12531 "10110111" // /* MW 5 */ + 12532 "00000001" // /* MW 4 */ + 12533 "01100000" // /* MW 3 */ + 12534 "10010001" // /* MW 2 */ + 12535 "11110000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 37 17 + 12536 "00111010" // MOVS p1, p6; MOVXM p6, #508596 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12537 "00010001" // /* MW 9 */ + 12538 "01011010" // /* MW 8 */ + 12539 "00110001" // /* MW 7 */ + 12540 "11110011" // /* MW 6 */ + 12541 "00000001" // /* MW 5 */ + 12542 "00000000" // /* MW 4 */ + 12543 "01100000" // /* MW 3 */ + 12544 "00010001" // /* MW 2 */ + 12545 "00110011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 37 17 first + 12546 "10011000" // LDA r16, [p6], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00010110" // /* MW 3 */ + 12548 "11011110" // /* MW 2 */ + 12549 "00000110" // /* MW 1 */ + 12550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12551 "00000000" // /* MW 1 */ + 12552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12553 "00000000" // /* MW 1 */ + 12554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12555 "00000000" // /* MW 1 */ + 12556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12557 "00000000" // /* MW 1 */ + 12558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12559 "00000000" // /* MW 1 */ + 12560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12561 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 37 7 +.src_ref 9 "softmax_adf_wrapper.cpp" 37 27 + 12562 "10000100" // JNZ r16, #12784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12784 delay_slots=5 */ + 12563 "00000001" // /* MW 5 */ + 12564 "01000000" // /* MW 4 */ + 12565 "11111000" // /* MW 3 */ + 12566 "00011000" // /* MW 2 */ + 12567 "10000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 34 +.delay_slot + 12568 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12569 "00000001" // /* MW 5 */ + 12570 "00000000" // /* MW 4 */ + 12571 "00000000" // /* MW 3 */ + 12572 "00010000" // /* MW 2 */ + 12573 "00000000" // /* MW 1 */ +.delay_slot + 12574 "10011000" // ST p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "10011101" // /* MW 3 */ + 12576 "11111100" // /* MW 2 */ + 12577 "00001111" // /* MW 1 */ +.delay_slot + 12578 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12579 "10011101" // /* MW 3 */ + 12580 "11111001" // /* MW 2 */ + 12581 "00001111" // /* MW 1 */ +.delay_slot + 12582 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12583 "00111101" // /* MW 3 */ + 12584 "11110100" // /* MW 2 */ + 12585 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 12586 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12587 "11000000" // /* MW 3 */ + 12588 "01100100" // /* MW 2 */ + 12589 "00011001" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 42 26 +.src_ref 9 "softmax_adf_wrapper.cpp" 42 26 +.src_ref 9 "softmax_adf_wrapper.cpp" 46 30 + 12590 "01110110" // MOVA m0, #5; MOVS p3, p7; MOVXM p2, #508600 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12591 "00010000" // /* MW 11 */ + 12592 "01011100" // /* MW 10 */ + 12593 "00110001" // /* MW 9 */ + 12594 "11110001" // /* MW 8 */ + 12595 "00000001" // /* MW 7 */ + 12596 "00000000" // /* MW 6 */ + 12597 "10001011" // /* MW 5 */ + 12598 "10011100" // /* MW 4 */ + 12599 "10000011" // /* MW 3 */ + 12600 "10100000" // /* MW 2 */ + 12601 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 43 58 +.src_ref 9 "softmax_adf_wrapper.cpp" 44 27 + 12602 "10111010" // MOVA m2, #-24; MOVX r17, #-16; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12603 "01111000" // /* MW 9 */ + 12604 "01110000" // /* MW 8 */ + 12605 "00001101" // /* MW 7 */ + 12606 "00001010" // /* MW 6 */ + 12607 "00010110" // /* MW 5 */ + 12608 "00111111" // /* MW 4 */ + 12609 "10000000" // /* MW 3 */ + 12610 "00001000" // /* MW 2 */ + 12611 "11111101" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 42 26 first +.src_ref 9 "softmax_adf_wrapper.cpp" 43 58 first +.src_ref 9 "softmax_adf_wrapper.cpp" 44 40 +.src_ref 9 "softmax_adf_wrapper.cpp" 47 30 + 12612 "01110110" // MOVA m0, #-6; ST r16, [p2], m0; LSHL r16, r16, r17; ADD.NC r18, r16, #-2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12613 "10001000" // /* MW 11 */ + 12614 "00111111" // /* MW 10 */ + 12615 "01001100" // /* MW 9 */ + 12616 "11101110" // /* MW 8 */ + 12617 "00001000" // /* MW 7 */ + 12618 "10100001" // /* MW 6 */ + 12619 "00010001" // /* MW 5 */ + 12620 "00001010" // /* MW 4 */ + 12621 "10000010" // /* MW 3 */ + 12622 "01000000" // /* MW 2 */ + 12623 "11111111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 9 "softmax_adf_wrapper.cpp" 43 27 +.src_ref 9 "softmax_adf_wrapper.cpp" 49 29 + 12624 "10111010" // ST.s8 r16, [p2], #-1; MOVX r17, #1; MOV m1, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12625 "01011000" // /* MW 9 */ + 12626 "00001100" // /* MW 8 */ + 12627 "10000000" // /* MW 7 */ + 12628 "00101000" // /* MW 6 */ + 12629 "00010000" // /* MW 5 */ + 12630 "00000001" // /* MW 4 */ + 12631 "11100000" // /* MW 3 */ + 12632 "11000000" // /* MW 2 */ + 12633 "01011111" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 9 "softmax_adf_wrapper.cpp" 46 30 first + 12634 "10111010" // LDA r19, [p3], #4; MOVXM p4, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12635 "00010000" // /* MW 9 */ + 12636 "00011000" // /* MW 8 */ + 12637 "00110001" // /* MW 7 */ + 12638 "11110010" // /* MW 6 */ + 12639 "00000001" // /* MW 5 */ + 12640 "00000000" // /* MW 4 */ + 12641 "11010000" // /* MW 3 */ + 12642 "11001110" // /* MW 2 */ + 12643 "01100011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 + 12644 "10111010" // MOVA r25, #0; MOVXM p5, #508460 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12645 "00010000" // /* MW 9 */ + 12646 "00010110" // /* MW 8 */ + 12647 "10110001" // /* MW 7 */ + 12648 "11110010" // /* MW 6 */ + 12649 "00000001" // /* MW 5 */ + 12650 "00000000" // /* MW 4 */ + 12651 "00000000" // /* MW 3 */ + 12652 "00011001" // /* MW 2 */ + 12653 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 49 29 + 12654 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12655 "00000001" // /* MW 3 */ + 12656 "00110000" // /* MW 2 */ + 12657 "00010000" // /* MW 1 */ + 12658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12659 "00000000" // /* MW 1 */ + 12660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12661 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 48 31 first + 12662 "00011000" // EXTEND.u8 r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12663 "10010000" // /* MW 3 */ + 12664 "10100000" // /* MW 2 */ + 12665 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 44 27 first + 12666 "00011000" // ST.s8 r18, [p2], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12667 "01000111" // /* MW 3 */ + 12668 "01001010" // /* MW 2 */ + 12669 "00000010" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 48 50 first + 12670 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12671 "00001111" // /* MW 3 */ + 12672 "11100001" // /* MW 2 */ + 12673 "00010100" // /* MW 1 */ + 12674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12675 "00000000" // /* MW 1 */ + 12676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12677 "00000000" // /* MW 1 */ + 12678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12679 "00000000" // /* MW 1 */ + 12680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12681 "00000000" // /* MW 1 */ + 12682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12683 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 46 23 first + 12684 "10011000" // ST r19, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12685 "01110001" // /* MW 3 */ + 12686 "00011110" // /* MW 2 */ + 12687 "00001010" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.src_ref 9 "softmax_adf_wrapper.cpp" 47 37 first + 12688 "00001100" // LDA el0, [p3], #8; ST r17, [p5] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12689 "01100011" // /* MW 5 */ + 12690 "00001100" // /* MW 4 */ + 12691 "11011010" // /* MW 3 */ + 12692 "10000101" // /* MW 2 */ + 12693 "01100101" // /* MW 1 */ + 12694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12695 "00000000" // /* MW 1 */ + 12696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12697 "00000000" // /* MW 1 */ + 12698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12699 "00000000" // /* MW 1 */ + 12700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12701 "00000000" // /* MW 1 */ + 12702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12703 "00000000" // /* MW 1 */ + 12704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12705 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 47 30 + 12706 "10011000" // ST el0, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12707 "00101001" // /* MW 3 */ + 12708 "00001000" // /* MW 2 */ + 12709 "00001010" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 48 29 first + 12710 "00011000" // ST.s16 r16, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12711 "00010111" // /* MW 3 */ + 12712 "11111110" // /* MW 2 */ + 12713 "00000010" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 50 34 first + 12714 "10011000" // LDA el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "00101110" // /* MW 3 */ + 12716 "00000100" // /* MW 2 */ + 12717 "00000011" // /* MW 1 */ + 12718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12719 "00000000" // /* MW 1 */ + 12720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12721 "00000000" // /* MW 1 */ + 12722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12723 "00000000" // /* MW 1 */ + 12724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12725 "00000000" // /* MW 1 */ + 12726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12727 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 49 29 first + 12728 "00011000" // ST.s8 r24, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12729 "00000111" // /* MW 3 */ + 12730 "00101011" // /* MW 2 */ + 12731 "00000010" // /* MW 1 */ + 12732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12733 "00000000" // /* MW 1 */ + 12734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12735 "00000000" // /* MW 1 */ + 12736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12737 "00000000" // /* MW 1 */ + 12738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12739 "00000000" // /* MW 1 */ + 12740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12741 "00000000" // /* MW 1 */ + 12742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12743 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 50 27 first + 12744 "10011000" // ST el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12745 "00101001" // /* MW 3 */ + 12746 "00000100" // /* MW 2 */ + 12747 "00001010" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 first + 12748 "00011000" // ST.s8 r25, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12749 "00100111" // /* MW 3 */ + 12750 "00000111" // /* MW 2 */ + 12751 "00000100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 51 35 first + 12752 "10011000" // LDA el0, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12753 "00101110" // /* MW 3 */ + 12754 "00010100" // /* MW 2 */ + 12755 "00000011" // /* MW 1 */ + 12756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12757 "00000000" // /* MW 1 */ + 12758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12759 "00000000" // /* MW 1 */ + 12760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12761 "00000000" // /* MW 1 */ + 12762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12763 "00000000" // /* MW 1 */ + 12764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12765 "00000000" // /* MW 1 */ + 12766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12767 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 51 28 + 12768 "11100001" // NOPA; NOPB; ST el0, [p2, #4]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12769 "00000000" // /* MW 15 */ + 12770 "00000000" // /* MW 14 */ + 12771 "01111000" // /* MW 13 */ + 12772 "10100101" // /* MW 12 */ + 12773 "00000001" // /* MW 11 */ + 12774 "00000000" // /* MW 10 */ + 12775 "00000000" // /* MW 9 */ + 12776 "10000000" // /* MW 8 */ + 12777 "00101001" // /* MW 7 */ + 12778 "00010100" // /* MW 6 */ + 12779 "00100010" // /* MW 5 */ + 12780 "00000000" // /* MW 4 */ + 12781 "11110000" // /* MW 3 */ + 12782 "00101100" // /* MW 2 */ + 12783 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE_256 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 first + 12784 "01010100" // LDA eh0, [p6], #4; MOV m0, #-60 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12785 "00010001" // /* MW 5 */ + 12786 "00011111" // /* MW 4 */ + 12787 "11010000" // /* MW 3 */ + 12788 "10000001" // /* MW 2 */ + 12789 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12790 "11010100" // LDA el0, [p6], #4; MOV p2, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12791 "11000001" // /* MW 5 */ + 12792 "11001011" // /* MW 4 */ + 12793 "11010100" // /* MW 3 */ + 12794 "10000101" // /* MW 2 */ + 12795 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12796 "00111100" // LDA el3, [p6], #4; PADDB [p2], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12797 "00100000" // /* MW 5 */ + 12798 "11011111" // /* MW 4 */ + 12799 "11010101" // /* MW 3 */ + 12800 "10011101" // /* MW 2 */ + 12801 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12802 "10011000" // LDA el2, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12803 "10101110" // /* MW 3 */ + 12804 "00011100" // /* MW 2 */ + 12805 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12806 "10011000" // LDA el1, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12807 "01101110" // /* MW 3 */ + 12808 "00011100" // /* MW 2 */ + 12809 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12810 "10011000" // LDA eh1, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12811 "01001110" // /* MW 3 */ + 12812 "00011100" // /* MW 2 */ + 12813 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12814 "10011000" // LDA eh2, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12815 "10001110" // /* MW 3 */ + 12816 "00011100" // /* MW 2 */ + 12817 "00000110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12818 "00001100" // LDA eh0, [p6], #4; ST eh0, [sp, #-120] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12819 "00011011" // /* MW 5 */ + 12820 "00010000" // /* MW 4 */ + 12821 "11011111" // /* MW 3 */ + 12822 "10000001" // /* MW 2 */ + 12823 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12824 "00001100" // LDA el0, [p6], #4; ST el0, [sp, #-116] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12825 "01011011" // /* MW 5 */ + 12826 "00011000" // /* MW 4 */ + 12827 "11011111" // /* MW 3 */ + 12828 "10000101" // /* MW 2 */ + 12829 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12830 "00001100" // LDA el3, [p6], #4; ST el3, [sp, #-112] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12831 "11011011" // /* MW 5 */ + 12832 "00100001" // /* MW 4 */ + 12833 "11011111" // /* MW 3 */ + 12834 "10011101" // /* MW 2 */ + 12835 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12836 "00001100" // LDA el2, [p6], #4; ST el2, [sp, #-108] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12837 "01011011" // /* MW 5 */ + 12838 "00101001" // /* MW 4 */ + 12839 "11011111" // /* MW 3 */ + 12840 "10010101" // /* MW 2 */ + 12841 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12842 "00001100" // LDA el1, [p6], #4; ST el1, [sp, #-104] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12843 "11011011" // /* MW 5 */ + 12844 "00110000" // /* MW 4 */ + 12845 "11011111" // /* MW 3 */ + 12846 "10001101" // /* MW 2 */ + 12847 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12848 "00001100" // LDA eh1, [p6], #4; ST eh1, [sp, #-100] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12849 "10011011" // /* MW 5 */ + 12850 "00111000" // /* MW 4 */ + 12851 "11011111" // /* MW 3 */ + 12852 "10001001" // /* MW 2 */ + 12853 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12854 "00001100" // LDA eh2, [p6], m0; ST eh2, [sp, #-96] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12855 "00011011" // /* MW 5 */ + 12856 "01000001" // /* MW 4 */ + 12857 "11011111" // /* MW 3 */ + 12858 "00010001" // /* MW 2 */ + 12859 "11000001" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12860 "00001100" // LDA eh0, [p6], #4; ST eh0, [sp, #-92] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12861 "00011011" // /* MW 5 */ + 12862 "01001000" // /* MW 4 */ + 12863 "11011111" // /* MW 3 */ + 12864 "10000001" // /* MW 2 */ + 12865 "11000011" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12866 "00001100" // LDA el0, [p6]; ST el0, [sp, #-88] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12867 "01011011" // /* MW 5 */ + 12868 "01010000" // /* MW 4 */ + 12869 "11011111" // /* MW 3 */ + 12870 "10000101" // /* MW 2 */ + 12871 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 + 12872 "00001100" // LDA p0, [p0]; ST el3, [sp, #-84] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12873 "11011011" // /* MW 5 */ + 12874 "01011001" // /* MW 4 */ + 12875 "11011111" // /* MW 3 */ + 12876 "10000011" // /* MW 2 */ + 12877 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 first + 12878 "00001100" // LDA p1, [p1]; ST el2, [sp, #-80] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12879 "01011011" // /* MW 5 */ + 12880 "01100001" // /* MW 4 */ + 12881 "11011111" // /* MW 3 */ + 12882 "10010011" // /* MW 2 */ + 12883 "00100000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 4 +.no_stack_arguments + 12884 "00000100" // JL #11488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11488 delay_slots=5 */ + 12885 "00000001" // /* MW 5 */ + 12886 "00000000" // /* MW 4 */ + 12887 "01110000" // /* MW 3 */ + 12888 "00010110" // /* MW 2 */ + 12889 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12890 "10011000" // ST el1, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12891 "01101101" // /* MW 3 */ + 12892 "10110100" // /* MW 2 */ + 12893 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12894 "10011000" // ST eh1, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12895 "01001101" // /* MW 3 */ + 12896 "10111000" // /* MW 2 */ + 12897 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12898 "10011000" // ST eh2, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12899 "10001101" // /* MW 3 */ + 12900 "10111100" // /* MW 2 */ + 12901 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12902 "10011000" // ST eh0, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12903 "00001101" // /* MW 3 */ + 12904 "10000000" // /* MW 2 */ + 12905 "00001111" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 54 95 +.delay_slot + 12906 "00001100" // NOPA; ST el0, [sp, #-124] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12907 "01011011" // /* MW 5 */ + 12908 "00001000" // /* MW 4 */ + 12909 "11111111" // /* MW 3 */ + 12910 "00101100" // /* MW 2 */ + 12911 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 56 23 first +.src_ref 9 "softmax_adf_wrapper.cpp" 57 26 +.return_address + 12912 "00101100" // LDA r17, [p6, #16]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12913 "00000010" // /* MW 5 */ + 12914 "01000000" // /* MW 4 */ + 12915 "11010000" // /* MW 3 */ + 12916 "11000110" // /* MW 2 */ + 12917 "11001000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 58 + 12918 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12919 "00111001" // /* MW 3 */ + 12920 "11110100" // /* MW 2 */ + 12921 "00000111" // /* MW 1 */ + 12922 "00011000" // LDA p2, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12923 "00011001" // /* MW 3 */ + 12924 "11111001" // /* MW 2 */ + 12925 "00000111" // /* MW 1 */ + 12926 "00011000" // LDA p0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12927 "00011001" // /* MW 3 */ + 12928 "11111100" // /* MW 2 */ + 12929 "00000111" // /* MW 1 */ + 12930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12931 "00000000" // /* MW 1 */ + 12932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12933 "00000000" // /* MW 1 */ + 12934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12935 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 56 23 + 12936 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12937 "00000111" // /* MW 3 */ + 12938 "01100010" // /* MW 2 */ + 12939 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 56 23 + 12940 "10011000" // ST r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12941 "00110001" // /* MW 3 */ + 12942 "01000110" // /* MW 2 */ + 12943 "00001110" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 56 first + 12944 "00001100" // LDA r18, [p7, #8]; MOVS p7, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12945 "00010110" // /* MW 5 */ + 12946 "00010001" // /* MW 4 */ + 12947 "11011111" // /* MW 3 */ + 12948 "11001010" // /* MW 2 */ + 12949 "11100100" // /* MW 1 */ + 12950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12951 "00000000" // /* MW 1 */ + 12952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12953 "00000000" // /* MW 1 */ + 12954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12955 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 58 first + 12956 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12957 "00000000" // /* MW 3 */ + 12958 "00101000" // /* MW 2 */ + 12959 "00010000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 58 +.delay_slot + 12960 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12961 "00000001" // /* MW 5 */ + 12962 "00000000" // /* MW 4 */ + 12963 "00000000" // /* MW 3 */ + 12964 "11110000" // /* MW 2 */ + 12965 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12967 "00000000" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 53 first +.delay_slot + 12968 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12969 "00100111" // /* MW 3 */ + 12970 "01110111" // /* MW 2 */ + 12971 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 26 +.delay_slot + 12972 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12973 "00000010" // /* MW 3 */ + 12974 "01100001" // /* MW 2 */ + 12975 "00010100" // /* MW 1 */ +.src_ref 9 "softmax_adf_wrapper.cpp" 57 24 +.delay_slot + 12976 "00000010" // ST r16, [p6, #16]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12977 "01110000" // /* MW 7 */ + 12978 "01100000" // /* MW 6 */ + 12979 "00110000" // /* MW 5 */ + 12980 "00000011" // /* MW 4 */ + 12981 "00110000" // /* MW 3 */ + 12982 "11000010" // /* MW 2 */ +.label _ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE__end +.label __ZN12mllib_graphs19softmax_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERA8_KjRNSD_ISE_NSF_3outET1_EE___func_end0 + 12983 "11001000" // /* MW 1 */ +.label __Z14_b8134_wrapperPPv___func_begin0 +.label _Z14_b8134_wrapperPPv +.function _b8134_wrapper _Z14_b8134_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 64 first +.src_ref 0 "0_0_reloadable11.cc" 66 79 +.function_start + 12992 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12993 "11000000" // /* MW 3 */ + 12994 "01100000" // /* MW 2 */ + 12995 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 66 79 first + 12996 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12997 "00011110" // /* MW 3 */ + 12998 "00101100" // /* MW 2 */ + 12999 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 68 80 first + 13000 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13001 "00011110" // /* MW 3 */ + 13002 "11110101" // /* MW 2 */ + 13003 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 67 46 first + 13004 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13005 "10011110" // /* MW 3 */ + 13006 "00000100" // /* MW 2 */ + 13007 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 65 4 first +.tail_call + 13008 "10000100" // J #12528 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12528 delay_slots=5 */ + 13009 "00000000" // /* MW 5 */ + 13010 "00000000" // /* MW 4 */ + 13011 "01111000" // /* MW 3 */ + 13012 "00011000" // /* MW 2 */ + 13013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13015 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13017 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8134_wrapperPPv__end +.label __Z14_b8134_wrapperPPv___func_end0 + 13023 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE___func_begin0 +.label _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE +.function expand_wrapper _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE +.src_ref 9 "expand_adf_wrapper.cpp" 25 first +.src_ref 9 "expand_adf_wrapper.cpp" 26 7 +.src_ref 9 "expand_adf_wrapper.cpp" 64 26 +.function_start + 13024 "01000100" // MOVXM p3, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13025 "01000000" // /* MW 5 */ + 13026 "11000100" // /* MW 4 */ + 13027 "11000110" // /* MW 3 */ + 13028 "00000111" // /* MW 2 */ + 13029 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 26 7 first + 13030 "10011000" // LDA r0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13031 "00010110" // /* MW 3 */ + 13032 "00000100" // /* MW 2 */ + 13033 "00000011" // /* MW 1 */ + 13034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13035 "00000000" // /* MW 1 */ + 13036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13037 "00000000" // /* MW 1 */ + 13038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13039 "00000000" // /* MW 1 */ + 13040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13041 "00000000" // /* MW 1 */ + 13042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13043 "00000000" // /* MW 1 */ + 13044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13045 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 26 7 +.src_ref 9 "expand_adf_wrapper.cpp" 26 30 + 13046 "10000100" // JNZ r0, #13088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13088 delay_slots=5 */ + 13047 "00000001" // /* MW 5 */ + 13048 "01000000" // /* MW 4 */ + 13049 "10010000" // /* MW 3 */ + 13050 "00011001" // /* MW 2 */ + 13051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13055 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13057 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13059 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13061 "00000000" // /* MW 1 */ + 13062 "11111000" // MOV r1, CORE_ID /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13063 "11100000" // /* MW 3 */ + 13064 "01011010" // /* MW 2 */ + 13065 "00011000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 41 first + 13066 "00011000" // EXTEND.u8 r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13067 "10010000" // /* MW 3 */ + 13068 "01000010" // /* MW 2 */ + 13069 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 68 + 13070 "00011000" // ADD r1, r1, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13071 "11111011" // /* MW 3 */ + 13072 "01000011" // /* MW 2 */ + 13073 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 28 + 13074 "01000100" // MOVXM p4, #508452 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13075 "01001000" // /* MW 5 */ + 13076 "11000100" // /* MW 4 */ + 13077 "11001000" // /* MW 3 */ + 13078 "00000111" // /* MW 2 */ + 13079 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 29 28 + 13080 "00000010" // ST r1, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13081 "01110000" // /* MW 7 */ + 13082 "10100101" // /* MW 6 */ + 13083 "00000001" // /* MW 5 */ + 13084 "00000000" // /* MW 4 */ + 13085 "00110000" // /* MW 3 */ + 13086 "10000110" // /* MW 2 */ + 13087 "10000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_64 +.src_ref 9 "expand_adf_wrapper.cpp" 36 19 first +.src_ref 9 "expand_adf_wrapper.cpp" 38 37 +.src_ref 9 "expand_adf_wrapper.cpp" 40 8 +.src_ref 9 "expand_adf_wrapper.cpp" 40 20 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13088 "10111010" // LDA r2, [p1, #4]; MOVX r16, #-5; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13089 "01011000" // /* MW 9 */ + 13090 "00000000" // /* MW 8 */ + 13091 "00001000" // /* MW 7 */ + 13092 "01101011" // /* MW 6 */ + 13093 "00000111" // /* MW 5 */ + 13094 "00111111" // /* MW 4 */ + 13095 "11010000" // /* MW 3 */ + 13096 "10001010" // /* MW 2 */ + 13097 "00100010" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 37 19 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13098 "10111010" // LDA r1, [p1]; MOVXM p1, #508464 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13099 "00010000" // /* MW 9 */ + 13100 "00011000" // /* MW 8 */ + 13101 "10110001" // /* MW 7 */ + 13102 "11110000" // /* MW 6 */ + 13103 "00000001" // /* MW 5 */ + 13104 "00000000" // /* MW 4 */ + 13105 "11010000" // /* MW 3 */ + 13106 "10000110" // /* MW 2 */ + 13107 "00100000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 46 33 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 first + 13108 "10111010" // LDA.s8 r7, [p1]; MOVXM p1, #508452 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13109 "00010000" // /* MW 9 */ + 13110 "00010010" // /* MW 8 */ + 13111 "10110001" // /* MW 7 */ + 13112 "11110000" // /* MW 6 */ + 13113 "00000001" // /* MW 5 */ + 13114 "00000000" // /* MW 4 */ + 13115 "01010000" // /* MW 3 */ + 13116 "10011100" // /* MW 2 */ + 13117 "00100000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 +.src_ref 9 "expand_adf_wrapper.cpp" 46 33 first +.src_ref 9 "expand_adf_wrapper.cpp" 46 53 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13118 "10111010" // LDA r4, [p1]; MOVX r3, #1; VINSERT.32 x0, x0, #0, r24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13119 "10111000" // /* MW 9 */ + 13120 "10001000" // /* MW 8 */ + 13121 "00000001" // /* MW 7 */ + 13122 "00101000" // /* MW 6 */ + 13123 "00110000" // /* MW 5 */ + 13124 "00000000" // /* MW 4 */ + 13125 "11010000" // /* MW 3 */ + 13126 "10010010" // /* MW 2 */ + 13127 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 + 13128 "10111010" // LDA r6, [p0]; MOVX r5, #64; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13129 "01111000" // /* MW 9 */ + 13130 "01001001" // /* MW 8 */ + 13131 "00000000" // /* MW 7 */ + 13132 "00001000" // /* MW 6 */ + 13133 "01010000" // /* MW 5 */ + 13134 "00000010" // /* MW 4 */ + 13135 "11010000" // /* MW 3 */ + 13136 "10011010" // /* MW 2 */ + 13137 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 + 13138 "00101100" // LDA p2, [p2]; MOVX r17, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13139 "11111010" // /* MW 5 */ + 13140 "01000100" // /* MW 4 */ + 13141 "11010000" // /* MW 3 */ + 13142 "10100011" // /* MW 2 */ + 13143 "01000000" // /* MW 1 */ + 13144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13145 "00000000" // /* MW 1 */ + 13146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13147 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 38 32 first +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 first + 13148 "00100100" // AND r27, r17, r1; ADD.NC r1, r1, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13149 "00011111" // /* MW 5 */ + 13150 "10100001" // /* MW 4 */ + 13151 "10010000" // /* MW 3 */ + 13152 "11000010" // /* MW 2 */ + 13153 "10001110" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 51 4 first +.src_ref 9 "expand_adf_wrapper.cpp" 51 22 first + 13154 "10000100" // JZ r2, #13408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13408 delay_slots=5 */ + 13155 "00000001" // /* MW 5 */ + 13156 "00000000" // /* MW 4 */ + 13157 "00110000" // /* MW 3 */ + 13158 "00011010" // /* MW 2 */ + 13159 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 46 53 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 +.delay_slot + 13160 "11100100" // MUL r4, r2, r4; MOV crRnd, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13161 "01000001" // /* MW 5 */ + 13162 "01100111" // /* MW 4 */ + 13163 "11111111" // /* MW 3 */ + 13164 "00001001" // /* MW 2 */ + 13165 "00010001" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 first +.delay_slot + 13166 "01011100" // VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r27, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13167 "01111011" // /* MW 5 */ + 13168 "11000100" // /* MW 4 */ + 13169 "11001101" // /* MW 3 */ + 13170 "00000010" // /* MW 2 */ + 13171 "00001000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 46 53 first +.delay_slot + 13172 "10011000" // LSHL r3, r4, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13173 "00111101" // /* MW 3 */ + 13174 "00000110" // /* MW 2 */ + 13175 "00010001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 9 "expand_adf_wrapper.cpp" 38 37 first +.src_ref 9 "expand_adf_wrapper.cpp" 49 61 first +.delay_slot + 13176 "10100100" // LSHL r1, r1, r16; VEXTBCST.16 x0, x0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13177 "00000110" // /* MW 5 */ + 13178 "00000010" // /* MW 4 */ + 13179 "10110000" // /* MW 3 */ + 13180 "01100001" // /* MW 2 */ + 13181 "00001000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 39 36 first +.src_ref 9 "expand_adf_wrapper.cpp" 46 11 first +.delay_slot + 13182 "10100100" // SUB r5, r5, r17; ADD.NC p0, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13183 "00110010" // /* MW 5 */ + 13184 "11000011" // /* MW 4 */ + 13185 "00110000" // /* MW 3 */ + 13186 "01100010" // /* MW 2 */ + 13187 "00101001" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 40 8 first +.src_ref 9 "expand_adf_wrapper.cpp" 40 20 first +.src_ref 9 "expand_adf_wrapper.cpp" 40 20 first + 13188 "00100100" // SEL.EQZ r4, r24, r5, r27; ADD.NC r3, r1, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13189 "11111111" // /* MW 5 */ + 13190 "10100001" // /* MW 4 */ + 13191 "01000001" // /* MW 3 */ + 13192 "00001010" // /* MW 2 */ + 13193 "11000001" // /* MW 1 */ + 13194 "00101100" // NOPA; ADD r2, r2, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13195 "11111110" // /* MW 5 */ + 13196 "00001011" // /* MW 4 */ + 13197 "11110001" // /* MW 3 */ + 13198 "00101100" // /* MW 2 */ + 13199 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 51 4 + 13200 "11100001" // NOPA; NOPB; NOPS; MOVXM p1, #13216; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13201 "00000000" // /* MW 15 */ + 13202 "00000000" // /* MW 14 */ + 13203 "00010000" // /* MW 13 */ + 13204 "11010000" // /* MW 12 */ + 13205 "10110001" // /* MW 11 */ + 13206 "00001100" // /* MW 10 */ + 13207 "00000000" // /* MW 9 */ + 13208 "00000000" // /* MW 8 */ + 13209 "01011011" // /* MW 7 */ + 13210 "00000001" // /* MW 6 */ + 13211 "00100000" // /* MW 5 */ + 13212 "00000000" // /* MW 4 */ + 13213 "11110000" // /* MW 3 */ + 13214 "00101100" // /* MW 2 */ + 13215 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_192 +.src_ref 9 "expand_adf_wrapper.cpp" 52 20 first +.loop_nesting 1 + 13216 "10011000" // LDA.s16 r6, [p0], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13217 "11010010" // /* MW 3 */ + 13218 "00011100" // /* MW 2 */ + 13219 "00000000" // /* MW 1 */ + 13220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13221 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 first + 13222 "10000100" // JZ r3, #13376 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13376 delay_slots=5 */ + 13223 "00000001" // /* MW 5 */ + 13224 "00000000" // /* MW 4 */ + 13225 "00100000" // /* MW 3 */ + 13226 "00011010" // /* MW 2 */ + 13227 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13235 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 13236 "11111000" // VBCST.16 x1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13237 "01110010" // /* MW 3 */ + 13238 "10011001" // /* MW 2 */ + 13239 "00011000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 first + 13240 "01000100" // MOVXM ls, #13360 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13241 "01100000" // /* MW 5 */ + 13242 "11101000" // /* MW 4 */ + 13243 "00110001" // /* MW 3 */ + 13244 "00000000" // /* MW 2 */ + 13245 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 + 13246 "01000100" // MOVXM le, #13360 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13247 "01100000" // /* MW 5 */ + 13248 "11101000" // /* MW 4 */ + 13249 "00110110" // /* MW 3 */ + 13250 "00000000" // /* MW 2 */ + 13251 "00000000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 55 8 + 13252 "11110110" // NOPA; NOPB; NOPS; ADD.NC lc, r1, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13253 "11000000" // /* MW 11 */ + 13254 "01111111" // /* MW 10 */ + 13255 "10111000" // /* MW 9 */ + 13256 "00000010" // /* MW 8 */ + 13257 "01011011" // /* MW 7 */ + 13258 "00000001" // /* MW 6 */ + 13259 "00100000" // /* MW 5 */ + 13260 "00000000" // /* MW 4 */ + 13261 "11110000" // /* MW 3 */ + 13262 "00101100" // /* MW 2 */ + 13263 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 9 "expand_adf_wrapper.cpp" 57 20 + 13264 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13265 "00000000" // /* MW 15 */ + 13266 "00000000" // /* MW 14 */ + 13267 "01111000" // /* MW 13 */ + 13268 "01001001" // /* MW 12 */ + 13269 "00000001" // /* MW 11 */ + 13270 "00000000" // /* MW 10 */ + 13271 "00000000" // /* MW 9 */ + 13272 "00000000" // /* MW 8 */ + 13273 "01011011" // /* MW 7 */ + 13274 "00000001" // /* MW 6 */ + 13275 "00100000" // /* MW 5 */ + 13276 "00000000" // /* MW 4 */ + 13277 "11110000" // /* MW 3 */ + 13278 "00101100" // /* MW 2 */ + 13279 "00000000" // /* MW 1 */ + 13280 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13281 "00000000" // /* MW 15 */ + 13282 "00000000" // /* MW 14 */ + 13283 "01111000" // /* MW 13 */ + 13284 "10100101" // /* MW 12 */ + 13285 "00000001" // /* MW 11 */ + 13286 "00000000" // /* MW 10 */ + 13287 "00000000" // /* MW 9 */ + 13288 "00000000" // /* MW 8 */ + 13289 "01011011" // /* MW 7 */ + 13290 "00000001" // /* MW 6 */ + 13291 "00100000" // /* MW 5 */ + 13292 "00000000" // /* MW 4 */ + 13293 "11110000" // /* MW 3 */ + 13294 "00101100" // /* MW 2 */ + 13295 "00000000" // /* MW 1 */ + 13296 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13297 "00000000" // /* MW 15 */ + 13298 "00000000" // /* MW 14 */ + 13299 "01111000" // /* MW 13 */ + 13300 "10100101" // /* MW 12 */ + 13301 "00000001" // /* MW 11 */ + 13302 "00000000" // /* MW 10 */ + 13303 "00000000" // /* MW 9 */ + 13304 "00000000" // /* MW 8 */ + 13305 "01011011" // /* MW 7 */ + 13306 "00000001" // /* MW 6 */ + 13307 "00100000" // /* MW 5 */ + 13308 "00000000" // /* MW 4 */ + 13309 "11110000" // /* MW 3 */ + 13310 "00101100" // /* MW 2 */ + 13311 "00000000" // /* MW 1 */ + 13312 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13313 "00000000" // /* MW 15 */ + 13314 "00000000" // /* MW 14 */ + 13315 "01111000" // /* MW 13 */ + 13316 "10100101" // /* MW 12 */ + 13317 "00000001" // /* MW 11 */ + 13318 "00000000" // /* MW 10 */ + 13319 "00000000" // /* MW 9 */ + 13320 "00000000" // /* MW 8 */ + 13321 "01011011" // /* MW 7 */ + 13322 "00000001" // /* MW 6 */ + 13323 "00100000" // /* MW 5 */ + 13324 "00000000" // /* MW 4 */ + 13325 "11110000" // /* MW 3 */ + 13326 "00101100" // /* MW 2 */ + 13327 "00000000" // /* MW 1 */ + 13328 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13329 "00000000" // /* MW 15 */ + 13330 "00000000" // /* MW 14 */ + 13331 "01111000" // /* MW 13 */ + 13332 "10100101" // /* MW 12 */ + 13333 "00000001" // /* MW 11 */ + 13334 "00000000" // /* MW 10 */ + 13335 "00000000" // /* MW 9 */ + 13336 "00000000" // /* MW 8 */ + 13337 "01011011" // /* MW 7 */ + 13338 "00000001" // /* MW 6 */ + 13339 "00100000" // /* MW 5 */ + 13340 "00000000" // /* MW 4 */ + 13341 "11110000" // /* MW 3 */ + 13342 "00101100" // /* MW 2 */ + 13343 "00000000" // /* MW 1 */ + 13344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13345 "00000000" // /* MW 15 */ + 13346 "00000000" // /* MW 14 */ + 13347 "01111000" // /* MW 13 */ + 13348 "10100101" // /* MW 12 */ + 13349 "00000001" // /* MW 11 */ + 13350 "00000000" // /* MW 10 */ + 13351 "00000000" // /* MW 9 */ + 13352 "00000000" // /* MW 8 */ + 13353 "01011011" // /* MW 7 */ + 13354 "00000001" // /* MW 6 */ + 13355 "00100000" // /* MW 5 */ + 13356 "00000000" // /* MW 4 */ + 13357 "11110000" // /* MW 3 */ + 13358 "00101100" // /* MW 2 */ + 13359 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_336 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 9 "expand_adf_wrapper.cpp" 57 20 first +.begin_of_loop +.end_of_loop +.loop_nesting 2 + 13360 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13361 "00000000" // /* MW 15 */ + 13362 "00000000" // /* MW 14 */ + 13363 "01111000" // /* MW 13 */ + 13364 "10100101" // /* MW 12 */ + 13365 "00000001" // /* MW 11 */ + 13366 "00000000" // /* MW 10 */ + 13367 "00000000" // /* MW 9 */ + 13368 "10000000" // /* MW 8 */ + 13369 "00000110" // /* MW 7 */ + 13370 "00011100" // /* MW 6 */ + 13371 "00100010" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_352 +.src_ref 9 "expand_adf_wrapper.cpp" 51 4 first +.loop_nesting 1 + 13376 "00011000" // JNZD r2, r2, p1 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13377 "01100000" // /* MW 3 */ + 13378 "10000100" // /* MW 2 */ + 13379 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13383 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.delay_slot + 13384 "11011000" // VSHIFT x1, x1, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13385 "00010010" // /* MW 3 */ + 13386 "10001000" // /* MW 2 */ + 13387 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13388 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13389 "01100111" // /* MW 3 */ + 13390 "00000001" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 9 "expand_adf_wrapper.cpp" 61 16 first +.delay_slot + 13392 "11100001" // NOPA; NOPB; VST x1, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00000000" // /* MW 15 */ + 13394 "00000000" // /* MW 14 */ + 13395 "01111000" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01010011" // /* MW 7 */ + 13402 "00011100" // /* MW 6 */ + 13403 "00100010" // /* MW 5 */ + 13404 "00000000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE_384 +.src_ref 9 "expand_adf_wrapper.cpp" 65 first +.loop_nesting 0 + 13408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13409 "00000000" // /* MW 3 */ + 13410 "00101000" // /* MW 2 */ + 13411 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 64 26 +.delay_slot + 13412 "00011000" // ADD r0, r0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13413 "00000111" // /* MW 3 */ + 13414 "00000000" // /* MW 2 */ + 13415 "00010000" // /* MW 1 */ +.src_ref 9 "expand_adf_wrapper.cpp" 64 26 first +.delay_slot + 13416 "10011000" // ST r0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13417 "00010001" // /* MW 3 */ + 13418 "00000100" // /* MW 2 */ + 13419 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE__end +.label __ZN12mllib_graphs14expand_wrapperI8bfloat16EEvRN3adf9io_bufferIT_NS2_9direction2inENS2_16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEEEERA3_KjRNS3_IS4_NS5_3outESG_EE___func_end0 + 13425 "00000000" // /* MW 1 */ +.label __Z14_b8096_wrapperPPv___func_begin0 +.label _Z14_b8096_wrapperPPv +.function _b8096_wrapper _Z14_b8096_wrapperPPv +.src_ref 0 "0_0_reloadable11.cc" 72 first +.src_ref 0 "0_0_reloadable11.cc" 74 79 +.function_start + 13440 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13441 "11000000" // /* MW 3 */ + 13442 "01100000" // /* MW 2 */ + 13443 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 74 79 first + 13444 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13445 "00011110" // /* MW 3 */ + 13446 "00101100" // /* MW 2 */ + 13447 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 76 80 first + 13448 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13449 "00011110" // /* MW 3 */ + 13450 "11110101" // /* MW 2 */ + 13451 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 75 46 first + 13452 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13453 "10011110" // /* MW 3 */ + 13454 "00000100" // /* MW 2 */ + 13455 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable11.cc" 73 4 first +.tail_call + 13456 "10000100" // J #13024 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */ + 13457 "00000000" // /* MW 5 */ + 13458 "00000000" // /* MW 4 */ + 13459 "01110000" // /* MW 3 */ + 13460 "00011001" // /* MW 2 */ + 13461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8096_wrapperPPv__end +.label __Z14_b8096_wrapperPPv___func_end0 + 13471 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 13472 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13473 "01000001" // /* MW 5 */ + 13474 "10100000" // /* MW 4 */ + 13475 "00101111" // /* MW 3 */ + 13476 "11000000" // /* MW 2 */ + 13477 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13478 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13479 "00011100" // /* MW 3 */ + 13480 "11000110" // /* MW 2 */ + 13481 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13482 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13483 "00011100" // /* MW 3 */ + 13484 "11000110" // /* MW 2 */ + 13485 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13486 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13487 "00011100" // /* MW 3 */ + 13488 "11000110" // /* MW 2 */ + 13489 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13490 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13491 "00011100" // /* MW 3 */ + 13492 "11000110" // /* MW 2 */ + 13493 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13494 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13495 "00011100" // /* MW 3 */ + 13496 "11000110" // /* MW 2 */ + 13497 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13498 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13499 "00011100" // /* MW 3 */ + 13500 "11000110" // /* MW 2 */ + 13501 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13502 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13503 "00011100" // /* MW 3 */ + 13504 "11000110" // /* MW 2 */ + 13505 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13506 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13507 "00011100" // /* MW 3 */ + 13508 "11000110" // /* MW 2 */ + 13509 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13510 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13511 "00011100" // /* MW 3 */ + 13512 "11000110" // /* MW 2 */ + 13513 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13514 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13515 "00011100" // /* MW 3 */ + 13516 "11000110" // /* MW 2 */ + 13517 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13518 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13519 "00011100" // /* MW 3 */ + 13520 "11000110" // /* MW 2 */ + 13521 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13522 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13523 "00011100" // /* MW 3 */ + 13524 "11000110" // /* MW 2 */ + 13525 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13526 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13527 "00011100" // /* MW 3 */ + 13528 "11000110" // /* MW 2 */ + 13529 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13530 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13531 "00011100" // /* MW 3 */ + 13532 "11000110" // /* MW 2 */ + 13533 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13534 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13535 "00011100" // /* MW 3 */ + 13536 "11000110" // /* MW 2 */ + 13537 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13538 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13539 "00011100" // /* MW 3 */ + 13540 "11000110" // /* MW 2 */ + 13541 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13542 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13543 "00011100" // /* MW 3 */ + 13544 "11000110" // /* MW 2 */ + 13545 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13546 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13547 "00011100" // /* MW 3 */ + 13548 "11000110" // /* MW 2 */ + 13549 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13550 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13551 "00011100" // /* MW 3 */ + 13552 "11000110" // /* MW 2 */ + 13553 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13554 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13555 "00011100" // /* MW 3 */ + 13556 "11000110" // /* MW 2 */ + 13557 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13558 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13559 "00011100" // /* MW 3 */ + 13560 "11000110" // /* MW 2 */ + 13561 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13562 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13563 "00011100" // /* MW 3 */ + 13564 "11000110" // /* MW 2 */ + 13565 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13566 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13567 "00011100" // /* MW 3 */ + 13568 "11000110" // /* MW 2 */ + 13569 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13570 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13571 "00011100" // /* MW 3 */ + 13572 "11000110" // /* MW 2 */ + 13573 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13574 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13575 "00011100" // /* MW 3 */ + 13576 "11000110" // /* MW 2 */ + 13577 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13578 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13579 "00011100" // /* MW 3 */ + 13580 "11000110" // /* MW 2 */ + 13581 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13582 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13583 "00011100" // /* MW 3 */ + 13584 "11000110" // /* MW 2 */ + 13585 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 13586 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13587 "00011100" // /* MW 3 */ + 13588 "11000110" // /* MW 2 */ + 13589 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 13590 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13591 "00000000" // /* MW 3 */ + 13592 "00101000" // /* MW 2 */ + 13593 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 13594 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13595 "00011100" // /* MW 3 */ + 13596 "11000110" // /* MW 2 */ + 13597 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 13598 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13599 "00011100" // /* MW 3 */ + 13600 "11000110" // /* MW 2 */ + 13601 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 13602 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13603 "00011100" // /* MW 3 */ + 13604 "11000110" // /* MW 2 */ + 13605 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 13606 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13607 "00011100" // /* MW 3 */ + 13608 "11000110" // /* MW 2 */ + 13609 "00010000" // /* MW 1 */ +.delay_slot + 13610 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13611 "10100000" // /* MW 3 */ + 13612 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 13613 "00011000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.txt b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.txt new file mode 100644 index 0000000000000000000000000000000000000000..03c9c4fd1e8f7599547a677b213888da30ce58d7 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/Release/0_0_reloadable81.txt @@ -0,0 +1,3883 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 164 0xb80 x +elementwise_binary_shared.h 170 0xb80 1 +elementwise_binary_shared.h 170 0xb80 2 x +elementwise_binary_shared.h 175 0xb8a +elementwise_binary_shared.h 175 0xb8a 1 +elementwise_binary_shared.h 175 0xb8a 2 +elementwise_binary_shared.h 170 0xb9e x +elementwise_binary_shared.h 171 0xba2 x +elementwise_binary_shared.h 171 0xbb2 +elementwise_binary_shared.h 172 0xbb6 x +elementwise_binary_shared.h 172 0xbc6 +elementwise_binary_shared.h 173 0xbca x +elementwise_binary_shared.h 173 0xbda +elementwise_binary_shared.h 175 0xbde x +elementwise_binary_shared.h 176 0xbe2 x +elementwise_binary_shared.h 175 0xbe6 +elementwise_binary_shared.h 175 0xbec x +elementwise_binary_shared.h 175 0xbf0 +elementwise_binary_shared.h 175 0xbf4 +elementwise_binary_shared.h 66 0xc50 x +elementwise_binary_shared.h 78 0xc50 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 134 0xc54 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 78 0xc54 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 134 0xc5a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 81 0xc5a 1 x +elementwise_binary_shared.h 83 0xc60 x +elementwise_binary_shared.h 83 0xc6e +elementwise_binary_shared.h 83 0xc72 +elementwise_binary_shared.h 83 0xc76 +elementwise_binary_shared.h 66 0xc7c +elementwise_binary_shared.h 78 0xc8a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xc90 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 85 0xc90 1 +elementwise_binary_shared.h 90 0xc90 2 +elementwise_binary_shared.h 85 0xc96 x +elementwise_binary_shared.h 90 0xc96 1 x +elementwise_binary_shared.h 90 0xca6 +elementwise_binary_shared.h 90 0xca6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xcac + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xcac 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0xcac 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xcb0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 130 0xcb0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xcba + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0xcba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xccc x +vector.hpp 1139 0xcd0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0xcd0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xcd4 +vector.hpp 1159 0xcd4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 130 0xcd4 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xce6 +vector.hpp 1139 0xce6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xce6 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0xce6 3 +elementwise_binary_shared.h 132 0xce6 4 +elementwise_binary_shared.h 109 0xd00 +elementwise_binary_shared.h 109 0xd04 x +elementwise_binary_shared.h 109 0xd08 +elementwise_binary_shared.h 109 0xd0e +elementwise_binary_shared.h 109 0xd1a +elementwise_binary_shared.h 109 0xd20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xd30 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xd30 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 109 0xd30 2 +elementwise_binary_shared.h 132 0xd30 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xd36 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0xd36 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xd3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 130 0xd3a 1 +elementwise_binary_shared.h 109 0xd42 +elementwise_binary_shared.h 109 0xd46 +elementwise_binary_shared.h 109 0xd4a +elementwise_binary_shared.h 109 0xd4e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xd54 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 109 0xd54 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xd5a x +vector.hpp 1139 0xd60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 125 0xd60 1 x +elementwise_binary_shared.h 125 0xd60 2 x +elementwise_binary_shared.h 128 0xd60 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xd6c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 125 0xd6c 1 +elementwise_binary_shared.h 125 0xd6c 2 +elementwise_binary_shared.h 130 0xd6c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xd78 x +vector.hpp 1139 0xd78 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xd78 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0xd78 3 x +elementwise_binary_shared.h 132 0xd78 4 x +elementwise_binary_shared.h 136 0xd78 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xd80 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 125 0xd80 1 +elementwise_binary_shared.h 130 0xd80 2 x +elementwise_binary_shared.h 125 0xd8a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xd8e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xd8e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0xd8e 2 x +elementwise_binary_shared.h 125 0xd94 x +elementwise_binary_shared.h 125 0xd98 +elementwise_binary_shared.h 136 0xd98 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xda0 x +vector.hpp 1139 0xda0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0xda0 2 x +elementwise_binary_shared.h 130 0xda0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xda6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xda6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0xda6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xdb0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xdb0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0xdb0 2 +elementwise_binary_shared.h 136 0xdc0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xdd0 x +vector.hpp 1139 0xdd0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 128 0xdd0 2 x +elementwise_binary_shared.h 130 0xdd0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xde0 +vector.hpp 1159 0xde0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xde0 2 x +accum.hpp 1119 0xde0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 132 0xde0 4 x +elementwise_binary_shared.h 144 0xde0 5 x +elementwise_binary_shared.h 136 0xe00 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe12 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe12 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0xe12 2 x +elementwise_binary_shared.h 136 0xe18 x +elementwise_binary_shared.h 146 0xe1e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe22 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe22 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0xe22 2 x +elementwise_binary_shared.h 146 0xe26 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe2e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe2e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 144 0xe2e 2 x +elementwise_binary_shared.h 164 0x10d0 x +elementwise_binary_shared.h 170 0x10d0 1 +elementwise_binary_shared.h 170 0x10d0 2 x +elementwise_binary_shared.h 175 0x10da +elementwise_binary_shared.h 175 0x10da 1 +elementwise_binary_shared.h 175 0x10da 2 +elementwise_binary_shared.h 170 0x10ee x +elementwise_binary_shared.h 171 0x10f2 x +elementwise_binary_shared.h 171 0x1102 +elementwise_binary_shared.h 172 0x1106 x +elementwise_binary_shared.h 172 0x1116 +elementwise_binary_shared.h 173 0x111a x +elementwise_binary_shared.h 173 0x112a +elementwise_binary_shared.h 175 0x112e x +elementwise_binary_shared.h 176 0x1132 x +elementwise_binary_shared.h 175 0x1136 +elementwise_binary_shared.h 175 0x113c x +elementwise_binary_shared.h 175 0x1140 +elementwise_binary_shared.h 175 0x1144 +elementwise_binary_shared.h 164 0x18b0 x +elementwise_binary_shared.h 170 0x18b0 1 +elementwise_binary_shared.h 170 0x18b0 2 x +elementwise_binary_shared.h 175 0x18ba +elementwise_binary_shared.h 175 0x18ba 1 +elementwise_binary_shared.h 175 0x18ba 2 +elementwise_binary_shared.h 170 0x18ce x +elementwise_binary_shared.h 171 0x18d2 x +elementwise_binary_shared.h 171 0x18e2 +elementwise_binary_shared.h 172 0x18e6 x +elementwise_binary_shared.h 172 0x18f6 +elementwise_binary_shared.h 173 0x18fa x +elementwise_binary_shared.h 173 0x190a +elementwise_binary_shared.h 175 0x190e x +elementwise_binary_shared.h 176 0x1912 x +elementwise_binary_shared.h 175 0x1916 +elementwise_binary_shared.h 175 0x191c x +elementwise_binary_shared.h 175 0x1920 +elementwise_binary_shared.h 175 0x1924 +elementwise_binary_shared.h 178 0xc00 +elementwise_binary_shared.h 178 0xc00 1 x +elementwise_binary_shared.h 179 0xc0a x +elementwise_binary_shared.h 179 0xc10 +elementwise_binary_shared.h 181 0xc20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 152 0xc20 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0xc36 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 152 0xc3a +mul_impl.h 152 0xc3e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0xc42 x +elementwise_binary_shared.h 196 0xe40 x +elementwise_binary_shared.h 203 0xe40 1 +elementwise_binary_shared.h 203 0xe46 x +elementwise_binary_shared.h 204 0xe46 1 +elementwise_binary_shared.h 204 0xe46 2 +elementwise_binary_shared.h 206 0xe46 3 +elementwise_binary_shared.h 206 0xe46 4 +elementwise_binary_shared.h 203 0xe5e +elementwise_binary_shared.h 203 0xe5e 1 +elementwise_binary_shared.h 209 0xe64 +elementwise_binary_shared.h 196 0xe68 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0xe76 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0xe76 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 206 0xe7a x +elementwise_binary_shared.h 204 0xea0 x +elementwise_binary_shared.h 204 0xea6 +elementwise_binary_shared.h 204 0xea6 1 +elementwise_binary_shared.h 209 0xec0 +elementwise_binary_shared.h 209 0xec4 x +elementwise_binary_shared.h 209 0xec8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 181 0xee0 x +superkernels.cpp 186 0xee0 1 +superkernels.cpp 186 0xee6 x +superkernels.cpp 181 0xeec +superkernels.cpp 183 0xefa + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0xf04 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 186 0xf0c +superkernels.cpp 186 0xf0c 1 +superkernels.cpp 183 0xf12 x +superkernels.cpp 183 0xf16 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0xf1e + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 183 0xf1e 1 +superkernels.cpp 189 0xf26 +superkernels.cpp 200 0xf26 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xf2c +tile.hpp 74 0xf32 +tile.hpp 86 0xf32 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 189 0xf3e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xf48 +tile.hpp 74 0xf4c +tile.hpp 74 0xf50 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0xf60 +superkernels.cpp 193 0xf66 x +superkernels.cpp 193 0xf66 1 +superkernels.cpp 191 0xf70 +superkernels.cpp 193 0xf70 1 +superkernels.cpp 200 0xf70 2 +superkernels.cpp 191 0xf7a x +superkernels.cpp 193 0xf7a 1 +superkernels.cpp 198 0xf7a 2 +superkernels.cpp 191 0xf8e +superkernels.cpp 193 0xf96 x +superkernels.cpp 191 0xf9a x +superkernels.cpp 193 0xfa0 x +superkernels.cpp 198 0xfb0 +superkernels.cpp 200 0xfb0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0xfc0 x +io_buffer_main.h 242 0xfc8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0xfc8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0xfd2 +io_buffer_main.h 242 0xfd6 +io_buffer_main.h 259 0xfda x +io_buffer_main.h 242 0xfe8 x +io_buffer_main.h 242 0xfe8 1 x +io_buffer_main.h 242 0xfec +io_buffer_main.h 419 0xff0 +io_buffer_main.h 419 0xffa x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 198 0xffe +superkernels.cpp 197 0x1008 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x100c x +io_buffer_main.h 348 0x100c 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 198 0x1012 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1016 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 200 0x101c x +superkernels.cpp 197 0x1024 x +superkernels.cpp 197 0x1028 +superkernels.cpp 198 0x102c x +superkernels.cpp 198 0x1030 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1040 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 202 0x1040 1 +superkernels.cpp 203 0x1040 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x104a +io_buffer_main.h 449 0x104a 1 +io_buffer_main.h 449 0x1058 x +io_buffer_main.h 351 0x105c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 202 0x105c 1 +superkernels.cpp 202 0x1066 x +superkernels.cpp 202 0x106a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1076 x +io_buffer_main.h 351 0x107a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 202 0x107e x +superkernels.cpp 202 0x1082 +superkernels.cpp 203 0x1092 +superkernels.cpp 203 0x1096 x +superkernels.cpp 205 0x10a0 +superkernels.cpp 205 0x10b4 x +superkernels.cpp 205 0x10bc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 178 0x1150 +elementwise_binary_shared.h 178 0x1150 1 x +elementwise_binary_shared.h 179 0x115a x +elementwise_binary_shared.h 179 0x1164 +elementwise_binary_shared.h 179 0x116e +elementwise_binary_shared.h 179 0x1180 +elementwise_binary_shared.h 181 0x1180 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 105 0x1180 2 +add_impl.h 105 0x118a +add_impl.h 106 0x118a 1 +add_impl.h 106 0x118a 2 +add_impl.h 105 0x1194 x +add_impl.h 106 0x1194 1 +add_impl.h 106 0x119e x +add_impl.h 106 0x11a6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0x11aa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0x11ae + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0x11b2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0x11b8 x +add_impl.h 106 0x11bc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 186 0x11d0 x +elementwise_binary_shared.h 191 0x11d0 1 x +elementwise_binary_shared.h 191 0x11d6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 240 0x11f0 x +superkernels.cpp 245 0x11f0 1 +superkernels.cpp 245 0x11f6 x +superkernels.cpp 240 0x11fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1202 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 242 0x1202 1 x +superkernels.cpp 245 0x121e x +superkernels.cpp 245 0x121e 1 x +superkernels.cpp 242 0x1224 x +superkernels.cpp 242 0x1228 +superkernels.cpp 242 0x122e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1236 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 248 0x123a +superkernels.cpp 250 0x123a 1 +superkernels.cpp 252 0x123a 2 +superkernels.cpp 264 0x123a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1244 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 248 0x1244 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x124e +tile.hpp 86 0x124e 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 248 0x125a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1264 +tile.hpp 74 0x1268 +tile.hpp 74 0x126c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 252 0x1270 +superkernels.cpp 252 0x1270 1 x +superkernels.cpp 252 0x127a +superkernels.cpp 252 0x127a 1 +superkernels.cpp 261 0x127a 2 +superkernels.cpp 250 0x1284 x +superkernels.cpp 253 0x1284 1 +superkernels.cpp 262 0x1284 2 +superkernels.cpp 250 0x129a +superkernels.cpp 252 0x12a0 x +superkernels.cpp 250 0x12a4 x +superkernels.cpp 252 0x12a8 x +superkernels.cpp 253 0x12ac x +superkernels.cpp 261 0x12b0 +superkernels.cpp 262 0x12b6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x12c0 x +io_buffer_main.h 242 0x12c4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 256 0x12c4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x12ce +io_buffer_main.h 242 0x12d2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 256 0x12d6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 259 0x12da x +io_buffer_main.h 242 0x12e6 x +io_buffer_main.h 242 0x12e6 1 x +io_buffer_main.h 242 0x12ea + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 256 0x12ea 1 x +superkernels.cpp 256 0x12f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x12f4 +io_buffer_main.h 419 0x12f4 1 +io_buffer_main.h 419 0x12fe x +io_buffer_main.h 242 0x1302 x +io_buffer_main.h 242 0x130a +io_buffer_main.h 242 0x130e +io_buffer_main.h 242 0x1312 +io_buffer_main.h 259 0x1316 x +io_buffer_main.h 242 0x1324 x +io_buffer_main.h 242 0x1324 1 x +io_buffer_main.h 242 0x1328 +io_buffer_main.h 419 0x1334 x +io_buffer_main.h 348 0x1338 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 261 0x1338 1 +superkernels.cpp 262 0x1338 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1346 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 261 0x134a x +superkernels.cpp 262 0x1350 x +superkernels.cpp 264 0x1350 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1356 x +io_buffer_main.h 149 0x135a +io_buffer_main.h 351 0x135e +io_buffer_main.h 351 0x135e 1 +io_buffer_main.h 149 0x1364 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 264 0x136a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1370 +io_buffer_main.h 351 0x1370 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 261 0x1374 x +superkernels.cpp 262 0x1378 x +superkernels.cpp 262 0x137c +superkernels.cpp 261 0x1380 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1390 x +io_buffer_main.h 351 0x1390 1 +io_buffer_main.h 351 0x1390 2 +io_buffer_main.h 351 0x1390 3 +io_buffer_main.h 351 0x1390 4 +io_buffer_main.h 449 0x1390 5 +io_buffer_main.h 449 0x1390 6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 268 0x139a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x13aa x +io_buffer_main.h 351 0x13ae x +io_buffer_main.h 348 0x13b2 +io_buffer_main.h 351 0x13c0 +io_buffer_main.h 348 0x13c4 x +io_buffer_main.h 351 0x13c4 1 +io_buffer_main.h 449 0x13d6 x +io_buffer_main.h 351 0x13da x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 268 0x13da 1 +superkernels.cpp 269 0x13da 2 +superkernels.cpp 268 0x13e4 x +superkernels.cpp 268 0x13e8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x13f4 x +io_buffer_main.h 351 0x13f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 268 0x13fc x +superkernels.cpp 268 0x1400 +superkernels.cpp 269 0x1410 +superkernels.cpp 269 0x1414 x +superkernels.cpp 271 0x1420 +superkernels.cpp 271 0x1436 x +superkernels.cpp 271 0x143e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 100 0x1450 x +elementwise_binary.h 103 0x1450 1 x +elementwise_binary.h 101 0x1454 +elementwise_binary.h 101 0x145a +elementwise_binary.h 101 0x145e x +elementwise_binary.h 101 0x1462 +elementwise_binary.h 89 0x1470 x +elementwise_binary.h 92 0x1470 1 +elementwise_binary.h 92 0x1470 2 x +elementwise_binary.h 89 0x147a +elementwise_binary.h 92 0x148c x +elementwise_binary.h 93 0x1490 x +elementwise_binary.h 93 0x14a0 +elementwise_binary.h 94 0x14a4 x +elementwise_binary.h 94 0x14b4 +elementwise_binary.h 95 0x14b8 x +elementwise_binary.h 96 0x14c0 x +elementwise_binary.h 95 0x14ce x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x14d2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x14e0 +elementwise_binary.h 98 0x14f2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x14fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x1500 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x1500 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 108 0x1510 x +elementwise_binary.h 115 0x1510 1 +elementwise_binary.h 115 0x1510 2 +elementwise_binary.h 115 0x151a x +elementwise_binary.h 115 0x151a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1524 +mul_acc32_fp.hpp 36 0x1524 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 115 0x1524 2 +elementwise_binary.h 115 0x1524 3 +elementwise_binary.h 115 0x152e +elementwise_binary.h 127 0x152e 1 x +elementwise_binary.h 115 0x1538 x +elementwise_binary.h 127 0x1538 1 +elementwise_binary.h 115 0x1548 +elementwise_binary.h 127 0x154c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1550 x +vector.hpp 1139 0x1550 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1550 2 x +elementwise_binary.h 148 0x1550 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1556 +vector.hpp 1139 0x1556 1 +vector.hpp 1159 0x1556 2 +vector.hpp 1159 0x1556 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1556 4 +accum.hpp 1119 0x1556 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1556 6 +elementwise_binary.h 170 0x1556 7 x +elementwise_binary.h 172 0x1556 8 x +elementwise_binary.h 177 0x1556 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x155e x +vector.hpp 1139 0x155e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x155e 2 x +elementwise_binary.h 148 0x155e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1564 +vector.hpp 1139 0x1564 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1564 2 x +elementwise_binary.h 172 0x1564 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x156a x +vector.hpp 1139 0x156a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x156a 2 x +elementwise_binary.h 148 0x156a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1570 +vector.hpp 1139 0x1570 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1570 2 x +elementwise_binary.h 172 0x1570 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1576 x +vector.hpp 1139 0x1576 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1576 2 x +elementwise_binary.h 148 0x1576 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x157c +vector.hpp 1139 0x157c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x157c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x157c 3 x +elementwise_binary.h 172 0x157c 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1586 x +vector.hpp 1139 0x1586 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1586 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1586 3 x +elementwise_binary.h 148 0x1586 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1590 +vector.hpp 1139 0x1590 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1590 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1590 3 x +elementwise_binary.h 172 0x1590 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x159a x +vector.hpp 1139 0x159a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x159a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x159a 3 x +elementwise_binary.h 148 0x159a 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15a4 +vector.hpp 1139 0x15a4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15a4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x15a4 3 x +elementwise_binary.h 172 0x15a4 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15b0 x +vector.hpp 1139 0x15b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15b0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x15b0 3 x +elementwise_binary.h 148 0x15b0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15c0 +vector.hpp 1139 0x15c0 1 +vector.hpp 1159 0x15c0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x15c0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15c0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x15c0 5 x +elementwise_binary.h 172 0x15c0 6 x +elementwise_binary.h 177 0x15c0 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15d0 x +vector.hpp 1139 0x15d0 1 x +vector.hpp 1159 0x15d0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x15d0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15d0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x15d0 5 x +elementwise_binary.h 148 0x15d0 6 x +elementwise_binary.h 154 0x15d0 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15e0 +vector.hpp 1139 0x15e0 1 +vector.hpp 1159 0x15e0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x15e0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15e0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x15e0 5 x +elementwise_binary.h 172 0x15e0 6 x +elementwise_binary.h 177 0x15e0 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x15f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x15f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15f0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x15f0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x15f8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x15f8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15f8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x15f8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1600 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1600 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1600 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1600 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1608 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1608 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1608 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1608 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1610 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1610 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1610 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1610 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1618 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1618 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1618 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1618 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1620 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1620 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1620 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1620 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1628 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1628 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1628 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x162c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x162c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 131 0x162c 2 x +elementwise_binary.h 154 0x162c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1632 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1632 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1632 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1636 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1636 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1636 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x163a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x163a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x163a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x163e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x163e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x163e 2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 277 0x1650 x +superkernels.cpp 282 0x1650 1 +superkernels.cpp 282 0x1656 x +superkernels.cpp 277 0x165c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1662 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 279 0x1662 1 x +superkernels.cpp 282 0x167e x +superkernels.cpp 282 0x167e 1 x +superkernels.cpp 279 0x1684 x +superkernels.cpp 279 0x1688 +superkernels.cpp 279 0x168e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1696 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x169a +superkernels.cpp 287 0x169a 1 +superkernels.cpp 289 0x169a 2 +superkernels.cpp 301 0x169a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x16a4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x16a4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x16ae +tile.hpp 86 0x16ae 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x16ba x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x16c4 +tile.hpp 74 0x16c8 +tile.hpp 74 0x16cc x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 289 0x16d0 +superkernels.cpp 289 0x16d0 1 x +superkernels.cpp 289 0x16da +superkernels.cpp 289 0x16da 1 +superkernels.cpp 298 0x16da 2 +superkernels.cpp 287 0x16e4 x +superkernels.cpp 290 0x16e4 1 +superkernels.cpp 299 0x16e4 2 +superkernels.cpp 287 0x16fa +superkernels.cpp 289 0x1700 x +superkernels.cpp 287 0x1704 x +superkernels.cpp 289 0x1708 x +superkernels.cpp 290 0x170c x +superkernels.cpp 298 0x1710 +superkernels.cpp 299 0x1716 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1720 x +io_buffer_main.h 242 0x1724 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x1724 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x172e +io_buffer_main.h 242 0x1732 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x1736 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 259 0x173a x +io_buffer_main.h 242 0x1746 x +io_buffer_main.h 242 0x1746 1 x +io_buffer_main.h 242 0x174a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x174a 1 x +superkernels.cpp 293 0x1750 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x1754 +io_buffer_main.h 419 0x1754 1 +io_buffer_main.h 419 0x175e x +io_buffer_main.h 242 0x1762 x +io_buffer_main.h 242 0x176a +io_buffer_main.h 242 0x176e +io_buffer_main.h 242 0x1772 +io_buffer_main.h 259 0x1776 x +io_buffer_main.h 242 0x1784 x +io_buffer_main.h 242 0x1784 1 x +io_buffer_main.h 242 0x1788 +io_buffer_main.h 419 0x1794 x +io_buffer_main.h 348 0x1798 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1798 1 +superkernels.cpp 299 0x1798 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x17a6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x17aa x +superkernels.cpp 299 0x17b0 x +superkernels.cpp 301 0x17b0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x17b6 x +io_buffer_main.h 149 0x17ba +io_buffer_main.h 351 0x17be +io_buffer_main.h 351 0x17be 1 +io_buffer_main.h 149 0x17c4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 301 0x17ca x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x17d0 +io_buffer_main.h 351 0x17d0 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x17d4 x +superkernels.cpp 299 0x17d8 x +superkernels.cpp 299 0x17dc +superkernels.cpp 298 0x17e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x17f0 x +io_buffer_main.h 351 0x17f0 1 +io_buffer_main.h 351 0x17f0 2 +io_buffer_main.h 351 0x17f0 3 +io_buffer_main.h 351 0x17f0 4 +io_buffer_main.h 449 0x17f0 5 +io_buffer_main.h 449 0x17f0 6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x17fa + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x180a x +io_buffer_main.h 351 0x180e x +io_buffer_main.h 348 0x1812 +io_buffer_main.h 351 0x1820 +io_buffer_main.h 348 0x1824 x +io_buffer_main.h 351 0x1824 1 +io_buffer_main.h 449 0x1836 x +io_buffer_main.h 351 0x183a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x183a 1 +superkernels.cpp 306 0x183a 2 +superkernels.cpp 305 0x1844 x +superkernels.cpp 305 0x1848 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1854 x +io_buffer_main.h 351 0x1858 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x185c x +superkernels.cpp 305 0x1860 +superkernels.cpp 306 0x1870 +superkernels.cpp 306 0x1874 x +superkernels.cpp 308 0x1880 +superkernels.cpp 308 0x1896 x +superkernels.cpp 308 0x189e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 178 0x1930 +elementwise_binary_shared.h 178 0x1930 1 x +elementwise_binary_shared.h 179 0x193a x +elementwise_binary_shared.h 179 0x1940 +elementwise_binary_shared.h 181 0x1950 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/sub_impl.h: +sub_impl.h 88 0x1950 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0x1966 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/sub_impl.h: +sub_impl.h 88 0x196a +sub_impl.h 88 0x196e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 181 0x1972 x +elementwise_binary_shared.h 186 0x1980 x +elementwise_binary_shared.h 191 0x1980 1 x +elementwise_binary_shared.h 191 0x1986 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 314 0x19a0 x +superkernels.cpp 319 0x19a0 1 +superkernels.cpp 319 0x19a6 x +superkernels.cpp 314 0x19ac + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x19b2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 316 0x19b2 1 x +superkernels.cpp 319 0x19ce x +superkernels.cpp 319 0x19ce 1 x +superkernels.cpp 316 0x19d4 x +superkernels.cpp 316 0x19d8 +superkernels.cpp 316 0x19de + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x19e6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 322 0x19ea +superkernels.cpp 324 0x19ea 1 +superkernels.cpp 326 0x19ea 2 +superkernels.cpp 338 0x19ea 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x19f4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 322 0x19f4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x19fe +tile.hpp 86 0x19fe 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 322 0x1a0a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1a14 +tile.hpp 74 0x1a18 +tile.hpp 74 0x1a1c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 326 0x1a20 +superkernels.cpp 326 0x1a20 1 x +superkernels.cpp 326 0x1a2a +superkernels.cpp 326 0x1a2a 1 +superkernels.cpp 335 0x1a2a 2 +superkernels.cpp 324 0x1a34 x +superkernels.cpp 327 0x1a34 1 +superkernels.cpp 336 0x1a34 2 +superkernels.cpp 324 0x1a4a +superkernels.cpp 326 0x1a50 x +superkernels.cpp 324 0x1a54 x +superkernels.cpp 326 0x1a58 x +superkernels.cpp 327 0x1a5c x +superkernels.cpp 335 0x1a60 +superkernels.cpp 336 0x1a66 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1a70 x +io_buffer_main.h 242 0x1a74 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 330 0x1a74 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1a7e +io_buffer_main.h 242 0x1a82 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 330 0x1a86 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 259 0x1a8a x +io_buffer_main.h 242 0x1a96 x +io_buffer_main.h 242 0x1a96 1 x +io_buffer_main.h 242 0x1a9a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 330 0x1a9a 1 x +superkernels.cpp 330 0x1aa0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x1aa4 +io_buffer_main.h 419 0x1aa4 1 +io_buffer_main.h 419 0x1aae x +io_buffer_main.h 242 0x1ab2 x +io_buffer_main.h 242 0x1aba +io_buffer_main.h 242 0x1abe +io_buffer_main.h 242 0x1ac2 +io_buffer_main.h 259 0x1ac6 x +io_buffer_main.h 242 0x1ad4 x +io_buffer_main.h 242 0x1ad4 1 x +io_buffer_main.h 242 0x1ad8 +io_buffer_main.h 419 0x1ae4 x +io_buffer_main.h 348 0x1ae8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 335 0x1ae8 1 +superkernels.cpp 336 0x1ae8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1af6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 335 0x1afa x +superkernels.cpp 336 0x1b00 x +superkernels.cpp 338 0x1b00 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1b06 x +io_buffer_main.h 149 0x1b0a +io_buffer_main.h 351 0x1b0e +io_buffer_main.h 351 0x1b0e 1 +io_buffer_main.h 149 0x1b14 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 338 0x1b1a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1b20 +io_buffer_main.h 351 0x1b20 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 335 0x1b24 x +superkernels.cpp 336 0x1b28 x +superkernels.cpp 336 0x1b2c +superkernels.cpp 335 0x1b30 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1b40 x +io_buffer_main.h 351 0x1b40 1 +io_buffer_main.h 351 0x1b40 2 +io_buffer_main.h 351 0x1b40 3 +io_buffer_main.h 351 0x1b40 4 +io_buffer_main.h 449 0x1b40 5 +io_buffer_main.h 449 0x1b40 6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 342 0x1b4a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x1b5a x +io_buffer_main.h 351 0x1b5e x +io_buffer_main.h 348 0x1b62 +io_buffer_main.h 351 0x1b70 +io_buffer_main.h 348 0x1b74 x +io_buffer_main.h 351 0x1b74 1 +io_buffer_main.h 449 0x1b86 x +io_buffer_main.h 351 0x1b8a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 342 0x1b8a 1 +superkernels.cpp 343 0x1b8a 2 +superkernels.cpp 342 0x1b94 x +superkernels.cpp 342 0x1b98 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1ba4 x +io_buffer_main.h 351 0x1ba8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 342 0x1bac x +superkernels.cpp 342 0x1bb0 +superkernels.cpp 343 0x1bc0 +superkernels.cpp 343 0x1bc4 x +superkernels.cpp 345 0x1bd0 +superkernels.cpp 345 0x1be6 x +superkernels.cpp 345 0x1bee + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 144 0x1c00 x +reduce_base.h 146 0x1c00 1 +reduce_base.h 146 0x1c00 2 x +reduce_base.h 152 0x1c0a +reduce_base.h 154 0x1c0a 1 +reduce_base.h 155 0x1c0a 2 +reduce_base.h 146 0x1c1e x +reduce_base.h 147 0x1c22 x +reduce_base.h 147 0x1c32 +reduce_base.h 189 0x1c32 1 x +reduce_base.h 148 0x1c38 x +reduce_base.h 148 0x1c48 +reduce_base.h 191 0x1c48 1 x +reduce_base.h 149 0x1c4e x +reduce_base.h 149 0x1c5e +reduce_base.h 155 0x1c5e 1 x +reduce_base.h 150 0x1c64 x +reduce_base.h 150 0x1c74 +reduce_base.h 153 0x1c74 1 +reduce_base.h 151 0x1c7c x +reduce_base.h 151 0x1c8c +reduce_base.h 153 0x1c8c 1 x +reduce_base.h 152 0x1c92 x +reduce_base.h 152 0x1ca2 +reduce_base.h 153 0x1ca2 1 +reduce_base.h 153 0x1caa x +reduce_base.h 153 0x1cae +reduce_base.h 155 0x1cb2 x +reduce_base.h 167 0x1cb8 +reduce_base.h 172 0x1cb8 1 +reduce_base.h 187 0x1cb8 2 +reduce_base.h 193 0x1cb8 3 +reduce_base.h 195 0x1cb8 4 +reduce_base.h 187 0x1cbc x +reduce_base.h 193 0x1cc0 x +reduce_base.h 187 0x1cc4 x +reduce_base.h 154 0x1cc8 x +reduce_base.h 193 0x1cc8 1 x +reduce_base.h 155 0x1cce +reduce_base.h 155 0x1cd2 x +reduce_base.h 155 0x1cd6 +reduce_base.h 178 0x1ce4 x +reduce_base.h 155 0x1ce8 +reduce_base.h 155 0x1cec x +reduce_base.h 155 0x1cf0 +reduce_base.h 186 0x1d10 +reduce_base.h 186 0x1d10 1 +reduce_base.h 188 0x1d10 2 +reduce_base.h 190 0x1d10 3 +reduce_base.h 192 0x1d1a +reduce_base.h 194 0x1d1a 1 +reduce_base.h 195 0x1d1a 2 x +reduce_base.h 194 0x1d24 +reduce_base.h 194 0x1d24 1 x +reduce_base.h 186 0x1d30 x +reduce_base.h 187 0x1d40 x +reduce_base.h 188 0x1d50 x +reduce_base.h 189 0x1d60 x +reduce_base.h 190 0x1d70 x +reduce_base.h 191 0x1d80 x +reduce_base.h 192 0x1d84 x +reduce_base.h 194 0x1d94 x +reduce_base.h 193 0x1da4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/reducesum_impl.h: +reducesum_impl.h 95 0x1dd0 +reducesum_impl.h 95 0x1dd0 1 x +reducesum_impl.h 95 0x1de0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 219 0x1de4 x +reduce_base.h 158 0x1e00 +reduce_base.h 158 0x1e00 1 +reduce_base.h 158 0x1e0a x +reduce_base.h 163 0x1e0a 1 x +reduce_base.h 167 0x1e12 x +reduce_base.h 160 0x1e1c +reduce_base.h 160 0x1e1c 1 x +reduce_base.h 160 0x1e1c 2 +reduce_base.h 160 0x1e1c 3 x +reduce_base.h 162 0x1e1c 4 +reduce_base.h 162 0x1e1c 5 +reduce_base.h 166 0x1e1c 6 +reduce_base.h 159 0x1e26 x +reduce_base.h 166 0x1e26 1 x +reduce_base.h 160 0x1e38 x +reduce_base.h 161 0x1e48 x +reduce_base.h 162 0x1e58 x +reduce_base.h 162 0x1e62 +reduce_base.h 162 0x1e62 1 +reduce_base.h 163 0x1e6a x +reduce_base.h 164 0x1e6e x +reduce_base.h 165 0x1e7e x +reduce_base.h 166 0x1e8a +reduce_base.h 166 0x1e94 x +reduce_base.h 172 0x1ea0 +reduce_base.h 173 0x1ea0 1 +reduce_base.h 172 0x1eaa x +reduce_base.h 173 0x1eaa 1 +reduce_base.h 180 0x1eaa 2 +reduce_base.h 174 0x1eb4 +reduce_base.h 174 0x1eb4 1 x +reduce_base.h 179 0x1eb4 2 +reduce_base.h 179 0x1eb4 3 +reduce_base.h 180 0x1eb4 4 +reduce_base.h 172 0x1ec0 x +reduce_base.h 172 0x1ec6 +reduce_base.h 173 0x1ecc x +reduce_base.h 174 0x1edc x +reduce_base.h 175 0x1ee0 x +reduce_base.h 176 0x1ef0 x +reduce_base.h 179 0x1efc x +reduce_base.h 175 0x1f02 x +reduce_base.h 177 0x1f02 1 x +reduce_base.h 178 0x1f12 x +reduce_base.h 179 0x1f22 x +reduce_base.h 180 0x1f2e x +reduce_base.h 179 0x1f32 x +reduce_base.h 179 0x1f32 1 x +reduce_base.h 180 0x1f36 x +reduce_base.h 180 0x1f3c +reduce_base.h 232 0x1f40 x +reduce_base.h 236 0x1f40 1 +reduce_base.h 236 0x1f40 2 +reduce_base.h 236 0x1f4a x +reduce_base.h 240 0x1f4a 1 +reduce_base.h 240 0x1f50 x +reduce_base.h 236 0x1f5e x +reduce_base.h 236 0x1f5e 1 x +reduce_base.h 232 0x1f64 +reduce_base.h 240 0x1f76 +reduce_base.h 240 0x1f7c x +reduce_base.h 241 0x1f7c 1 +reduce_base.h 241 0x1f86 x +reduce_base.h 241 0x1f96 +reduce_base.h 241 0x1f96 1 +reduce_base.h 240 0x1f9c +reduce_base.h 240 0x1fa0 +reduce_base.h 240 0x1fa4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x1faa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 240 0x1faa 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fae + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 241 0x1fae 1 +reduce_base.h 241 0x1fae 2 x +reduce_base.h 243 0x1fae 3 +reduce_base.h 241 0x1fba + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fc0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 241 0x1fc0 1 +reduce_base.h 243 0x1fc0 2 +reduce_base.h 241 0x1fc6 +reduce_base.h 241 0x1fca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2030 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 243 0x2030 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2040 +aie_core.h 73 0x2040 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2040 2 +vector.hpp 1159 0x2040 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x2040 4 +accum.hpp 199 0x2040 5 +accum.hpp 940 0x2040 6 +accum.hpp 1119 0x2040 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 267 0x2040 8 x +reduce_base.h 267 0x2040 9 +reduce_base.h 267 0x2056 +reduce_base.h 267 0x205a +reduce_base.h 405 0x2068 +reduce_base.h 269 0x206c +reduce_base.h 269 0x2072 x +reduce_base.h 274 0x2082 x +reduce_base.h 274 0x2082 1 x +reduce_base.h 269 0x208a +reduce_base.h 269 0x208e +reduce_base.h 269 0x208e 1 x +reduce_base.h 269 0x2094 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x209e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 280 0x209e 1 x +reduce_base.h 289 0x209e 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x20aa + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 274 0x20aa 1 +reduce_base.h 277 0x20aa 2 +reduce_base.h 280 0x20aa 3 +reduce_base.h 274 0x20b6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x20bc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 282 0x20c4 +reduce_base.h 274 0x20c8 x +reduce_base.h 282 0x20c8 1 +reduce_base.h 274 0x20ce +reduce_base.h 282 0x20ce 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x20d6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 282 0x20d6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x20e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 277 0x20e0 1 x +reduce_base.h 280 0x20e0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2100 +vector.hpp 915 0x2110 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 280 0x2110 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2120 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 280 0x2120 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 856 0x2130 x +vector.hpp 915 0x2140 x +vector.hpp 856 0x2150 x +vector.hpp 1159 0x2150 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 280 0x2150 2 +reduce_base.h 289 0x2150 3 x +reduce_base.h 274 0x2160 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2164 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 289 0x2164 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 856 0x216a x +vector.hpp 1159 0x2170 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 289 0x2170 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2180 +aie_core.h 73 0x2180 1 +aie_core.h 73 0x2180 2 +aie_core.h 90 0x2180 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2180 4 +vector.hpp 1139 0x2180 5 +vector.hpp 1139 0x2180 6 +vector.hpp 1139 0x2180 7 +vector.hpp 1159 0x2180 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x2180 9 +accum.hpp 199 0x2180 10 +accum.hpp 199 0x2180 11 +accum.hpp 940 0x2180 12 +accum.hpp 940 0x2180 13 +accum.hpp 940 0x2180 14 +accum.hpp 1119 0x2180 15 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 405 0x2180 16 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x218a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x218a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x218a 2 +accum.hpp 940 0x218a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 294 0x218a 4 +reduce_base.h 405 0x218a 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2196 +aie_core.h 90 0x2196 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2196 2 +vector.hpp 1139 0x2196 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2196 4 +accum.hpp 940 0x2196 5 +accum.hpp 940 0x2196 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add.hpp: +add.hpp 28 0x2196 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 332 0x2196 8 +add_reduce.hpp 337 0x2196 9 +add_reduce.hpp 337 0x2196 10 +add_reduce.hpp 337 0x2196 11 +add_reduce.hpp 337 0x2196 12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 294 0x2196 13 +reduce_base.h 406 0x2196 14 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x21a2 +aie_core.h 73 0x21a2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x21a2 2 +vector.hpp 1139 0x21a2 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x21a2 4 +accum.hpp 940 0x21a2 5 +accum.hpp 940 0x21a2 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 409 0x21a2 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x21ac + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x21ac 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x21ac 2 +accum.hpp 940 0x21ac 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 409 0x21ac 4 +reduce_base.h 305 0x21b2 x +reduce_base.h 410 0x21b2 1 x +reduce_base.h 305 0x21bc +reduce_base.h 411 0x21bc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x21c6 +aie_core.h 73 0x21c6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x21c6 2 +vector.hpp 1159 0x21c6 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x21c6 4 +accum.hpp 149 0x21c6 5 +accum.hpp 1119 0x21c6 6 +accum.hpp 1119 0x21c6 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 410 0x21c6 8 x +reduce_base.h 222 0x21d0 +reduce_base.h 294 0x21d0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x21d6 +aie_core.h 73 0x21d6 1 +aie_core.h 90 0x21d6 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x21d6 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x21d6 4 +accum.hpp 940 0x21d6 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 294 0x21d6 6 +reduce_base.h 313 0x21d6 7 +reduce_base.h 314 0x21d6 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x21e0 +aie_core.h 73 0x21e0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x21e0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x21e0 3 +accum.hpp 1119 0x21e0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x21e6 +aie_core.h 73 0x21e6 1 +aie_core.h 90 0x21e6 2 +aie_core.h 90 0x21e6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x21e6 4 +vector.hpp 1139 0x21e6 5 +vector.hpp 1159 0x21e6 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x21e6 7 +accum.hpp 199 0x21e6 8 +accum.hpp 199 0x21e6 9 +accum.hpp 940 0x21e6 10 +accum.hpp 940 0x21e6 11 +accum.hpp 1119 0x21e6 12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x21f0 +aie_core.h 90 0x21f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x21f0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x21f0 3 +accum.hpp 940 0x21f0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x21f6 +aie_core.h 90 0x21f6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x21f6 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x21f6 3 +accum.hpp 940 0x21f6 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x21fc x +aie_core.h 90 0x21fc 1 +aie_core.h 90 0x21fc 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x21fc 3 +vector.hpp 1139 0x21fc 4 +vector.hpp 1139 0x21fc 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x21fc 6 +accum.hpp 199 0x21fc 7 +accum.hpp 199 0x21fc 8 x +accum.hpp 940 0x21fc 9 +accum.hpp 940 0x21fc 10 +accum.hpp 940 0x21fc 11 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2206 +aie_core.h 73 0x2206 1 +aie_core.h 90 0x2206 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2206 3 +vector.hpp 1159 0x2206 4 +vector.hpp 1159 0x2206 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x2206 6 +accum.hpp 149 0x2206 7 +accum.hpp 199 0x2206 8 +accum.hpp 940 0x2206 9 +accum.hpp 1119 0x2206 10 +accum.hpp 1119 0x2206 11 +accum.hpp 1119 0x2206 12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2210 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2210 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2210 2 x +accum.hpp 940 0x2210 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 305 0x2210 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x2216 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2216 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2216 2 +accum.hpp 940 0x2216 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x221a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x221a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x221a 2 x +accum.hpp 940 0x221a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x2220 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2220 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2220 2 +accum.hpp 940 0x2220 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add.hpp: +add.hpp 28 0x2220 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2230 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2230 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2230 2 x +accum.hpp 940 0x2230 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x2240 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2240 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2240 2 +accum.hpp 940 0x2240 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add.hpp: +add.hpp 28 0x2240 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2250 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2250 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2250 2 x +accum.hpp 940 0x2250 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x2260 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2260 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2260 2 +accum.hpp 940 0x2260 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add.hpp: +add.hpp 28 0x2260 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2270 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2270 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x2270 2 x +accum.hpp 940 0x2270 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2280 +aie_core.h 90 0x2280 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2280 2 +vector.hpp 1159 0x2280 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x2280 4 x +accum.hpp 199 0x2280 5 +accum.hpp 940 0x2280 6 +accum.hpp 1119 0x2280 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add.hpp: +add.hpp 28 0x2280 8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2292 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2292 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x2292 2 +accum.hpp 1119 0x2292 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add.hpp: +add.hpp 28 0x2292 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x229c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x229c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x229c 2 x +accum.hpp 1119 0x229c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add.hpp: +add.hpp 28 0x229c 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 312 0x22a4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x22aa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22aa 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x22aa 2 x +accum.hpp 1119 0x22aa 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x22b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22b0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x22b0 2 +accum.hpp 1119 0x22b0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x22b6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22b6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x22b6 2 +accum.hpp 1119 0x22b6 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x22ba + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22ba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x22ba 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 314 0x22ba 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x22c4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 313 0x22c4 1 +reduce_base.h 314 0x22c4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x22ce +add_reduce.hpp 335 0x22ce 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 313 0x22ce 2 x +reduce_base.h 314 0x22d8 x +reduce_base.h 319 0x22d8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x22e2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 319 0x22e2 1 +reduce_base.h 314 0x22f2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x22f6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22f6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x22f6 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 313 0x22f6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x22fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22fc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x22fc 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 319 0x22fc 3 x +reduce_base.h 313 0x2304 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2308 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2308 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2308 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 332 0x2316 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x231a x +accum.hpp 198 0x231a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2322 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2322 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2322 2 x +accum.hpp 153 0x2328 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x232c x +add_reduce.hpp 337 0x232c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2334 x +accum.hpp 153 0x2340 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x2344 x +add_reduce.hpp 337 0x2344 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x234c x +accum.hpp 153 0x2350 x +accum.hpp 198 0x2350 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 332 0x2350 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x235e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x2362 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2362 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x2362 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x2362 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2368 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x236c x +add_reduce.hpp 337 0x236c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2374 x +accum.hpp 198 0x2378 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 337 0x2378 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2386 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x238a x +add_reduce.hpp 337 0x238a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2392 x +accum.hpp 153 0x2396 x +accum.hpp 198 0x2396 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 332 0x2396 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x23a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x23a4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x23a8 x +accum.hpp 153 0x23ac x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 337 0x23ac 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x23b4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x23b8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x23bc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 337 0x23bc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x23d0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x23d0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x23d0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x23d0 3 x +add_reduce.hpp 337 0x23d0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x23da x +accum.hpp 1119 0x23da 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x23e4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x23e8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x23ec x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x23f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 337 0x23f0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x23f8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 332 0x23f8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2400 +accum.hpp 198 0x2400 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x2404 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2408 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 337 0x240c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2410 x +accum.hpp 153 0x2414 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x2420 x +add_reduce.hpp 337 0x2420 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 326 0x2420 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2430 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x2440 x +add_reduce.hpp 337 0x2440 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2448 x +accum.hpp 1119 0x2448 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2452 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2456 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x245a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 326 0x245a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2460 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x2464 x +add_reduce.hpp 337 0x2464 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x246c x +accum.hpp 198 0x2470 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 337 0x2470 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x247e x +accum.hpp 153 0x2484 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 326 0x2484 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x248a x +add_reduce.hpp 337 0x248a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2492 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2496 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x24a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 335 0x24a4 x +add_reduce.hpp 337 0x24a4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x24ac x +accum.hpp 1119 0x24b8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x24c0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h: +reduce_base.h 326 0x24c0 1 x +reduce_base.h 222 0x24d0 x +reduce_base.h 222 0x24de +reduce_base.h 223 0x24e2 x +reduce_base.h 223 0x24e6 +reduce_base.h 222 0x24ec x +reduce_base.h 222 0x24f0 +reduce_base.h 355 0x2500 +reduce_base.h 223 0x2508 x +reduce_base.h 355 0x250c x +reduce_base.h 355 0x2518 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 405 0x2530 x +superkernels.cpp 410 0x2530 1 +superkernels.cpp 410 0x2536 x +superkernels.cpp 405 0x253c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2552 +io_buffer_main.h 242 0x2552 1 +io_buffer_main.h 348 0x2552 2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 407 0x2552 3 x +superkernels.cpp 517 0x255c +superkernels.cpp 410 0x2566 x +superkernels.cpp 410 0x2566 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2570 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 413 0x2570 1 +superkernels.cpp 407 0x257a +superkernels.cpp 407 0x2580 x +superkernels.cpp 407 0x2584 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2588 +tile.hpp 74 0x258e +tile.hpp 86 0x258e 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 413 0x259a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x25a4 +tile.hpp 74 0x25a8 +tile.hpp 74 0x25b0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 416 0x25b0 1 +superkernels.cpp 414 0x25c0 +superkernels.cpp 414 0x25c6 +superkernels.cpp 414 0x25c6 1 x +superkernels.cpp 414 0x25d0 +superkernels.cpp 414 0x25d4 +superkernels.cpp 414 0x25da +superkernels.cpp 415 0x25e0 x +superkernels.cpp 414 0x25e6 x +superkernels.cpp 414 0x25ec +superkernels.cpp 415 0x25f0 +superkernels.cpp 444 0x25f0 1 +superkernels.cpp 414 0x25f6 +superkernels.cpp 414 0x25fa +superkernels.cpp 415 0x25fa 1 x +superkernels.cpp 414 0x2604 x +superkernels.cpp 416 0x2604 1 +superkernels.cpp 416 0x260c x +superkernels.cpp 416 0x261e +superkernels.cpp 416 0x2624 +superkernels.cpp 441 0x2630 +superkernels.cpp 444 0x2630 1 +superkernels.cpp 441 0x2636 x +superkernels.cpp 441 0x264a +superkernels.cpp 441 0x264e +superkernels.cpp 441 0x2652 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x2658 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 441 0x2664 +superkernels.cpp 441 0x266e x +superkernels.cpp 441 0x266e 1 +superkernels.cpp 441 0x2678 +superkernels.cpp 441 0x2680 +superkernels.cpp 441 0x2686 +superkernels.cpp 441 0x268c +superkernels.cpp 441 0x2690 +superkernels.cpp 441 0x2690 1 +superkernels.cpp 441 0x2696 +superkernels.cpp 441 0x26a0 +superkernels.cpp 441 0x26a0 1 +superkernels.cpp 441 0x26a6 +superkernels.cpp 441 0x26aa +superkernels.cpp 441 0x26aa 1 +superkernels.cpp 444 0x26b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x26b6 +io_buffer_main.h 348 0x26b6 1 +io_buffer_main.h 242 0x26c0 +io_buffer_main.h 419 0x26c0 1 +io_buffer_main.h 242 0x26c6 x +io_buffer_main.h 242 0x26ca +io_buffer_main.h 242 0x26ce +io_buffer_main.h 259 0x26d4 x +io_buffer_main.h 242 0x26e0 x +io_buffer_main.h 242 0x26e0 1 x +io_buffer_main.h 242 0x26e4 +io_buffer_main.h 419 0x26f0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 444 0x2700 +superkernels.cpp 444 0x2708 x +superkernels.cpp 444 0x2718 +superkernels.cpp 444 0x271c +superkernels.cpp 444 0x272c +superkernels.cpp 444 0x2736 +superkernels.cpp 444 0x2736 1 +superkernels.cpp 444 0x2740 +superkernels.cpp 444 0x2748 +superkernels.cpp 444 0x274e +superkernels.cpp 444 0x2754 +superkernels.cpp 444 0x275a +superkernels.cpp 444 0x275a 1 +superkernels.cpp 444 0x2760 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2770 +io_buffer_main.h 348 0x2770 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 444 0x2770 2 +superkernels.cpp 444 0x2770 3 +superkernels.cpp 444 0x277a +superkernels.cpp 444 0x277e +superkernels.cpp 444 0x277e 1 +superkernels.cpp 447 0x2784 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x2792 +io_buffer_main.h 419 0x2792 1 +io_buffer_main.h 242 0x2798 x +io_buffer_main.h 242 0x279c +io_buffer_main.h 242 0x27a0 +io_buffer_main.h 259 0x27a6 x +io_buffer_main.h 242 0x27b2 x +io_buffer_main.h 242 0x27b2 1 x +io_buffer_main.h 242 0x27b6 +io_buffer_main.h 419 0x27c2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 447 0x27d6 x +superkernels.cpp 447 0x27e0 +superkernels.cpp 496 0x27e0 1 +superkernels.cpp 499 0x27e0 2 +superkernels.cpp 502 0x27e0 3 +superkernels.cpp 505 0x27e0 4 +superkernels.cpp 508 0x27e0 5 +superkernels.cpp 511 0x27e0 6 +superkernels.cpp 515 0x27e0 7 +superkernels.cpp 447 0x27ec +superkernels.cpp 447 0x27f0 +superkernels.cpp 447 0x27f4 +superkernels.cpp 447 0x27fa +superkernels.cpp 447 0x2800 +superkernels.cpp 447 0x280c +superkernels.cpp 447 0x2810 +superkernels.cpp 447 0x2818 +superkernels.cpp 447 0x2822 +superkernels.cpp 447 0x2826 +superkernels.cpp 447 0x282c +superkernels.cpp 447 0x2830 +superkernels.cpp 447 0x2830 1 +superkernels.cpp 447 0x2842 +superkernels.cpp 447 0x2846 +superkernels.cpp 447 0x2846 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x284c +io_buffer_main.h 242 0x2858 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 494 0x2858 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x2862 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 496 0x2862 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x286c +io_buffer_main.h 242 0x286c 1 +io_buffer_main.h 348 0x286c 2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 452 0x286c 3 +superkernels.cpp 496 0x286c 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x2876 +io_buffer_main.h 259 0x287a x +io_buffer_main.h 242 0x2886 x +io_buffer_main.h 242 0x2886 1 x +io_buffer_main.h 242 0x288a +io_buffer_main.h 149 0x2898 +io_buffer_main.h 348 0x2898 1 +io_buffer_main.h 419 0x289c x +io_buffer_main.h 149 0x28b0 +io_buffer_main.h 348 0x28b0 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 496 0x28ba +superkernels.cpp 494 0x28c0 +superkernels.cpp 452 0x28c6 +superkernels.cpp 496 0x28c6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x28d0 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 494 0x28d0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x28e0 +io_buffer_main.h 348 0x28e0 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 452 0x28e0 2 +superkernels.cpp 496 0x28e0 3 +superkernels.cpp 496 0x28e0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x28f0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 450 0x28f0 1 +superkernels.cpp 450 0x28fc x +superkernels.cpp 452 0x28fc 1 +superkernels.cpp 452 0x2906 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x290a x +io_buffer_main.h 348 0x290a 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 494 0x2910 x +superkernels.cpp 450 0x291a x +superkernels.cpp 450 0x291e +superkernels.cpp 452 0x291e 1 x +superkernels.cpp 452 0x2924 +superkernels.cpp 496 0x2930 x +superkernels.cpp 496 0x2940 +superkernels.cpp 496 0x2944 +superkernels.cpp 502 0x294a +superkernels.cpp 496 0x2956 +superkernels.cpp 499 0x2956 1 +superkernels.cpp 496 0x295c +superkernels.cpp 496 0x295c 1 +superkernels.cpp 496 0x2966 +superkernels.cpp 496 0x2970 +superkernels.cpp 496 0x2978 +superkernels.cpp 496 0x297c +superkernels.cpp 496 0x297c 1 +superkernels.cpp 496 0x2982 +superkernels.cpp 496 0x2982 1 +superkernels.cpp 496 0x2988 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x2990 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 496 0x2990 1 +superkernels.cpp 496 0x2990 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x299a +io_buffer_main.h 449 0x299a 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 496 0x299a 2 +superkernels.cpp 496 0x29a0 +superkernels.cpp 496 0x29a0 1 +superkernels.cpp 502 0x29a6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x29b2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 499 0x29b2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x29c4 x +io_buffer_main.h 351 0x29c8 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 502 0x29c8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x29e2 +io_buffer_main.h 351 0x29e6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 502 0x29f0 +superkernels.cpp 502 0x2a00 x +superkernels.cpp 502 0x2a1c +superkernels.cpp 502 0x2a20 +superkernels.cpp 502 0x2a26 +superkernels.cpp 502 0x2a2c +superkernels.cpp 505 0x2a2c 1 +superkernels.cpp 502 0x2a38 +superkernels.cpp 502 0x2a3c +superkernels.cpp 502 0x2a46 +superkernels.cpp 502 0x2a4e +superkernels.cpp 502 0x2a52 +superkernels.cpp 502 0x2a52 1 +superkernels.cpp 502 0x2a5a +superkernels.cpp 502 0x2a5a 1 +superkernels.cpp 502 0x2a60 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x2a70 +io_buffer_main.h 348 0x2a70 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 502 0x2a70 2 +superkernels.cpp 502 0x2a70 3 +superkernels.cpp 502 0x2a7a +superkernels.cpp 502 0x2a7e +superkernels.cpp 502 0x2a7e 1 +superkernels.cpp 508 0x2a84 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x2a8a +io_buffer_main.h 449 0x2a8a 1 +io_buffer_main.h 348 0x2a94 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 505 0x2a94 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x2aa6 x +io_buffer_main.h 351 0x2aaa x +io_buffer_main.h 351 0x2aba +io_buffer_main.h 351 0x2ac0 +io_buffer_main.h 348 0x2ad6 +io_buffer_main.h 348 0x2af0 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 508 0x2af0 1 +superkernels.cpp 508 0x2b00 x +superkernels.cpp 508 0x2b0e +superkernels.cpp 508 0x2b12 +superkernels.cpp 508 0x2b16 +superkernels.cpp 508 0x2b1c +superkernels.cpp 508 0x2b2a +superkernels.cpp 508 0x2b2a 1 +superkernels.cpp 508 0x2b34 +superkernels.cpp 508 0x2b3e +superkernels.cpp 508 0x2b46 +superkernels.cpp 508 0x2b4a +superkernels.cpp 508 0x2b4a 1 +superkernels.cpp 508 0x2b50 +superkernels.cpp 508 0x2b50 1 +superkernels.cpp 508 0x2b56 +superkernels.cpp 508 0x2b60 +superkernels.cpp 508 0x2b60 1 +superkernels.cpp 508 0x2b66 +superkernels.cpp 508 0x2b6a +superkernels.cpp 508 0x2b6a 1 +superkernels.cpp 511 0x2b70 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x2b7e x +io_buffer_main.h 351 0x2b7e 1 +io_buffer_main.h 449 0x2b7e 2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 511 0x2b7e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x2b94 x +io_buffer_main.h 351 0x2b98 x +io_buffer_main.h 351 0x2ba8 +io_buffer_main.h 351 0x2bac + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 514 0x2bb0 +superkernels.cpp 515 0x2bb0 1 +superkernels.cpp 514 0x2bba x +superkernels.cpp 514 0x2bba 1 +superkernels.cpp 514 0x2bc4 +superkernels.cpp 514 0x2bd4 +superkernels.cpp 514 0x2bd8 +superkernels.cpp 515 0x2bec x +superkernels.cpp 517 0x2bf0 +superkernels.cpp 517 0x2bfa x +superkernels.cpp 517 0x2bfe +superkernels.cpp - 0x2bff + + +accum_native_types.hpp: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 34 0x30f0 x +softmax_adf_wrapper.cpp 46 0x30f0 1 +softmax_adf_wrapper.cpp 57 0x30f0 2 +softmax_adf_wrapper.cpp 37 0x30f8 +softmax_adf_wrapper.cpp 37 0x3102 x +softmax_adf_wrapper.cpp 37 0x3112 +softmax_adf_wrapper.cpp 37 0x3112 1 +softmax_adf_wrapper.cpp 34 0x3118 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x312a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 42 0x312e +softmax_adf_wrapper.cpp 42 0x312e 1 +softmax_adf_wrapper.cpp 46 0x312e 2 +softmax_adf_wrapper.cpp 43 0x313a +softmax_adf_wrapper.cpp 44 0x313a 1 +softmax_adf_wrapper.cpp 42 0x3144 x +softmax_adf_wrapper.cpp 43 0x3144 1 x +softmax_adf_wrapper.cpp 44 0x3144 2 +softmax_adf_wrapper.cpp 47 0x3144 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3150 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 43 0x3150 1 +softmax_adf_wrapper.cpp 49 0x3150 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x315a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 46 0x315a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3164 +tile.hpp 86 0x3164 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 49 0x316e +softmax_adf_wrapper.cpp 48 0x3176 x +softmax_adf_wrapper.cpp 44 0x317a x +softmax_adf_wrapper.cpp 48 0x317e x +softmax_adf_wrapper.cpp 46 0x318c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3190 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 47 0x3190 1 x +softmax_adf_wrapper.cpp 47 0x31a2 +softmax_adf_wrapper.cpp 48 0x31a6 x +softmax_adf_wrapper.cpp 50 0x31aa x +softmax_adf_wrapper.cpp 49 0x31b8 x +softmax_adf_wrapper.cpp 50 0x31c8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x31cc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 51 0x31d0 x +softmax_adf_wrapper.cpp 51 0x31e0 +softmax_adf_wrapper.cpp 54 0x31f0 +softmax_adf_wrapper.cpp 54 0x31f0 1 x +softmax_adf_wrapper.cpp 54 0x31f6 +softmax_adf_wrapper.cpp 54 0x31fc +softmax_adf_wrapper.cpp 54 0x3202 +softmax_adf_wrapper.cpp 54 0x3206 +softmax_adf_wrapper.cpp 54 0x320a +softmax_adf_wrapper.cpp 54 0x320e +softmax_adf_wrapper.cpp 54 0x3212 +softmax_adf_wrapper.cpp 54 0x3212 1 +softmax_adf_wrapper.cpp 54 0x3218 +softmax_adf_wrapper.cpp 54 0x3218 1 +softmax_adf_wrapper.cpp 54 0x321e +softmax_adf_wrapper.cpp 54 0x321e 1 +softmax_adf_wrapper.cpp 54 0x3224 +softmax_adf_wrapper.cpp 54 0x3224 1 +softmax_adf_wrapper.cpp 54 0x322a +softmax_adf_wrapper.cpp 54 0x322a 1 +softmax_adf_wrapper.cpp 54 0x3230 +softmax_adf_wrapper.cpp 54 0x3230 1 +softmax_adf_wrapper.cpp 54 0x3236 +softmax_adf_wrapper.cpp 54 0x3236 1 +softmax_adf_wrapper.cpp 54 0x323c +softmax_adf_wrapper.cpp 54 0x323c 1 +softmax_adf_wrapper.cpp 54 0x3242 +softmax_adf_wrapper.cpp 54 0x3242 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3248 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 54 0x3248 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x324e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/softmax_adf_wrapper.cpp: +softmax_adf_wrapper.cpp 54 0x324e 1 x +softmax_adf_wrapper.cpp 54 0x3254 +softmax_adf_wrapper.cpp 54 0x325a +softmax_adf_wrapper.cpp 54 0x325e +softmax_adf_wrapper.cpp 54 0x3262 +softmax_adf_wrapper.cpp 54 0x3266 +softmax_adf_wrapper.cpp 54 0x326a +softmax_adf_wrapper.cpp 56 0x3270 x +softmax_adf_wrapper.cpp 57 0x3270 1 +softmax_adf_wrapper.cpp 58 0x3276 +softmax_adf_wrapper.cpp 56 0x3288 +softmax_adf_wrapper.cpp 56 0x328c +softmax_adf_wrapper.cpp 57 0x3290 x +softmax_adf_wrapper.cpp 58 0x329c x +softmax_adf_wrapper.cpp 58 0x32a0 +softmax_adf_wrapper.cpp 57 0x32a8 x +softmax_adf_wrapper.cpp 57 0x32ac +softmax_adf_wrapper.cpp 57 0x32b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 25 0x32e0 x +expand_adf_wrapper.cpp 26 0x32e0 1 +expand_adf_wrapper.cpp 64 0x32e0 2 +expand_adf_wrapper.cpp 26 0x32e6 x +expand_adf_wrapper.cpp 26 0x32f6 +expand_adf_wrapper.cpp 26 0x32f6 1 +expand_adf_wrapper.cpp 29 0x330a x +expand_adf_wrapper.cpp 29 0x330e +expand_adf_wrapper.cpp 29 0x3312 +expand_adf_wrapper.cpp 29 0x3318 +expand_adf_wrapper.cpp 36 0x3320 x +expand_adf_wrapper.cpp 38 0x3320 1 +expand_adf_wrapper.cpp 40 0x3320 2 +expand_adf_wrapper.cpp 40 0x3320 3 +expand_adf_wrapper.cpp 49 0x3320 4 +expand_adf_wrapper.cpp 37 0x332a x +expand_adf_wrapper.cpp 49 0x332a 1 +expand_adf_wrapper.cpp 46 0x3334 +expand_adf_wrapper.cpp 49 0x3334 1 x +expand_adf_wrapper.cpp 39 0x333e +expand_adf_wrapper.cpp 46 0x333e 1 x +expand_adf_wrapper.cpp 46 0x333e 2 +expand_adf_wrapper.cpp 49 0x333e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3348 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 39 0x3348 1 +expand_adf_wrapper.cpp 49 0x3348 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3352 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 39 0x3352 1 +expand_adf_wrapper.cpp 38 0x335c x +expand_adf_wrapper.cpp 39 0x335c 1 x +expand_adf_wrapper.cpp 51 0x3362 x +expand_adf_wrapper.cpp 51 0x3362 1 x +expand_adf_wrapper.cpp 46 0x3368 x +expand_adf_wrapper.cpp 49 0x3368 1 +expand_adf_wrapper.cpp 39 0x336e x +expand_adf_wrapper.cpp 49 0x336e 1 x +expand_adf_wrapper.cpp 46 0x3374 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x3378 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 38 0x3378 1 x +expand_adf_wrapper.cpp 49 0x3378 2 x +expand_adf_wrapper.cpp 39 0x337e x +expand_adf_wrapper.cpp 46 0x337e 1 x +expand_adf_wrapper.cpp 40 0x3384 x +expand_adf_wrapper.cpp 40 0x3384 1 x +expand_adf_wrapper.cpp 40 0x3384 2 x +expand_adf_wrapper.cpp 51 0x3390 +expand_adf_wrapper.cpp 52 0x33a0 x +expand_adf_wrapper.cpp 55 0x33a6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x33b4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 55 0x33b8 x +expand_adf_wrapper.cpp 55 0x33be +expand_adf_wrapper.cpp 55 0x33c4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x33d0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 57 0x33d0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3430 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 57 0x3430 1 x +expand_adf_wrapper.cpp 51 0x3440 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x3448 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3450 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/expand_adf_wrapper.cpp: +expand_adf_wrapper.cpp 61 0x3450 1 x +expand_adf_wrapper.cpp 65 0x3460 x +expand_adf_wrapper.cpp 64 0x3464 +expand_adf_wrapper.cpp 64 0x3468 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 21 0x2c10 x +0_0_reloadable11.cc 23 0x2c10 1 +0_0_reloadable11.cc 23 0x2c14 x +0_0_reloadable11.cc 24 0x2c18 x +0_0_reloadable11.cc 26 0x2c1c x +0_0_reloadable11.cc 25 0x2c20 x +0_0_reloadable11.cc 22 0x2c24 x +0_0_reloadable11.cc 30 0x2c40 x +0_0_reloadable11.cc 32 0x2c40 1 +0_0_reloadable11.cc 32 0x2c44 x +0_0_reloadable11.cc 34 0x2c48 x +0_0_reloadable11.cc 33 0x2c4c x +0_0_reloadable11.cc 31 0x2c50 x +0_0_reloadable11.cc 38 0x2c60 x +0_0_reloadable11.cc 40 0x2c60 1 +0_0_reloadable11.cc 40 0x2c64 x +0_0_reloadable11.cc 41 0x2c68 x +0_0_reloadable11.cc 43 0x2c6c x +0_0_reloadable11.cc 42 0x2c70 x +0_0_reloadable11.cc 39 0x2c74 x +0_0_reloadable11.cc 47 0x2c90 x +0_0_reloadable11.cc 49 0x2c90 1 +0_0_reloadable11.cc 49 0x2c94 x +0_0_reloadable11.cc 50 0x2c98 x +0_0_reloadable11.cc 52 0x2c9c x +0_0_reloadable11.cc 51 0x2ca0 x +0_0_reloadable11.cc 48 0x2ca4 x +0_0_reloadable11.cc 56 0x2cc0 x +0_0_reloadable11.cc 58 0x2cc0 1 +0_0_reloadable11.cc 58 0x2cc4 x +0_0_reloadable11.cc 60 0x2cc8 x +0_0_reloadable11.cc 59 0x2ccc x +0_0_reloadable11.cc 57 0x2cd0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 214 0x2ce0 +softmax_row_major.h 214 0x2ce0 1 x +softmax_row_major.h 214 0x2ce4 +softmax_row_major.h 219 0x2ce4 1 +softmax_row_major.h 52 0x2cea +softmax_row_major.h 217 0x2cea 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum_native_types.hpp: +accum_native_types.hpp 213 0x2cf4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 52 0x2cf4 1 +softmax_row_major.h 219 0x2cf4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x2cfe + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 256 0x2cfe 1 x +softmax_row_major.h 52 0x2d08 x +softmax_row_major.h 83 0x2d08 1 x +softmax_row_major.h 83 0x2d12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x2d18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 68 0x2d18 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum_native_types.hpp: +accum_native_types.hpp 213 0x2d1e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 52 0x2d1e 1 x +softmax_row_major.h 55 0x2d1e 2 +softmax_row_major.h 68 0x2d1e 3 +softmax_row_major.h 264 0x2d1e 4 +softmax_row_major.h 52 0x2d24 +softmax_row_major.h 55 0x2d24 1 x +softmax_row_major.h 68 0x2d2a x +softmax_row_major.h 256 0x2d2e +softmax_row_major.h 264 0x2d2e 1 x +softmax_row_major.h 269 0x2d2e 2 +softmax_row_major.h 52 0x2d34 +softmax_row_major.h 69 0x2d34 1 +softmax_row_major.h 256 0x2d34 2 x +softmax_row_major.h 68 0x2d3e +softmax_row_major.h 69 0x2d3e 1 x +softmax_row_major.h 256 0x2d3e 2 +softmax_row_major.h 52 0x2d48 x +softmax_row_major.h 68 0x2d48 1 x +softmax_row_major.h 70 0x2d48 2 +softmax_row_major.h 85 0x2d48 3 +softmax_row_major.h 176 0x2d48 4 +softmax_row_major.h 256 0x2d48 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2d54 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 68 0x2d54 1 +softmax_row_major.h 70 0x2d54 2 x +softmax_row_major.h 99 0x2d54 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2d5e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 373 0x2d5e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x2d5e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 52 0x2d5e 3 x +softmax_row_major.h 70 0x2d5e 4 +softmax_row_major.h 99 0x2d5e 5 x +softmax_row_major.h 68 0x2d6a x +softmax_row_major.h 85 0x2d6a 1 +softmax_row_major.h 85 0x2d6a 2 +softmax_row_major.h 83 0x2d70 x +softmax_row_major.h 85 0x2d70 1 +softmax_row_major.h 85 0x2d70 2 x +softmax_row_major.h 176 0x2d70 3 +softmax_row_major.h 85 0x2d80 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2d90 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 85 0x2d90 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2da0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 85 0x2da0 1 +softmax_row_major.h 99 0x2da0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/../../detail/mask.hpp: +mask.hpp 57 0x2db0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 83 0x2db0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/blend.hpp: +blend.hpp 163 0x2dc0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 85 0x2dc0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2dd0 x +vector.hpp 1454 0x2dd0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 85 0x2dd0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2de0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 85 0x2de0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2df0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min_reduce.hpp: +max_min_reduce.hpp 93 0x2df0 1 +max_min_reduce.hpp 93 0x2df0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x2df0 3 +add_reduce.hpp 322 0x2df0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 85 0x2df0 5 +softmax_row_major.h 180 0x2df0 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min_reduce.hpp: +max_min_reduce.hpp 93 0x2e00 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2e00 1 x +accum.hpp 938 0x2e00 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/../../detail/mask.hpp: +mask.hpp 57 0x2e00 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x2e00 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 148 0x2e00 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min_reduce.hpp: +max_min_reduce.hpp 93 0x2e0a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x2e0a 1 +add_reduce.hpp 324 0x2e0a 2 +add_reduce.hpp 324 0x2e0a 3 +add_reduce.hpp 324 0x2e0a 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/blend.hpp: +blend.hpp 163 0x2e0a 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x2e0a 6 +add_accum.hpp 20 0x2e0a 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2e14 x +vector.hpp 1454 0x2e14 1 x +vector.hpp 212 0x2e1a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2e1a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2e1a 2 +accum.hpp 153 0x2e1a 3 +accum.hpp 1117 0x2e1a 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2e1a 5 +mul_acc32_fp.hpp 36 0x2e1a 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 153 0x2e1a 7 +softmax_row_major.h 269 0x2e1a 8 +softmax_row_major.h 277 0x2e1a 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min_reduce.hpp: +max_min_reduce.hpp 93 0x2e24 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2e28 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min_reduce.hpp: +max_min_reduce.hpp 93 0x2e2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2e30 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min_reduce.hpp: +max_min_reduce.hpp 93 0x2e34 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2e38 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min_reduce.hpp: +max_min_reduce.hpp 93 0x2e3c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2e3c 1 +softmax_row_major.h 176 0x2e3c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2e42 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2e42 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2e48 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2e48 1 x +accum.hpp 938 0x2e48 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x2e48 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 20 0x2e48 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 148 0x2e48 5 x +softmax_row_major.h 176 0x2e48 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2e54 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2e54 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 471 0x2e5a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2e5a 1 +softmax_row_major.h 112 0x2e60 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2e66 x +accum.hpp 938 0x2e66 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/../../detail/mask.hpp: +mask.hpp 57 0x2e66 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 20 0x2e66 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 112 0x2e66 4 x +softmax_row_major.h 148 0x2e66 5 x +softmax_row_major.h 176 0x2e72 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2e76 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2e76 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 153 0x2e76 2 x +softmax_row_major.h 176 0x2e76 3 +softmax_row_major.h 112 0x2e7c +softmax_row_major.h 176 0x2e7c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2e82 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2e82 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 20 0x2e8a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2e8a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2e92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2e92 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 112 0x2e92 2 x +softmax_row_major.h 153 0x2e92 3 x +softmax_row_major.h 112 0x2e9c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2ea2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2ea2 1 x +softmax_row_major.h 176 0x2eaa + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2eae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2eae 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 473 0x2eae 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/../../detail/mask.hpp: +mask.hpp 57 0x2eb2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/blend.hpp: +blend.hpp 163 0x2eb2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2ec0 x +vector.hpp 111 0x2ec0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2ec0 2 x +accum.hpp 198 0x2ec0 3 x +accum.hpp 938 0x2ec0 4 x +accum.hpp 938 0x2ec0 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x2ec0 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 148 0x2ec0 7 x +softmax_row_major.h 176 0x2ec0 8 x +softmax_row_major.h 180 0x2ec0 9 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2ed0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 112 0x2ed0 1 +softmax_row_major.h 176 0x2ed6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2ee0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2ee0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 153 0x2ee0 2 x +softmax_row_major.h 176 0x2ee0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2ef0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2ef0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 20 0x2ef0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 473 0x2ef0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2ef0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/../../detail/mask.hpp: +mask.hpp 57 0x2f00 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2f00 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/blend.hpp: +blend.hpp 163 0x2f00 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2f10 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2f20 x +vector.hpp 111 0x2f20 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f20 2 x +accum.hpp 938 0x2f20 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x2f20 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x2f20 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 176 0x2f20 6 x +softmax_row_major.h 180 0x2f20 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2f30 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f30 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 269 0x2f30 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2f3c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2f3c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 153 0x2f3c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2f40 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2f40 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 473 0x2f40 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/../../detail/mask.hpp: +mask.hpp 57 0x2f44 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2f44 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/blend.hpp: +blend.hpp 163 0x2f44 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2f4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2f52 x +vector.hpp 111 0x2f52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f52 2 x +accum.hpp 938 0x2f52 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x2f52 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 180 0x2f52 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f5c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 269 0x2f5c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x2f66 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2f66 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 473 0x2f66 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/blend.hpp: +blend.hpp 163 0x2f6a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x2f6e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2f72 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f72 1 x +accum.hpp 938 0x2f72 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x2f72 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f7a +accum.hpp 153 0x2f86 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x2f8c +add_reduce.hpp 322 0x2f90 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f94 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x2f94 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2f9c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x2fa8 +add_reduce.hpp 322 0x2fac x +add_reduce.hpp 324 0x2fac 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2fb4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x2fc0 x +add_reduce.hpp 322 0x2fc4 x +add_reduce.hpp 324 0x2fc4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2fcc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x2fd8 x +add_reduce.hpp 322 0x2fdc x +add_reduce.hpp 324 0x2fdc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x2fe4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 269 0x2fea x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 111 0x2ff0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 180 0x2ff0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x2ff6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x2ffa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 269 0x3006 +softmax_row_major.h 269 0x3012 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/elementary.hpp: +elementary.hpp 618 0x3020 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax.h: +softmax.h 166 0x302a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x3030 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x3030 1 +accum.hpp 1117 0x3030 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 269 0x3030 3 +softmax_row_major.h 275 0x3030 4 x +softmax_row_major.h 277 0x3030 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x303a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 273 0x303a 1 x +softmax_row_major.h 275 0x303a 2 +softmax_row_major.h 269 0x3044 x +softmax_row_major.h 273 0x3044 1 +softmax_row_major.h 273 0x304e x +softmax_row_major.h 269 0x3052 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x305a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x305e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 275 0x305e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3080 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 212 0x3090 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 275 0x3090 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x30a0 x +accum.hpp 1117 0x30a0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 277 0x30a0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x30c0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x30d2 x +accum.hpp 1117 0x30d2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 277 0x30d2 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x30d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 281 0x30de x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x30e2 x +accum.hpp 1117 0x30e2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 277 0x30e2 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x30ea +accum.hpp 1117 0x30ea 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/softmax_row_major.h: +softmax_row_major.h 277 0x30ea 2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 64 0x32c0 x +0_0_reloadable11.cc 66 0x32c0 1 +0_0_reloadable11.cc 66 0x32c4 x +0_0_reloadable11.cc 68 0x32c8 x +0_0_reloadable11.cc 67 0x32cc x +0_0_reloadable11.cc 65 0x32d0 x +0_0_reloadable11.cc 72 0x3480 x +0_0_reloadable11.cc 74 0x3480 1 +0_0_reloadable11.cc 74 0x3484 x +0_0_reloadable11.cc 76 0x3488 x +0_0_reloadable11.cc 75 0x348c x +0_0_reloadable11.cc 73 0x3490 x +0_0_reloadable11.cc 92 0x9e0 x +0_0_reloadable11.cc 94 0x9e0 1 +0_0_reloadable11.cc 94 0x9e0 2 x +0_0_reloadable11.cc 92 0x9ea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0x9f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 97 0x9f8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xa00 +io_buffer_compiler.h 614 0xa10 x +io_buffer_compiler.h 614 0xa18 +io_buffer_compiler.h 614 0xa1c +io_buffer_compiler.h 614 0xa20 +io_buffer_compiler.h 614 0xa24 +io_buffer_compiler.h 219 0xa34 x +io_buffer_compiler.h 219 0xa34 1 x +io_buffer_compiler.h 218 0xa38 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa3c +io_buffer_main.h 434 0xa3c 1 +io_buffer_main.h 434 0xa46 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 97 0xa4a +0_0_reloadable11.cc 97 0xa4a 1 x +0_0_reloadable11.cc 100 0xa4a 2 +0_0_reloadable11.cc 97 0xa50 +0_0_reloadable11.cc 97 0xa50 1 +0_0_reloadable11.cc 97 0xa56 +0_0_reloadable11.cc 97 0xa56 1 +0_0_reloadable11.cc 97 0xa5c +0_0_reloadable11.cc 97 0xa60 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa72 x +io_buffer_compiler.h 614 0xa76 +io_buffer_compiler.h 614 0xa7a +io_buffer_compiler.h 614 0xa7e +io_buffer_compiler.h 614 0xa82 +io_buffer_compiler.h 219 0xa92 x +io_buffer_compiler.h 219 0xa92 1 x +io_buffer_compiler.h 218 0xa96 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xaa2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 100 0xaa6 x +0_0_reloadable11.cc 100 0xaaa +0_0_reloadable11.cc 100 0xaae +0_0_reloadable11.cc 100 0xab4 +0_0_reloadable11.cc 100 0xac4 +0_0_reloadable11.cc 103 0xac8 +0_0_reloadable11.cc 103 0xae0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xae4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 108 0xae8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 629 0xaf4 x +io_buffer_compiler.h 629 0xaf8 +io_buffer_compiler.h 630 0xafc +io_buffer_compiler.h 630 0xafc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xafc 2 +io_buffer_main.h 464 0xafc 3 +io_buffer_main.h 464 0xafc 4 +io_buffer_main.h 464 0xb0a +io_buffer_main.h 464 0xb0e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb12 +io_buffer_compiler.h 630 0xb12 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 106 0xb18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb26 x +io_buffer_compiler.h 629 0xb2a x +io_buffer_compiler.h 630 0xb2a 1 +io_buffer_compiler.h 629 0xb30 +io_buffer_compiler.h 630 0xb30 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb42 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb46 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 108 0xb5e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb62 +io_buffer_compiler.h 630 0xb66 x +io_buffer_compiler.h 630 0xb6a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable11/src/0_0_reloadable11.cc: +0_0_reloadable11.cc 108 0xb6e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb74 x +io_buffer_compiler.h - 0xb75 + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x34a0 +me_div.c 108 0x34a0 1 +me_div.c 115 0x34a0 2 x +me_div.c 108 0x34a6 +me_div.c 108 0x34aa +me_div.c 108 0x34ae +me_div.c 108 0x34b2 +me_div.c 108 0x34b6 +me_div.c 108 0x34ba +me_div.c 108 0x34be +me_div.c 108 0x34c2 +me_div.c 108 0x34c6 +me_div.c 108 0x34ca +me_div.c 108 0x34ce +me_div.c 108 0x34d2 +me_div.c 108 0x34d6 +me_div.c 108 0x34da +me_div.c 108 0x34de +me_div.c 108 0x34e2 +me_div.c 108 0x34e6 +me_div.c 108 0x34ea +me_div.c 108 0x34ee +me_div.c 108 0x34f2 +me_div.c 108 0x34f6 +me_div.c 108 0x34fa +me_div.c 108 0x34fe +me_div.c 108 0x3502 +me_div.c 108 0x3506 +me_div.c 108 0x350a +me_div.c 108 0x350e +me_div.c 108 0x3512 +me_div.c 119 0x3516 x +me_div.c 108 0x351a x +me_div.c 108 0x351e +me_div.c 108 0x3522 +me_div.c 108 0x3526 +me_div.c - 0x3527 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/scripts/0_0_reloadable81.bcf b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/scripts/0_0_reloadable81.bcf new file mode 100644 index 0000000000000000000000000000000000000000..b5025c34b99f02de39e461699cdc760aa2cbe456 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/scripts/0_0_reloadable81.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x9e0 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x9e0 + +_reserved DMb 0x7b540 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7bd40 0x40 //reserved for sync buffer +_stack DM_stack 0x7bd80 0x440 //stack for core +_reserved DMb 0x7c1c0 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c200 0x800//heap +_reserved DMb 0x40000 0x3b540 + +_reserved DMb 0x7ca00 0x3600 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/scripts/0_0_reloadable81.prx b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/scripts/0_0_reloadable81.prx new file mode 100644 index 0000000000000000000000000000000000000000..23970b39f8f266890e3c04b926837730d6216785 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/scripts/0_0_reloadable81.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/src/0_0_reloadable81.cc b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/src/0_0_reloadable81.cc new file mode 100644 index 0000000000000000000000000000000000000000..7d280691fae6ce4acdf5113568358cd244ac54c3 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable81/src/0_0_reloadable81.cc @@ -0,0 +1,108 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_sub1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_add1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_reducesum(adf::io_buffer>> &__restrict,const unsigned int (&)[18],adf::io_buffer, adf::locking::async>> &__restrict); +#include "softmax_adf_wrapper.cpp" +#include "expand_adf_wrapper.cpp" + +// Declare Kernel objects and external arrays + + +void _b14285_wrapper(void* args[]) +{ + superkernel_sub1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b14290_wrapper(void* args[]) +{ + superkernel_mul1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b13811_wrapper(void* args[]) +{ + superkernel_add1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b13749_wrapper(void* args[]) +{ + superkernel_mul1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b14811_wrapper(void* args[]) +{ + superkernel_reducesum( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b8134_wrapper(void* args[]) +{ + mllib_graphs::softmax_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b8096_wrapper(void* args[]) +{ + mllib_graphs::expand_wrapper( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[7] = { + _b14285_wrapper, + _b14290_wrapper, + _b13811_wrapper, + _b13749_wrapper, + _b14811_wrapper, + _b8134_wrapper, + _b8096_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->acquire(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->release(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; +} diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/0_0_reloadable82.log b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/0_0_reloadable82.log new file mode 100644 index 0000000000000000000000000000000000000000..09ff3e10125937f83e30df1e09e984656f49f8d5 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/0_0_reloadable82.log @@ -0,0 +1,483 @@ +Configuration: Release_LLVM +Compiling "0_0_reloadable82.ll" +chess-clang --chess-proc-dir=/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -S -O2 -std=c++2a -fno-builtin-memcpy -mllvm -instcombine-code-sinking=false -mllvm -disable-lsr -mllvm -replexitval=never -mllvm -enable-load-pre=false -mllvm -chess-disable-add-to-or -mllvm -chess-combine-gep-indices=none -mllvm -chess-disable-fold-phi-of-loads -mllvm -chess-aainfo2chains-algo=4 -mllvm -chess-aggressive-aainfo=false -mllvm -chess-enable-indvarsimplify=0 -mllvm -chess-disable-cse-across-loopboundary -mllvm -chess-tbaa-detect-common-underlying-object=true -mllvm -chess-protect-llvm-global-reg-access=true -fno-jump-tables -fno-discard-value-names -g ../../ir/0_0_reloadable82.ll -o../Release/chesswork3593526/0_0_reloadable82.sfg --chess-proc-name=me +noodle -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -iaie_core.h +Sinl +Olbb=200 +Opmsa +NOpld +Olzyinl +w../Release/chesswork3593526 ../Release/chesswork3593526/0_0_reloadable82.sfg +Q1=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl +Q2=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl +Q3=+Sinl,+Olbb=1000,+Opmsa,+NOpld,+Olzyinl +Qfast=+Sinl,+Olbb=1000,+Opmsa,+NOpld,+Olzyinl,+Opfp +Qs=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl +Qz=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl me +chess-backend 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +chess-backend 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +chess-backend 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +chess-backend 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +Warning in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h", line 662, column 4: in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h", line 662: (loop #3) + further loop software pipelining (to 3 cycles) is feasible with `chess_prepare_for_pipelining' + but requires a minimum loop count of 6 + ... consider annotating the loop with `chess_loop_range(6,)' if applicable, + ... or remove the current `chess_loop_range(4,)` annotation + +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +Warning in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h", line 125, column 4: in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h", line 125: (loop #19) + further loop software pipelining (to 2 cycles) is feasible with `chess_prepare_for_pipelining' + but requires a minimum loop count of 7 + ... consider annotating the loop with `chess_loop_range(7,)' if applicable, + ... or remove the current `chess_loop_range(4,)` annotation + +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +Warning in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h", line 258, column 4: in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h", line 258: (loop #3) + further loop software pipelining (to 4 cycles) is feasible with `chess_prepare_for_pipelining' + but requires a minimum loop count of 4 + ... consider annotating the loop with `chess_loop_range(4,)' if applicable, + ... or remove the current `chess_loop_range(2,)` annotation + +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist1 -k64 --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist1 -k64 --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist1 -k64 --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend --gvt me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +bridge -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -i -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 0_0_reloadable82.objlist -o../0_0_reloadable82.o -pme +darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno ../Release/0_0_reloadable82.o me +Linking "../Release/0_0_reloadable82" +bridge -o../Release/0_0_reloadable82 ../Release/0_0_reloadable82.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable82.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3593526 -pme +darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable82 me +Compilation finished successfully (0 errors, 3 warnings) diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.# b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.# new file mode 100644 index 0000000000000000000000000000000000000000..cdb2e50d668602e677d7c344ba6b93ed8dd1a7ee --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.# @@ -0,0 +1,2 @@ +a62ac71a0afb12a23a7ea519771976bcbca7510f +1a735f496f1284ab86135ede88939a8aeeb375a3 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.## b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.## new file mode 100644 index 0000000000000000000000000000000000000000..edb3f529e38bf23bf0a79ae315a7164cfbfadb8c --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.## @@ -0,0 +1,2 @@ +4c09d58b150fa6a21ceebc73be9be5180693b4c2 +92e103875c6a39bc8cdbe4a21c02c5a94ba3cc96 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.calltree b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.calltree new file mode 100644 index 0000000000000000000000000000000000000000..8437981960bfebeb5d5bff2aa185f7d8096f4888 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.calltree @@ -0,0 +1,60 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:36:47 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable82 ../Release/0_0_reloadable82.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable82.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3593526 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z15_b13786_wrapperPPv (referenced text) + _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv + _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params + _Z8init_accILt1EEvPaS0_iii + _Z12post_processPai + _Z14_b8148_wrapperPPv (referenced text) + _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + memset + _Z14_b8170_wrapperPPv (referenced text) + _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + _Z14_b7835_wrapperPPv (referenced text) + _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 256 0 0 546 7520 _Z13kernelWrapperPPvjjjj + 0 192 1 1 36 3650 _Z15_b13786_wrapperPPv + 64 192 1 2 546 3614 _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 550 550 _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv + 64 128 2 3 1930 2518 _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params + 64 64 3 4 294 294 _Z8init_accILt1EEvPaS0_iii + 0 0 3 4 294 294 _Z12post_processPai + 0 64 1 1 32 690 _Z14_b8148_wrapperPPv + 64 64 1 2 484 658 _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + 0 0 2 3 174 174 memset + 0 128 1 1 32 988 _Z14_b8170_wrapperPPv + 128 128 1 2 178 956 _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + 0 0 2 3 52 298 _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + 0 0 3 4 162 162 _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + 0 0 2 4 84 84 _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + 0 0 2 3 480 480 _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + 0 64 1 1 32 1646 _Z14_b7835_wrapperPPv + 64 64 1 2 202 1614 _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + 0 0 2 3 262 262 _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + 0 0 2 3 1150 1150 _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + + +Maximum call level : 4 +Maximum stack level: 3 +Maximum stack size : 256 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.cmic2 b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..4e4279673be0cc4fde6427ea9a9279b7c1bb08c4 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.cmic2 @@ -0,0 +1,10727 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:36:49 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable82 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable82.cc" 60 first +.src_ref 0 "0_0_reloadable82.cc" 62 60 +.src_ref 0 "0_0_reloadable82.cc" 62 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 60 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable82.cc" 67 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable82.cc" 64 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 64 110 +.src_ref 0 "0_0_reloadable82.cc" 67 60 +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 64 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 64 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 67 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 +.src_ref 0 "0_0_reloadable82.cc" 67 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 +.src_ref 0 "0_0_reloadable82.cc" 67 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2816 "01000100" // MOVXM p7, #508800 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "00000000" // /* MW 5 */ + 2818 "11000111" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 73 60 +.src_ref 0 "0_0_reloadable82.cc" 75 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable82.cc" 73 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 75 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable82.cc" 75 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 78 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 80 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 80 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_begin0 +.function setup_gemm_bfp16_params _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.src_ref 2 "gemm_bfp16_params.h" 128 first +.src_ref 2 "gemm_bfp16_params.h" 130 24 +.src_ref 2 "gemm_bfp16_params.h" 130 26 first +.function_start + 3088 "10111010" // LDA r3, [p0], #4; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3089 "00010000" // /* MW 9 */ + 3090 "00000000" // /* MW 8 */ + 3091 "10110001" // /* MW 7 */ + 3092 "11110000" // /* MW 6 */ + 3093 "00000001" // /* MW 5 */ + 3094 "00000000" // /* MW 4 */ + 3095 "11010000" // /* MW 3 */ + 3096 "10001110" // /* MW 2 */ + 3097 "00000011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 58 39 +.src_ref 2 "gemm_bfp16_params.h" 59 38 +.src_ref 2 "gemm_bfp16_params.h" 61 39 +.src_ref 2 "gemm_bfp16_params.h" 71 52 +.src_ref 2 "gemm_bfp16_params.h" 86 29 +.src_ref 2 "gemm_bfp16_params.h" 93 56 + 3098 "10111010" // MOVA r29, #-2; MOVX r6, #-3; MOV r5, #-4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3099 "01011000" // /* MW 9 */ + 3100 "11111100" // /* MW 8 */ + 3101 "10101111" // /* MW 7 */ + 3102 "10101000" // /* MW 6 */ + 3103 "01100111" // /* MW 5 */ + 3104 "00111110" // /* MW 4 */ + 3105 "00000000" // /* MW 3 */ + 3106 "11011101" // /* MW 2 */ + 3107 "11111111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 3 "aie.hpp" 7072 95 +.src_ref 2 "gemm_bfp16_params.h" 44 26 +.src_ref 2 "gemm_bfp16_params.h" 44 26 +.src_ref 2 "gemm_bfp16_params.h" 80 39 +.src_ref 2 "gemm_bfp16_params.h" 99 73 +.src_ref 2 "gemm_bfp16_params.h" 138 24 + 3108 "10111010" // MOVA r24, #0; MOVX r1, #1; MOV r0, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3109 "01011000" // /* MW 9 */ + 3110 "00001000" // /* MW 8 */ + 3111 "00001000" // /* MW 7 */ + 3112 "00101000" // /* MW 6 */ + 3113 "00010000" // /* MW 5 */ + 3114 "00000000" // /* MW 4 */ + 3115 "00000000" // /* MW 3 */ + 3116 "00011000" // /* MW 2 */ + 3117 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7072 95 +.src_ref 3 "aie.hpp" 7073 95 +.src_ref 2 "gemm_bfp16_params.h" 44 26 +.src_ref 2 "gemm_bfp16_params.h" 88 55 + 3118 "10111010" // MOVA r4, #256; MOVXM r28, #16777214 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3119 "00010000" // /* MW 9 */ + 3120 "11111111" // /* MW 8 */ + 3121 "10001111" // /* MW 7 */ + 3122 "11111111" // /* MW 6 */ + 3123 "00111111" // /* MW 5 */ + 3124 "00000000" // /* MW 4 */ + 3125 "00000000" // /* MW 3 */ + 3126 "00000100" // /* MW 2 */ + 3127 "00100000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 +.src_ref 3 "aie.hpp" 7053 42 +.src_ref 3 "aie.hpp" 7053 42 +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 2 "gemm_bfp16_params.h" 85 38 +.src_ref 2 "gemm_bfp16_params.h" 88 66 + 3128 "10111010" // MOVA r16, #7; MOVX r19, #9; MOV r2, #512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3129 "01011000" // /* MW 9 */ + 3130 "00000000" // /* MW 8 */ + 3131 "01001010" // /* MW 7 */ + 3132 "00101000" // /* MW 6 */ + 3133 "00110001" // /* MW 5 */ + 3134 "00000001" // /* MW 4 */ + 3135 "00000000" // /* MW 3 */ + 3136 "11110000" // /* MW 2 */ + 3137 "00000000" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 138 24 + 3138 "01100100" // MOVX r7, #128; MOV m0, #52 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3139 "11010001" // /* MW 5 */ + 3140 "00000000" // /* MW 4 */ + 3141 "00100000" // /* MW 3 */ + 3142 "11000000" // /* MW 2 */ + 3143 "00010001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 138 24 + 3144 "11111000" // MOV dj0, m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3145 "00000000" // /* MW 3 */ + 3146 "10000000" // /* MW 2 */ + 3147 "00011000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 58 39 first +.src_ref 2 "gemm_bfp16_params.h" 130 24 first + 3148 "01011100" // ST r3, [p1], #4; LSHL r27, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3149 "11011011" // /* MW 5 */ + 3150 "11101100" // /* MW 4 */ + 3151 "00110001" // /* MW 3 */ + 3152 "10001110" // /* MW 2 */ + 3153 "00100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 93 56 first +.src_ref 2 "gemm_bfp16_params.h" 131 26 first + 3154 "00101100" // LDA r3, [p0], #4; LSHL r17, r3, r5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "10111011" // /* MW 5 */ + 3156 "11000100" // /* MW 4 */ + 3157 "11010001" // /* MW 3 */ + 3158 "10001110" // /* MW 2 */ + 3159 "00000011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 59 first +.src_ref 2 "gemm_bfp16_params.h" 80 39 first + 3160 "00100100" // LSHL r31, r27, r0; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3161 "11111111" // /* MW 5 */ + 3162 "10110001" // /* MW 4 */ + 3163 "10111000" // /* MW 3 */ + 3164 "11000001" // /* MW 2 */ + 3165 "11011111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 82 45 +.src_ref 2 "gemm_bfp16_params.h" 85 38 first + 3166 "10100100" // LSHL r19, r27, r19; ADD.NC r18, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3167 "00100010" // /* MW 5 */ + 3168 "00111111" // /* MW 4 */ + 3169 "10111001" // /* MW 3 */ + 3170 "11100111" // /* MW 2 */ + 3171 "11011100" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 first + 3172 "10011000" // LSHL r22, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3173 "00001101" // /* MW 3 */ + 3174 "11101101" // /* MW 2 */ + 3175 "00010110" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 59 38 first +.src_ref 2 "gemm_bfp16_params.h" 131 24 first + 3182 "01011100" // ST r3, [p1], #4; LSHL r26, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3183 "11011011" // /* MW 5 */ + 3184 "11101000" // /* MW 4 */ + 3185 "00110001" // /* MW 3 */ + 3186 "10001110" // /* MW 2 */ + 3187 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 59 first +.src_ref 2 "gemm_bfp16_params.h" 132 26 first + 3188 "00101100" // LDA r21, [p0], #4; ADD r20, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3189 "11111110" // /* MW 5 */ + 3190 "01010011" // /* MW 4 */ + 3191 "11011101" // /* MW 3 */ + 3192 "11010110" // /* MW 2 */ + 3193 "00000011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 + 3194 "10011000" // MUL r23, r22, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "01001111" // /* MW 3 */ + 3196 "10101111" // /* MW 2 */ + 3197 "00010101" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first + 3200 "10011000" // SUB r30, r7, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3201 "01110001" // /* MW 3 */ + 3202 "11111101" // /* MW 2 */ + 3203 "00010001" // /* MW 1 */ + 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3205 "00000000" // /* MW 1 */ + 3206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3207 "00000000" // /* MW 1 */ + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 65 24 first +.src_ref 2 "gemm_bfp16_params.h" 132 24 first + 3210 "01011100" // ST r21, [p1], #4; MUL r3, r3, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3211 "10111111" // /* MW 5 */ + 3212 "10001110" // /* MW 4 */ + 3213 "00110001" // /* MW 3 */ + 3214 "11010110" // /* MW 2 */ + 3215 "00100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 61 39 first +.src_ref 2 "gemm_bfp16_params.h" 133 26 first + 3216 "00101100" // LDA el0, [p0], #4; LSHL r6, r21, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3217 "11011011" // /* MW 5 */ + 3218 "10011000" // /* MW 4 */ + 3219 "11011010" // /* MW 3 */ + 3220 "10000101" // /* MW 2 */ + 3221 "00000011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 71 36 first +.src_ref 2 "gemm_bfp16_params.h" 88 55 + 3222 "10100100" // MUL r25, r27, r6; ADD.NC r28, r6, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3223 "11100010" // /* MW 5 */ + 3224 "00100110" // /* MW 4 */ + 3225 "11111110" // /* MW 3 */ + 3226 "01001101" // /* MW 2 */ + 3227 "11011110" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 first +.src_ref 2 "gemm_bfp16_params.h" 86 29 first + 3228 "10100100" // LSHL r5, r21, r5; ADD.NC r21, r26, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3229 "10100010" // /* MW 5 */ + 3230 "10111010" // /* MW 4 */ + 3231 "10111010" // /* MW 3 */ + 3232 "01001011" // /* MW 2 */ + 3233 "10101001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 71 52 first +.src_ref 2 "gemm_bfp16_params.h" 86 38 + 3234 "10111010" // MOVA r25, #128; LSHL r29, r25, r29; ADD.NC r5, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3235 "11001000" // /* MW 9 */ + 3236 "01111111" // /* MW 8 */ + 3237 "10101001" // /* MW 7 */ + 3238 "11101100" // /* MW 6 */ + 3239 "11011110" // /* MW 5 */ + 3240 "00110011" // /* MW 4 */ + 3241 "00000000" // /* MW 3 */ + 3242 "00011001" // /* MW 2 */ + 3243 "00010000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 88 66 first + 3244 "00011000" // MSC r2, r2, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "11001110" // /* MW 3 */ + 3246 "11000101" // /* MW 2 */ + 3247 "00010111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 first + 3248 "10011000" // LSHL r6, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00001101" // /* MW 3 */ + 3250 "10001101" // /* MW 2 */ + 3251 "00010001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 first + 3252 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "00001101" // /* MW 3 */ + 3254 "01101011" // /* MW 2 */ + 3255 "00010101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 99 73 first +.src_ref 2 "gemm_bfp16_params.h" 133 24 first + 3256 "01011100" // ST el0, [p1], #4; LSHL r28, r26, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "00011011" // /* MW 5 */ + 3258 "01110000" // /* MW 4 */ + 3259 "00111101" // /* MW 3 */ + 3260 "10000101" // /* MW 2 */ + 3261 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 first +.src_ref 2 "gemm_bfp16_params.h" 134 26 first + 3262 "00101100" // LDA el0, [p0]; LSHL r16, r26, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3263 "00011011" // /* MW 5 */ + 3264 "01000010" // /* MW 4 */ + 3265 "11011101" // /* MW 3 */ + 3266 "10000101" // /* MW 2 */ + 3267 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first + 3268 "10011000" // SUB r27, r28, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3269 "01010001" // /* MW 3 */ + 3270 "00110111" // /* MW 2 */ + 3271 "00010111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 first + 3272 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3273 "00001101" // /* MW 3 */ + 3274 "01000000" // /* MW 2 */ + 3275 "00010001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7057 21 first + 3276 "00011000" // MAC r0, r0, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3277 "01000110" // /* MW 3 */ + 3278 "10000001" // /* MW 2 */ + 3279 "00010001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 first +.src_ref 3 "aie.hpp" 7056 79 first + 3280 "00011000" // MSC r25, r25, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3281 "01001110" // /* MW 3 */ + 3282 "10110011" // /* MW 2 */ + 3283 "00010001" // /* MW 1 */ + 3284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3285 "00000000" // /* MW 1 */ + 3286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3287 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 134 24 first + 3288 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00101001" // /* MW 3 */ + 3290 "00011100" // /* MW 2 */ + 3291 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 135 26 first + 3292 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "00101110" // /* MW 3 */ + 3294 "00010100" // /* MW 2 */ + 3295 "00000000" // /* MW 1 */ + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3297 "00000000" // /* MW 1 */ + 3298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3299 "00000000" // /* MW 1 */ + 3300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3301 "00000000" // /* MW 1 */ + 3302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3303 "00000000" // /* MW 1 */ + 3304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3305 "00000000" // /* MW 1 */ + 3306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3307 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 135 24 + 3308 "10011000" // ST el0, [p1], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3309 "00101001" // /* MW 3 */ + 3310 "00111100" // /* MW 2 */ + 3311 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3312 "00000010" // ST r3, [p1], #4; ADD.NC r3, r6, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3313 "00000000" // /* MW 7 */ + 3314 "10100000" // /* MW 6 */ + 3315 "01101001" // /* MW 5 */ + 3316 "00000000" // /* MW 4 */ + 3317 "00110000" // /* MW 3 */ + 3318 "10001110" // /* MW 2 */ + 3319 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3320 "01011100" // ST r29, [p1], #4; SUB r29, r7, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3321 "00000011" // /* MW 5 */ + 3322 "11110110" // /* MW 4 */ + 3323 "00110011" // /* MW 3 */ + 3324 "11110110" // /* MW 2 */ + 3325 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3326 "00000010" // ST r26, [p1], #4; ADD.NC r26, r22, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3327 "00000000" // /* MW 7 */ + 3328 "10100000" // /* MW 6 */ + 3329 "01001101" // /* MW 5 */ + 3330 "00000011" // /* MW 4 */ + 3331 "00110000" // /* MW 3 */ + 3332 "11101010" // /* MW 2 */ + 3333 "00100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3334 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3335 "00110001" // /* MW 3 */ + 3336 "00011100" // /* MW 2 */ + 3337 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3338 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3339 "00010001" // /* MW 3 */ + 3340 "00011111" // /* MW 2 */ + 3341 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3342 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3343 "11110001" // /* MW 3 */ + 3344 "00011111" // /* MW 2 */ + 3345 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3346 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "10010001" // /* MW 3 */ + 3348 "00011100" // /* MW 2 */ + 3349 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7072 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3350 "01011100" // ST r18, [p1], #4; ADD r18, r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3351 "00000001" // /* MW 5 */ + 3352 "11001010" // /* MW 4 */ + 3353 "00111101" // /* MW 3 */ + 3354 "11001010" // /* MW 2 */ + 3355 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7073 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3356 "01011100" // ST r19, [p1], #4; SUB r19, r4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3357 "11100011" // /* MW 5 */ + 3358 "01001110" // /* MW 4 */ + 3359 "00110010" // /* MW 3 */ + 3360 "11001110" // /* MW 2 */ + 3361 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 first +.src_ref 3 "aie.hpp" 7072 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3362 "01011100" // ST r5, [p1], #4; MSC r4, r4, r6, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "10011100" // /* MW 5 */ + 3364 "00010010" // /* MW 4 */ + 3365 "00110011" // /* MW 3 */ + 3366 "10010110" // /* MW 2 */ + 3367 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3368 "01011100" // ST r2, [p1], #16; MOVX r2, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "00000010" // /* MW 5 */ + 3370 "00001000" // /* MW 4 */ + 3371 "00111111" // /* MW 3 */ + 3372 "10001010" // /* MW 2 */ + 3373 "00101001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3374 "01011100" // ST r24, [p1], #4; XOR r31, r23, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "01001101" // /* MW 5 */ + 3376 "11111100" // /* MW 4 */ + 3377 "00111011" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7072 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3380 "01011100" // ST r24, [p1], #-12; SUB r23, r24, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "11100011" // /* MW 5 */ + 3382 "01011110" // /* MW 4 */ + 3383 "00111100" // /* MW 3 */ + 3384 "11100010" // /* MW 2 */ + 3385 "00111011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3386 "01011100" // ST r24, [p1], #4; XOR r2, r2, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3387 "00001101" // /* MW 5 */ + 3388 "00001000" // /* MW 4 */ + 3389 "00110001" // /* MW 3 */ + 3390 "11100010" // /* MW 2 */ + 3391 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3392 "01011100" // ST r24, [p1], #-8; SUB r0, r24, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000011" // /* MW 5 */ + 3394 "00000000" // /* MW 4 */ + 3395 "00111100" // /* MW 3 */ + 3396 "11100010" // /* MW 2 */ + 3397 "00111101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3398 "10011000" // ST r24, [p1], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3399 "00010001" // /* MW 3 */ + 3400 "01011111" // /* MW 2 */ + 3401 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3402 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3403 "00110001" // /* MW 3 */ + 3404 "00011110" // /* MW 2 */ + 3405 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3406 "10011000" // ST r30, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3407 "11010001" // /* MW 3 */ + 3408 "00011111" // /* MW 2 */ + 3409 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3410 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3411 "10110001" // /* MW 3 */ + 3412 "00011100" // /* MW 2 */ + 3413 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3414 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3415 "11110001" // /* MW 3 */ + 3416 "00011111" // /* MW 2 */ + 3417 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3418 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3419 "10010001" // /* MW 3 */ + 3420 "00011110" // /* MW 2 */ + 3421 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3422 "10011000" // ST r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3423 "01010001" // /* MW 3 */ + 3424 "00011111" // /* MW 2 */ + 3425 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3426 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3427 "00110001" // /* MW 3 */ + 3428 "00011100" // /* MW 2 */ + 3429 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3430 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3431 "11110001" // /* MW 3 */ + 3432 "00011100" // /* MW 2 */ + 3433 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3434 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3435 "10010001" // /* MW 3 */ + 3436 "00011110" // /* MW 2 */ + 3437 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3438 "10011000" // ST r22, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3439 "11010001" // /* MW 3 */ + 3440 "00011110" // /* MW 2 */ + 3441 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3442 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3443 "10110001" // /* MW 3 */ + 3444 "00011100" // /* MW 2 */ + 3445 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3446 "10011000" // ST r23, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3447 "11110001" // /* MW 3 */ + 3448 "00011110" // /* MW 2 */ + 3449 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 3 "aie.hpp" 7054 44 first +.src_ref 3 "aie.hpp" 7057 21 first + 3450 "01011100" // ST r19, [p1], #4; MAC r21, r21, r5, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3451 "10001100" // /* MW 5 */ + 3452 "11010111" // /* MW 4 */ + 3453 "00110010" // /* MW 3 */ + 3454 "11001110" // /* MW 2 */ + 3455 "00100011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3456 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "11110001" // /* MW 3 */ + 3458 "00011100" // /* MW 2 */ + 3459 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first +.src_ref 3 "aie.hpp" 7056 79 first + 3460 "01011100" // ST r17, [p1], #4; SUB r28, r24, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3461 "10100011" // /* MW 5 */ + 3462 "01110010" // /* MW 4 */ + 3463 "00111100" // /* MW 3 */ + 3464 "11000110" // /* MW 2 */ + 3465 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 7073 95 first + 3466 "01011100" // ST r28, [p1], #4; SUB r21, r16, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3467 "10100011" // /* MW 5 */ + 3468 "01010110" // /* MW 4 */ + 3469 "00111000" // /* MW 3 */ + 3470 "11110010" // /* MW 2 */ + 3471 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3472 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3473 "10110001" // /* MW 3 */ + 3474 "00011100" // /* MW 2 */ + 3475 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3476 "10011000" // ST r27, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "01110001" // /* MW 3 */ + 3478 "00011111" // /* MW 2 */ + 3479 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3480 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3481 "10010001" // /* MW 3 */ + 3482 "00011110" // /* MW 2 */ + 3483 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3484 "10011000" // ST r29, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3485 "10110001" // /* MW 3 */ + 3486 "00011111" // /* MW 2 */ + 3487 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3488 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3489 "00110001" // /* MW 3 */ + 3490 "00011100" // /* MW 2 */ + 3491 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3492 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00010001" // /* MW 3 */ + 3494 "00011110" // /* MW 2 */ + 3495 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3496 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3497 "10010001" // /* MW 3 */ + 3498 "00011110" // /* MW 2 */ + 3499 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3500 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3501 "11110001" // /* MW 3 */ + 3502 "00011100" // /* MW 2 */ + 3503 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3504 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3505 "10110001" // /* MW 3 */ + 3506 "00011100" // /* MW 2 */ + 3507 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3508 "10011000" // ST r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "01010001" // /* MW 3 */ + 3510 "00011110" // /* MW 2 */ + 3511 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3512 "10011000" // ST r21, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "10110001" // /* MW 3 */ + 3514 "00011110" // /* MW 2 */ + 3515 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3516 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "00010001" // /* MW 3 */ + 3518 "00011110" // /* MW 2 */ + 3519 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3520 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "00110001" // /* MW 3 */ + 3522 "00011110" // /* MW 2 */ + 3523 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3524 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3525 "01010001" // /* MW 3 */ + 3526 "00011100" // /* MW 2 */ + 3527 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3528 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3529 "10110001" // /* MW 3 */ + 3530 "00011100" // /* MW 2 */ + 3531 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3532 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3533 "00110001" // /* MW 3 */ + 3534 "00011111" // /* MW 2 */ + 3535 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3536 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "10010001" // /* MW 3 */ + 3538 "00011110" // /* MW 2 */ + 3539 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3540 "10011000" // ST r3, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3541 "01110001" // /* MW 3 */ + 3542 "00011100" // /* MW 2 */ + 3543 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3544 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "00110001" // /* MW 3 */ + 3546 "00011100" // /* MW 2 */ + 3547 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3548 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11110001" // /* MW 3 */ + 3550 "00011100" // /* MW 2 */ + 3551 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3552 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "10010001" // /* MW 3 */ + 3554 "00011110" // /* MW 2 */ + 3555 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3556 "10011000" // ST r6, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11010001" // /* MW 3 */ + 3558 "00011100" // /* MW 2 */ + 3559 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3560 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3561 "10110001" // /* MW 3 */ + 3562 "00011100" // /* MW 2 */ + 3563 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3564 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3565 "10010001" // /* MW 3 */ + 3566 "00011100" // /* MW 2 */ + 3567 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3568 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3569 "00010001" // /* MW 3 */ + 3570 "00011100" // /* MW 2 */ + 3571 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3572 "10011000" // ST r7, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3573 "11110001" // /* MW 3 */ + 3574 "00001000" // /* MW 2 */ + 3575 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3576 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3577 "00010001" // /* MW 3 */ + 3578 "00011111" // /* MW 2 */ + 3579 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3580 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3581 "00010001" // /* MW 3 */ + 3582 "11011111" // /* MW 2 */ + 3583 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3584 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3585 "00010001" // /* MW 3 */ + 3586 "00011111" // /* MW 2 */ + 3587 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3588 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3589 "00010001" // /* MW 3 */ + 3590 "11011111" // /* MW 2 */ + 3591 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3592 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3593 "00010001" // /* MW 3 */ + 3594 "00011111" // /* MW 2 */ + 3595 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3596 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3597 "00010001" // /* MW 3 */ + 3598 "11011111" // /* MW 2 */ + 3599 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3600 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00010001" // /* MW 3 */ + 3602 "00011111" // /* MW 2 */ + 3603 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3604 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3605 "00010001" // /* MW 3 */ + 3606 "11011111" // /* MW 2 */ + 3607 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3608 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "00010001" // /* MW 3 */ + 3610 "00011111" // /* MW 2 */ + 3611 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 2 "gemm_bfp16_params.h" 139 first + 3612 "01011100" // ST r24, [p1], #-12; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3613 "00000000" // /* MW 5 */ + 3614 "01010000" // /* MW 4 */ + 3615 "00110000" // /* MW 3 */ + 3616 "11100010" // /* MW 2 */ + 3617 "00111011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first +.delay_slot + 3618 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3619 "00010001" // /* MW 3 */ + 3620 "00011111" // /* MW 2 */ + 3621 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.delay_slot + 3622 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3623 "00010001" // /* MW 3 */ + 3624 "11011111" // /* MW 2 */ + 3625 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.delay_slot + 3626 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3627 "00010001" // /* MW 3 */ + 3628 "00011111" // /* MW 2 */ + 3629 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.delay_slot + 3630 "10011000" // ST r24, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "00010001" // /* MW 3 */ + 3632 "00000111" // /* MW 2 */ + 3633 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 138 24 first +.delay_slot + 3634 "10011000" // ST r24, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "00010001" // /* MW 3 */ + 3636 "00000011" // /* MW 2 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv__end +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_end0 + 3637 "00001001" // /* MW 1 */ +.label __Z8init_accILt1EEvPaS0_iii___func_begin0 +.label _Z8init_accILt1EEvPaS0_iii +.function init_acc<(unsigned short)1> _Z8init_accILt1EEvPaS0_iii +.src_ref 2 "gemm_bfp16.h" 38 first +.src_ref 2 "gemm_bfp16.h" 41 47 +.function_start + 3648 "01000100" // MOVXM p2, #508788 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3649 "11101000" // /* MW 5 */ + 3650 "11000110" // /* MW 4 */ + 3651 "11000100" // /* MW 3 */ + 3652 "00000111" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 38 + 3654 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3655 "00000001" // /* MW 5 */ + 3656 "00000000" // /* MW 4 */ + 3657 "00000000" // /* MW 3 */ + 3658 "00001000" // /* MW 2 */ + 3659 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 first + 3660 "10011000" // LDA.s8 r4, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3661 "10000010" // /* MW 3 */ + 3662 "00000100" // /* MW 2 */ + 3663 "00000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 68 6 first + 3664 "01000100" // MOVXM ls, #3824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3665 "11100000" // /* MW 5 */ + 3666 "11111101" // /* MW 4 */ + 3667 "00000001" // /* MW 3 */ + 3668 "00000000" // /* MW 2 */ + 3669 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 +.src_ref 2 "gemm_bfp16.h" 68 6 + 3670 "10111010" // MOVA r26, #0; MOVXM le, #3888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3671 "00010000" // /* MW 9 */ + 3672 "10011000" // /* MW 8 */ + 3673 "10111111" // /* MW 7 */ + 3674 "00000001" // /* MW 6 */ + 3675 "00000000" // /* MW 5 */ + 3676 "00000000" // /* MW 4 */ + 3677 "00000000" // /* MW 3 */ + 3678 "00011010" // /* MW 2 */ + 3679 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 53 4 +.src_ref 2 "gemm_bfp16.h" 53 29 + 3680 "10111010" // MOVA r5, #-4; MOVXM p3, #3776 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3681 "00010000" // /* MW 9 */ + 3682 "01100000" // /* MW 8 */ + 3683 "10110111" // /* MW 7 */ + 3684 "00000001" // /* MW 6 */ + 3685 "00000000" // /* MW 5 */ + 3686 "00000000" // /* MW 4 */ + 3687 "00000000" // /* MW 3 */ + 3688 "10000101" // /* MW 2 */ + 3689 "11111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 39 33 +.src_ref 2 "gemm_bfp16.h" 41 47 first +.src_ref 2 "gemm_bfp16.h" 53 29 first +.src_ref 2 "gemm_bfp16.h" 75 43 + 3690 "10111010" // MOVA r3, #5; LSHL r5, r1, r5; VINSERT.32 x1, x0, #0, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3691 "10111000" // /* MW 9 */ + 3692 "10101000" // /* MW 8 */ + 3693 "01000001" // /* MW 7 */ + 3694 "11101100" // /* MW 6 */ + 3695 "01010010" // /* MW 5 */ + 3696 "00000010" // /* MW 4 */ + 3697 "00000000" // /* MW 3 */ + 3698 "10100011" // /* MW 2 */ + 3699 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 39 33 first + 3700 "11100100" // LSHL r7, r0, r3; MOV p2, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11000001" // /* MW 5 */ + 3702 "11001011" // /* MW 4 */ + 3703 "10110100" // /* MW 3 */ + 3704 "11000111" // /* MW 2 */ + 3705 "00000001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 +.src_ref 2 "gemm_bfp16.h" 75 43 first + 3706 "11100100" // LSHL r3, r2, r3; VMOV bmll0, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00100101" // /* MW 5 */ + 3708 "00000101" // /* MW 4 */ + 3709 "10110000" // /* MW 3 */ + 3710 "11000111" // /* MW 2 */ + 3711 "00010000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 +.src_ref 2 "gemm_bfp16.h" 42 54 + 3712 "11100100" // MOVX crRnd, r4; MOV r1, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "10000001" // /* MW 5 */ + 3714 "10100101" // /* MW 4 */ + 3715 "00000000" // /* MW 3 */ + 3716 "01010000" // /* MW 2 */ + 3717 "00100111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 first +.src_ref 2 "gemm_bfp16.h" 42 69 +.src_ref 2 "gemm_bfp16.h" 75 14 + 3718 "00110110" // PADDB [p2], #-64; VCONV.bf16.fp32 wl0, bmll0; MOVX r16, #1; MOV m1, r3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3719 "01111000" // /* MW 11 */ + 3720 "11010000" // /* MW 10 */ + 3721 "10000000" // /* MW 9 */ + 3722 "00101000" // /* MW 8 */ + 3723 "00000000" // /* MW 7 */ + 3724 "00000001" // /* MW 6 */ + 3725 "00100000" // /* MW 5 */ + 3726 "11111111" // /* MW 4 */ + 3727 "11000101" // /* MW 3 */ + 3728 "00000010" // /* MW 2 */ + 3729 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 42 54 +.src_ref 2 "gemm_bfp16.h" 42 69 first +.src_ref 2 "gemm_bfp16.h" 75 43 + 3730 "10111010" // MOVA r6, #-3; EQ r27, r2, r16; MOV r3, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3731 "01111000" // /* MW 9 */ + 3732 "01100000" // /* MW 8 */ + 3733 "01101010" // /* MW 7 */ + 3734 "00111100" // /* MW 6 */ + 3735 "10111000" // /* MW 5 */ + 3736 "00000101" // /* MW 4 */ + 3737 "00000000" // /* MW 3 */ + 3738 "10100110" // /* MW 2 */ + 3739 "11111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 22 first +.src_ref 2 "gemm_bfp16.h" 41 47 first +.src_ref 2 "gemm_bfp16.h" 75 43 first + 3740 "10100100" // LSHL r0, r0, r6; VEXTBCST.16 x1, x0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3741 "00000110" // /* MW 5 */ + 3742 "00000010" // /* MW 4 */ + 3743 "10110001" // /* MW 3 */ + 3744 "00001101" // /* MW 2 */ + 3745 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 42 54 first +.src_ref 2 "gemm_bfp16.h" 44 44 +.src_ref 2 "gemm_bfp16.h" 69 17 +.src_ref 2 "gemm_bfp16.h" 76 14 +.src_ref 2 "gemm_bfp16.h" 77 16 + 3746 "01111110" // NOPA; NOPB; MOVS p1, p0; SEL.EQZ r1, r3, r1, r27; MOV m0, r7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3747 "01100000" // /* MW 13 */ + 3748 "00010001" // /* MW 12 */ + 3749 "00110000" // /* MW 11 */ + 3750 "00001111" // /* MW 10 */ + 3751 "00111010" // /* MW 9 */ + 3752 "00000000" // /* MW 8 */ + 3753 "00010010" // /* MW 7 */ + 3754 "11000010" // /* MW 6 */ + 3755 "00100000" // /* MW 5 */ + 3756 "00000000" // /* MW 4 */ + 3757 "11110000" // /* MW 3 */ + 3758 "00101100" // /* MW 2 */ + 3759 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 22 first +.src_ref 2 "gemm_bfp16.h" 44 44 first +.src_ref 2 "gemm_bfp16.h" 54 24 +.src_ref 2 "gemm_bfp16.h" 75 14 + 3760 "11100001" // NOPA; PADDB [p0], m0; VST x1, [p2]; ADD r2, r5, #-1; MOV p2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3761 "00000000" // /* MW 15 */ + 3762 "00000000" // /* MW 14 */ + 3763 "01111000" // /* MW 13 */ + 3764 "01010000" // /* MW 12 */ + 3765 "00110000" // /* MW 11 */ + 3766 "11111001" // /* MW 10 */ + 3767 "00101111" // /* MW 9 */ + 3768 "00001010" // /* MW 8 */ + 3769 "01010011" // /* MW 7 */ + 3770 "00000100" // /* MW 6 */ + 3771 "00100010" // /* MW 5 */ + 3772 "00010111" // /* MW 4 */ + 3773 "11110000" // /* MW 3 */ + 3774 "00101100" // /* MW 2 */ + 3775 "00000000" // /* MW 1 */ +.label TGT_F_Z8init_accILt1EEvPaS0_iii_128 +.src_ref 2 "gemm_bfp16.h" 54 24 first +.src_ref 2 "gemm_bfp16.h" 68 6 first +.loop_nesting 1 + 3776 "11110100" // VLDB wl0, [p2]; MOV lc, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3777 "01000001" // /* MW 5 */ + 3778 "11100000" // /* MW 4 */ + 3779 "10001010" // /* MW 3 */ + 3780 "10000100" // /* MW 2 */ + 3781 "01000000" // /* MW 1 */ + 3782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3783 "00000000" // /* MW 1 */ + 3784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3785 "00000000" // /* MW 1 */ + 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3787 "00000000" // /* MW 1 */ + 3788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3789 "00000000" // /* MW 1 */ + 3790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3791 "00000000" // /* MW 1 */ + 3792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3793 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 54 24 +.src_ref 2 "gemm_bfp16.h" 63 39 +.src_ref 2 "gemm_bfp16.h" 64 39 + 3794 "11111000" // VMOV wh0, wl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "00100010" // /* MW 3 */ + 3796 "00000001" // /* MW 2 */ + 3797 "00011000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 54 24 +.src_ref 2 "gemm_bfp16.h" 63 39 first + 3798 "01011000" // VEXTBCST.128 x3, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3799 "00000011" // /* MW 3 */ + 3800 "10000100" // /* MW 2 */ + 3801 "00011001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 54 24 first +.src_ref 2 "gemm_bfp16.h" 64 39 first + 3802 "01011000" // VEXTBCST.128 x1, x0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3803 "00000111" // /* MW 3 */ + 3804 "10000100" // /* MW 2 */ + 3805 "00011000" // /* MW 1 */ + 3806 "11111000" // VCONV.fp32.bf16 cml0, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3807 "10001010" // /* MW 3 */ + 3808 "00000111" // /* MW 2 */ + 3809 "00011000" // /* MW 1 */ + 3810 "11111000" // VCONV.fp32.bf16 cmh0, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3811 "10001010" // /* MW 3 */ + 3812 "10000011" // /* MW 2 */ + 3813 "00011000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 69 17 first + 3814 "11111000" // VMOV bmll1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3815 "00010010" // /* MW 3 */ + 3816 "00000000" // /* MW 2 */ + 3817 "00011001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 71 19 first + 3818 "11010100" // NOPA; VMOV bmlh1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3819 "00100101" // /* MW 5 */ + 3820 "10000100" // /* MW 4 */ + 3821 "11110010" // /* MW 3 */ + 3822 "00101100" // /* MW 2 */ + 3823 "00000000" // /* MW 1 */ +.label ZLS_F_Z8init_accILt1EEvPaS0_iii_176 +.src_ref 2 "gemm_bfp16.h" 69 17 first +.begin_of_loop +.loop_nesting 2 + 3824 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3825 "00100110" // /* MW 3 */ + 3826 "00010100" // /* MW 2 */ + 3827 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 69 17 + 3828 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3829 "10000110" // /* MW 3 */ + 3830 "00101100" // /* MW 2 */ + 3831 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 70 17 first + 3832 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3833 "00100110" // /* MW 3 */ + 3834 "00010100" // /* MW 2 */ + 3835 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 70 17 + 3836 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "10000110" // /* MW 3 */ + 3838 "00101100" // /* MW 2 */ + 3839 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 71 19 first + 3840 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3841 "00000000" // /* MW 15 */ + 3842 "00000000" // /* MW 14 */ + 3843 "01111000" // /* MW 13 */ + 3844 "10100101" // /* MW 12 */ + 3845 "00000001" // /* MW 11 */ + 3846 "00000000" // /* MW 10 */ + 3847 "00000000" // /* MW 9 */ + 3848 "10000000" // /* MW 8 */ + 3849 "01100110" // /* MW 7 */ + 3850 "00010100" // /* MW 6 */ + 3851 "00100000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 71 19 + 3856 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3857 "00000000" // /* MW 15 */ + 3858 "00000000" // /* MW 14 */ + 3859 "01111000" // /* MW 13 */ + 3860 "10100101" // /* MW 12 */ + 3861 "00000001" // /* MW 11 */ + 3862 "00000000" // /* MW 10 */ + 3863 "00000000" // /* MW 9 */ + 3864 "10000000" // /* MW 8 */ + 3865 "10100110" // /* MW 7 */ + 3866 "00101100" // /* MW 6 */ + 3867 "00100000" // /* MW 5 */ + 3868 "00000000" // /* MW 4 */ + 3869 "11110000" // /* MW 3 */ + 3870 "00101100" // /* MW 2 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 72 19 first + 3872 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3873 "00000000" // /* MW 15 */ + 3874 "00000000" // /* MW 14 */ + 3875 "01111000" // /* MW 13 */ + 3876 "10100101" // /* MW 12 */ + 3877 "00000001" // /* MW 11 */ + 3878 "00000000" // /* MW 10 */ + 3879 "00000000" // /* MW 9 */ + 3880 "10000000" // /* MW 8 */ + 3881 "01100110" // /* MW 7 */ + 3882 "00010100" // /* MW 6 */ + 3883 "00100000" // /* MW 5 */ + 3884 "00000000" // /* MW 4 */ + 3885 "11110000" // /* MW 3 */ + 3886 "00101100" // /* MW 2 */ + 3887 "00000000" // /* MW 1 */ +.label ZLE_F_Z8init_accILt1EEvPaS0_iii_240 +.src_ref 2 "gemm_bfp16.h" 72 19 +.end_of_loop + 3888 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3889 "00000000" // /* MW 15 */ + 3890 "00000000" // /* MW 14 */ + 3891 "01111000" // /* MW 13 */ + 3892 "10100101" // /* MW 12 */ + 3893 "00000001" // /* MW 11 */ + 3894 "00000000" // /* MW 10 */ + 3895 "00000000" // /* MW 9 */ + 3896 "10000000" // /* MW 8 */ + 3897 "10100110" // /* MW 7 */ + 3898 "00101100" // /* MW 6 */ + 3899 "00100000" // /* MW 5 */ + 3900 "00000000" // /* MW 4 */ + 3901 "11110000" // /* MW 3 */ + 3902 "00101100" // /* MW 2 */ + 3903 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 53 4 first +.src_ref 2 "gemm_bfp16.h" 75 14 first +.src_ref 2 "gemm_bfp16.h" 76 14 first +.loop_nesting 1 + 3904 "00010010" // PADDA [p1], m0; PADDB [p2], m1; JNZD r2, r2, p3 /* MW 8 */ /* control_operation: words=8 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 3905 "11100000" // /* MW 7 */ + 3906 "10000100" // /* MW 6 */ + 3907 "00100000" // /* MW 5 */ + 3908 "01010111" // /* MW 4 */ + 3909 "11110100" // /* MW 3 */ + 3910 "00001100" // /* MW 2 */ + 3911 "00100001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 77 16 first +.delay_slot + 3912 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3913 "10010000" // /* MW 3 */ + 3914 "00001011" // /* MW 2 */ + 3915 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3923 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 80 first +.loop_nesting 0 + 3924 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3925 "00000000" // /* MW 3 */ + 3926 "00101000" // /* MW 2 */ + 3927 "00010000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 80 +.delay_slot + 3928 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3929 "00000001" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00000000" // /* MW 3 */ + 3932 "11111000" // /* MW 2 */ + 3933 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z8init_accILt1EEvPaS0_iii__end +.label __Z8init_accILt1EEvPaS0_iii___func_end0 + 3941 "00000000" // /* MW 1 */ +.label __Z12post_processPai___func_begin0 +.label _Z12post_processPai +.function post_process _Z12post_processPai +.src_ref 2 "gemm_bfp16.h" 83 first +.src_ref 2 "gemm_bfp16.h" 92 26 +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 97 21 +.src_ref 2 "gemm_bfp16.h" 97 23 +.function_start + 3952 "01110110" // MOVA m0, #512; MOVS p2, p0; MOVXM p1, #508788 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3953 "00010000" // /* MW 11 */ + 3954 "10111010" // /* MW 10 */ + 3955 "10110001" // /* MW 9 */ + 3956 "11110000" // /* MW 8 */ + 3957 "00000001" // /* MW 7 */ + 3958 "00000000" // /* MW 6 */ + 3959 "10001011" // /* MW 5 */ + 3960 "10000000" // /* MW 4 */ + 3961 "10000010" // /* MW 3 */ + 3962 "00000000" // /* MW 2 */ + 3963 "01000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 26 +.src_ref 2 "gemm_bfp16.h" 94 26 +.src_ref 2 "gemm_bfp16.h" 94 26 +.src_ref 2 "gemm_bfp16.h" 95 26 +.src_ref 2 "gemm_bfp16.h" 96 26 + 3964 "10111010" // MOVA r1, #-7; MOVX r2, #0; MOV r4, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3965 "01011000" // /* MW 9 */ + 3966 "00000001" // /* MW 8 */ + 3967 "10001000" // /* MW 7 */ + 3968 "00001000" // /* MW 6 */ + 3969 "00100000" // /* MW 5 */ + 3970 "00000000" // /* MW 4 */ + 3971 "00000000" // /* MW 3 */ + 3972 "00100001" // /* MW 2 */ + 3973 "11111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 26 first +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 94 26 first +.src_ref 2 "gemm_bfp16.h" 95 14 +.src_ref 2 "gemm_bfp16.h" 96 14 + 3974 "01110110" // LDA.s8 r24, [p1]; MOVS p1, p0; OR r16, r2, r4; MOV r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3975 "01011000" // /* MW 11 */ + 3976 "00000111" // /* MW 10 */ + 3977 "01101000" // /* MW 9 */ + 3978 "00101100" // /* MW 8 */ + 3979 "00000010" // /* MW 7 */ + 3980 "00000101" // /* MW 6 */ + 3981 "10001011" // /* MW 5 */ + 3982 "10000000" // /* MW 4 */ + 3983 "01010001" // /* MW 3 */ + 3984 "11100000" // /* MW 2 */ + 3985 "00100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 26 +.src_ref 2 "gemm_bfp16.h" 93 12 first +.src_ref 2 "gemm_bfp16.h" 95 26 + 3986 "10111010" // VLDA bmlh1, [p1, #64]; LSHL r1, r0, r1; MOV r5, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "01011000" // /* MW 9 */ + 3988 "00000010" // /* MW 8 */ + 3989 "10101000" // /* MW 7 */ + 3990 "11101100" // /* MW 6 */ + 3991 "00010000" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "10110000" // /* MW 3 */ + 3994 "10010110" // /* MW 2 */ + 3995 "00100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 94 14 first +.src_ref 2 "gemm_bfp16.h" 95 14 +.src_ref 2 "gemm_bfp16.h" 96 14 + 3996 "10111010" // VLDA bmll1, [p1], m0; LSHL r18, r16, r3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3997 "01111000" // /* MW 9 */ + 3998 "01100000" // /* MW 8 */ + 3999 "00001000" // /* MW 7 */ + 4000 "11101100" // /* MW 6 */ + 4001 "00100001" // /* MW 5 */ + 4002 "00100001" // /* MW 4 */ + 4003 "10110000" // /* MW 3 */ + 4004 "00010010" // /* MW 2 */ + 4005 "00100001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 95 26 first +.src_ref 2 "gemm_bfp16.h" 96 26 + 4006 "10111010" // MOVA r6, #3; OR r7, r5, r2; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4007 "10101000" // /* MW 9 */ + 4008 "10000000" // /* MW 8 */ + 4009 "10110100" // /* MW 7 */ + 4010 "00101101" // /* MW 6 */ + 4011 "01110001" // /* MW 5 */ + 4012 "00001010" // /* MW 4 */ + 4013 "00000000" // /* MW 3 */ + 4014 "01100110" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 2 first +.src_ref 2 "gemm_bfp16.h" 94 12 first +.src_ref 2 "gemm_bfp16.h" 95 14 + 4016 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r19, r7, r3; ADD.NC lc, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4017 "11001000" // /* MW 9 */ + 4018 "01111111" // /* MW 8 */ + 4019 "10111000" // /* MW 7 */ + 4020 "11101110" // /* MW 6 */ + 4021 "00110001" // /* MW 5 */ + 4022 "00001111" // /* MW 4 */ + 4023 "10110000" // /* MW 3 */ + 4024 "10001110" // /* MW 2 */ + 4025 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 12 +.src_ref 2 "gemm_bfp16.h" 95 14 first +.src_ref 2 "gemm_bfp16.h" 96 26 first + 4026 "10111010" // VLDA bmhl0, [p3]; OR r17, r6, r2; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4027 "10101000" // /* MW 9 */ + 4028 "11000000" // /* MW 8 */ + 4029 "00110100" // /* MW 7 */ + 4030 "00101110" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00001101" // /* MW 4 */ + 4033 "10110000" // /* MW 3 */ + 4034 "10001010" // /* MW 2 */ + 4035 "01100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 +.src_ref 2 "gemm_bfp16.h" 96 14 + 4036 "10111010" // VLDA bmlh0, [p4, #64]; LSHL r20, r17, r3; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4037 "00001000" // /* MW 9 */ + 4038 "10000001" // /* MW 8 */ + 4039 "01001000" // /* MW 7 */ + 4040 "11101100" // /* MW 6 */ + 4041 "01000001" // /* MW 5 */ + 4042 "00100011" // /* MW 4 */ + 4043 "10110000" // /* MW 3 */ + 4044 "10000110" // /* MW 2 */ + 4045 "10000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 +.src_ref 2 "gemm_bfp16.h" 96 14 +.src_ref 2 "gemm_bfp16.h" 97 21 +.src_ref 2 "gemm_bfp16.h" 97 23 +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 98 21 +.src_ref 2 "gemm_bfp16.h" 98 23 +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 99 21 +.src_ref 2 "gemm_bfp16.h" 99 23 +.src_ref 2 "gemm_bfp16.h" 100 4 +.src_ref 2 "gemm_bfp16.h" 100 21 +.src_ref 2 "gemm_bfp16.h" 100 23 + 4046 "10111010" // VLDA bmll0, [p4]; MOVX crRnd, r24; ADD.NC p5, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "10101000" // /* MW 9 */ + 4048 "00000000" // /* MW 8 */ + 4049 "10110101" // /* MW 7 */ + 4050 "00000010" // /* MW 6 */ + 4051 "11010100" // /* MW 5 */ + 4052 "00110001" // /* MW 4 */ + 4053 "10110000" // /* MW 3 */ + 4054 "10000010" // /* MW 2 */ + 4055 "10000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 2 first +.src_ref 2 "gemm_bfp16.h" 96 12 + 4056 "10111010" // VLDA bmhh1, [p5, #64]; MOVXM ls, #4096 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4057 "00010000" // /* MW 9 */ + 4058 "00000000" // /* MW 8 */ + 4059 "01111000" // /* MW 7 */ + 4060 "00000100" // /* MW 6 */ + 4061 "00000000" // /* MW 5 */ + 4062 "00000000" // /* MW 4 */ + 4063 "10110000" // /* MW 3 */ + 4064 "10011110" // /* MW 2 */ + 4065 "10100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 2 +.src_ref 2 "gemm_bfp16.h" 96 12 first + 4066 "10111010" // VLDA bmhl1, [p5]; MOVXM le, #4192 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4067 "00010000" // /* MW 9 */ + 4068 "00110000" // /* MW 8 */ + 4069 "10111000" // /* MW 7 */ + 4070 "00000101" // /* MW 6 */ + 4071 "00000000" // /* MW 5 */ + 4072 "00000000" // /* MW 4 */ + 4073 "10110000" // /* MW 3 */ + 4074 "10011010" // /* MW 2 */ + 4075 "10100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 100 4 + 4076 "00011000" // MOVX r1, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4077 "00011001" // /* MW 3 */ + 4078 "00000010" // /* MW 2 */ + 4079 "00010000" // /* MW 1 */ + 4080 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4081 "00000000" // /* MW 15 */ + 4082 "00000000" // /* MW 14 */ + 4083 "01111000" // /* MW 13 */ + 4084 "10100101" // /* MW 12 */ + 4085 "00000001" // /* MW 11 */ + 4086 "00000000" // /* MW 10 */ + 4087 "00000000" // /* MW 9 */ + 4088 "00000000" // /* MW 8 */ + 4089 "01011011" // /* MW 7 */ + 4090 "00000001" // /* MW 6 */ + 4091 "00100000" // /* MW 5 */ + 4092 "00000000" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.label ZLS_F_Z12post_processPai_144 +.src_ref 2 "gemm_bfp16.h" 97 21 first +.src_ref 2 "gemm_bfp16.h" 97 23 first +.src_ref 2 "gemm_bfp16.h" 98 4 first +.begin_of_loop +.loop_nesting 1 + 4096 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4097 "00111011" // /* MW 5 */ + 4098 "01010100" // /* MW 4 */ + 4099 "01101000" // /* MW 3 */ + 4100 "10010100" // /* MW 2 */ + 4101 "01001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 98 21 +.src_ref 2 "gemm_bfp16.h" 98 23 +.src_ref 2 "gemm_bfp16.h" 99 4 first + 4102 "11100100" // LSHL r22, r7, r1; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4103 "01000001" // /* MW 5 */ + 4104 "00010101" // /* MW 4 */ + 4105 "10110101" // /* MW 3 */ + 4106 "10000011" // /* MW 2 */ + 4107 "00111101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 first +.src_ref 2 "gemm_bfp16.h" 98 21 first +.src_ref 2 "gemm_bfp16.h" 98 23 first +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 99 21 +.src_ref 2 "gemm_bfp16.h" 99 23 +.src_ref 2 "gemm_bfp16.h" 100 4 first + 4108 "00111010" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r23, r17, r1; MOV dj0, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4109 "01111001" // /* MW 9 */ + 4110 "10010000" // /* MW 8 */ + 4111 "01000101" // /* MW 7 */ + 4112 "11101100" // /* MW 6 */ + 4113 "01110000" // /* MW 5 */ + 4114 "00100011" // /* MW 4 */ + 4115 "01100000" // /* MW 3 */ + 4116 "00001100" // /* MW 2 */ + 4117 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 26 first +.src_ref 2 "gemm_bfp16.h" 99 4 first +.src_ref 2 "gemm_bfp16.h" 99 21 first +.src_ref 2 "gemm_bfp16.h" 99 23 first +.src_ref 2 "gemm_bfp16.h" 100 4 +.src_ref 2 "gemm_bfp16.h" 100 21 +.src_ref 2 "gemm_bfp16.h" 100 23 + 4118 "00111010" // VST.CONV.bf16.fp32 cml0, [p0, dj0];OR r16, r2, r4; MOV dj1, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "01111001" // /* MW 9 */ + 4120 "11010000" // /* MW 8 */ + 4121 "11000101" // /* MW 7 */ + 4122 "00101100" // /* MW 6 */ + 4123 "00000010" // /* MW 5 */ + 4124 "00000101" // /* MW 4 */ + 4125 "01100000" // /* MW 3 */ + 4126 "00000100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 26 first + 4128 "10011000" // OR r7, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4129 "00100101" // /* MW 3 */ + 4130 "01001110" // /* MW 2 */ + 4131 "00010001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 96 26 first +.src_ref 2 "gemm_bfp16.h" 100 4 first +.src_ref 2 "gemm_bfp16.h" 100 21 first +.src_ref 2 "gemm_bfp16.h" 100 23 first + 4132 "00111010" // VST.CONV.bf16.fp32 cmh1, [p0, dj1];OR r17, r6, r2; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4133 "00001001" // /* MW 9 */ + 4134 "10000001" // /* MW 8 */ + 4135 "01001000" // /* MW 7 */ + 4136 "00101100" // /* MW 6 */ + 4137 "00010001" // /* MW 5 */ + 4138 "00001101" // /* MW 4 */ + 4139 "01100000" // /* MW 3 */ + 4140 "00011100" // /* MW 2 */ + 4141 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 93 12 first +.src_ref 2 "gemm_bfp16.h" 94 14 first + 4142 "00101100" // VLDA bmlh1, [p1, #64]; LSHL r18, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4143 "01111011" // /* MW 5 */ + 4144 "01001000" // /* MW 4 */ + 4145 "10111000" // /* MW 3 */ + 4146 "10010110" // /* MW 2 */ + 4147 "00100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 95 14 first + 4148 "10111010" // VLDA bmll1, [p1], m0; LSHL r19, r7, r3; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4149 "10101000" // /* MW 9 */ + 4150 "10000000" // /* MW 8 */ + 4151 "10110100" // /* MW 7 */ + 4152 "11101101" // /* MW 6 */ + 4153 "00110001" // /* MW 5 */ + 4154 "00001111" // /* MW 4 */ + 4155 "10110000" // /* MW 3 */ + 4156 "00010010" // /* MW 2 */ + 4157 "00100001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 12 first +.src_ref 2 "gemm_bfp16.h" 95 14 +.src_ref 2 "gemm_bfp16.h" 96 14 first + 4158 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r20, r17, r3; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4159 "10101000" // /* MW 9 */ + 4160 "11000000" // /* MW 8 */ + 4161 "00110100" // /* MW 7 */ + 4162 "11101110" // /* MW 6 */ + 4163 "01000001" // /* MW 5 */ + 4164 "00100011" // /* MW 4 */ + 4165 "10110000" // /* MW 3 */ + 4166 "10001110" // /* MW 2 */ + 4167 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 12 +.src_ref 2 "gemm_bfp16.h" 96 14 + 4168 "10010100" // VLDA bmhl0, [p3]; ADD.NC p5, r20, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4169 "00000010" // /* MW 5 */ + 4170 "11010100" // /* MW 4 */ + 4171 "10111010" // /* MW 3 */ + 4172 "10001010" // /* MW 2 */ + 4173 "01100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 first + 4174 "10011000" // VLDA bmlh0, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4175 "00110101" // /* MW 3 */ + 4176 "00010100" // /* MW 2 */ + 4177 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 + 4178 "10011000" // VLDA bmll0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4179 "00010101" // /* MW 3 */ + 4180 "00000100" // /* MW 2 */ + 4181 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 96 12 first + 4182 "10011000" // VLDA bmhh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4183 "11110101" // /* MW 3 */ + 4184 "00010100" // /* MW 2 */ + 4185 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 96 12 + 4186 "00111100" // VLDA bmhl1, [p5]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4187 "00100000" // /* MW 5 */ + 4188 "00000000" // /* MW 4 */ + 4189 "10110000" // /* MW 3 */ + 4190 "10011010" // /* MW 2 */ + 4191 "10100000" // /* MW 1 */ +.label ZLE_F_Z12post_processPai_240 +.end_of_loop + 4192 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4193 "00000000" // /* MW 15 */ + 4194 "00000000" // /* MW 14 */ + 4195 "01111000" // /* MW 13 */ + 4196 "10100101" // /* MW 12 */ + 4197 "00000001" // /* MW 11 */ + 4198 "00000000" // /* MW 10 */ + 4199 "00000000" // /* MW 9 */ + 4200 "00000000" // /* MW 8 */ + 4201 "01011011" // /* MW 7 */ + 4202 "00000001" // /* MW 6 */ + 4203 "00100000" // /* MW 5 */ + 4204 "00000000" // /* MW 4 */ + 4205 "11110000" // /* MW 3 */ + 4206 "00101100" // /* MW 2 */ + 4207 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 97 21 first +.src_ref 2 "gemm_bfp16.h" 97 23 first +.src_ref 2 "gemm_bfp16.h" 98 4 first +.loop_nesting 0 + 4208 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4209 "00111011" // /* MW 5 */ + 4210 "01010100" // /* MW 4 */ + 4211 "01101000" // /* MW 3 */ + 4212 "10010100" // /* MW 2 */ + 4213 "01001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 98 21 +.src_ref 2 "gemm_bfp16.h" 98 23 +.src_ref 2 "gemm_bfp16.h" 102 first + 4214 "11100100" // RET lr; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4215 "01000001" // /* MW 5 */ + 4216 "00010101" // /* MW 4 */ + 4217 "00000101" // /* MW 3 */ + 4218 "00000000" // /* MW 2 */ + 4219 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 first +.src_ref 2 "gemm_bfp16.h" 98 21 first +.src_ref 2 "gemm_bfp16.h" 98 23 first +.src_ref 2 "gemm_bfp16.h" 99 4 first +.delay_slot + 4220 "01011100" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r22, r7, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4221 "00111011" // /* MW 5 */ + 4222 "11011000" // /* MW 4 */ + 4223 "01100011" // /* MW 3 */ + 4224 "00001100" // /* MW 2 */ + 4225 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 99 21 +.src_ref 2 "gemm_bfp16.h" 99 23 +.src_ref 2 "gemm_bfp16.h" 100 4 first +.delay_slot + 4226 "11100100" // LSHL r23, r17, r1; MOV dj0, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4227 "01000001" // /* MW 5 */ + 4228 "00010110" // /* MW 4 */ + 4229 "10110001" // /* MW 3 */ + 4230 "11000011" // /* MW 2 */ + 4231 "10001101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 99 4 first +.src_ref 2 "gemm_bfp16.h" 99 21 first +.src_ref 2 "gemm_bfp16.h" 99 23 first +.src_ref 2 "gemm_bfp16.h" 100 4 +.src_ref 2 "gemm_bfp16.h" 100 21 +.src_ref 2 "gemm_bfp16.h" 100 23 +.delay_slot + 4232 "00000010" // VST.CONV.bf16.fp32 cml0, [p0, dj0]; MOV dj1, r23 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4233 "01110000" // /* MW 7 */ + 4234 "11010000" // /* MW 6 */ + 4235 "11000101" // /* MW 5 */ + 4236 "00000000" // /* MW 4 */ + 4237 "01100000" // /* MW 3 */ + 4238 "00000100" // /* MW 2 */ + 4239 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 100 4 first +.src_ref 2 "gemm_bfp16.h" 100 21 first +.src_ref 2 "gemm_bfp16.h" 100 23 first +.delay_slot + 4240 "00011000" // VST.CONV.bf16.fp32 cmh1, [p0, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4241 "11100011" // /* MW 3 */ + 4242 "00100000" // /* MW 2 */ + 4243 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z12post_processPai__end +.label __Z12post_processPai___func_end0 + 4245 "00000000" // /* MW 1 */ +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_begin0 +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.function gemm_bfp16 _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.src_ref 2 "gemm_bfp16.h" 225 first +.src_ref 2 "gemm_bfp16.h" 231 12 +.src_ref 2 "gemm_bfp16.h" 231 12 +.function_start + 4256 "01110110" // MOVA m4, #-300; MOVS p4, p7; MOVXM p7, #508736 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4257 "00010000" // /* MW 11 */ + 4258 "10100000" // /* MW 10 */ + 4259 "10110001" // /* MW 9 */ + 4260 "11110011" // /* MW 8 */ + 4261 "00000001" // /* MW 7 */ + 4262 "00000000" // /* MW 6 */ + 4263 "10001011" // /* MW 5 */ + 4264 "10011100" // /* MW 4 */ + 4265 "10000100" // /* MW 3 */ + 4266 "10010000" // /* MW 2 */ + 4267 "11011010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 231 12 first + 4268 "10011000" // LDA r16, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010110" // /* MW 3 */ + 4270 "10001010" // /* MW 2 */ + 4271 "00000111" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "accum.hpp" 940 83 + 4278 "00000010" // MOVS p0, p6; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4279 "01110000" // /* MW 7 */ + 4280 "01100000" // /* MW 6 */ + 4281 "00110000" // /* MW 5 */ + 4282 "00000011" // /* MW 4 */ + 4283 "01100000" // /* MW 3 */ + 4284 "00010001" // /* MW 2 */ + 4285 "00010011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 225 + 4286 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4287 "00000001" // /* MW 5 */ + 4288 "00000000" // /* MW 4 */ + 4289 "00000000" // /* MW 3 */ + 4290 "00001000" // /* MW 2 */ + 4291 "00000000" // /* MW 1 */ + 4292 "10011000" // ST p0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4293 "00011101" // /* MW 3 */ + 4294 "11111100" // /* MW 2 */ + 4295 "00001111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 231 6 +.src_ref 2 "gemm_bfp16.h" 231 28 + 4296 "00111010" // ST p4, [sp, #-16]; JNZ r16, #4384 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4384 delay_slots=5 */ + 4297 "01100001" // /* MW 9 */ + 4298 "00000000" // /* MW 8 */ + 4299 "00010000" // /* MW 7 */ + 4300 "00100100" // /* MW 6 */ + 4301 "00000010" // /* MW 5 */ + 4302 "00100000" // /* MW 4 */ + 4303 "10110000" // /* MW 3 */ + 4304 "01000011" // /* MW 2 */ + 4305 "11111110" // /* MW 1 */ +.delay_slot + 4306 "10011000" // ST p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4307 "00011101" // /* MW 3 */ + 4308 "11110101" // /* MW 2 */ + 4309 "00001111" // /* MW 1 */ +.delay_slot + 4310 "10011000" // ST p1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "10011101" // /* MW 3 */ + 4312 "11101100" // /* MW 2 */ + 4313 "00001111" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 +.src_ref 8 "tile.hpp" 74 8 +.delay_slot + 4314 "01110110" // MOVA r18, #1; ST lr, [sp, #-8]; MOVXM p0, #508784 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4315 "00010000" // /* MW 11 */ + 4316 "10111000" // /* MW 10 */ + 4317 "00110001" // /* MW 9 */ + 4318 "11110000" // /* MW 8 */ + 4319 "00000001" // /* MW 7 */ + 4320 "10000000" // /* MW 6 */ + 4321 "00111101" // /* MW 5 */ + 4322 "11111000" // /* MW 4 */ + 4323 "00000111" // /* MW 3 */ + 4324 "00110010" // /* MW 2 */ + 4325 "00000000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 first +.src_ref 8 "tile.hpp" 86 8 +.src_ref 8 "tile.hpp" 86 8 +.delay_slot + 4326 "01110110" // MOVA r17, #11; ST r18, [p0]; MOVXM p0, #508788 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4327 "00010000" // /* MW 11 */ + 4328 "10111010" // /* MW 10 */ + 4329 "00110001" // /* MW 9 */ + 4330 "11110000" // /* MW 8 */ + 4331 "00000001" // /* MW 7 */ + 4332 "10000000" // /* MW 6 */ + 4333 "01010001" // /* MW 5 */ + 4334 "00000110" // /* MW 4 */ + 4335 "00000000" // /* MW 3 */ + 4336 "01110001" // /* MW 2 */ + 4337 "00000001" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 first +.src_ref 2 "gemm_bfp16.h" 235 66 +.delay_slot + 4338 "10111010" // ST.s8 r17, [p0]; MOVXM p5, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4339 "00010000" // /* MW 9 */ + 4340 "00000000" // /* MW 8 */ + 4341 "10110001" // /* MW 7 */ + 4342 "11110010" // /* MW 6 */ + 4343 "00000001" // /* MW 5 */ + 4344 "00000000" // /* MW 4 */ + 4345 "11100000" // /* MW 3 */ + 4346 "11000100" // /* MW 2 */ + 4347 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 4 +.src_ref 2 "gemm_bfp16.h" 235 66 first + 4348 "11010100" // LDA r0, [p5], #8; MOV p0, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4349 "10000001" // /* MW 5 */ + 4350 "11001001" // /* MW 4 */ + 4351 "11010000" // /* MW 3 */ + 4352 "10000010" // /* MW 2 */ + 4353 "10100101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 79 + 4354 "10011000" // LDA r1, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4355 "00110110" // /* MW 3 */ + 4356 "00000100" // /* MW 2 */ + 4357 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 92 + 4358 "10011000" // LDA r2, [p5, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4359 "01010110" // /* MW 3 */ + 4360 "00010100" // /* MW 2 */ + 4361 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 4 +.no_stack_arguments + 4362 "00000100" // JL #3648 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3648 delay_slots=5 */ + 4363 "00000001" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "00100000" // /* MW 3 */ + 4366 "00000111" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 4 +.delay_slot + 4368 "11111000" // MOV p1, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4369 "11000000" // /* MW 3 */ + 4370 "01100110" // /* MW 2 */ + 4371 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4378 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4379 "00100000" // /* MW 5 */ + 4380 "00000000" // /* MW 4 */ + 4381 "11110000" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.src_ref 4 "tuple" 562 47 +.src_ref 8 "tile.hpp" 86 8 +.src_ref 2 "gemm_bfp16.h" 252 79 +.src_ref 2 "gemm_bfp16.h" 252 85 +.return_address + 4384 "10111010" // MOVA r16, #184; MOVX r18, #-184; MOV m4, #220 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4385 "01011000" // /* MW 9 */ + 4386 "11011100" // /* MW 8 */ + 4387 "00000000" // /* MW 7 */ + 4388 "00001010" // /* MW 6 */ + 4389 "00100001" // /* MW 5 */ + 4390 "00111011" // /* MW 4 */ + 4391 "00000000" // /* MW 3 */ + 4392 "00010000" // /* MW 2 */ + 4393 "00010111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 252 85 first + 4394 "10011000" // LDA r27, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4395 "01110110" // /* MW 3 */ + 4396 "10001011" // /* MW 2 */ + 4397 "00000111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 2 "gemm_bfp16.h" 252 79 + 4398 "11111000" // MOV r19, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4399 "11000000" // /* MW 3 */ + 4400 "11011110" // /* MW 2 */ + 4401 "00011100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 126 19 + 4402 "00011000" // ADD.NC r20, r19, #-56 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "11100100" // /* MW 3 */ + 4404 "00011001" // /* MW 2 */ + 4405 "00011101" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 + 4406 "01011000" // ADD.NC p7, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4407 "01010001" // /* MW 3 */ + 4408 "01101001" // /* MW 2 */ + 4409 "00011111" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 2 "gemm_bfp16.h" 252 79 + 4410 "00011000" // MOVX r17, #240 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4411 "11000001" // /* MW 3 */ + 4412 "11100010" // /* MW 2 */ + 4413 "00010000" // /* MW 1 */ + 4414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4415 "00000000" // /* MW 1 */ + 4416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4417 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first +.src_ref 2 "gemm_bfp16.h" 252 79 + 4418 "00011000" // SEL.EQZ r18, r20, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00110010" // /* MW 3 */ + 4420 "00100101" // /* MW 2 */ + 4421 "00010101" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first +.src_ref 4 "tuple" 562 47 first +.src_ref 2 "gemm_bfp16.h" 252 79 first + 4422 "00100100" // SEL.EQZ r16, r16, r17, r27; ADD.NC p3, r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4423 "00000100" // /* MW 5 */ + 4424 "11010010" // /* MW 4 */ + 4425 "01000110" // /* MW 3 */ + 4426 "00100010" // /* MW 2 */ + 4427 "10000100" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 2 "gemm_bfp16.h" 134 10 first +.src_ref 2 "gemm_bfp16.h" 252 79 + 4428 "10111010" // LDA dj1, [p3], #4; JZ r27, #4688 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4688 delay_slots=5 */ + 4429 "01100000" // /* MW 9 */ + 4430 "00000000" // /* MW 8 */ + 4431 "00000000" // /* MW 7 */ + 4432 "01001010" // /* MW 6 */ + 4433 "00000010" // /* MW 5 */ + 4434 "00110110" // /* MW 4 */ + 4435 "11010000" // /* MW 3 */ + 4436 "10011000" // /* MW 2 */ + 4437 "01100011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 first +.delay_slot + 4438 "11010100" // LDA dn5, [p3], #4; MOV dj3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4439 "01000001" // /* MW 5 */ + 4440 "00010000" // /* MW 4 */ + 4441 "11010111" // /* MW 3 */ + 4442 "11010100" // /* MW 2 */ + 4443 "01100011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.delay_slot + 4444 "10011000" // LDA dj5, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4445 "11000110" // /* MW 3 */ + 4446 "00011110" // /* MW 2 */ + 4447 "00000011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.delay_slot + 4448 "10011000" // LDA dn1, [p7, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "10100110" // /* MW 3 */ + 4450 "01100000" // /* MW 2 */ + 4451 "00000111" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.delay_slot + 4452 "10011000" // LDA r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00010110" // /* MW 3 */ + 4454 "00000110" // /* MW 2 */ + 4455 "00000011" // /* MW 1 */ +.src_ref 4 "tuple" 562 49 +.delay_slot + 4456 "10011000" // LDA m4, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4457 "00000110" // /* MW 3 */ + 4458 "00010110" // /* MW 2 */ + 4459 "00000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 2 "gemm_bfp16.h" 113 16 +.src_ref 2 "gemm_bfp16.h" 135 60 + 4460 "10111010" // LDA p3, [sp, #-20]; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4461 "00010000" // /* MW 9 */ + 4462 "00010000" // /* MW 8 */ + 4463 "00110001" // /* MW 7 */ + 4464 "11110001" // /* MW 6 */ + 4465 "00000001" // /* MW 5 */ + 4466 "00000000" // /* MW 4 */ + 4467 "00100000" // /* MW 3 */ + 4468 "10110011" // /* MW 2 */ + 4469 "11111101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 110 4 first +.src_ref 2 "gemm_bfp16.h" 135 60 first + 4470 "10111010" // LDA r19, [p2]; MOVXM ls, #4560 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4471 "00010000" // /* MW 9 */ + 4472 "11101000" // /* MW 8 */ + 4473 "01111000" // /* MW 7 */ + 4474 "00000100" // /* MW 6 */ + 4475 "00000000" // /* MW 5 */ + 4476 "00000000" // /* MW 4 */ + 4477 "11010000" // /* MW 3 */ + 4478 "11001110" // /* MW 2 */ + 4479 "01000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 110 4 +.src_ref 2 "gemm_bfp16.h" 135 68 + 4480 "10111010" // MOVA r20, #-6; MOVXM le, #4624 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4481 "00010000" // /* MW 9 */ + 4482 "00001000" // /* MW 8 */ + 4483 "10111001" // /* MW 7 */ + 4484 "00000101" // /* MW 6 */ + 4485 "00000000" // /* MW 5 */ + 4486 "00000000" // /* MW 4 */ + 4487 "00000000" // /* MW 3 */ + 4488 "01010100" // /* MW 2 */ + 4489 "11111111" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 1365 19 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 8 "transpose.hpp" 225 15 + 4490 "01100100" // MOVX r17, #52; MOV r18, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4491 "11010101" // /* MW 5 */ + 4492 "00100000" // /* MW 4 */ + 4493 "00101001" // /* MW 3 */ + 4494 "01011010" // /* MW 2 */ + 4495 "00000100" // /* MW 1 */ + 4496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4497 "00000000" // /* MW 1 */ + 4498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4499 "00000000" // /* MW 1 */ + 4500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 + 4502 "11111000" // MOV p2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4503 "11000000" // /* MW 3 */ + 4504 "01100110" // /* MW 2 */ + 4505 "00011010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 2 "gemm_bfp16.h" 135 68 + 4506 "00101100" // VLDA lfh0, [p2, #64]; LSHL r19, r19, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4507 "10011011" // /* MW 5 */ + 4508 "11001110" // /* MW 4 */ + 4509 "11111001" // /* MW 3 */ + 4510 "10000000" // /* MW 2 */ + 4511 "01000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 2 "gemm_bfp16.h" 110 4 first + 4512 "00010100" // VLDA lfl0, [p2], #128; ADD.NC lc, r19, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4513 "11111110" // /* MW 5 */ + 4514 "11110011" // /* MW 4 */ + 4515 "11111010" // /* MW 3 */ + 4516 "10010000" // /* MW 2 */ + 4517 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4519 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4520 "10011000" // VLDA lfh0, [p2, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4521 "00000111" // /* MW 3 */ + 4522 "00010100" // /* MW 2 */ + 4523 "00000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4524 "10011000" // VLDA lfl0, [p2], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4525 "10000111" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4529 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4531 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 "11111000" // VMOV x8, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4533 "10010010" // /* MW 3 */ + 4534 "00100001" // /* MW 2 */ + 4535 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4536 "00000010" // NOPS; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4537 "01110000" // /* MW 7 */ + 4538 "11001001" // /* MW 6 */ + 4539 "01010000" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "01100000" // /* MW 3 */ + 4542 "00101011" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "11001001" // /* MW 12 */ + 4549 "00010010" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "01011011" // /* MW 7 */ + 4554 "00000001" // /* MW 6 */ + 4555 "00100000" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 4560 "11100001" // VLDA lfh0, [p2, #64]; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x8, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4561 "00000000" // /* MW 15 */ + 4562 "00000000" // /* MW 14 */ + 4563 "00111000" // /* MW 13 */ + 4564 "00100100" // /* MW 12 */ + 4565 "11000010" // /* MW 11 */ + 4566 "00000000" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "01011011" // /* MW 7 */ + 4570 "00000001" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "10000000" // /* MW 2 */ + 4575 "01000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4576 "11100001" // VLDA lfl0, [p2], #128; NOPB; NOPS; NOPX; VSHUFFLE x2, x0, x8, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4577 "00000000" // /* MW 15 */ + 4578 "00000000" // /* MW 14 */ + 4579 "00111000" // /* MW 13 */ + 4580 "00100010" // /* MW 12 */ + 4581 "10000010" // /* MW 11 */ + 4582 "00000000" // /* MW 10 */ + 4583 "00000000" // /* MW 9 */ + 4584 "00000000" // /* MW 8 */ + 4585 "01011011" // /* MW 7 */ + 4586 "00000001" // /* MW 6 */ + 4587 "00100000" // /* MW 5 */ + 4588 "00000000" // /* MW 4 */ + 4589 "11110000" // /* MW 3 */ + 4590 "10010000" // /* MW 2 */ + 4591 "01000101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 2 "gemm_bfp16.h" 113 16 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4592 "11100001" // NOPA; NOPB; VST x3, [p3, #64]; NOPX; VMOV x8, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4593 "00000000" // /* MW 15 */ + 4594 "00000000" // /* MW 14 */ + 4595 "01111000" // /* MW 13 */ + 4596 "11001001" // /* MW 12 */ + 4597 "00010000" // /* MW 11 */ + 4598 "00000010" // /* MW 10 */ + 4599 "00000000" // /* MW 9 */ + 4600 "00000000" // /* MW 8 */ + 4601 "11010011" // /* MW 7 */ + 4602 "00010100" // /* MW 6 */ + 4603 "00100011" // /* MW 5 */ + 4604 "00000000" // /* MW 4 */ + 4605 "11110000" // /* MW 3 */ + 4606 "00101100" // /* MW 2 */ + 4607 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4608 "11100001" // NOPA; NOPB; VST x2, [p3], #128; NOPX; VMOV x1, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "01111000" // /* MW 13 */ + 4612 "11001001" // /* MW 12 */ + 4613 "01010000" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "00000000" // /* MW 9 */ + 4616 "00000000" // /* MW 8 */ + 4617 "10010011" // /* MW 7 */ + 4618 "00101100" // /* MW 6 */ + 4619 "00100011" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4624 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4625 "00000000" // /* MW 15 */ + 4626 "00000000" // /* MW 14 */ + 4627 "01111000" // /* MW 13 */ + 4628 "11001001" // /* MW 12 */ + 4629 "00010010" // /* MW 11 */ + 4630 "00000000" // /* MW 10 */ + 4631 "00000000" // /* MW 9 */ + 4632 "00000000" // /* MW 8 */ + 4633 "01011011" // /* MW 7 */ + 4634 "00000001" // /* MW 6 */ + 4635 "00100000" // /* MW 5 */ + 4636 "00000000" // /* MW 4 */ + 4637 "11110000" // /* MW 3 */ + 4638 "00101100" // /* MW 2 */ + 4639 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.loop_nesting 0 + 4640 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4641 "01001000" // /* MW 3 */ + 4642 "10000100" // /* MW 2 */ + 4643 "00011001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "transpose.hpp" 224 15 first + 4644 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4645 "01000100" // /* MW 3 */ + 4646 "00000100" // /* MW 2 */ + 4647 "00011001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 2 "gemm_bfp16.h" 113 16 first + 4648 "00000010" // VST x3, [p3, #64]; VMOV x8, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4649 "01110000" // /* MW 7 */ + 4650 "11001001" // /* MW 6 */ + 4651 "00010000" // /* MW 5 */ + 4652 "00000010" // /* MW 4 */ + 4653 "01100000" // /* MW 3 */ + 4654 "10011010" // /* MW 2 */ + 4655 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 + 4656 "00000010" // VST x2, [p3], #128; VMOV x0, lfl0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4657 "01110000" // /* MW 7 */ + 4658 "11001001" // /* MW 6 */ + 4659 "00010010" // /* MW 5 */ + 4660 "00000000" // /* MW 4 */ + 4661 "01100000" // /* MW 3 */ + 4662 "10010010" // /* MW 2 */ + 4663 "01100101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first + 4664 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4665 "01001000" // /* MW 3 */ + 4666 "10000100" // /* MW 2 */ + 4667 "00011001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "transpose.hpp" 224 15 first + 4668 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4669 "01000100" // /* MW 3 */ + 4670 "00000100" // /* MW 2 */ + 4671 "00011001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 first + 4672 "00000010" // VST x3, [p3, #64]; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4673 "01110000" // /* MW 7 */ + 4674 "11001001" // /* MW 6 */ + 4675 "01010000" // /* MW 5 */ + 4676 "00000000" // /* MW 4 */ + 4677 "01100000" // /* MW 3 */ + 4678 "10011010" // /* MW 2 */ + 4679 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 + 4680 "00000010" // VST x2, [p3], #128; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4681 "01110000" // /* MW 7 */ + 4682 "10100101" // /* MW 6 */ + 4683 "00000001" // /* MW 5 */ + 4684 "00000000" // /* MW 4 */ + 4685 "01100000" // /* MW 3 */ + 4686 "10010010" // /* MW 2 */ + 4687 "01100101" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 +.src_ref 2 "gemm_bfp16.h" 141 44 first + 4688 "00011000" // PADDB [p7], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4689 "10010000" // /* MW 3 */ + 4690 "00011111" // /* MW 2 */ + 4691 "00111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 148 2 first + 4692 "10011000" // LDA dj3, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "11000110" // /* MW 3 */ + 4694 "00011101" // /* MW 2 */ + 4695 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 148 2 +.src_ref 2 "gemm_bfp16.h" 148 2 + 4696 "01010100" // LDA dn3, [p7], #4; MOV m5, #-36 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4697 "01110001" // /* MW 5 */ + 4698 "00011111" // /* MW 4 */ + 4699 "11011010" // /* MW 3 */ + 4700 "10110100" // /* MW 2 */ + 4701 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 148 2 + 4702 "10011000" // LDA r18, [p7], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4703 "01010110" // /* MW 3 */ + 4704 "10101010" // /* MW 2 */ + 4705 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4706 "10111010" // LDA r20, [p7], #12; MOVXM p3, #508788 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4707 "00010000" // /* MW 9 */ + 4708 "10111010" // /* MW 8 */ + 4709 "10110001" // /* MW 7 */ + 4710 "11110001" // /* MW 6 */ + 4711 "00000001" // /* MW 5 */ + 4712 "00000000" // /* MW 4 */ + 4713 "11010000" // /* MW 3 */ + 4714 "11010010" // /* MW 2 */ + 4715 "11100111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4716 "10111010" // LDA.s8 r20, [p3]; MOVXM r23, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4717 "00010000" // /* MW 9 */ + 4718 "11000000" // /* MW 8 */ + 4719 "11101111" // /* MW 7 */ + 4720 "00001110" // /* MW 6 */ + 4721 "00000000" // /* MW 5 */ + 4722 "00000000" // /* MW 4 */ + 4723 "01010000" // /* MW 3 */ + 4724 "11010000" // /* MW 2 */ + 4725 "01100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4726 "11010100" // LDA p3, [sp, #-12]; VBCST.16 x5, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4727 "11100101" // /* MW 5 */ + 4728 "10111010" // /* MW 4 */ + 4729 "00100101" // /* MW 3 */ + 4730 "10110011" // /* MW 2 */ + 4731 "11111110" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4732 "01010100" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOV m6, #84 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4733 "01010001" // /* MW 5 */ + 4734 "00000001" // /* MW 4 */ + 4735 "01111100" // /* MW 3 */ + 4736 "11001101" // /* MW 2 */ + 4737 "11000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4738 "11010100" // LDA m7, [p7], #4; VBCST.16 x4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4739 "11100101" // /* MW 5 */ + 4740 "10111010" // /* MW 4 */ + 4741 "11010100" // /* MW 3 */ + 4742 "11110000" // /* MW 2 */ + 4743 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4744 "11010100" // LDA m3, [p7], #4; VMOV x10, x4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4745 "00100101" // /* MW 5 */ + 4746 "01010001" // /* MW 4 */ + 4747 "11011010" // /* MW 3 */ + 4748 "10110000" // /* MW 2 */ + 4749 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4750 "11010100" // LDA m1, [p7], #4; VMOV x11, x5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4751 "00100101" // /* MW 5 */ + 4752 "01010101" // /* MW 4 */ + 4753 "11011011" // /* MW 3 */ + 4754 "10010000" // /* MW 2 */ + 4755 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4756 "00101100" // LDA m6, [p7], m6; ADD r23, r20, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4757 "11110110" // /* MW 5 */ + 4758 "01011111" // /* MW 4 */ + 4759 "11011010" // /* MW 3 */ + 4760 "01100000" // /* MW 2 */ + 4761 "11111001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.src_ref 2 "gemm_bfp16.h" 172 37 + 4762 "01010100" // LDA m0, [p7], #-16; MOV m2, #280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4763 "01100001" // /* MW 5 */ + 4764 "00000100" // /* MW 4 */ + 4765 "11010100" // /* MW 3 */ + 4766 "10000000" // /* MW 2 */ + 4767 "11111001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.src_ref 2 "gemm_bfp16.h" 172 37 + 4768 "01010100" // LDA dn0, [p7], #4; MOV m5, #-108 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4769 "01010001" // /* MW 5 */ + 4770 "00011110" // /* MW 4 */ + 4771 "11011010" // /* MW 3 */ + 4772 "10000100" // /* MW 2 */ + 4773 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 + 4774 "10011000" // LDA dj0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "01000110" // /* MW 3 */ + 4776 "00011100" // /* MW 2 */ + 4777 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 + 4778 "10011000" // LDA dn4, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4779 "00100110" // /* MW 3 */ + 4780 "00011110" // /* MW 2 */ + 4781 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 + 4782 "10011000" // LDA dj4, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4783 "01000110" // /* MW 3 */ + 4784 "00101110" // /* MW 2 */ + 4785 "00000111" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 +.src_ref 7 "accum.hpp" 940 83 +.src_ref 2 "gemm_bfp16.h" 172 37 + 4786 "01010100" // LDA m5, [p7], m5; MOV dc4, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4787 "00000001" // /* MW 5 */ + 4788 "10000000" // /* MW 4 */ + 4789 "11011001" // /* MW 3 */ + 4790 "01010000" // /* MW 2 */ + 4791 "11110101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4792 "10111010" // LDA r26, [p7], m2; MOVS p0, p3; MOV r25, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4793 "01110010" // /* MW 9 */ + 4794 "01100000" // /* MW 8 */ + 4795 "00101111" // /* MW 7 */ + 4796 "00000011" // /* MW 6 */ + 4797 "10001011" // /* MW 5 */ + 4798 "10001100" // /* MW 4 */ + 4799 "11010000" // /* MW 3 */ + 4800 "01101010" // /* MW 2 */ + 4801 "11101001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 +.src_ref 7 "accum.hpp" 940 83 +.src_ref 2 "gemm_bfp16.h" 172 2 +.src_ref 2 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4802 "01111110" // LDA p7, [sp, #-20]; PADDB [p0], m3; MOVS dc0, dc4; MOVXM p2, #4912 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4803 "01100000" // /* MW 13 */ + 4804 "00001001" // /* MW 12 */ + 4805 "00000010" // /* MW 11 */ + 4806 "00000010" // /* MW 10 */ + 4807 "00110011" // /* MW 9 */ + 4808 "10100110" // /* MW 8 */ + 4809 "00000000" // /* MW 7 */ + 4810 "00000000" // /* MW 6 */ + 4811 "00100000" // /* MW 5 */ + 4812 "11010111" // /* MW 4 */ + 4813 "00100000" // /* MW 3 */ + 4814 "11110011" // /* MW 2 */ + 4815 "11111101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 2 "gemm_bfp16.h" 175 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4816 "10111010" // VLDA bmlh2, [p0, #64]; MOVS dc2, dc4; MOV dc5, dc4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4817 "01110010" // /* MW 9 */ + 4818 "11000000" // /* MW 8 */ + 4819 "11100100" // /* MW 7 */ + 4820 "00000010" // /* MW 6 */ + 4821 "01001011" // /* MW 5 */ + 4822 "00010000" // /* MW 4 */ + 4823 "10110010" // /* MW 3 */ + 4824 "10100110" // /* MW 2 */ + 4825 "00000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4826 "10111010" // VLDA bmhl2, [p0, #128]; MOVS p4, p3; MOV dj2, dj3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4827 "01110010" // /* MW 9 */ + 4828 "10000000" // /* MW 8 */ + 4829 "01000011" // /* MW 7 */ + 4830 "00000001" // /* MW 6 */ + 4831 "10001011" // /* MW 5 */ + 4832 "10001100" // /* MW 4 */ + 4833 "10110100" // /* MW 3 */ + 4834 "10101010" // /* MW 2 */ + 4835 "00000100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4836 "01111110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; MOVS p1, p3; MOVX r17, #780; MOV r24, m1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4837 "01100000" // /* MW 13 */ + 4838 "10010001" // /* MW 12 */ + 4839 "00110001" // /* MW 11 */ + 4840 "00001111" // /* MW 10 */ + 4841 "00100000" // /* MW 9 */ + 4842 "01100001" // /* MW 8 */ + 4843 "00110001" // /* MW 7 */ + 4844 "00100010" // /* MW 6 */ + 4845 "00100011" // /* MW 5 */ + 4846 "10010111" // /* MW 4 */ + 4847 "10110111" // /* MW 3 */ + 4848 "10101110" // /* MW 2 */ + 4849 "00000110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4850 "01111110" // VLDA bmlh0, [p3, #64]; NOPB; MOVS dc3, dc0; MOVX crRnd, r20; MOV r20, p7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4851 "01100000" // /* MW 13 */ + 4852 "00001001" // /* MW 12 */ + 4853 "01100000" // /* MW 11 */ + 4854 "00001111" // /* MW 10 */ + 4855 "11101100" // /* MW 9 */ + 4856 "01010001" // /* MW 8 */ + 4857 "10000000" // /* MW 7 */ + 4858 "00111010" // /* MW 6 */ + 4859 "00100101" // /* MW 5 */ + 4860 "00000000" // /* MW 4 */ + 4861 "10110000" // /* MW 3 */ + 4862 "10000110" // /* MW 2 */ + 4863 "01100010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 203 6 + 4864 "11100001" // VLDA bmhl0, [p3, #128]; NOPB; MOVS dn2, dn3; MOVX r19, #52; MOV m2, m3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4865 "00000000" // /* MW 15 */ + 4866 "00000000" // /* MW 14 */ + 4867 "01111000" // /* MW 13 */ + 4868 "00000000" // /* MW 12 */ + 4869 "00000011" // /* MW 11 */ + 4870 "10001001" // /* MW 10 */ + 4871 "00110110" // /* MW 9 */ + 4872 "00000001" // /* MW 8 */ + 4873 "01001011" // /* MW 7 */ + 4874 "01001110" // /* MW 6 */ + 4875 "00100010" // /* MW 5 */ + 4876 "00000000" // /* MW 4 */ + 4877 "10110000" // /* MW 3 */ + 4878 "10001010" // /* MW 2 */ + 4879 "01100100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1365 19 +.src_ref 8 "vector.hpp" 1365 19 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 8 "transpose.hpp" 225 15 +.src_ref 8 "transpose.hpp" 225 15 +.src_ref 2 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4880 "11100001" // VLDA bmhh0, [p3, #192]; NOPB; MOVS dc1, dc3; MOVX r21, #53; MOV m3, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4881 "00000000" // /* MW 15 */ + 4882 "00000000" // /* MW 14 */ + 4883 "01111000" // /* MW 13 */ + 4884 "10010000" // /* MW 12 */ + 4885 "10000100" // /* MW 11 */ + 4886 "10101001" // /* MW 10 */ + 4887 "01010110" // /* MW 9 */ + 4888 "00000001" // /* MW 8 */ + 4889 "01001011" // /* MW 7 */ + 4890 "00001100" // /* MW 6 */ + 4891 "00100001" // /* MW 5 */ + 4892 "00000000" // /* MW 4 */ + 4893 "10110000" // /* MW 3 */ + 4894 "10001110" // /* MW 2 */ + 4895 "01100110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4896 "11100001" // VLDA bmll0, [p3]; VLDB x4, [p7, #64]; PADDS [p4], m1; MOVX r22, #60; MOV p5, p4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4897 "00000000" // /* MW 15 */ + 4898 "00000000" // /* MW 14 */ + 4899 "01111000" // /* MW 13 */ + 4900 "01100000" // /* MW 12 */ + 4901 "10110100" // /* MW 11 */ + 4902 "10001010" // /* MW 10 */ + 4903 "01100111" // /* MW 9 */ + 4904 "00000001" // /* MW 8 */ + 4905 "01011011" // /* MW 7 */ + 4906 "00101000" // /* MW 6 */ + 4907 "01101100" // /* MW 5 */ + 4908 "00101010" // /* MW 4 */ + 4909 "10111110" // /* MW 3 */ + 4910 "10000010" // /* MW 2 */ + 4911 "01100000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4912 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4913 "01110000" // /* MW 11 */ + 4914 "00010000" // /* MW 10 */ + 4915 "10000100" // /* MW 9 */ + 4916 "00000000" // /* MW 8 */ + 4917 "10001011" // /* MW 7 */ + 4918 "10010100" // /* MW 6 */ + 4919 "00100011" // /* MW 5 */ + 4920 "11010111" // /* MW 4 */ + 4921 "10111011" // /* MW 3 */ + 4922 "10010110" // /* MW 2 */ + 4923 "10000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4924 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4925 "01001110" // /* MW 9 */ + 4926 "10111111" // /* MW 8 */ + 4927 "10111110" // /* MW 7 */ + 4928 "00000010" // /* MW 6 */ + 4929 "10010000" // /* MW 5 */ + 4930 "01110011" // /* MW 4 */ + 4931 "10110011" // /* MW 3 */ + 4932 "10011010" // /* MW 2 */ + 4933 "10000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4934 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #5120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4935 "00010000" // /* MW 9 */ + 4936 "00000000" // /* MW 8 */ + 4937 "01111010" // /* MW 7 */ + 4938 "00000100" // /* MW 6 */ + 4939 "00000000" // /* MW 5 */ + 4940 "00000000" // /* MW 4 */ + 4941 "10110000" // /* MW 3 */ + 4942 "10011110" // /* MW 2 */ + 4943 "10000110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4944 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #5200 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4945 "01100000" // /* MW 13 */ + 4946 "10010001" // /* MW 12 */ + 4947 "10010011" // /* MW 11 */ + 4948 "00000010" // /* MW 10 */ + 4949 "01000101" // /* MW 9 */ + 4950 "10110111" // /* MW 8 */ + 4951 "00000000" // /* MW 7 */ + 4952 "00000000" // /* MW 6 */ + 4953 "11101000" // /* MW 5 */ + 4954 "01110011" // /* MW 4 */ + 4955 "10111110" // /* MW 3 */ + 4956 "10010010" // /* MW 2 */ + 4957 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4958 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4959 "01111110" // /* MW 9 */ + 4960 "00000000" // /* MW 8 */ + 4961 "10000010" // /* MW 7 */ + 4962 "00000001" // /* MW 6 */ + 4963 "10010000" // /* MW 5 */ + 4964 "10001011" // /* MW 4 */ + 4965 "10110100" // /* MW 3 */ + 4966 "10110110" // /* MW 2 */ + 4967 "10100010" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4968 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4969 "10001011" // /* MW 7 */ + 4970 "10011100" // /* MW 6 */ + 4971 "11101100" // /* MW 5 */ + 4972 "00101010" // /* MW 4 */ + 4973 "01111000" // /* MW 3 */ + 4974 "11001011" // /* MW 2 */ + 4975 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4976 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4977 "01011011" // /* MW 7 */ + 4978 "10001000" // /* MW 6 */ + 4979 "01101100" // /* MW 5 */ + 4980 "00101010" // /* MW 4 */ + 4981 "10111110" // /* MW 3 */ + 4982 "10111010" // /* MW 2 */ + 4983 "10100100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 2 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4984 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4985 "11101000" // /* MW 5 */ + 4986 "01110011" // /* MW 4 */ + 4987 "10111110" // /* MW 3 */ + 4988 "10111110" // /* MW 2 */ + 4989 "10100110" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4990 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4991 "01111110" // /* MW 9 */ + 4992 "01100000" // /* MW 8 */ + 4993 "10110110" // /* MW 7 */ + 4994 "00000010" // /* MW 6 */ + 4995 "01110100" // /* MW 5 */ + 4996 "00010101" // /* MW 4 */ + 4997 "10110100" // /* MW 3 */ + 4998 "10110010" // /* MW 2 */ + 4999 "10100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5000 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5001 "00100000" // /* MW 5 */ + 5002 "01010111" // /* MW 4 */ + 5003 "01111011" // /* MW 3 */ + 5004 "01000101" // /* MW 2 */ + 5005 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5006 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5007 "00111110" // /* MW 9 */ + 5008 "00100110" // /* MW 8 */ + 5009 "10011101" // /* MW 7 */ + 5010 "00000001" // /* MW 6 */ + 5011 "01110100" // /* MW 5 */ + 5012 "00000110" // /* MW 4 */ + 5013 "10110100" // /* MW 3 */ + 5014 "10100010" // /* MW 2 */ + 5015 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5016 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5017 "01010100" // /* MW 3 */ + 5018 "10111010" // /* MW 2 */ + 5019 "00011011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5020 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5021 "00110110" // /* MW 9 */ + 5022 "01100110" // /* MW 8 */ + 5023 "00100101" // /* MW 7 */ + 5024 "00000010" // /* MW 6 */ + 5025 "00110100" // /* MW 5 */ + 5026 "00010101" // /* MW 4 */ + 5027 "01100111" // /* MW 3 */ + 5028 "10010001" // /* MW 2 */ + 5029 "10010011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5030 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5031 "01010001" // /* MW 11 */ + 5032 "11101101" // /* MW 10 */ + 5033 "10110100" // /* MW 9 */ + 5034 "01100010" // /* MW 8 */ + 5035 "11010100" // /* MW 7 */ + 5036 "11001010" // /* MW 6 */ + 5037 "00100100" // /* MW 5 */ + 5038 "00010111" // /* MW 4 */ + 5039 "01111001" // /* MW 3 */ + 5040 "11000101" // /* MW 2 */ + 5041 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5042 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5043 "00110000" // /* MW 11 */ + 5044 "00100110" // /* MW 10 */ + 5045 "10011101" // /* MW 9 */ + 5046 "00000001" // /* MW 8 */ + 5047 "10001011" // /* MW 7 */ + 5048 "10011000" // /* MW 6 */ + 5049 "11101101" // /* MW 5 */ + 5050 "00101010" // /* MW 4 */ + 5051 "01111000" // /* MW 3 */ + 5052 "11001101" // /* MW 2 */ + 5053 "10100010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5054 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5055 "10101000" // /* MW 5 */ + 5056 "01110100" // /* MW 4 */ + 5057 "11110111" // /* MW 3 */ + 5058 "00001100" // /* MW 2 */ + 5059 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5060 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5061 "01010001" // /* MW 9 */ + 5062 "11110001" // /* MW 8 */ + 5063 "10110100" // /* MW 7 */ + 5064 "00001001" // /* MW 6 */ + 5065 "00110110" // /* MW 5 */ + 5066 "00001010" // /* MW 4 */ + 5067 "01110000" // /* MW 3 */ + 5068 "11001101" // /* MW 2 */ + 5069 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5070 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5071 "00111110" // /* MW 9 */ + 5072 "01100110" // /* MW 8 */ + 5073 "00100101" // /* MW 7 */ + 5074 "00000010" // /* MW 6 */ + 5075 "11110100" // /* MW 5 */ + 5076 "00111001" // /* MW 4 */ + 5077 "01110111" // /* MW 3 */ + 5078 "01000101" // /* MW 2 */ + 5079 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5080 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5081 "11010100" // /* MW 3 */ + 5082 "11001010" // /* MW 2 */ + 5083 "00011100" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5084 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5085 "00110110" // /* MW 3 */ + 5086 "10001010" // /* MW 2 */ + 5087 "00001000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5088 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5089 "01010001" // /* MW 9 */ + 5090 "11101101" // /* MW 8 */ + 5091 "10110100" // /* MW 7 */ + 5092 "00011101" // /* MW 6 */ + 5093 "01110100" // /* MW 5 */ + 5094 "00000110" // /* MW 4 */ + 5095 "01110100" // /* MW 3 */ + 5096 "11000101" // /* MW 2 */ + 5097 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5098 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5099 "01101100" // /* MW 5 */ + 5100 "00010100" // /* MW 4 */ + 5101 "01110010" // /* MW 3 */ + 5102 "11001101" // /* MW 2 */ + 5103 "10100010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5104 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5105 "00000000" // /* MW 15 */ + 5106 "00000000" // /* MW 14 */ + 5107 "01111000" // /* MW 13 */ + 5108 "10100101" // /* MW 12 */ + 5109 "00000001" // /* MW 11 */ + 5110 "00000000" // /* MW 10 */ + 5111 "00000000" // /* MW 9 */ + 5112 "00000000" // /* MW 8 */ + 5113 "00110110" // /* MW 7 */ + 5114 "10001010" // /* MW 6 */ + 5115 "00100001" // /* MW 5 */ + 5116 "00000000" // /* MW 4 */ + 5117 "11110000" // /* MW 3 */ + 5118 "00101100" // /* MW 2 */ + 5119 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5120 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5121 "01010001" // /* MW 9 */ + 5122 "11110001" // /* MW 8 */ + 5123 "10110100" // /* MW 7 */ + 5124 "11100110" // /* MW 6 */ + 5125 "11000000" // /* MW 5 */ + 5126 "01101100" // /* MW 4 */ + 5127 "01101101" // /* MW 3 */ + 5128 "00101010" // /* MW 2 */ + 5129 "00001110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5130 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5131 "00000001" // /* MW 15 */ + 5132 "01011011" // /* MW 14 */ + 5133 "00111100" // /* MW 13 */ + 5134 "00100110" // /* MW 12 */ + 5135 "10011101" // /* MW 11 */ + 5136 "00000001" // /* MW 10 */ + 5137 "00000000" // /* MW 9 */ + 5138 "00000000" // /* MW 8 */ + 5139 "10001011" // /* MW 7 */ + 5140 "10011100" // /* MW 6 */ + 5141 "11101100" // /* MW 5 */ + 5142 "01110011" // /* MW 4 */ + 5143 "01111110" // /* MW 3 */ + 5144 "11001101" // /* MW 2 */ + 5145 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5146 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5147 "00100001" // /* MW 15 */ + 5148 "01001001" // /* MW 14 */ + 5149 "00111100" // /* MW 13 */ + 5150 "00101010" // /* MW 12 */ + 5151 "11011101" // /* MW 11 */ + 5152 "00000001" // /* MW 10 */ + 5153 "00000000" // /* MW 9 */ + 5154 "00000000" // /* MW 8 */ + 5155 "00110110" // /* MW 7 */ + 5156 "00001010" // /* MW 6 */ + 5157 "00100000" // /* MW 5 */ + 5158 "00010111" // /* MW 4 */ + 5159 "01111001" // /* MW 3 */ + 5160 "01000101" // /* MW 2 */ + 5161 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5162 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5163 "01101001" // /* MW 11 */ + 5164 "01000000" // /* MW 10 */ + 5165 "10001010" // /* MW 9 */ + 5166 "00001110" // /* MW 8 */ + 5167 "00011011" // /* MW 7 */ + 5168 "01000101" // /* MW 6 */ + 5169 "11101000" // /* MW 5 */ + 5170 "00101010" // /* MW 4 */ + 5171 "11111000" // /* MW 3 */ + 5172 "00001100" // /* MW 2 */ + 5173 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5174 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5175 "01010001" // /* MW 11 */ + 5176 "11101101" // /* MW 10 */ + 5177 "10110100" // /* MW 9 */ + 5178 "01100010" // /* MW 8 */ + 5179 "11001100" // /* MW 7 */ + 5180 "01001010" // /* MW 6 */ + 5181 "11101100" // /* MW 5 */ + 5182 "00001100" // /* MW 4 */ + 5183 "01111000" // /* MW 3 */ + 5184 "11000101" // /* MW 2 */ + 5185 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5186 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5187 "01101001" // /* MW 13 */ + 5188 "00000100" // /* MW 12 */ + 5189 "10001000" // /* MW 11 */ + 5190 "10100011" // /* MW 10 */ + 5191 "01010110" // /* MW 9 */ + 5192 "01100110" // /* MW 8 */ + 5193 "00000000" // /* MW 7 */ + 5194 "00000000" // /* MW 6 */ + 5195 "01101100" // /* MW 5 */ + 5196 "00010100" // /* MW 4 */ + 5197 "01110010" // /* MW 3 */ + 5198 "11001101" // /* MW 2 */ + 5199 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5200 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5201 "00000000" // /* MW 15 */ + 5202 "00000000" // /* MW 14 */ + 5203 "01111000" // /* MW 13 */ + 5204 "10100101" // /* MW 12 */ + 5205 "00000001" // /* MW 11 */ + 5206 "00000000" // /* MW 10 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "00110110" // /* MW 7 */ + 5210 "10001010" // /* MW 6 */ + 5211 "00100001" // /* MW 5 */ + 5212 "00000000" // /* MW 4 */ + 5213 "11110000" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5216 "10001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB x4, [p7, #64]; MOVS p4, p1; NOPX; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5217 "10001010" // /* MW 15 */ + 5218 "10100111" // /* MW 14 */ + 5219 "01111101" // /* MW 13 */ + 5220 "01100000" // /* MW 12 */ + 5221 "10110110" // /* MW 11 */ + 5222 "00000010" // /* MW 10 */ + 5223 "00000000" // /* MW 9 */ + 5224 "00000000" // /* MW 8 */ + 5225 "10001011" // /* MW 7 */ + 5226 "10000100" // /* MW 6 */ + 5227 "01101100" // /* MW 5 */ + 5228 "00101010" // /* MW 4 */ + 5229 "01111110" // /* MW 3 */ + 5230 "11001101" // /* MW 2 */ + 5231 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5232 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5233 "00000001" // /* MW 15 */ + 5234 "01011011" // /* MW 14 */ + 5235 "00111100" // /* MW 13 */ + 5236 "00100110" // /* MW 12 */ + 5237 "10011101" // /* MW 11 */ + 5238 "00000001" // /* MW 10 */ + 5239 "00000000" // /* MW 9 */ + 5240 "00000000" // /* MW 8 */ + 5241 "10001011" // /* MW 7 */ + 5242 "10000100" // /* MW 6 */ + 5243 "00100000" // /* MW 5 */ + 5244 "01010111" // /* MW 4 */ + 5245 "01111011" // /* MW 3 */ + 5246 "01000101" // /* MW 2 */ + 5247 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.src_ref 2 "gemm_bfp16.h" 202 6 first +.src_ref 2 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5248 "01001011" // PADDA [p0], m3; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5249 "00100001" // /* MW 15 */ + 5250 "01001001" // /* MW 14 */ + 5251 "00111100" // /* MW 13 */ + 5252 "00101010" // /* MW 12 */ + 5253 "11011101" // /* MW 11 */ + 5254 "00000001" // /* MW 10 */ + 5255 "00000000" // /* MW 9 */ + 5256 "00000000" // /* MW 8 */ + 5257 "00110110" // /* MW 7 */ + 5258 "00001010" // /* MW 6 */ + 5259 "00100000" // /* MW 5 */ + 5260 "11010111" // /* MW 4 */ + 5261 "11110011" // /* MW 3 */ + 5262 "00001100" // /* MW 2 */ + 5263 "00001101" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5264 "01001010" // VCONV.bfp16ebs8.fp32 ex1, dm4; MOV m1, r24; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5265 "01101001" // /* MW 9 */ + 5266 "01000000" // /* MW 8 */ + 5267 "10001010" // /* MW 7 */ + 5268 "11100100" // /* MW 6 */ + 5269 "00100000" // /* MW 5 */ + 5270 "00001100" // /* MW 4 */ + 5271 "11000001" // /* MW 3 */ + 5272 "01000110" // /* MW 2 */ + 5273 "00010001" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5274 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5275 "01010001" // /* MW 9 */ + 5276 "11101101" // /* MW 8 */ + 5277 "10110100" // /* MW 7 */ + 5278 "01100010" // /* MW 6 */ + 5279 "11001100" // /* MW 5 */ + 5280 "01001010" // /* MW 4 */ + 5281 "01110100" // /* MW 3 */ + 5282 "11000101" // /* MW 2 */ + 5283 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5284 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5285 "01101001" // /* MW 13 */ + 5286 "00000100" // /* MW 12 */ + 5287 "10001000" // /* MW 11 */ + 5288 "10100011" // /* MW 10 */ + 5289 "01010110" // /* MW 9 */ + 5290 "01100110" // /* MW 8 */ + 5291 "00000000" // /* MW 7 */ + 5292 "00000000" // /* MW 6 */ + 5293 "01101100" // /* MW 5 */ + 5294 "00010100" // /* MW 4 */ + 5295 "01110010" // /* MW 3 */ + 5296 "11001101" // /* MW 2 */ + 5297 "10100010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5298 "10111010" // PADDB [p4], m1; VCONV.bfp16ebs8.fp32 ex3, dm4; MOV p5, p4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5299 "01110110" // /* MW 9 */ + 5300 "01100000" // /* MW 8 */ + 5301 "10110100" // /* MW 7 */ + 5302 "00000010" // /* MW 6 */ + 5303 "10010000" // /* MW 5 */ + 5304 "00101011" // /* MW 4 */ + 5305 "11000100" // /* MW 3 */ + 5306 "01000110" // /* MW 2 */ + 5307 "00110001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5308 "01100010" // MOV m2, r18; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5309 "00101001" // /* MW 7 */ + 5310 "01100000" // /* MW 6 */ + 5311 "10001011" // /* MW 5 */ + 5312 "11100110" // /* MW 4 */ + 5313 "00100000" // /* MW 3 */ + 5314 "00001001" // /* MW 2 */ + 5315 "00000010" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5316 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5317 "01010001" // /* MW 9 */ + 5318 "11110001" // /* MW 8 */ + 5319 "10110100" // /* MW 7 */ + 5320 "00001001" // /* MW 6 */ + 5321 "00110110" // /* MW 5 */ + 5322 "00001010" // /* MW 4 */ + 5323 "01110000" // /* MW 3 */ + 5324 "11001101" // /* MW 2 */ + 5325 "11000010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5326 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "00101001" // /* MW 3 */ + 5328 "00100100" // /* MW 2 */ + 5329 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5330 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5331 "01101001" // /* MW 7 */ + 5332 "01000000" // /* MW 6 */ + 5333 "10001010" // /* MW 5 */ + 5334 "00000010" // /* MW 4 */ + 5335 "11000000" // /* MW 3 */ + 5336 "01000110" // /* MW 2 */ + 5337 "00010001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5338 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5339 "01101001" // /* MW 3 */ + 5340 "00000100" // /* MW 2 */ + 5341 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5343 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5344 "00011000" // VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "00110110" // /* MW 3 */ + 5346 "00001010" // /* MW 2 */ + 5347 "00001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5348 "01100010" // VCONV.bfp16ebs8.fp32 ex3, dm4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5349 "00101001" // /* MW 7 */ + 5350 "01100000" // /* MW 6 */ + 5351 "10001011" // /* MW 5 */ + 5352 "00000010" // /* MW 4 */ + 5353 "11000000" // /* MW 3 */ + 5354 "01000110" // /* MW 2 */ + 5355 "00110001" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5357 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5359 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5360 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5361 "00101001" // /* MW 3 */ + 5362 "00100100" // /* MW 2 */ + 5363 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5364 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5365 "01101001" // /* MW 3 */ + 5366 "01000000" // /* MW 2 */ + 5367 "10001010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5368 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5369 "01101001" // /* MW 3 */ + 5370 "00000100" // /* MW 2 */ + 5371 "10001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5372 "10011000" // VST bmlh3, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5373 "10100110" // /* MW 3 */ + 5374 "00010101" // /* MW 2 */ + 5375 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5376 "10011000" // VST bmhl3, [p1, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5377 "11000110" // /* MW 3 */ + 5378 "00100101" // /* MW 2 */ + 5379 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5380 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5381 "11100110" // /* MW 3 */ + 5382 "00110101" // /* MW 2 */ + 5383 "00001001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5384 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5385 "01110110" // /* MW 9 */ + 5386 "01100000" // /* MW 8 */ + 5387 "10110101" // /* MW 7 */ + 5388 "00000000" // /* MW 6 */ + 5389 "10010000" // /* MW 5 */ + 5390 "11001011" // /* MW 4 */ + 5391 "11010101" // /* MW 3 */ + 5392 "10110000" // /* MW 2 */ + 5393 "00100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.src_ref 2 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5394 "10111010" // PADDB.2D [p1], d2; VST bmlh2, [p0, #64]; MOV m2, m3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5395 "01110110" // /* MW 9 */ + 5396 "00000000" // /* MW 8 */ + 5397 "00000011" // /* MW 7 */ + 5398 "00000001" // /* MW 6 */ + 5399 "10010000" // /* MW 5 */ + 5400 "01010011" // /* MW 4 */ + 5401 "11010001" // /* MW 3 */ + 5402 "10100100" // /* MW 2 */ + 5403 "00000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5404 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "01000110" // /* MW 3 */ + 5406 "00100101" // /* MW 2 */ + 5407 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5408 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "01100110" // /* MW 3 */ + 5410 "00110101" // /* MW 2 */ + 5411 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5412 "00000010" // VST bmll2, [p0]; MOV p0, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5413 "01110000" // /* MW 7 */ + 5414 "01100000" // /* MW 6 */ + 5415 "00110011" // /* MW 5 */ + 5416 "00000000" // /* MW 4 */ + 5417 "11010000" // /* MW 3 */ + 5418 "10100000" // /* MW 2 */ + 5419 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 175 6 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5420 "10111010" // PADDB [p0], m3; VST bmlh1, [p4, #64]; MOV m3, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5421 "01110110" // /* MW 9 */ + 5422 "10010000" // /* MW 8 */ + 5423 "10000100" // /* MW 7 */ + 5424 "00000001" // /* MW 6 */ + 5425 "10010000" // /* MW 5 */ + 5426 "01101011" // /* MW 4 */ + 5427 "11010000" // /* MW 3 */ + 5428 "10010100" // /* MW 2 */ + 5429 "10000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5430 "00001100" // VLDA bmlh2, [p0, #64]; VST bmhl1, [p4, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5431 "10001101" // /* MW 5 */ + 5432 "01001001" // /* MW 4 */ + 5433 "10111000" // /* MW 3 */ + 5434 "10100110" // /* MW 2 */ + 5435 "00000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 2 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5436 "01111010" // VLDA bmhl2, [p0, #128]; VST bmhh1, [p4, #192]; JNZD r23, r23, p2 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 5437 "10100000" // /* MW 9 */ + 5438 "11101110" // /* MW 8 */ + 5439 "00000101" // /* MW 7 */ + 5440 "10000000" // /* MW 6 */ + 5441 "11100110" // /* MW 5 */ + 5442 "00110100" // /* MW 4 */ + 5443 "10110100" // /* MW 3 */ + 5444 "10101010" // /* MW 2 */ + 5445 "00000100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 175 6 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5446 "11110110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; VST bmll1, [p4]; MOV p4, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5447 "01110000" // /* MW 11 */ + 5448 "01100000" // /* MW 10 */ + 5449 "00110011" // /* MW 9 */ + 5450 "10000010" // /* MW 8 */ + 5451 "10000110" // /* MW 7 */ + 5452 "00000100" // /* MW 6 */ + 5453 "00100100" // /* MW 5 */ + 5454 "10010111" // /* MW 4 */ + 5455 "10110111" // /* MW 3 */ + 5456 "10101110" // /* MW 2 */ + 5457 "00000110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5458 "00001100" // VLDA bmlh0, [p3, #64]; VST bmlh0, [p5, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5459 "01001101" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "10111010" // /* MW 3 */ + 5462 "10000110" // /* MW 2 */ + 5463 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5464 "00001100" // VLDA bmhl0, [p3, #128]; VST bmhl0, [p5, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5465 "10001101" // /* MW 5 */ + 5466 "01001000" // /* MW 4 */ + 5467 "10111010" // /* MW 3 */ + 5468 "10001010" // /* MW 2 */ + 5469 "01100100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5470 "00001100" // VLDA bmhh0, [p3, #192]; VST bmhh0, [p5, #192] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5471 "11001101" // /* MW 5 */ + 5472 "01101000" // /* MW 4 */ + 5473 "10111010" // /* MW 3 */ + 5474 "10001110" // /* MW 2 */ + 5475 "01100110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5476 "11110110" // VLDA bmll0, [p3]; PADDB [p4], m1; VST bmll0, [p5]; MOV p5, p4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "01100000" // /* MW 10 */ + 5479 "10110100" // /* MW 9 */ + 5480 "10000010" // /* MW 8 */ + 5481 "00000110" // /* MW 7 */ + 5482 "00000100" // /* MW 6 */ + 5483 "00100101" // /* MW 5 */ + 5484 "01010111" // /* MW 4 */ + 5485 "10111000" // /* MW 3 */ + 5486 "10000010" // /* MW 2 */ + 5487 "01100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5488 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5489 "01110000" // /* MW 11 */ + 5490 "00010000" // /* MW 10 */ + 5491 "10000100" // /* MW 9 */ + 5492 "00000000" // /* MW 8 */ + 5493 "10001011" // /* MW 7 */ + 5494 "10010100" // /* MW 6 */ + 5495 "00100011" // /* MW 5 */ + 5496 "11010111" // /* MW 4 */ + 5497 "10111011" // /* MW 3 */ + 5498 "10010110" // /* MW 2 */ + 5499 "10000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5500 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5501 "01001110" // /* MW 9 */ + 5502 "10111111" // /* MW 8 */ + 5503 "10111110" // /* MW 7 */ + 5504 "00000010" // /* MW 6 */ + 5505 "10010000" // /* MW 5 */ + 5506 "01110011" // /* MW 4 */ + 5507 "10110011" // /* MW 3 */ + 5508 "10011010" // /* MW 2 */ + 5509 "10000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5510 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #5696 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5511 "00010000" // /* MW 9 */ + 5512 "00100000" // /* MW 8 */ + 5513 "01111011" // /* MW 7 */ + 5514 "00000100" // /* MW 6 */ + 5515 "00000000" // /* MW 5 */ + 5516 "00000000" // /* MW 4 */ + 5517 "10110000" // /* MW 3 */ + 5518 "10011110" // /* MW 2 */ + 5519 "10000110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5520 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #5776 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5521 "01100000" // /* MW 13 */ + 5522 "10010001" // /* MW 12 */ + 5523 "10010011" // /* MW 11 */ + 5524 "00000010" // /* MW 10 */ + 5525 "01101001" // /* MW 9 */ + 5526 "10110111" // /* MW 8 */ + 5527 "00000000" // /* MW 7 */ + 5528 "00000000" // /* MW 6 */ + 5529 "11101000" // /* MW 5 */ + 5530 "01110011" // /* MW 4 */ + 5531 "10111110" // /* MW 3 */ + 5532 "10010010" // /* MW 2 */ + 5533 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5534 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5535 "01111110" // /* MW 9 */ + 5536 "00000000" // /* MW 8 */ + 5537 "10000010" // /* MW 7 */ + 5538 "00000001" // /* MW 6 */ + 5539 "10010000" // /* MW 5 */ + 5540 "10001011" // /* MW 4 */ + 5541 "10110100" // /* MW 3 */ + 5542 "10110110" // /* MW 2 */ + 5543 "10100010" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5544 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5545 "10001011" // /* MW 7 */ + 5546 "10011100" // /* MW 6 */ + 5547 "11101100" // /* MW 5 */ + 5548 "00101010" // /* MW 4 */ + 5549 "01111000" // /* MW 3 */ + 5550 "11001011" // /* MW 2 */ + 5551 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5552 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5553 "01011011" // /* MW 7 */ + 5554 "10001000" // /* MW 6 */ + 5555 "01101100" // /* MW 5 */ + 5556 "00101010" // /* MW 4 */ + 5557 "10111110" // /* MW 3 */ + 5558 "10111010" // /* MW 2 */ + 5559 "10100100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 2 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5560 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5561 "11101000" // /* MW 5 */ + 5562 "01110011" // /* MW 4 */ + 5563 "10111110" // /* MW 3 */ + 5564 "10111110" // /* MW 2 */ + 5565 "10100110" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5566 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5567 "01111110" // /* MW 9 */ + 5568 "01100000" // /* MW 8 */ + 5569 "10110110" // /* MW 7 */ + 5570 "00000010" // /* MW 6 */ + 5571 "01110100" // /* MW 5 */ + 5572 "00010101" // /* MW 4 */ + 5573 "10110100" // /* MW 3 */ + 5574 "10110010" // /* MW 2 */ + 5575 "10100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5576 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5577 "00100000" // /* MW 5 */ + 5578 "01010111" // /* MW 4 */ + 5579 "01111011" // /* MW 3 */ + 5580 "01000101" // /* MW 2 */ + 5581 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5582 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5583 "00111110" // /* MW 9 */ + 5584 "00100110" // /* MW 8 */ + 5585 "10011101" // /* MW 7 */ + 5586 "00000001" // /* MW 6 */ + 5587 "01110100" // /* MW 5 */ + 5588 "00000110" // /* MW 4 */ + 5589 "10110100" // /* MW 3 */ + 5590 "10100010" // /* MW 2 */ + 5591 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5592 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5593 "01010100" // /* MW 3 */ + 5594 "10111010" // /* MW 2 */ + 5595 "00011011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5596 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5597 "00110110" // /* MW 9 */ + 5598 "01100110" // /* MW 8 */ + 5599 "00100101" // /* MW 7 */ + 5600 "00000010" // /* MW 6 */ + 5601 "00110100" // /* MW 5 */ + 5602 "00010101" // /* MW 4 */ + 5603 "01100111" // /* MW 3 */ + 5604 "10010001" // /* MW 2 */ + 5605 "10010011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5606 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5607 "01010001" // /* MW 11 */ + 5608 "11101101" // /* MW 10 */ + 5609 "10110100" // /* MW 9 */ + 5610 "01100010" // /* MW 8 */ + 5611 "11010100" // /* MW 7 */ + 5612 "11001010" // /* MW 6 */ + 5613 "00100100" // /* MW 5 */ + 5614 "00010111" // /* MW 4 */ + 5615 "01111001" // /* MW 3 */ + 5616 "11000101" // /* MW 2 */ + 5617 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5618 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5619 "00110000" // /* MW 11 */ + 5620 "00100110" // /* MW 10 */ + 5621 "10011101" // /* MW 9 */ + 5622 "00000001" // /* MW 8 */ + 5623 "10001011" // /* MW 7 */ + 5624 "10011000" // /* MW 6 */ + 5625 "11101101" // /* MW 5 */ + 5626 "00101010" // /* MW 4 */ + 5627 "01111000" // /* MW 3 */ + 5628 "11001101" // /* MW 2 */ + 5629 "10100010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5630 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5631 "10101000" // /* MW 5 */ + 5632 "01110100" // /* MW 4 */ + 5633 "11110111" // /* MW 3 */ + 5634 "00001100" // /* MW 2 */ + 5635 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5636 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5637 "01010001" // /* MW 9 */ + 5638 "11110001" // /* MW 8 */ + 5639 "10110100" // /* MW 7 */ + 5640 "00001001" // /* MW 6 */ + 5641 "00110110" // /* MW 5 */ + 5642 "00001010" // /* MW 4 */ + 5643 "01110000" // /* MW 3 */ + 5644 "11001101" // /* MW 2 */ + 5645 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5646 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5647 "00111110" // /* MW 9 */ + 5648 "01100110" // /* MW 8 */ + 5649 "00100101" // /* MW 7 */ + 5650 "00000010" // /* MW 6 */ + 5651 "11110100" // /* MW 5 */ + 5652 "00111001" // /* MW 4 */ + 5653 "01110111" // /* MW 3 */ + 5654 "01000101" // /* MW 2 */ + 5655 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5656 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "11010100" // /* MW 3 */ + 5658 "11001010" // /* MW 2 */ + 5659 "00011100" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5660 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5661 "00110110" // /* MW 3 */ + 5662 "10001010" // /* MW 2 */ + 5663 "00001000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5664 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5665 "01010001" // /* MW 9 */ + 5666 "11101101" // /* MW 8 */ + 5667 "10110100" // /* MW 7 */ + 5668 "00011101" // /* MW 6 */ + 5669 "01110100" // /* MW 5 */ + 5670 "00000110" // /* MW 4 */ + 5671 "01110100" // /* MW 3 */ + 5672 "11000101" // /* MW 2 */ + 5673 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5674 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5675 "01101100" // /* MW 5 */ + 5676 "00010100" // /* MW 4 */ + 5677 "01110010" // /* MW 3 */ + 5678 "11001101" // /* MW 2 */ + 5679 "10100010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5680 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5681 "00000000" // /* MW 15 */ + 5682 "00000000" // /* MW 14 */ + 5683 "01111000" // /* MW 13 */ + 5684 "10100101" // /* MW 12 */ + 5685 "00000001" // /* MW 11 */ + 5686 "00000000" // /* MW 10 */ + 5687 "00000000" // /* MW 9 */ + 5688 "00000000" // /* MW 8 */ + 5689 "00110110" // /* MW 7 */ + 5690 "10001010" // /* MW 6 */ + 5691 "00100001" // /* MW 5 */ + 5692 "00000000" // /* MW 4 */ + 5693 "11110000" // /* MW 3 */ + 5694 "00101100" // /* MW 2 */ + 5695 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5696 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5697 "01010001" // /* MW 9 */ + 5698 "11110001" // /* MW 8 */ + 5699 "10110100" // /* MW 7 */ + 5700 "11100110" // /* MW 6 */ + 5701 "11000000" // /* MW 5 */ + 5702 "01101100" // /* MW 4 */ + 5703 "01101101" // /* MW 3 */ + 5704 "00101010" // /* MW 2 */ + 5705 "00001110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5706 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5707 "00000001" // /* MW 15 */ + 5708 "01011011" // /* MW 14 */ + 5709 "00111100" // /* MW 13 */ + 5710 "00100110" // /* MW 12 */ + 5711 "10011101" // /* MW 11 */ + 5712 "00000001" // /* MW 10 */ + 5713 "00000000" // /* MW 9 */ + 5714 "00000000" // /* MW 8 */ + 5715 "10001011" // /* MW 7 */ + 5716 "10011100" // /* MW 6 */ + 5717 "11101100" // /* MW 5 */ + 5718 "01110011" // /* MW 4 */ + 5719 "01111110" // /* MW 3 */ + 5720 "11001101" // /* MW 2 */ + 5721 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5722 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5723 "00100001" // /* MW 15 */ + 5724 "01001001" // /* MW 14 */ + 5725 "00111100" // /* MW 13 */ + 5726 "00101010" // /* MW 12 */ + 5727 "11011101" // /* MW 11 */ + 5728 "00000001" // /* MW 10 */ + 5729 "00000000" // /* MW 9 */ + 5730 "00000000" // /* MW 8 */ + 5731 "00110110" // /* MW 7 */ + 5732 "00001010" // /* MW 6 */ + 5733 "00100000" // /* MW 5 */ + 5734 "00010111" // /* MW 4 */ + 5735 "01111001" // /* MW 3 */ + 5736 "01000101" // /* MW 2 */ + 5737 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5738 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5739 "01101001" // /* MW 11 */ + 5740 "01000000" // /* MW 10 */ + 5741 "10001010" // /* MW 9 */ + 5742 "00001110" // /* MW 8 */ + 5743 "00011011" // /* MW 7 */ + 5744 "01000101" // /* MW 6 */ + 5745 "11101000" // /* MW 5 */ + 5746 "00101010" // /* MW 4 */ + 5747 "11111000" // /* MW 3 */ + 5748 "00001100" // /* MW 2 */ + 5749 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5750 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5751 "01010001" // /* MW 11 */ + 5752 "11101101" // /* MW 10 */ + 5753 "10110100" // /* MW 9 */ + 5754 "01100010" // /* MW 8 */ + 5755 "11001100" // /* MW 7 */ + 5756 "01001010" // /* MW 6 */ + 5757 "11101100" // /* MW 5 */ + 5758 "00001100" // /* MW 4 */ + 5759 "01111000" // /* MW 3 */ + 5760 "11000101" // /* MW 2 */ + 5761 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5762 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5763 "01101001" // /* MW 13 */ + 5764 "00000100" // /* MW 12 */ + 5765 "10001000" // /* MW 11 */ + 5766 "10100011" // /* MW 10 */ + 5767 "01010110" // /* MW 9 */ + 5768 "01100110" // /* MW 8 */ + 5769 "00000000" // /* MW 7 */ + 5770 "00000000" // /* MW 6 */ + 5771 "01101100" // /* MW 5 */ + 5772 "00010100" // /* MW 4 */ + 5773 "01110010" // /* MW 3 */ + 5774 "11001101" // /* MW 2 */ + 5775 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5776 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5777 "00000000" // /* MW 15 */ + 5778 "00000000" // /* MW 14 */ + 5779 "01111000" // /* MW 13 */ + 5780 "10100101" // /* MW 12 */ + 5781 "00000001" // /* MW 11 */ + 5782 "00000000" // /* MW 10 */ + 5783 "00000000" // /* MW 9 */ + 5784 "00000000" // /* MW 8 */ + 5785 "00110110" // /* MW 7 */ + 5786 "10001010" // /* MW 6 */ + 5787 "00100001" // /* MW 5 */ + 5788 "00000000" // /* MW 4 */ + 5789 "11110000" // /* MW 3 */ + 5790 "00101100" // /* MW 2 */ + 5791 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5792 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOVS p4, p1; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5793 "01010001" // /* MW 13 */ + 5794 "11110001" // /* MW 12 */ + 5795 "10110100" // /* MW 11 */ + 5796 "00000111" // /* MW 10 */ + 5797 "01100110" // /* MW 9 */ + 5798 "01101011" // /* MW 8 */ + 5799 "00000000" // /* MW 7 */ + 5800 "00000000" // /* MW 6 */ + 5801 "00010110" // /* MW 5 */ + 5802 "00001001" // /* MW 4 */ + 5803 "01111001" // /* MW 3 */ + 5804 "11001101" // /* MW 2 */ + 5805 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5806 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5807 "00000001" // /* MW 15 */ + 5808 "01011011" // /* MW 14 */ + 5809 "00111100" // /* MW 13 */ + 5810 "00100110" // /* MW 12 */ + 5811 "10011101" // /* MW 11 */ + 5812 "00000001" // /* MW 10 */ + 5813 "00000000" // /* MW 9 */ + 5814 "00000000" // /* MW 8 */ + 5815 "10001011" // /* MW 7 */ + 5816 "10000100" // /* MW 6 */ + 5817 "00100000" // /* MW 5 */ + 5818 "01010111" // /* MW 4 */ + 5819 "01111011" // /* MW 3 */ + 5820 "01000101" // /* MW 2 */ + 5821 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.src_ref 2 "gemm_bfp16.h" 202 6 first +.src_ref 2 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5822 "01001011" // MOVA dj1, #-304; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5823 "00100001" // /* MW 15 */ + 5824 "01001001" // /* MW 14 */ + 5825 "00111100" // /* MW 13 */ + 5826 "00101010" // /* MW 12 */ + 5827 "11011101" // /* MW 11 */ + 5828 "00000001" // /* MW 10 */ + 5829 "00000000" // /* MW 9 */ + 5830 "00000000" // /* MW 8 */ + 5831 "00110110" // /* MW 7 */ + 5832 "00001010" // /* MW 6 */ + 5833 "00100000" // /* MW 5 */ + 5834 "11010111" // /* MW 4 */ + 5835 "10000011" // /* MW 3 */ + 5836 "00000110" // /* MW 2 */ + 5837 "11011010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 203 6 first +.src_ref 2 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5838 "01101110" // PADDA [p0], m3; VCONV.bfp16ebs8.fp32 ex1, dm4; MOV p7, r20; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5839 "01101001" // /* MW 13 */ + 5840 "01000000" // /* MW 12 */ + 5841 "10001010" // /* MW 11 */ + 5842 "00000111" // /* MW 10 */ + 5843 "01010001" // /* MW 9 */ + 5844 "01111011" // /* MW 8 */ + 5845 "00000000" // /* MW 7 */ + 5846 "00000000" // /* MW 6 */ + 5847 "01101100" // /* MW 5 */ + 5848 "00010100" // /* MW 4 */ + 5849 "11110001" // /* MW 3 */ + 5850 "00001100" // /* MW 2 */ + 5851 "00001101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5852 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p5]; MOVS p6, r25; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5853 "01010001" // /* MW 13 */ + 5854 "11101101" // /* MW 12 */ + 5855 "10110100" // /* MW 11 */ + 5856 "01100011" // /* MW 10 */ + 5857 "01010110" // /* MW 9 */ + 5858 "01100010" // /* MW 8 */ + 5859 "00000000" // /* MW 7 */ + 5860 "00000000" // /* MW 6 */ + 5861 "00010110" // /* MW 5 */ + 5862 "00110010" // /* MW 4 */ + 5863 "01111101" // /* MW 3 */ + 5864 "11000101" // /* MW 2 */ + 5865 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5866 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5867 "01101001" // /* MW 13 */ + 5868 "00000100" // /* MW 12 */ + 5869 "10001000" // /* MW 11 */ + 5870 "10100011" // /* MW 10 */ + 5871 "01010110" // /* MW 9 */ + 5872 "01100110" // /* MW 8 */ + 5873 "00000000" // /* MW 7 */ + 5874 "00000000" // /* MW 6 */ + 5875 "01101100" // /* MW 5 */ + 5876 "00010100" // /* MW 4 */ + 5877 "01110010" // /* MW 3 */ + 5878 "11001101" // /* MW 2 */ + 5879 "10100010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5880 "00000010" // VCONV.bfp16ebs8.fp32 ex3, dm4; MOV m1, r24 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5881 "01110000" // /* MW 7 */ + 5882 "00010000" // /* MW 6 */ + 5883 "10000110" // /* MW 5 */ + 5884 "00000000" // /* MW 4 */ + 5885 "11000000" // /* MW 3 */ + 5886 "01000110" // /* MW 2 */ + 5887 "00110001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5888 "01001010" // PADDB [p4], m1; MOV p5, p4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5889 "00101001" // /* MW 9 */ + 5890 "01100000" // /* MW 8 */ + 5891 "10001011" // /* MW 7 */ + 5892 "11100110" // /* MW 6 */ + 5893 "11000000" // /* MW 5 */ + 5894 "01101000" // /* MW 4 */ + 5895 "00100101" // /* MW 3 */ + 5896 "01010111" // /* MW 2 */ + 5897 "00001000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5898 "01100010" // VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5899 "01010001" // /* MW 7 */ + 5900 "11110001" // /* MW 6 */ + 5901 "10110100" // /* MW 5 */ + 5902 "00000010" // /* MW 4 */ + 5903 "11000000" // /* MW 3 */ + 5904 "01000110" // /* MW 2 */ + 5905 "00000001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5906 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5907 "00101001" // /* MW 3 */ + 5908 "00100100" // /* MW 2 */ + 5909 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5910 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5911 "01101001" // /* MW 7 */ + 5912 "01000000" // /* MW 6 */ + 5913 "10001010" // /* MW 5 */ + 5914 "00000010" // /* MW 4 */ + 5915 "11000000" // /* MW 3 */ + 5916 "01000110" // /* MW 2 */ + 5917 "00010001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5918 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5919 "01101001" // /* MW 3 */ + 5920 "00000100" // /* MW 2 */ + 5921 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5923 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 2 "gemm_bfp16.h" 268 12 +.src_ref 2 "gemm_bfp16.h" 268 37 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5924 "10111010" // LDA r17, [p7, dj1]; VCONV.bfp16ebs8.fp32 ex2, dm4; MOV dj1, #280 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5925 "01010010" // /* MW 9 */ + 5926 "00011000" // /* MW 8 */ + 5927 "11000001" // /* MW 7 */ + 5928 "00000000" // /* MW 6 */ + 5929 "00110110" // /* MW 5 */ + 5930 "00001010" // /* MW 4 */ + 5931 "11010001" // /* MW 3 */ + 5932 "01000110" // /* MW 2 */ + 5933 "11100100" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 268 12 +.src_ref 2 "gemm_bfp16.h" 269 34 +.src_ref 2 "gemm_bfp16.h" 269 48 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5934 "01001011" // LDA r16, [p6, dj1]; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;MOVXM p7, #508416; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5935 "00000001" // /* MW 15 */ + 5936 "01011011" // /* MW 14 */ + 5937 "00010100" // /* MW 13 */ + 5938 "00000000" // /* MW 12 */ + 5939 "10110001" // /* MW 11 */ + 5940 "11110011" // /* MW 10 */ + 5941 "00000001" // /* MW 9 */ + 5942 "00000000" // /* MW 8 */ + 5943 "00110110" // /* MW 7 */ + 5944 "10001010" // /* MW 6 */ + 5945 "00100001" // /* MW 5 */ + 5946 "00000000" // /* MW 4 */ + 5947 "11010000" // /* MW 3 */ + 5948 "01000010" // /* MW 2 */ + 5949 "11000100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5951 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5953 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5954 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5955 "00101001" // /* MW 3 */ + 5956 "00100100" // /* MW 2 */ + 5957 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5958 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5959 "01101001" // /* MW 3 */ + 5960 "01000000" // /* MW 2 */ + 5961 "10001010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5962 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5963 "01101001" // /* MW 3 */ + 5964 "00000100" // /* MW 2 */ + 5965 "10001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 first +.src_ref 2 "gemm_bfp16.h" 268 45 first + 5966 "01011100" // VST bmlh3, [p1, #64]; ADD r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5967 "11111110" // /* MW 5 */ + 5968 "11000111" // /* MW 4 */ + 5969 "11011000" // /* MW 3 */ + 5970 "10110100" // /* MW 2 */ + 5971 "00100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 268 28 + 5972 "01011100" // VST bmhl3, [p1, #128]; NE r17, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5973 "00010001" // /* MW 5 */ + 5974 "11000110" // /* MW 4 */ + 5975 "11011000" // /* MW 3 */ + 5976 "10111000" // /* MW 2 */ + 5977 "00100100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 + 5978 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5979 "11100110" // /* MW 3 */ + 5980 "00110101" // /* MW 2 */ + 5981 "00001001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first + 5982 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5983 "01110110" // /* MW 9 */ + 5984 "01100000" // /* MW 8 */ + 5985 "10110101" // /* MW 7 */ + 5986 "00000000" // /* MW 6 */ + 5987 "10010000" // /* MW 5 */ + 5988 "11001011" // /* MW 4 */ + 5989 "11010101" // /* MW 3 */ + 5990 "10110000" // /* MW 2 */ + 5991 "00100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 first + 5992 "10011000" // VST bmlh2, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5993 "00100110" // /* MW 3 */ + 5994 "00010101" // /* MW 2 */ + 5995 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 + 5996 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "01000110" // /* MW 3 */ + 5998 "00100101" // /* MW 2 */ + 5999 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 + 6000 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6001 "01100110" // /* MW 3 */ + 6002 "00110101" // /* MW 2 */ + 6003 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 + 6004 "10011000" // VST bmll2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6005 "00000110" // /* MW 3 */ + 6006 "00000101" // /* MW 2 */ + 6007 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 first + 6008 "10011000" // VST bmlh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6009 "10100110" // /* MW 3 */ + 6010 "00010100" // /* MW 2 */ + 6011 "00001100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 + 6012 "10011000" // VST bmhl1, [p4, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6013 "11000110" // /* MW 3 */ + 6014 "00100100" // /* MW 2 */ + 6015 "00001100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 268 6 first + 6016 "00111010" // VST bmhh1, [p4, #192]; JNZ r17, #6128 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6128 delay_slots=5 */ + 6017 "01100001" // /* MW 9 */ + 6018 "00000000" // /* MW 8 */ + 6019 "00010000" // /* MW 7 */ + 6020 "11111110" // /* MW 6 */ + 6021 "00000010" // /* MW 5 */ + 6022 "00100010" // /* MW 4 */ + 6023 "11010000" // /* MW 3 */ + 6024 "10011100" // /* MW 2 */ + 6025 "10000110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 first +.delay_slot + 6026 "10011000" // VST bmll1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6027 "10000110" // /* MW 3 */ + 6028 "00000100" // /* MW 2 */ + 6029 "00001100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 205 6 first +.delay_slot + 6030 "10011000" // VST bmlh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6031 "00100110" // /* MW 3 */ + 6032 "00010100" // /* MW 2 */ + 6033 "00001101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot + 6034 "10011000" // VST bmhl0, [p5, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6035 "01000110" // /* MW 3 */ + 6036 "00100100" // /* MW 2 */ + 6037 "00001101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot + 6038 "00000010" // VST bmhh0, [p5, #192]; MOV m2, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6039 "01110000" // /* MW 7 */ + 6040 "10010000" // /* MW 6 */ + 6041 "00000100" // /* MW 5 */ + 6042 "00000001" // /* MW 4 */ + 6043 "11010000" // /* MW 3 */ + 6044 "10001100" // /* MW 2 */ + 6045 "10100110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot + 6046 "01001100" // PADDB.2D [p1], d2; VST bmll0, [p5] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6047 "00001101" // /* MW 5 */ + 6048 "00001000" // /* MW 4 */ + 6049 "00001010" // /* MW 3 */ + 6050 "01110010" // /* MW 2 */ + 6051 "00101010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 34 first + 6052 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6053 "00010110" // /* MW 3 */ + 6054 "00000110" // /* MW 2 */ + 6055 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 48 + 6056 "10011000" // LDA r17, [p7, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6057 "00110110" // /* MW 3 */ + 6058 "00100110" // /* MW 2 */ + 6059 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 4 + 6060 "00011000" // LDA p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "00011001" // /* MW 3 */ + 6062 "11110100" // /* MW 2 */ + 6063 "00000111" // /* MW 1 */ + 6064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6065 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 4 +.no_stack_arguments + 6066 "00000100" // JL #3952 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3952 delay_slots=5 */ + 6067 "00000001" // /* MW 5 */ + 6068 "00000000" // /* MW 4 */ + 6069 "10111000" // /* MW 3 */ + 6070 "00000111" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6077 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 40 +.delay_slot + 6078 "10011000" // MUL r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6079 "00001111" // /* MW 3 */ + 6080 "01000001" // /* MW 2 */ + 6081 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6082 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6083 "00011100" // /* MW 13 */ + 6084 "00000000" // /* MW 12 */ + 6085 "00000000" // /* MW 11 */ + 6086 "01010111" // /* MW 10 */ + 6087 "00011010" // /* MW 9 */ + 6088 "01000000" // /* MW 8 */ + 6089 "00000000" // /* MW 7 */ + 6090 "00000000" // /* MW 6 */ + 6091 "10110110" // /* MW 5 */ + 6092 "00000010" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.return_address + 6096 "10000100" // J #6144 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6144 delay_slots=5 */ + 6097 "00000000" // /* MW 5 */ + 6098 "00000000" // /* MW 4 */ + 6099 "00000000" // /* MW 3 */ + 6100 "00001100" // /* MW 2 */ + 6101 "00000000" // /* MW 1 */ +.delay_slot + 6102 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6103 "00000001" // /* MW 3 */ + 6104 "00100000" // /* MW 2 */ + 6105 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6112 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6113 "00000000" // /* MW 15 */ + 6114 "00000000" // /* MW 14 */ + 6115 "01111000" // /* MW 13 */ + 6116 "10100101" // /* MW 12 */ + 6117 "00000001" // /* MW 11 */ + 6118 "00000000" // /* MW 10 */ + 6119 "00000000" // /* MW 9 */ + 6120 "00000000" // /* MW 8 */ + 6121 "01011011" // /* MW 7 */ + 6122 "00000001" // /* MW 6 */ + 6123 "00100000" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "11110000" // /* MW 3 */ + 6126 "00101100" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 +.src_ref 2 "gemm_bfp16.h" 272 25 first + 6128 "11100001" // NOPA; NOPB; NOPS; ADD r16, r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6129 "00000000" // /* MW 15 */ + 6130 "00000000" // /* MW 14 */ + 6131 "01111000" // /* MW 13 */ + 6132 "10100101" // /* MW 12 */ + 6133 "00000001" // /* MW 11 */ + 6134 "00111000" // /* MW 10 */ + 6135 "00000000" // /* MW 9 */ + 6136 "00100001" // /* MW 8 */ + 6137 "01011011" // /* MW 7 */ + 6138 "00000001" // /* MW 6 */ + 6139 "00100000" // /* MW 5 */ + 6140 "00000000" // /* MW 4 */ + 6141 "11110000" // /* MW 3 */ + 6142 "00101100" // /* MW 2 */ + 6143 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 +.src_ref 2 "gemm_bfp16.h" 274 + 6144 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "00111001" // /* MW 3 */ + 6146 "11111000" // /* MW 2 */ + 6147 "00000111" // /* MW 1 */ + 6148 "00011000" // LDA p7, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6149 "10011001" // /* MW 3 */ + 6150 "11110011" // /* MW 2 */ + 6151 "00000111" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ + 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6155 "00000000" // /* MW 1 */ + 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.noswbrkpt + 6160 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6161 "00011001" // /* MW 3 */ + 6162 "11111111" // /* MW 2 */ + 6163 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 274 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 6164 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6165 "00000000" // /* MW 3 */ + 6166 "00101000" // /* MW 2 */ + 6167 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 6168 "10111000" // MOV dj1, #280 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6169 "00110000" // /* MW 3 */ + 6170 "10000010" // /* MW 2 */ + 6171 "00011001" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 "10011000" // ST r16, [p6, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6173 "00010001" // /* MW 3 */ + 6174 "00100010" // /* MW 2 */ + 6175 "00001110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 274 first +.delay_slot + 6176 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6177 "00000001" // /* MW 5 */ + 6178 "00000000" // /* MW 4 */ + 6179 "00000000" // /* MW 3 */ + 6180 "11111000" // /* MW 2 */ + 6181 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params__end +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_end0 + 6185 "00000000" // /* MW 1 */ +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_GemmBfp16 _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 9 "superkernels.cpp" 381 first +.src_ref 9 "superkernels.cpp" 382 6 +.src_ref 9 "superkernels.cpp" 388 11 +.function_start + 6192 "00111010" // MOVS p4, p1; MOVXM p5, #508768 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6193 "00010001" // /* MW 9 */ + 6194 "10110000" // /* MW 8 */ + 6195 "10110001" // /* MW 7 */ + 6196 "11110010" // /* MW 6 */ + 6197 "00000001" // /* MW 5 */ + 6198 "00000000" // /* MW 4 */ + 6199 "01100000" // /* MW 3 */ + 6200 "10010001" // /* MW 2 */ + 6201 "10010000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 382 6 first + 6202 "10011000" // LDA r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6203 "00010110" // /* MW 3 */ + 6204 "00000110" // /* MW 2 */ + 6205 "00000101" // /* MW 1 */ + 6206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6207 "00000000" // /* MW 1 */ + 6208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6209 "00000000" // /* MW 1 */ + 6210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6211 "00000000" // /* MW 1 */ + 6212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6213 "00000000" // /* MW 1 */ + 6214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6215 "00000000" // /* MW 1 */ + 6216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6217 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 382 6 +.src_ref 9 "superkernels.cpp" 382 16 + 6218 "10000100" // JNZ r16, #6336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6336 delay_slots=5 */ + 6219 "00000001" // /* MW 5 */ + 6220 "01000000" // /* MW 4 */ + 6221 "01100000" // /* MW 3 */ + 6222 "00001100" // /* MW 2 */ + 6223 "10000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 381 +.delay_slot + 6224 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6225 "00000001" // /* MW 5 */ + 6226 "00000000" // /* MW 4 */ + 6227 "00000000" // /* MW 3 */ + 6228 "00001000" // /* MW 2 */ + 6229 "00000000" // /* MW 1 */ +.delay_slot + 6230 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6231 "00011101" // /* MW 3 */ + 6232 "11111111" // /* MW 2 */ + 6233 "00001111" // /* MW 1 */ +.delay_slot + 6234 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6235 "10011101" // /* MW 3 */ + 6236 "11110111" // /* MW 2 */ + 6237 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 6238 "00000010" // ST lr, [sp, #-8]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6239 "01110000" // /* MW 7 */ + 6240 "01100000" // /* MW 6 */ + 6241 "10110000" // /* MW 5 */ + 6242 "00000011" // /* MW 4 */ + 6243 "10110000" // /* MW 3 */ + 6244 "00000111" // /* MW 2 */ + 6245 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 6246 "11111000" // MOV p6, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6247 "11000000" // /* MW 3 */ + 6248 "01100110" // /* MW 2 */ + 6249 "00011110" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 +.src_ref 9 "superkernels.cpp" 384 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6250 "00111010" // MOVS p0, p2; MOVXM p3, #508788 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6251 "00010001" // /* MW 9 */ + 6252 "10111010" // /* MW 8 */ + 6253 "10110001" // /* MW 7 */ + 6254 "11110001" // /* MW 6 */ + 6255 "00000001" // /* MW 5 */ + 6256 "00000000" // /* MW 4 */ + 6257 "01100000" // /* MW 3 */ + 6258 "00010001" // /* MW 2 */ + 6259 "00010001" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 +.src_ref 8 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6260 "10111010" // ST.s8 r16, [p3]; MOVXM p3, #508784 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6261 "00010000" // /* MW 9 */ + 6262 "10111000" // /* MW 8 */ + 6263 "10110001" // /* MW 7 */ + 6264 "11110001" // /* MW 6 */ + 6265 "00000001" // /* MW 5 */ + 6266 "00000000" // /* MW 4 */ + 6267 "11100000" // /* MW 3 */ + 6268 "11000000" // /* MW 2 */ + 6269 "01100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6271 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 384 6 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6272 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 6273 "00000001" // /* MW 5 */ + 6274 "00000000" // /* MW 4 */ + 6275 "00001000" // /* MW 3 */ + 6276 "00000110" // /* MW 2 */ + 6277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6281 "00000000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6282 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6283 "00110001" // /* MW 3 */ + 6284 "00100000" // /* MW 2 */ + 6285 "00010000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 +.delay_slot + 6286 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6287 "00000101" // /* MW 3 */ + 6288 "00100000" // /* MW 2 */ + 6289 "00010000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 first +.delay_slot + 6290 "00101110" // NOPA; ST r16, [p3]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6291 "00011100" // /* MW 13 */ + 6292 "00000000" // /* MW 12 */ + 6293 "00000000" // /* MW 11 */ + 6294 "01010111" // /* MW 10 */ + 6295 "00011010" // /* MW 9 */ + 6296 "01000000" // /* MW 8 */ + 6297 "00000000" // /* MW 7 */ + 6298 "00000000" // /* MW 6 */ + 6299 "00100011" // /* MW 5 */ + 6300 "00001100" // /* MW 4 */ + 6301 "11110110" // /* MW 3 */ + 6302 "00101100" // /* MW 2 */ + 6303 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 385 18 +.src_ref 9 "superkernels.cpp" 385 20 first +.return_address + 6304 "10111010" // LDA el0, [p2, #24]; MOVXM p2, #508776 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6305 "00010000" // /* MW 9 */ + 6306 "10110100" // /* MW 8 */ + 6307 "00110001" // /* MW 7 */ + 6308 "11110001" // /* MW 6 */ + 6309 "00000001" // /* MW 5 */ + 6310 "00000000" // /* MW 4 */ + 6311 "11010000" // /* MW 3 */ + 6312 "10000101" // /* MW 2 */ + 6313 "01001100" // /* MW 1 */ + 6314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6315 "00000000" // /* MW 1 */ + 6316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6317 "00000000" // /* MW 1 */ + 6318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6319 "00000000" // /* MW 1 */ + 6320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6321 "00000000" // /* MW 1 */ + 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6323 "00000000" // /* MW 1 */ + 6324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6325 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 385 18 + 6326 "01111010" // NOPA; ST el0, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6327 "00000000" // /* MW 9 */ + 6328 "00000000" // /* MW 8 */ + 6329 "00000000" // /* MW 7 */ + 6330 "10000000" // /* MW 6 */ + 6331 "00101001" // /* MW 5 */ + 6332 "00000100" // /* MW 4 */ + 6333 "11110010" // /* MW 3 */ + 6334 "00101100" // /* MW 2 */ + 6335 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 +.src_ref 9 "superkernels.cpp" 387 12 +.src_ref 9 "superkernels.cpp" 388 11 first + 6336 "10111010" // LDA r16, [p5]; MOVXM p2, #508772 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6337 "00010000" // /* MW 9 */ + 6338 "10110010" // /* MW 8 */ + 6339 "00110001" // /* MW 7 */ + 6340 "11110001" // /* MW 6 */ + 6341 "00000001" // /* MW 5 */ + 6342 "00000000" // /* MW 4 */ + 6343 "11010000" // /* MW 3 */ + 6344 "11000010" // /* MW 2 */ + 6345 "10100000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 387 12 first + 6346 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6347 "00110110" // /* MW 3 */ + 6348 "00000110" // /* MW 2 */ + 6349 "00000010" // /* MW 1 */ + 6350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6351 "00000000" // /* MW 1 */ + 6352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6353 "00000000" // /* MW 1 */ + 6354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6355 "00000000" // /* MW 1 */ + 6356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6357 "00000000" // /* MW 1 */ + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 389 6 first +.src_ref 9 "superkernels.cpp" 389 17 first + 6362 "10000100" // JNZ r17, #6448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6448 delay_slots=5 */ + 6363 "00000001" // /* MW 5 */ + 6364 "01000000" // /* MW 4 */ + 6365 "10011000" // /* MW 3 */ + 6366 "00001100" // /* MW 2 */ + 6367 "10001000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 388 11 first +.delay_slot + 6368 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6369 "00000111" // /* MW 3 */ + 6370 "00100000" // /* MW 2 */ + 6371 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 388 11 +.delay_slot + 6372 "10011000" // ST r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6373 "00010001" // /* MW 3 */ + 6374 "00000110" // /* MW 2 */ + 6375 "00001101" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 387 12 first +.delay_slot + 6376 "00011000" // ADD r16, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6377 "00000111" // /* MW 3 */ + 6378 "01100000" // /* MW 2 */ + 6379 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 387 12 +.delay_slot + 6380 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6381 "00010001" // /* MW 3 */ + 6382 "00000110" // /* MW 2 */ + 6383 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6386 "11111000" // MOV r16, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6387 "11000000" // /* MW 3 */ + 6388 "00011100" // /* MW 2 */ + 6389 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6390 "00011000" // ADD.NC p2, r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6391 "00000110" // /* MW 3 */ + 6392 "01101000" // /* MW 2 */ + 6393 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6394 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01110110" // /* MW 3 */ + 6396 "11111111" // /* MW 2 */ + 6397 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6398 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6399 "00010110" // /* MW 3 */ + 6400 "11111110" // /* MW 2 */ + 6401 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6402 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6403 "00110110" // /* MW 3 */ + 6404 "11111110" // /* MW 2 */ + 6405 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6408 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6409 "00010110" // /* MW 3 */ + 6410 "01000110" // /* MW 2 */ + 6411 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6415 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6417 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6419 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6420 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6421 "00000010" // /* MW 3 */ + 6422 "01100001" // /* MW 2 */ + 6423 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6424 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6425 "00010001" // /* MW 3 */ + 6426 "00000110" // /* MW 2 */ + 6427 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 6428 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6429 "11111101" // /* MW 3 */ + 6430 "11100010" // /* MW 2 */ + 6431 "00010111" // /* MW 1 */ + 6432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6433 "00000000" // /* MW 1 */ + 6434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6435 "00000000" // /* MW 1 */ + 6436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6437 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6438 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6439 "00011000" // /* MW 9 */ + 6440 "00010011" // /* MW 8 */ + 6441 "00000100" // /* MW 7 */ + 6442 "00000000" // /* MW 6 */ + 6443 "01011011" // /* MW 5 */ + 6444 "00000001" // /* MW 4 */ + 6445 "11110000" // /* MW 3 */ + 6446 "00101100" // /* MW 2 */ + 6447 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.src_ref 2 "gemm_bfp16.h" 285 80 +.src_ref 2 "gemm_bfp16.h" 285 80 + 6448 "10111010" // MOVA r24, #0; MOVXM r16, #2147483616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6449 "00010000" // /* MW 9 */ + 6450 "11110000" // /* MW 8 */ + 6451 "00001111" // /* MW 7 */ + 6452 "11111110" // /* MW 6 */ + 6453 "11111111" // /* MW 5 */ + 6454 "00011111" // /* MW 4 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00011000" // /* MW 2 */ + 6457 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 + 6458 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00000101" // /* MW 3 */ + 6460 "00100010" // /* MW 2 */ + 6461 "00010000" // /* MW 1 */ + 6462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 2 "gemm_bfp16.h" 285 86 + 6464 "10111010" // LDA p3, [p4]; MOVXM p4, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6465 "00010000" // /* MW 9 */ + 6466 "00000110" // /* MW 8 */ + 6467 "00110001" // /* MW 7 */ + 6468 "11110010" // /* MW 6 */ + 6469 "00000001" // /* MW 5 */ + 6470 "00000000" // /* MW 4 */ + 6471 "11010000" // /* MW 3 */ + 6472 "10110011" // /* MW 2 */ + 6473 "10000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 86 first + 6474 "10011000" // LDA r27, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6475 "01110110" // /* MW 3 */ + 6476 "11111111" // /* MW 2 */ + 6477 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 + 6478 "10011000" // LDA r18, [p4], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6479 "01010110" // /* MW 3 */ + 6480 "11101110" // /* MW 2 */ + 6481 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 9 "superkernels.cpp" 393 34 + 6482 "11010100" // LDA p0, [p7]; MOV p7, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6483 "10000001" // /* MW 5 */ + 6484 "11010001" // /* MW 4 */ + 6485 "11011110" // /* MW 3 */ + 6486 "10000011" // /* MW 2 */ + 6487 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6488 "10011000" // LDA p2, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6489 "00011110" // /* MW 3 */ + 6490 "00000101" // /* MW 2 */ + 6491 "00000110" // /* MW 1 */ + 6492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6493 "00000000" // /* MW 1 */ + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 + 6496 "11111000" // MOV r19, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6497 "11000000" // /* MW 3 */ + 6498 "11010110" // /* MW 2 */ + 6499 "00011100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 4 first +.no_stack_arguments + 6500 "00000100" // JL #4256 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4256 delay_slots=5 */ + 6501 "00000001" // /* MW 5 */ + 6502 "00000000" // /* MW 4 */ + 6503 "01010000" // /* MW 3 */ + 6504 "00001000" // /* MW 2 */ + 6505 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 +.delay_slot + 6506 "00011000" // ADD r18, r18, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6507 "01111111" // /* MW 3 */ + 6508 "10100100" // /* MW 2 */ + 6509 "00010100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 +.delay_slot + 6510 "10011000" // AND r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6511 "00000100" // /* MW 3 */ + 6512 "10100001" // /* MW 2 */ + 6513 "00010100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 +.delay_slot + 6514 "00011000" // SEL.EQZ r16, r24, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6515 "00000010" // /* MW 3 */ + 6516 "00100001" // /* MW 2 */ + 6517 "00010110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 +.delay_slot + 6518 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6519 "00011101" // /* MW 3 */ + 6520 "00100001" // /* MW 2 */ + 6521 "00010100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 +.delay_slot + 6522 "10010100" // NOPA; ADD.NC p1, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6523 "10000010" // /* MW 5 */ + 6524 "11010011" // /* MW 4 */ + 6525 "11110010" // /* MW 3 */ + 6526 "00101100" // /* MW 2 */ + 6527 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 393 6 +.src_ref 9 "superkernels.cpp" 393 34 first +.src_ref 9 "superkernels.cpp" 394 17 +.return_address + 6528 "10111010" // LDA r16, [p7, #16]; MOVXM p2, #508772 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6529 "00010000" // /* MW 9 */ + 6530 "10110010" // /* MW 8 */ + 6531 "00110001" // /* MW 7 */ + 6532 "11110001" // /* MW 6 */ + 6533 "00000001" // /* MW 5 */ + 6534 "00000000" // /* MW 4 */ + 6535 "11010000" // /* MW 3 */ + 6536 "11000010" // /* MW 2 */ + 6537 "11101000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 9 "superkernels.cpp" 393 6 + 6538 "11010100" // LDA r18, [p2]; MOV r17, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6539 "10000001" // /* MW 5 */ + 6540 "10111001" // /* MW 4 */ + 6541 "11011000" // /* MW 3 */ + 6542 "11001010" // /* MW 2 */ + 6543 "01000000" // /* MW 1 */ + 6544 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6545 "10011001" // /* MW 3 */ + 6546 "11110111" // /* MW 2 */ + 6547 "00000111" // /* MW 1 */ + 6548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6549 "00000000" // /* MW 1 */ + 6550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6551 "00000000" // /* MW 1 */ + 6552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6553 "00000000" // /* MW 1 */ + 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6555 "00000000" // /* MW 1 */ + 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6557 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 393 17 + 6558 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6559 "00001000" // /* MW 3 */ + 6560 "10100001" // /* MW 2 */ + 6561 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 393 6 + 6562 "10000100" // JNZ r16, #6640 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6640 delay_slots=5 */ + 6563 "00000001" // /* MW 5 */ + 6564 "01000000" // /* MW 4 */ + 6565 "11111000" // /* MW 3 */ + 6566 "00001100" // /* MW 2 */ + 6567 "10000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 394 17 +.src_ref 9 "superkernels.cpp" 398 16 +.delay_slot + 6568 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6569 "00000001" // /* MW 3 */ + 6570 "00110000" // /* MW 2 */ + 6571 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 9 "superkernels.cpp" 394 17 first + 6580 "00111010" // ST r24, [p2]; MOVX r16, #1; ADD.NC p6, r17, #20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6581 "00001001" // /* MW 9 */ + 6582 "01000101" // /* MW 8 */ + 6583 "00110100" // /* MW 7 */ + 6584 "00101011" // /* MW 6 */ + 6585 "00000000" // /* MW 5 */ + 6586 "00000001" // /* MW 4 */ + 6587 "00110000" // /* MW 3 */ + 6588 "11100010" // /* MW 2 */ + 6589 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6590 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6591 "00110110" // /* MW 3 */ + 6592 "00000110" // /* MW 2 */ + 6593 "00000110" // /* MW 1 */ + 6594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6595 "00000000" // /* MW 1 */ + 6596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6597 "00000000" // /* MW 1 */ + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6606 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6607 "00001000" // /* MW 3 */ + 6608 "01010001" // /* MW 2 */ + 6609 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6610 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6611 "00110110" // /* MW 3 */ + 6612 "11100110" // /* MW 2 */ + 6613 "00000110" // /* MW 1 */ + 6614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6615 "00000000" // /* MW 1 */ + 6616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6617 "00000000" // /* MW 1 */ + 6618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6619 "00000000" // /* MW 1 */ + 6620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6621 "00000000" // /* MW 1 */ + 6622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6623 "00000000" // /* MW 1 */ + 6624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6625 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6626 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6627 "00010001" // /* MW 3 */ + 6628 "00100001" // /* MW 2 */ + 6629 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6630 "01111010" // NOPA; ST r16, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6631 "00000000" // /* MW 9 */ + 6632 "00000000" // /* MW 8 */ + 6633 "00000000" // /* MW 7 */ + 6634 "10000000" // /* MW 6 */ + 6635 "00010001" // /* MW 5 */ + 6636 "11100110" // /* MW 4 */ + 6637 "11110110" // /* MW 3 */ + 6638 "00101100" // /* MW 2 */ + 6639 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 9 "superkernels.cpp" 397 6 +.src_ref 9 "superkernels.cpp" 398 16 + 6640 "01000100" // MOVXM p2, #508768 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6641 "11000000" // /* MW 5 */ + 6642 "11000110" // /* MW 4 */ + 6643 "11000100" // /* MW 3 */ + 6644 "00000111" // /* MW 2 */ + 6645 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 6 first +.src_ref 9 "superkernels.cpp" 397 19 + 6646 "10111010" // LDA r16, [p2]; MOVXM p3, #508776 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6647 "00010000" // /* MW 9 */ + 6648 "10110100" // /* MW 8 */ + 6649 "10110001" // /* MW 7 */ + 6650 "11110001" // /* MW 6 */ + 6651 "00000001" // /* MW 5 */ + 6652 "00000000" // /* MW 4 */ + 6653 "11010000" // /* MW 3 */ + 6654 "11000010" // /* MW 2 */ + 6655 "01000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 19 + 6656 "10011000" // LDA r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6657 "00110110" // /* MW 3 */ + 6658 "00000110" // /* MW 2 */ + 6659 "00000011" // /* MW 1 */ + 6660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6661 "00000000" // /* MW 1 */ + 6662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6663 "00000000" // /* MW 1 */ + 6664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6665 "00000000" // /* MW 1 */ + 6666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6667 "00000000" // /* MW 1 */ + 6668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6669 "00000000" // /* MW 1 */ + 6670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6671 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 16 + 6672 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "00001000" // /* MW 3 */ + 6674 "01100001" // /* MW 2 */ + 6675 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 6 + 6676 "10000100" // JNZ r16, #6704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6704 delay_slots=5 */ + 6677 "00000001" // /* MW 5 */ + 6678 "01000000" // /* MW 4 */ + 6679 "00011000" // /* MW 3 */ + 6680 "00001101" // /* MW 2 */ + 6681 "10000000" // /* MW 1 */ +.delay_slot + 6682 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6683 "00011001" // /* MW 3 */ + 6684 "11111111" // /* MW 2 */ + 6685 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6693 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 398 16 first + 6694 "01111010" // NOPA; ST r24, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6695 "00000000" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "00000000" // /* MW 7 */ + 6698 "10000000" // /* MW 6 */ + 6699 "00010001" // /* MW 5 */ + 6700 "00000111" // /* MW 4 */ + 6701 "11110010" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 +.src_ref 9 "superkernels.cpp" 400 + 6704 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6705 "00111001" // /* MW 3 */ + 6706 "11111000" // /* MW 2 */ + 6707 "00000111" // /* MW 1 */ + 6708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6709 "00000000" // /* MW 1 */ + 6710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6711 "00000000" // /* MW 1 */ + 6712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6713 "00000000" // /* MW 1 */ + 6714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6715 "00000000" // /* MW 1 */ + 6716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6717 "00000000" // /* MW 1 */ + 6718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6719 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 400 first + 6720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6721 "00000000" // /* MW 3 */ + 6722 "00101000" // /* MW 2 */ + 6723 "00010000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 400 +.delay_slot + 6724 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6725 "00000001" // /* MW 5 */ + 6726 "00000000" // /* MW 4 */ + 6727 "00000000" // /* MW 3 */ + 6728 "11111000" // /* MW 2 */ + 6729 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 6737 "00000000" // /* MW 1 */ +.label __Z15_b13786_wrapperPPv___func_begin0 +.label _Z15_b13786_wrapperPPv +.function _b13786_wrapper _Z15_b13786_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 18 first +.src_ref 0 "0_0_reloadable82.cc" 20 79 +.function_start + 6752 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6753 "11000000" // /* MW 3 */ + 6754 "01100000" // /* MW 2 */ + 6755 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 20 79 first + 6756 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6757 "00011110" // /* MW 3 */ + 6758 "00011100" // /* MW 2 */ + 6759 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 21 79 first + 6760 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6761 "10011110" // /* MW 3 */ + 6762 "00101100" // /* MW 2 */ + 6763 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 23 81 first + 6764 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6765 "10011110" // /* MW 3 */ + 6766 "11110101" // /* MW 2 */ + 6767 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 22 46 first + 6768 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "00011110" // /* MW 3 */ + 6770 "00000101" // /* MW 2 */ + 6771 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 19 4 first +.tail_call + 6772 "10000100" // J #6192 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6192 delay_slots=5 */ + 6773 "00000000" // /* MW 5 */ + 6774 "00000000" // /* MW 4 */ + 6775 "00011000" // /* MW 3 */ + 6776 "00001100" // /* MW 2 */ + 6777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13786_wrapperPPv__end +.label __Z15_b13786_wrapperPPv___func_end0 + 6787 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function buffer_pad_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.function_start + 6800 "11010100" // LDA el0, [p1]; MOV r17, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6801 "10000001" // /* MW 5 */ + 6802 "10101001" // /* MW 4 */ + 6803 "11011000" // /* MW 3 */ + 6804 "10000101" // /* MW 2 */ + 6805 "00100000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 first + 6806 "00011000" // ADD.NC p1, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10000010" // /* MW 3 */ + 6808 "01101000" // /* MW 2 */ + 6809 "00011001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6810 "10011000" // LDA r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "01010110" // /* MW 3 */ + 6812 "00011110" // /* MW 2 */ + 6813 "00000001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 27 33 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6814 "10011000" // LDA r15, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6815 "11110110" // /* MW 3 */ + 6816 "00000101" // /* MW 2 */ + 6817 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6819 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6821 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6823 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6825 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6826 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6827 "10100000" // /* MW 3 */ + 6828 "00010111" // /* MW 2 */ + 6829 "00011000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6830 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6831 "00000001" // /* MW 5 */ + 6832 "00000000" // /* MW 4 */ + 6833 "00000000" // /* MW 3 */ + 6834 "00001000" // /* MW 2 */ + 6835 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 43 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6836 "01100100" // MUL r18, r15, r18; MOV r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6837 "11111101" // /* MW 5 */ + 6838 "00111111" // /* MW 4 */ + 6839 "11111000" // /* MW 3 */ + 6840 "10100101" // /* MW 2 */ + 6841 "01111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6842 "00111010" // ST r18, [sp, #-20]; MOVXM r17, #1073741823 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6843 "10010001" // /* MW 9 */ + 6844 "11111111" // /* MW 8 */ + 6845 "00101111" // /* MW 7 */ + 6846 "11111110" // /* MW 6 */ + 6847 "11111111" // /* MW 5 */ + 6848 "00001111" // /* MW 4 */ + 6849 "10110000" // /* MW 3 */ + 6850 "11001010" // /* MW 2 */ + 6851 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 6852 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00001101" // /* MW 3 */ + 6854 "10100001" // /* MW 2 */ + 6855 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 6856 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "00000100" // /* MW 3 */ + 6858 "01100001" // /* MW 2 */ + 6859 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 22 + 6860 "10000100" // JZ r16, #6944 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6944 delay_slots=5 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "10010000" // /* MW 3 */ + 6864 "00001101" // /* MW 2 */ + 6865 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.delay_slot + 6866 "11010100" // LDA p7, [p0]; MOV p0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6867 "10000001" // /* MW 5 */ + 6868 "11011101" // /* MW 4 */ + 6869 "11010000" // /* MW 3 */ + 6870 "11110011" // /* MW 2 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot + 6872 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6873 "00011101" // /* MW 3 */ + 6874 "11111000" // /* MW 2 */ + 6875 "00001111" // /* MW 1 */ +.delay_slot + 6876 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6877 "11010101" // /* MW 3 */ + 6878 "11110101" // /* MW 2 */ + 6879 "00001111" // /* MW 1 */ +.delay_slot + 6880 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6881 "00111101" // /* MW 3 */ + 6882 "11110000" // /* MW 2 */ + 6883 "00001111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 25 24 first +.delay_slot + 6884 "00001100" // LDA r14, [p1, #-8]; ST r0, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6885 "00101011" // /* MW 5 */ + 6886 "11111000" // /* MW 4 */ + 6887 "11011111" // /* MW 3 */ + 6888 "10111010" // /* MW 2 */ + 6889 "00111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 6890 "01011100" // ST el0, [sp, #-24]; MOVX r0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6891 "00000010" // /* MW 5 */ + 6892 "00000000" // /* MW 4 */ + 6893 "10110000" // /* MW 3 */ + 6894 "00000101" // /* MW 2 */ + 6895 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 6896 "00011000" // LDA p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "10011001" // /* MW 3 */ + 6898 "11101000" // /* MW 2 */ + 6899 "00000111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 first +.no_stack_arguments + 6900 "00000100" // JL #10032 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10032 delay_slots=5 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "10011000" // /* MW 3 */ + 6904 "00010011" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.delay_slot + 6906 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "00001001" // /* MW 3 */ + 6908 "00100010" // /* MW 2 */ + 6909 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 first +.delay_slot + 6910 "10011000" // LSHL r1, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6911 "00011101" // /* MW 3 */ + 6912 "00000011" // /* MW 2 */ + 6913 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6918 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6919 "01111110" // /* MW 9 */ + 6920 "10100101" // /* MW 8 */ + 6921 "00000001" // /* MW 7 */ + 6922 "00000000" // /* MW 6 */ + 6923 "00010000" // /* MW 5 */ + 6924 "00000000" // /* MW 4 */ + 6925 "11110000" // /* MW 3 */ + 6926 "00101100" // /* MW 2 */ + 6927 "00000000" // /* MW 1 */ +.return_address + 6928 "10000100" // J #6960 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6960 delay_slots=5 */ + 6929 "00000000" // /* MW 5 */ + 6930 "00000000" // /* MW 4 */ + 6931 "10011000" // /* MW 3 */ + 6932 "00001101" // /* MW 2 */ + 6933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6943 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 6944 "11100001" // NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6945 "00000000" // /* MW 15 */ + 6946 "00000000" // /* MW 14 */ + 6947 "01111000" // /* MW 13 */ + 6948 "10100101" // /* MW 12 */ + 6949 "00000001" // /* MW 11 */ + 6950 "00000000" // /* MW 10 */ + 6951 "00000000" // /* MW 9 */ + 6952 "10000000" // /* MW 8 */ + 6953 "00101101" // /* MW 7 */ + 6954 "11101000" // /* MW 6 */ + 6955 "00100111" // /* MW 5 */ + 6956 "00000000" // /* MW 4 */ + 6957 "11110000" // /* MW 3 */ + 6958 "00101100" // /* MW 2 */ + 6959 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 22 first + 6960 "10000100" // JZ r15, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */ + 6961 "00000001" // /* MW 5 */ + 6962 "00000000" // /* MW 4 */ + 6963 "00100000" // /* MW 3 */ + 6964 "00001110" // /* MW 2 */ + 6965 "01111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6967 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6975 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 first + 6976 "10111010" // LDA r17, [sp, #-20]; MOVXM ls, #7072 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6977 "00010000" // /* MW 9 */ + 6978 "11010000" // /* MW 8 */ + 6979 "01111101" // /* MW 7 */ + 6980 "00000100" // /* MW 6 */ + 6981 "00000000" // /* MW 5 */ + 6982 "00000000" // /* MW 4 */ + 6983 "00100000" // /* MW 3 */ + 6984 "11000110" // /* MW 2 */ + 6985 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 + 6986 "10111010" // MOVA r19, #1; MOVXM le, #7168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6987 "00010000" // /* MW 9 */ + 6988 "00000000" // /* MW 8 */ + 6989 "10111110" // /* MW 7 */ + 6990 "00000101" // /* MW 6 */ + 6991 "00000000" // /* MW 5 */ + 6992 "00000000" // /* MW 4 */ + 6993 "00000000" // /* MW 3 */ + 6994 "00110011" // /* MW 2 */ + 6995 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 6996 "10111010" // LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6997 "11001000" // /* MW 9 */ + 6998 "11111111" // /* MW 8 */ + 6999 "00001011" // /* MW 7 */ + 7000 "11101110" // /* MW 6 */ + 7001 "01001001" // /* MW 5 */ + 7002 "00011101" // /* MW 4 */ + 7003 "00100000" // /* MW 3 */ + 7004 "01001010" // /* MW 2 */ + 7005 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 7006 "10111010" // LDA lr, [sp, #-16]; MOVXM p0, #7040 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7007 "00010000" // /* MW 9 */ + 7008 "11000000" // /* MW 8 */ + 7009 "00110101" // /* MW 7 */ + 7010 "00000100" // /* MW 6 */ + 7011 "00000000" // /* MW 5 */ + 7012 "00000000" // /* MW 4 */ + 7013 "00100000" // /* MW 3 */ + 7014 "00000111" // /* MW 2 */ + 7015 "11111110" // /* MW 1 */ + 7016 "11111000" // MOV m0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7017 "00100000" // /* MW 3 */ + 7018 "00001010" // /* MW 2 */ + 7019 "00011000" // /* MW 1 */ + 7020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7021 "00000000" // /* MW 1 */ + 7022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7023 "00000000" // /* MW 1 */ + 7024 "11100001" // NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7025 "00000000" // /* MW 15 */ + 7026 "00000000" // /* MW 14 */ + 7027 "01111000" // /* MW 13 */ + 7028 "10100101" // /* MW 12 */ + 7029 "00000001" // /* MW 11 */ + 7030 "11101100" // /* MW 10 */ + 7031 "00011001" // /* MW 9 */ + 7032 "00100011" // /* MW 8 */ + 7033 "01011011" // /* MW 7 */ + 7034 "00000001" // /* MW 6 */ + 7035 "00100000" // /* MW 5 */ + 7036 "00000000" // /* MW 4 */ + 7037 "11110000" // /* MW 3 */ + 7038 "00101100" // /* MW 2 */ + 7039 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.loop_nesting 1 + 7040 "10000100" // JZ r14, #7184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7184 delay_slots=5 */ + 7041 "00000001" // /* MW 5 */ + 7042 "00000000" // /* MW 4 */ + 7043 "00001000" // /* MW 3 */ + 7044 "00001110" // /* MW 2 */ + 7045 "01110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7049 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7055 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 7056 "00000010" // MOVS p2, p7; MOV lc, r14 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7057 "01110000" // /* MW 7 */ + 7058 "10010000" // /* MW 6 */ + 7059 "10111011" // /* MW 5 */ + 7060 "00000010" // /* MW 4 */ + 7061 "01100000" // /* MW 3 */ + 7062 "10010001" // /* MW 2 */ + 7063 "01010011" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 7064 "00000010" // NOPS; MOV p1, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7065 "01110000" // /* MW 7 */ + 7066 "10010000" // /* MW 6 */ + 7067 "10110100" // /* MW 5 */ + 7068 "00000000" // /* MW 4 */ + 7069 "01100000" // /* MW 3 */ + 7070 "00101011" // /* MW 2 */ + 7071 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 first +.begin_of_loop +.loop_nesting 2 + 7072 "11100001" // LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "10100101" // /* MW 12 */ + 7077 "00000001" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "00000000" // /* MW 8 */ + 7081 "01011011" // /* MW 7 */ + 7082 "00000001" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "01010000" // /* MW 3 */ + 7086 "11001110" // /* MW 2 */ + 7087 "01000011" // /* MW 1 */ + 7088 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7089 "00000000" // /* MW 15 */ + 7090 "00000000" // /* MW 14 */ + 7091 "01111000" // /* MW 13 */ + 7092 "10100101" // /* MW 12 */ + 7093 "00000001" // /* MW 11 */ + 7094 "00000000" // /* MW 10 */ + 7095 "00000000" // /* MW 9 */ + 7096 "00000000" // /* MW 8 */ + 7097 "01011011" // /* MW 7 */ + 7098 "00000001" // /* MW 6 */ + 7099 "00100000" // /* MW 5 */ + 7100 "00000000" // /* MW 4 */ + 7101 "11110000" // /* MW 3 */ + 7102 "00101100" // /* MW 2 */ + 7103 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 7104 "11100001" // ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7105 "00000000" // /* MW 15 */ + 7106 "00000000" // /* MW 14 */ + 7107 "01111000" // /* MW 13 */ + 7108 "10100101" // /* MW 12 */ + 7109 "00000001" // /* MW 11 */ + 7110 "00000000" // /* MW 10 */ + 7111 "00000000" // /* MW 9 */ + 7112 "00000000" // /* MW 8 */ + 7113 "01011011" // /* MW 7 */ + 7114 "00000001" // /* MW 6 */ + 7115 "00100000" // /* MW 5 */ + 7116 "00000000" // /* MW 4 */ + 7117 "11100000" // /* MW 3 */ + 7118 "11001110" // /* MW 2 */ + 7119 "00100011" // /* MW 1 */ + 7120 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7121 "00000000" // /* MW 15 */ + 7122 "00000000" // /* MW 14 */ + 7123 "01111000" // /* MW 13 */ + 7124 "10100101" // /* MW 12 */ + 7125 "00000001" // /* MW 11 */ + 7126 "00000000" // /* MW 10 */ + 7127 "00000000" // /* MW 9 */ + 7128 "00000000" // /* MW 8 */ + 7129 "01011011" // /* MW 7 */ + 7130 "00000001" // /* MW 6 */ + 7131 "00100000" // /* MW 5 */ + 7132 "00000000" // /* MW 4 */ + 7133 "11110000" // /* MW 3 */ + 7134 "00101100" // /* MW 2 */ + 7135 "00000000" // /* MW 1 */ + 7136 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7137 "00000000" // /* MW 15 */ + 7138 "00000000" // /* MW 14 */ + 7139 "01111000" // /* MW 13 */ + 7140 "10100101" // /* MW 12 */ + 7141 "00000001" // /* MW 11 */ + 7142 "00000000" // /* MW 10 */ + 7143 "00000000" // /* MW 9 */ + 7144 "00000000" // /* MW 8 */ + 7145 "01011011" // /* MW 7 */ + 7146 "00000001" // /* MW 6 */ + 7147 "00100000" // /* MW 5 */ + 7148 "00000000" // /* MW 4 */ + 7149 "11110000" // /* MW 3 */ + 7150 "00101100" // /* MW 2 */ + 7151 "00000000" // /* MW 1 */ + 7152 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7153 "00000000" // /* MW 15 */ + 7154 "00000000" // /* MW 14 */ + 7155 "01111000" // /* MW 13 */ + 7156 "10100101" // /* MW 12 */ + 7157 "00000001" // /* MW 11 */ + 7158 "00000000" // /* MW 10 */ + 7159 "00000000" // /* MW 9 */ + 7160 "00000000" // /* MW 8 */ + 7161 "01011011" // /* MW 7 */ + 7162 "00000001" // /* MW 6 */ + 7163 "00100000" // /* MW 5 */ + 7164 "00000000" // /* MW 4 */ + 7165 "11110000" // /* MW 3 */ + 7166 "00101100" // /* MW 2 */ + 7167 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 7168 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7169 "00000000" // /* MW 15 */ + 7170 "00000000" // /* MW 14 */ + 7171 "01111000" // /* MW 13 */ + 7172 "10100101" // /* MW 12 */ + 7173 "00000001" // /* MW 11 */ + 7174 "00000000" // /* MW 10 */ + 7175 "00000000" // /* MW 9 */ + 7176 "00000000" // /* MW 8 */ + 7177 "01011011" // /* MW 7 */ + 7178 "00000001" // /* MW 6 */ + 7179 "00100000" // /* MW 5 */ + 7180 "00000000" // /* MW 4 */ + 7181 "11110000" // /* MW 3 */ + 7182 "00101100" // /* MW 2 */ + 7183 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.loop_nesting 1 + 7184 "00011100" // PADDB [p7], m0; JNZD r16, r16, p0 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 7185 "01000000" // /* MW 5 */ + 7186 "01000000" // /* MW 4 */ + 7187 "00001000" // /* MW 3 */ + 7188 "01110010" // /* MW 2 */ + 7189 "11100001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7195 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ +.delay_slot + 7198 "01011000" // ADD.NC r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "11001001" // /* MW 3 */ + 7200 "10011000" // /* MW 2 */ + 7201 "00011100" // /* MW 1 */ +.loop_nesting 0 + 7202 "10000100" // J #7248 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7248 delay_slots=5 */ + 7203 "00000000" // /* MW 5 */ + 7204 "00000000" // /* MW 4 */ + 7205 "00101000" // /* MW 3 */ + 7206 "00001110" // /* MW 2 */ + 7207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7216 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7217 "00000000" // /* MW 15 */ + 7218 "00000000" // /* MW 14 */ + 7219 "01111000" // /* MW 13 */ + 7220 "10100101" // /* MW 12 */ + 7221 "00000001" // /* MW 11 */ + 7222 "00000000" // /* MW 10 */ + 7223 "00000000" // /* MW 9 */ + 7224 "00000000" // /* MW 8 */ + 7225 "01011011" // /* MW 7 */ + 7226 "00000001" // /* MW 6 */ + 7227 "00100000" // /* MW 5 */ + 7228 "00000000" // /* MW 4 */ + 7229 "11110000" // /* MW 3 */ + 7230 "00101100" // /* MW 2 */ + 7231 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 7232 "11100001" // LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7233 "00000000" // /* MW 15 */ + 7234 "00000000" // /* MW 14 */ + 7235 "01111000" // /* MW 13 */ + 7236 "10100101" // /* MW 12 */ + 7237 "00000001" // /* MW 11 */ + 7238 "00000000" // /* MW 10 */ + 7239 "00000000" // /* MW 9 */ + 7240 "00000000" // /* MW 8 */ + 7241 "01011011" // /* MW 7 */ + 7242 "00000001" // /* MW 6 */ + 7243 "00100000" // /* MW 5 */ + 7244 "00000000" // /* MW 4 */ + 7245 "00100000" // /* MW 3 */ + 7246 "00000111" // /* MW 2 */ + 7247 "11111110" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 7248 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7249 "11010001" // /* MW 3 */ + 7250 "11110101" // /* MW 2 */ + 7251 "00000111" // /* MW 1 */ + 7252 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7253 "10011001" // /* MW 3 */ + 7254 "11111011" // /* MW 2 */ + 7255 "00000111" // /* MW 1 */ + 7256 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7257 "11110001" // /* MW 3 */ + 7258 "11111101" // /* MW 2 */ + 7259 "00000111" // /* MW 1 */ + 7260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7261 "00000000" // /* MW 1 */ + 7262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7263 "00000000" // /* MW 1 */ + 7264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7265 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 first + 7266 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7267 "00000000" // /* MW 3 */ + 7268 "00101000" // /* MW 2 */ + 7269 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 +.delay_slot + 7270 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7271 "00000001" // /* MW 5 */ + 7272 "00000000" // /* MW 4 */ + 7273 "00000000" // /* MW 3 */ + 7274 "11111000" // /* MW 2 */ + 7275 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + 7283 "00000000" // /* MW 1 */ +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function _b8148_wrapper _Z14_b8148_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 27 first +.src_ref 0 "0_0_reloadable82.cc" 29 79 +.function_start + 7296 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "11000000" // /* MW 3 */ + 7298 "01100000" // /* MW 2 */ + 7299 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 29 79 first + 7300 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "00011110" // /* MW 3 */ + 7302 "00011100" // /* MW 2 */ + 7303 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 31 46 first + 7304 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00011110" // /* MW 3 */ + 7306 "00010101" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 30 80 first + 7308 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7309 "10011110" // /* MW 3 */ + 7310 "00000100" // /* MW 2 */ + 7311 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 28 4 first +.tail_call + 7312 "10000100" // J #6800 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 7313 "00000000" // /* MW 5 */ + 7314 "00000000" // /* MW 4 */ + 7315 "01001000" // /* MW 3 */ + 7316 "00001101" // /* MW 2 */ + 7317 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 + 7327 "00000000" // /* MW 1 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function load_slice_generic_innermost_rtp _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.src_ref 11 "slice_generic_innermost_params.h" 40 first +.src_ref 11 "slice_generic_innermost_params.h" 41 19 first +.function_start + 7328 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7329 "00101110" // /* MW 3 */ + 7330 "00011100" // /* MW 2 */ + 7331 "00000001" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ + 7342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7343 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 41 17 first + 7344 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7345 "00101001" // /* MW 3 */ + 7346 "00011100" // /* MW 2 */ + 7347 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 42 19 first + 7348 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7349 "00101110" // /* MW 3 */ + 7350 "00011100" // /* MW 2 */ + 7351 "00000001" // /* MW 1 */ + 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7353 "00000000" // /* MW 1 */ + 7354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7355 "00000000" // /* MW 1 */ + 7356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7357 "00000000" // /* MW 1 */ + 7358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7359 "00000000" // /* MW 1 */ + 7360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7361 "00000000" // /* MW 1 */ + 7362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7363 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 42 17 + 7364 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "00101001" // /* MW 3 */ + 7366 "00011100" // /* MW 2 */ + 7367 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 43 19 first + 7368 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7369 "00101110" // /* MW 3 */ + 7370 "00011100" // /* MW 2 */ + 7371 "00000001" // /* MW 1 */ + 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7373 "00000000" // /* MW 1 */ + 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7375 "00000000" // /* MW 1 */ + 7376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7377 "00000000" // /* MW 1 */ + 7378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7379 "00000000" // /* MW 1 */ + 7380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7381 "00000000" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 43 17 + 7384 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7385 "00101001" // /* MW 3 */ + 7386 "00011100" // /* MW 2 */ + 7387 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 44 19 first + 7388 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00101110" // /* MW 3 */ + 7390 "00011100" // /* MW 2 */ + 7391 "00000001" // /* MW 1 */ + 7392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7393 "00000000" // /* MW 1 */ + 7394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7395 "00000000" // /* MW 1 */ + 7396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7397 "00000000" // /* MW 1 */ + 7398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7399 "00000000" // /* MW 1 */ + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 44 17 + 7404 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7405 "00101001" // /* MW 3 */ + 7406 "00011100" // /* MW 2 */ + 7407 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 45 19 first + 7408 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00101110" // /* MW 3 */ + 7410 "00011100" // /* MW 2 */ + 7411 "00000001" // /* MW 1 */ + 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7413 "00000000" // /* MW 1 */ + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ + 7416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7417 "00000000" // /* MW 1 */ + 7418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7419 "00000000" // /* MW 1 */ + 7420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7421 "00000000" // /* MW 1 */ + 7422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7423 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 45 17 + 7424 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00101001" // /* MW 3 */ + 7426 "00011100" // /* MW 2 */ + 7427 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 46 17 first + 7428 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7429 "00101110" // /* MW 3 */ + 7430 "00011100" // /* MW 2 */ + 7431 "00000001" // /* MW 1 */ + 7432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7433 "00000000" // /* MW 1 */ + 7434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7435 "00000000" // /* MW 1 */ + 7436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7437 "00000000" // /* MW 1 */ + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 46 15 + 7444 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "00101001" // /* MW 3 */ + 7446 "00011100" // /* MW 2 */ + 7447 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 47 18 first + 7448 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7449 "00101110" // /* MW 3 */ + 7450 "00000100" // /* MW 2 */ + 7451 "00000001" // /* MW 1 */ + 7452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7453 "00000000" // /* MW 1 */ + 7454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7455 "00000000" // /* MW 1 */ + 7456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7457 "00000000" // /* MW 1 */ + 7458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7459 "00000000" // /* MW 1 */ + 7460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7461 "00000000" // /* MW 1 */ + 7462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7463 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 47 16 + 7464 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7465 "00101001" // /* MW 3 */ + 7466 "00000100" // /* MW 2 */ + 7467 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 48 18 first + 7468 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7469 "00101110" // /* MW 3 */ + 7470 "00010100" // /* MW 2 */ + 7471 "00000001" // /* MW 1 */ + 7472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7473 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 49 first + 7474 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7475 "00000000" // /* MW 3 */ + 7476 "00101000" // /* MW 2 */ + 7477 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7485 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 48 16 first +.delay_slot + 7486 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7487 "00101001" // /* MW 3 */ + 7488 "00010100" // /* MW 2 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 7489 "00001000" // /* MW 1 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function setup_slice_generic_innermost _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.src_ref 11 "slice_generic_innermost_params.h" 52 first +.src_ref 11 "slice_generic_innermost_params.h" 53 25 first +.src_ref 11 "slice_generic_innermost_params.h" 55 42 +.src_ref 11 "slice_generic_innermost_params.h" 58 40 +.function_start + 7504 "10111010" // LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7505 "01011000" // /* MW 9 */ + 7506 "00100000" // /* MW 8 */ + 7507 "10000000" // /* MW 7 */ + 7508 "00101000" // /* MW 6 */ + 7509 "00000000" // /* MW 5 */ + 7510 "00000000" // /* MW 4 */ + 7511 "11010000" // /* MW 3 */ + 7512 "10000110" // /* MW 2 */ + 7513 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 38 first +.src_ref 11 "slice_generic_innermost_params.h" 58 30 +.src_ref 11 "slice_generic_innermost_params.h" 59 31 + 7514 "10111010" // LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7515 "01011000" // /* MW 9 */ + 7516 "11111010" // /* MW 8 */ + 7517 "01001111" // /* MW 7 */ + 7518 "01001000" // /* MW 6 */ + 7519 "00110000" // /* MW 5 */ + 7520 "00000000" // /* MW 4 */ + 7521 "11010000" // /* MW 3 */ + 7522 "10010110" // /* MW 2 */ + 7523 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 51 +.src_ref 11 "slice_generic_innermost_params.h" 60 27 +.src_ref 11 "slice_generic_innermost_params.h" 62 27 + 7524 "01010100" // LDA r4, [p0], #8; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7525 "00000001" // /* MW 5 */ + 7526 "00000001" // /* MW 4 */ + 7527 "11010000" // /* MW 3 */ + 7528 "10010010" // /* MW 2 */ + 7529 "00000101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 55 42 first +.src_ref 11 "slice_generic_innermost_params.h" 60 27 + 7530 "01010100" // LDA r6, [p0], m1; MOV dj0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7531 "00000001" // /* MW 5 */ + 7532 "00000010" // /* MW 4 */ + 7533 "11010001" // /* MW 3 */ + 7534 "00011010" // /* MW 2 */ + 7535 "00000101" // /* MW 1 */ + 7536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7537 "00000000" // /* MW 1 */ + 7538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7539 "00000000" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 30 first + 7546 "10011000" // MUL r1, r5, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7547 "00011111" // /* MW 3 */ + 7548 "01000010" // /* MW 2 */ + 7549 "00010001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 58 40 first + 7550 "10011000" // AND r0, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00000100" // /* MW 3 */ + 7552 "10000000" // /* MW 2 */ + 7553 "00010001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 58 30 + 7554 "10011000" // OR r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7555 "00000101" // /* MW 3 */ + 7556 "11000000" // /* MW 2 */ + 7557 "00010000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 43 first +.src_ref 11 "slice_generic_innermost_params.h" 58 28 + 7558 "01011100" // ST r0, [p0], #-16; MUL r1, r1, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7559 "10011111" // /* MW 5 */ + 7560 "10000100" // /* MW 4 */ + 7561 "00110000" // /* MW 3 */ + 7562 "10000010" // /* MW 2 */ + 7563 "00011001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 75 first + 7564 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7565 "00000000" // /* MW 3 */ + 7566 "00101000" // /* MW 2 */ + 7567 "00010000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 59 31 first +.delay_slot + 7568 "10011000" // LSHL r0, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7569 "00101101" // /* MW 3 */ + 7570 "01000000" // /* MW 2 */ + 7571 "00010000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 59 25 +.delay_slot + 7572 "10011000" // ST r0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7573 "00010001" // /* MW 3 */ + 7574 "00011100" // /* MW 2 */ + 7575 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 60 27 first +.delay_slot + 7576 "10011000" // ST m0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00000001" // /* MW 3 */ + 7578 "00011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 60 27 +.delay_slot + 7580 "10011000" // ST dj0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "01000001" // /* MW 3 */ + 7582 "00000100" // /* MW 2 */ + 7583 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 62 27 first +.delay_slot + 7584 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7585 "00000001" // /* MW 3 */ + 7586 "00010100" // /* MW 2 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + 7587 "00001000" // /* MW 1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function setup_slice_generic_innermost_params _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.src_ref 11 "slice_generic_innermost_params.h" 79 first +.src_ref 11 "slice_generic_innermost_params.h" 80 4 first +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 7600 "00000100" // JL #7328 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7328 delay_slots=5 */ + 7601 "00000001" // /* MW 5 */ + 7602 "00000000" // /* MW 4 */ + 7603 "01010000" // /* MW 3 */ + 7604 "00001110" // /* MW 2 */ + 7605 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7606 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7607 "11100000" // /* MW 3 */ + 7608 "11000001" // /* MW 2 */ + 7609 "00011000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 7610 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7611 "11000000" // /* MW 3 */ + 7612 "01100000" // /* MW 2 */ + 7613 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7615 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7617 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7618 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7619 "00011100" // /* MW 13 */ + 7620 "00000000" // /* MW 12 */ + 7621 "00000000" // /* MW 11 */ + 7622 "01010111" // /* MW 10 */ + 7623 "00011010" // /* MW 9 */ + 7624 "01000000" // /* MW 8 */ + 7625 "00000000" // /* MW 7 */ + 7626 "00000000" // /* MW 6 */ + 7627 "10110110" // /* MW 5 */ + 7628 "00000010" // /* MW 4 */ + 7629 "11110000" // /* MW 3 */ + 7630 "00101100" // /* MW 2 */ + 7631 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 first +.tail_call +.return_address + 7632 "10000100" // J #7504 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7504 delay_slots=5 */ + 7633 "00000000" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "10101000" // /* MW 3 */ + 7636 "00001110" // /* MW 2 */ + 7637 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 7638 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7639 "10000000" // /* MW 3 */ + 7640 "01110001" // /* MW 2 */ + 7641 "00011111" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 7642 "11111000" // MOV p0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7643 "11000000" // /* MW 3 */ + 7644 "01100100" // /* MW 2 */ + 7645 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 7651 "00000000" // /* MW 1 */ +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function slice_generic_innermost _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "slice_generic_innermost.h" 25 first +.src_ref 11 "slice_generic_innermost.h" 35 60 +.src_ref 11 "slice_generic_innermost.h" 54 19 +.function_start + 7664 "00000010" // MOVS p5, p1; MOV r0, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7665 "01110000" // /* MW 7 */ + 7666 "01100000" // /* MW 6 */ + 7667 "00001010" // /* MW 5 */ + 7668 "00000000" // /* MW 4 */ + 7669 "01100000" // /* MW 3 */ + 7670 "10010001" // /* MW 2 */ + 7671 "10110000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 35 60 first + 7672 "00011000" // ADD.NC p3, r0, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7673 "00010010" // /* MW 3 */ + 7674 "01100000" // /* MW 2 */ + 7675 "00011011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 35 60 + 7676 "11010100" // LDA m2, [p3], #4; MOV r0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7677 "10000001" // /* MW 5 */ + 7678 "00111101" // /* MW 4 */ + 7679 "11010000" // /* MW 3 */ + 7680 "10100000" // /* MW 2 */ + 7681 "01100011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 36 61 first + 7682 "10011000" // LDA m0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7683 "00000110" // /* MW 3 */ + 7684 "00011100" // /* MW 2 */ + 7685 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 35 first + 7686 "10011000" // LDA r2, [p3, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7687 "01010110" // /* MW 3 */ + 7688 "11010100" // /* MW 2 */ + 7689 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 38 59 first + 7690 "10011000" // LDA m1, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7691 "10000110" // /* MW 3 */ + 7692 "00000100" // /* MW 2 */ + 7693 "00000011" // /* MW 1 */ + 7694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7695 "00000000" // /* MW 1 */ + 7696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7697 "00000000" // /* MW 1 */ + 7698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7699 "00000000" // /* MW 1 */ + 7700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7701 "00000000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 first +.src_ref 11 "slice_generic_innermost.h" 40 26 first + 7704 "10000100" // JZ r2, #8128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8128 delay_slots=5 */ + 7705 "00000001" // /* MW 5 */ + 7706 "00000000" // /* MW 4 */ + 7707 "11100000" // /* MW 3 */ + 7708 "00001111" // /* MW 2 */ + 7709 "00010000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 36 39 +.src_ref 11 "slice_generic_innermost.h" 50 19 +.delay_slot + 7710 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7711 "11000000" // /* MW 3 */ + 7712 "01100000" // /* MW 2 */ + 7713 "00011111" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 36 39 first +.src_ref 11 "slice_generic_innermost.h" 50 19 +.delay_slot + 7714 "11110100" // PADDB [p7], m0; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7715 "10000001" // /* MW 5 */ + 7716 "11011101" // /* MW 4 */ + 7717 "00000110" // /* MW 3 */ + 7718 "01110010" // /* MW 2 */ + 7719 "11100001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 35 38 first +.delay_slot + 7720 "00011000" // PADDB [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7721 "10010000" // /* MW 3 */ + 7722 "01001011" // /* MW 2 */ + 7723 "00111000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 37 39 first +.src_ref 11 "slice_generic_innermost.h" 52 20 +.delay_slot + 7724 "11110100" // PADDB [p0], m0; MOV p4, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7725 "10000001" // /* MW 5 */ + 7726 "11000001" // /* MW 4 */ + 7727 "00001000" // /* MW 3 */ + 7728 "01110010" // /* MW 2 */ + 7729 "00000001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 38 37 first +.delay_slot + 7730 "00011000" // PADDB [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7731 "10010000" // /* MW 3 */ + 7732 "00101011" // /* MW 2 */ + 7733 "00111001" // /* MW 1 */ + 7734 "00011000" // MOVX r1, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00010001" // /* MW 3 */ + 7736 "00000010" // /* MW 2 */ + 7737 "00010000" // /* MW 1 */ + 7738 "10011000" // LTU r3, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7739 "00011100" // /* MW 3 */ + 7740 "10000110" // /* MW 2 */ + 7741 "00010000" // /* MW 1 */ + 7742 "10000100" // JNZ r3, #7984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7984 delay_slots=5 */ + 7743 "00000001" // /* MW 5 */ + 7744 "01000000" // /* MW 4 */ + 7745 "10011000" // /* MW 3 */ + 7746 "00001111" // /* MW 2 */ + 7747 "00011000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 +.delay_slot + 7748 "10111000" // MOV dj0, #48 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "01100000" // /* MW 3 */ + 7750 "10000000" // /* MW 2 */ + 7751 "00011000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 first +.delay_slot + 7752 "10011000" // LDA r1, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "00110110" // /* MW 3 */ + 7754 "00000000" // /* MW 2 */ + 7755 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7757 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7759 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7761 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 40 8 +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first + 7762 "10110110" // VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #7856 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7763 "00010000" // /* MW 11 */ + 7764 "01011000" // /* MW 10 */ + 7765 "01111111" // /* MW 9 */ + 7766 "00000100" // /* MW 8 */ + 7767 "00000000" // /* MW 7 */ + 7768 "00000000" // /* MW 6 */ + 7769 "11101000" // /* MW 5 */ + 7770 "00010000" // /* MW 4 */ + 7771 "01110110" // /* MW 3 */ + 7772 "00010011" // /* MW 2 */ + 7773 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 40 8 first +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7774 "01111110" // PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #7904 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7775 "01100000" // /* MW 13 */ + 7776 "00001011" // /* MW 12 */ + 7777 "01100001" // /* MW 11 */ + 7778 "00000010" // /* MW 10 */ + 7779 "11101110" // /* MW 9 */ + 7780 "10110111" // /* MW 8 */ + 7781 "00000000" // /* MW 7 */ + 7782 "00000000" // /* MW 6 */ + 7783 "01101000" // /* MW 5 */ + 7784 "00010000" // /* MW 4 */ + 7785 "11111110" // /* MW 3 */ + 7786 "00001100" // /* MW 2 */ + 7787 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 40 8 +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7788 "11110110" // VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7789 "01000000" // /* MW 11 */ + 7790 "10111111" // /* MW 10 */ + 7791 "10111000" // /* MW 9 */ + 7792 "00000010" // /* MW 8 */ + 7793 "01011011" // /* MW 7 */ + 7794 "00001000" // /* MW 6 */ + 7795 "11101111" // /* MW 5 */ + 7796 "00010001" // /* MW 4 */ + 7797 "01110000" // /* MW 3 */ + 7798 "00001011" // /* MW 2 */ + 7799 "01100001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.src_ref 11 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7800 "00110010" // PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7801 "01011011" // /* MW 7 */ + 7802 "00001000" // /* MW 6 */ + 7803 "01101011" // /* MW 5 */ + 7804 "00010001" // /* MW 4 */ + 7805 "11111000" // /* MW 3 */ + 7806 "00001100" // /* MW 2 */ + 7807 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7808 "00111100" // PADDA [p4], m0; VLDB x0, [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7809 "01101000" // /* MW 5 */ + 7810 "00010000" // /* MW 4 */ + 7811 "11111110" // /* MW 3 */ + 7812 "00001100" // /* MW 2 */ + 7813 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7814 "01001100" // VLDB x3, [p0], m0; PADDS [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7815 "10110110" // /* MW 5 */ + 7816 "00010000" // /* MW 4 */ + 7817 "10001110" // /* MW 3 */ + 7818 "00011110" // /* MW 2 */ + 7819 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7820 "00111100" // PADDA [p0], m0; VLDB x1, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7821 "11101000" // /* MW 5 */ + 7822 "00010000" // /* MW 4 */ + 7823 "11110110" // /* MW 3 */ + 7824 "00001100" // /* MW 2 */ + 7825 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7826 "10110100" // VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7827 "00001011" // /* MW 5 */ + 7828 "00010010" // /* MW 4 */ + 7829 "10000000" // /* MW 3 */ + 7830 "00010110" // /* MW 2 */ + 7831 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7832 "00110010" // NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7833 "01011011" // /* MW 7 */ + 7834 "00001000" // /* MW 6 */ + 7835 "01101011" // /* MW 5 */ + 7836 "00010000" // /* MW 4 */ + 7837 "11111110" // /* MW 3 */ + 7838 "00101100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7840 "11100001" // NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7841 "00000000" // /* MW 15 */ + 7842 "00000000" // /* MW 14 */ + 7843 "11101000" // /* MW 13 */ + 7844 "11000010" // /* MW 12 */ + 7845 "01000000" // /* MW 11 */ + 7846 "00000000" // /* MW 10 */ + 7847 "00000000" // /* MW 9 */ + 7848 "10000000" // /* MW 8 */ + 7849 "00000110" // /* MW 7 */ + 7850 "00101000" // /* MW 6 */ + 7851 "11101101" // /* MW 5 */ + 7852 "00010001" // /* MW 4 */ + 7853 "11110000" // /* MW 3 */ + 7854 "00101100" // /* MW 2 */ + 7855 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7856 "11100001" // PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7857 "00000000" // /* MW 15 */ + 7858 "00000000" // /* MW 14 */ + 7859 "11101000" // /* MW 13 */ + 7860 "10000010" // /* MW 12 */ + 7861 "00000100" // /* MW 11 */ + 7862 "00000000" // /* MW 10 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "01011011" // /* MW 7 */ + 7866 "00001000" // /* MW 6 */ + 7867 "11101111" // /* MW 5 */ + 7868 "00010000" // /* MW 4 */ + 7869 "11110110" // /* MW 3 */ + 7870 "00001100" // /* MW 2 */ + 7871 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.src_ref 11 "slice_generic_innermost.h" 59 21 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7872 "11100001" // PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7873 "00000000" // /* MW 15 */ + 7874 "00000000" // /* MW 14 */ + 7875 "01111000" // /* MW 13 */ + 7876 "10100101" // /* MW 12 */ + 7877 "00000001" // /* MW 11 */ + 7878 "00000000" // /* MW 10 */ + 7879 "00000000" // /* MW 9 */ + 7880 "10000000" // /* MW 8 */ + 7881 "00100110" // /* MW 7 */ + 7882 "00101000" // /* MW 6 */ + 7883 "01101001" // /* MW 5 */ + 7884 "00010001" // /* MW 4 */ + 7885 "11111000" // /* MW 3 */ + 7886 "00001100" // /* MW 2 */ + 7887 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 55 19 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7888 "11100001" // PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7889 "00000000" // /* MW 15 */ + 7890 "00000000" // /* MW 14 */ + 7891 "11101000" // /* MW 13 */ + 7892 "11000010" // /* MW 12 */ + 7893 "01000000" // /* MW 11 */ + 7894 "00000000" // /* MW 10 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "01011011" // /* MW 7 */ + 7898 "00001000" // /* MW 6 */ + 7899 "01101011" // /* MW 5 */ + 7900 "00010000" // /* MW 4 */ + 7901 "11111110" // /* MW 3 */ + 7902 "00001100" // /* MW 2 */ + 7903 "10100101" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7904 "11100001" // PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7905 "00000000" // /* MW 15 */ + 7906 "00000000" // /* MW 14 */ + 7907 "01111000" // /* MW 13 */ + 7908 "10100101" // /* MW 12 */ + 7909 "00000001" // /* MW 11 */ + 7910 "00000000" // /* MW 10 */ + 7911 "00000000" // /* MW 9 */ + 7912 "10000000" // /* MW 8 */ + 7913 "00000110" // /* MW 7 */ + 7914 "00101000" // /* MW 6 */ + 7915 "11101101" // /* MW 5 */ + 7916 "00010001" // /* MW 4 */ + 7917 "11110000" // /* MW 3 */ + 7918 "00001100" // /* MW 2 */ + 7919 "00100101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7920 "11011000" // VSHUFFLE bmll0, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7921 "00000101" // /* MW 3 */ + 7922 "00001001" // /* MW 2 */ + 7923 "00011000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7924 "10011000" // VST bmlh0, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "00100110" // /* MW 3 */ + 7926 "00101000" // /* MW 2 */ + 7927 "00001001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7928 "10010100" // PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7929 "00001011" // /* MW 5 */ + 7930 "00000011" // /* MW 4 */ + 7931 "11110001" // /* MW 3 */ + 7932 "00001100" // /* MW 2 */ + 7933 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7934 "10000100" // J #8128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8128 delay_slots=5 */ + 7935 "00000000" // /* MW 5 */ + 7936 "00000000" // /* MW 4 */ + 7937 "11100000" // /* MW 3 */ + 7938 "00001111" // /* MW 2 */ + 7939 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.src_ref 11 "slice_generic_innermost.h" 55 19 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7940 "10111010" // PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7941 "11100010" // /* MW 9 */ + 7942 "10000010" // /* MW 8 */ + 7943 "00000100" // /* MW 7 */ + 7944 "10000000" // /* MW 6 */ + 7945 "00100110" // /* MW 5 */ + 7946 "00101000" // /* MW 4 */ + 7947 "11110001" // /* MW 3 */ + 7948 "00001100" // /* MW 2 */ + 7949 "10100101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7950 "00001100" // PADDA [p1], m1; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7951 "00001101" // /* MW 5 */ + 7952 "01010000" // /* MW 4 */ + 7953 "11111010" // /* MW 3 */ + 7954 "00001100" // /* MW 2 */ + 7955 "00100101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 55 19 first +.delay_slot + 7956 "10010100" // PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7957 "00001011" // /* MW 5 */ + 7958 "00000011" // /* MW 4 */ + 7959 "11110001" // /* MW 3 */ + 7960 "00001100" // /* MW 2 */ + 7961 "10100101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.delay_slot + 7962 "00001100" // NOPA; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7963 "00001101" // /* MW 5 */ + 7964 "01010000" // /* MW 4 */ + 7965 "11111010" // /* MW 3 */ + 7966 "00101100" // /* MW 2 */ + 7967 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.delay_slot + 7968 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7969 "00000000" // /* MW 15 */ + 7970 "00000000" // /* MW 14 */ + 7971 "01111000" // /* MW 13 */ + 7972 "10100101" // /* MW 12 */ + 7973 "00000001" // /* MW 11 */ + 7974 "00000000" // /* MW 10 */ + 7975 "00000000" // /* MW 9 */ + 7976 "10000000" // /* MW 8 */ + 7977 "00100110" // /* MW 7 */ + 7978 "00101000" // /* MW 6 */ + 7979 "00100001" // /* MW 5 */ + 7980 "00000000" // /* MW 4 */ + 7981 "11110000" // /* MW 3 */ + 7982 "00101100" // /* MW 2 */ + 7983 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.src_ref 11 "slice_generic_innermost.h" 40 8 first + 7984 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7985 "00100000" // /* MW 3 */ + 7986 "01110001" // /* MW 2 */ + 7987 "00011101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 + 7988 "01000100" // MOVXM ls, #8000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7989 "10000000" // /* MW 5 */ + 7990 "11111110" // /* MW 4 */ + 7991 "00010001" // /* MW 3 */ + 7992 "00000000" // /* MW 2 */ + 7993 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 + 7994 "01000100" // MOVXM le, #8112 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7995 "01100000" // /* MW 5 */ + 7996 "11111111" // /* MW 4 */ + 7997 "00010110" // /* MW 3 */ + 7998 "00000000" // /* MW 2 */ + 7999 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.begin_of_loop +.loop_nesting 1 + 8000 "00111100" // VLDA x1, [p4], m0; VLDB x2, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8001 "01101000" // /* MW 5 */ + 8002 "00010001" // /* MW 4 */ + 8003 "01110110" // /* MW 3 */ + 8004 "00001011" // /* MW 2 */ + 8005 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first + 8006 "00110010" // PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8007 "01011011" // /* MW 7 */ + 8008 "00001000" // /* MW 6 */ + 8009 "01101100" // /* MW 5 */ + 8010 "00010000" // /* MW 4 */ + 8011 "11111110" // /* MW 3 */ + 8012 "00001100" // /* MW 2 */ + 8013 "01100001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first + 8014 "00111100" // PADDA [p7], m0; VLDB x3, [p0], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8015 "11101000" // /* MW 5 */ + 8016 "00010001" // /* MW 4 */ + 8017 "11110000" // /* MW 3 */ + 8018 "00001100" // /* MW 2 */ + 8019 "11100001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 59 21 first + 8020 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8021 "10010000" // /* MW 3 */ + 8022 "00001011" // /* MW 2 */ + 8023 "00111000" // /* MW 1 */ + 8024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8025 "00000000" // /* MW 1 */ + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ + 8028 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "01100111" // /* MW 3 */ + 8030 "00000001" // /* MW 2 */ + 8031 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 46 17 first + 8032 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "11101000" // /* MW 13 */ + 8036 "01000010" // /* MW 12 */ + 8037 "00001000" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "11110000" // /* MW 3 */ + 8046 "00101100" // /* MW 2 */ + 8047 "00000000" // /* MW 1 */ + 8048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "00000000" // /* MW 15 */ + 8050 "00000000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "01011011" // /* MW 7 */ + 8058 "00000001" // /* MW 6 */ + 8059 "00100000" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "11110000" // /* MW 3 */ + 8062 "00101100" // /* MW 2 */ + 8063 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first + 8064 "11100001" // NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8065 "00000000" // /* MW 15 */ + 8066 "00000000" // /* MW 14 */ + 8067 "11101000" // /* MW 13 */ + 8068 "11000010" // /* MW 12 */ + 8069 "01000000" // /* MW 11 */ + 8070 "00000000" // /* MW 10 */ + 8071 "00000000" // /* MW 9 */ + 8072 "10000000" // /* MW 8 */ + 8073 "00000110" // /* MW 7 */ + 8074 "00101000" // /* MW 6 */ + 8075 "00100101" // /* MW 5 */ + 8076 "00000000" // /* MW 4 */ + 8077 "11110000" // /* MW 3 */ + 8078 "00101100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 55 19 first + 8080 "11100001" // NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8081 "00000000" // /* MW 15 */ + 8082 "00000000" // /* MW 14 */ + 8083 "01111000" // /* MW 13 */ + 8084 "10100101" // /* MW 12 */ + 8085 "00000001" // /* MW 11 */ + 8086 "00000000" // /* MW 10 */ + 8087 "00000000" // /* MW 9 */ + 8088 "00000000" // /* MW 8 */ + 8089 "01011011" // /* MW 7 */ + 8090 "00000001" // /* MW 6 */ + 8091 "00100000" // /* MW 5 */ + 8092 "01010111" // /* MW 4 */ + 8093 "11111010" // /* MW 3 */ + 8094 "00101100" // /* MW 2 */ + 8095 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first + 8096 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8097 "00000000" // /* MW 15 */ + 8098 "00000000" // /* MW 14 */ + 8099 "01111000" // /* MW 13 */ + 8100 "10100101" // /* MW 12 */ + 8101 "00000001" // /* MW 11 */ + 8102 "00000000" // /* MW 10 */ + 8103 "00000000" // /* MW 9 */ + 8104 "10000000" // /* MW 8 */ + 8105 "00100110" // /* MW 7 */ + 8106 "00101000" // /* MW 6 */ + 8107 "00100001" // /* MW 5 */ + 8108 "00000000" // /* MW 4 */ + 8109 "11110000" // /* MW 3 */ + 8110 "00101100" // /* MW 2 */ + 8111 "00000000" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.end_of_loop + 8112 "11100001" // NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8113 "00000000" // /* MW 15 */ + 8114 "00000000" // /* MW 14 */ + 8115 "01111000" // /* MW 13 */ + 8116 "10100101" // /* MW 12 */ + 8117 "00000001" // /* MW 11 */ + 8118 "00000000" // /* MW 10 */ + 8119 "00000000" // /* MW 9 */ + 8120 "00000000" // /* MW 8 */ + 8121 "01011011" // /* MW 7 */ + 8122 "00000001" // /* MW 6 */ + 8123 "00100000" // /* MW 5 */ + 8124 "01010111" // /* MW 4 */ + 8125 "11110010" // /* MW 3 */ + 8126 "00101100" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.src_ref 11 "slice_generic_innermost.h" 76 first +.loop_nesting 0 + 8128 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8129 "00000000" // /* MW 3 */ + 8130 "00101000" // /* MW 2 */ + 8131 "00010000" // /* MW 1 */ +.delay_slot + 8132 "11111000" // MOV p7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8133 "00100000" // /* MW 3 */ + 8134 "01100000" // /* MW 2 */ + 8135 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 + 8143 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function slice_generic_innermost_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 first +.function_start + 8144 "00111010" // MOVS p5, p0; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8145 "01110001" // /* MW 9 */ + 8146 "00000000" // /* MW 8 */ + 8147 "00000000" // /* MW 7 */ + 8148 "00000000" // /* MW 6 */ + 8149 "00000100" // /* MW 5 */ + 8150 "00000000" // /* MW 4 */ + 8151 "01100000" // /* MW 3 */ + 8152 "00010001" // /* MW 2 */ + 8153 "10110000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 8154 "00000010" // ST lr, [sp, #-4]; MOV p3, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8155 "01110000" // /* MW 7 */ + 8156 "01100000" // /* MW 6 */ + 8157 "10110001" // /* MW 5 */ + 8158 "00000001" // /* MW 4 */ + 8159 "10110000" // /* MW 3 */ + 8160 "10000111" // /* MW 2 */ + 8161 "11111111" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 first +.no_stack_arguments + 8162 "00111010" // MOVS p1, p2; JL #7600 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=7600 delay_slots=5 */ + 8163 "01000001" // /* MW 9 */ + 8164 "00000000" // /* MW 8 */ + 8165 "00000000" // /* MW 7 */ + 8166 "10110110" // /* MW 6 */ + 8167 "00000011" // /* MW 5 */ + 8168 "00000000" // /* MW 4 */ + 8169 "01100000" // /* MW 3 */ + 8170 "00010001" // /* MW 2 */ + 8171 "00110001" // /* MW 1 */ +.delay_slot + 8172 "11111000" // MOV p0, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8173 "11100000" // /* MW 3 */ + 8174 "01100101" // /* MW 2 */ + 8175 "00011000" // /* MW 1 */ +.delay_slot + 8176 "00011000" // PADDB [p0], #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8177 "10010000" // /* MW 3 */ + 8178 "11101111" // /* MW 2 */ + 8179 "00111000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.delay_slot + 8180 "11111000" // MOV p4, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8181 "11000000" // /* MW 3 */ + 8182 "01100000" // /* MW 2 */ + 8183 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8186 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 31 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.return_address + 8192 "10111010" // LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8193 "01110010" // /* MW 9 */ + 8194 "01110000" // /* MW 8 */ + 8195 "00101101" // /* MW 7 */ + 8196 "00000010" // /* MW 6 */ + 8197 "10001011" // /* MW 5 */ + 8198 "10010000" // /* MW 4 */ + 8199 "00100010" // /* MW 3 */ + 8200 "01001010" // /* MW 2 */ + 8201 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 44 + 8202 "00101100" // LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8203 "00100000" // /* MW 5 */ + 8204 "11000101" // /* MW 4 */ + 8205 "00101000" // /* MW 3 */ + 8206 "11011010" // /* MW 2 */ + 8207 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 57 first + 8208 "10111010" // LDA r20, [sp, #-120]; MOVXM r19, #65534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8209 "00010000" // /* MW 9 */ + 8210 "11111111" // /* MW 8 */ + 8211 "01101111" // /* MW 7 */ + 8212 "00111110" // /* MW 6 */ + 8213 "00000000" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00100000" // /* MW 3 */ + 8216 "01010010" // /* MW 2 */ + 8217 "11110001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first + 8218 "00101100" // LDA p1, [p3]; ADD r17, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8219 "00100001" // /* MW 5 */ + 8220 "11000110" // /* MW 4 */ + 8221 "11011001" // /* MW 3 */ + 8222 "10010011" // /* MW 2 */ + 8223 "01100000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 70 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 59 first + 8224 "00101100" // LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8225 "01100000" // /* MW 5 */ + 8226 "11010101" // /* MW 4 */ + 8227 "00101000" // /* MW 3 */ + 8228 "11001110" // /* MW 2 */ + 8229 "11110001" // /* MW 1 */ + 8230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8231 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8232 "10011000" // LDA r17, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8233 "00110110" // /* MW 3 */ + 8234 "00000110" // /* MW 2 */ + 8235 "00000101" // /* MW 1 */ + 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8237 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 36 first + 8238 "10011000" // MUL r18, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8239 "00101111" // /* MW 3 */ + 8240 "10100101" // /* MW 2 */ + 8241 "00010101" // /* MW 1 */ + 8242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8243 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 49 + 8244 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8245 "01001111" // /* MW 3 */ + 8246 "10100101" // /* MW 2 */ + 8247 "00010100" // /* MW 1 */ + 8248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8249 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 62 + 8250 "10011000" // MUL r18, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8251 "00101111" // /* MW 3 */ + 8252 "01100101" // /* MW 2 */ + 8253 "00010101" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 first +.no_stack_arguments + 8254 "00000100" // JL #7664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 8255 "00000001" // /* MW 5 */ + 8256 "00000000" // /* MW 4 */ + 8257 "11111000" // /* MW 3 */ + 8258 "00001110" // /* MW 2 */ + 8259 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 57 +.delay_slot + 8260 "10011000" // MUL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8261 "00101111" // /* MW 3 */ + 8262 "11100101" // /* MW 2 */ + 8263 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 8264 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8265 "00000101" // /* MW 3 */ + 8266 "00100000" // /* MW 2 */ + 8267 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 8268 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8269 "00001101" // /* MW 3 */ + 8270 "10100001" // /* MW 2 */ + 8271 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 8272 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8273 "11000001" // /* MW 3 */ + 8274 "01101000" // /* MW 2 */ + 8275 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8276 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8277 "10000001" // /* MW 11 */ + 8278 "10101101" // /* MW 10 */ + 8279 "00000000" // /* MW 9 */ + 8280 "00000000" // /* MW 8 */ + 8281 "00000000" // /* MW 7 */ + 8282 "00000000" // /* MW 6 */ + 8283 "00100000" // /* MW 5 */ + 8284 "00000000" // /* MW 4 */ + 8285 "11110000" // /* MW 3 */ + 8286 "00101100" // /* MW 2 */ + 8287 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.return_address + 8288 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8289 "00111001" // /* MW 3 */ + 8290 "11111100" // /* MW 2 */ + 8291 "00000111" // /* MW 1 */ + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ + 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8297 "00000000" // /* MW 1 */ + 8298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8299 "00000000" // /* MW 1 */ + 8300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8301 "00000000" // /* MW 1 */ + 8302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8303 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 first + 8304 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8305 "00000000" // /* MW 3 */ + 8306 "00101000" // /* MW 2 */ + 8307 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.delay_slot + 8308 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8309 "00000001" // /* MW 5 */ + 8310 "00000000" // /* MW 4 */ + 8311 "00000000" // /* MW 3 */ + 8312 "11110000" // /* MW 2 */ + 8313 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8315 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + 8321 "00000000" // /* MW 1 */ +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function _b8170_wrapper _Z14_b8170_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 35 first +.src_ref 0 "0_0_reloadable82.cc" 37 79 +.function_start + 8336 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8337 "11000000" // /* MW 3 */ + 8338 "01100000" // /* MW 2 */ + 8339 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 37 79 first + 8340 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8341 "00011110" // /* MW 3 */ + 8342 "00011100" // /* MW 2 */ + 8343 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 39 47 first + 8344 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00011110" // /* MW 3 */ + 8346 "00010101" // /* MW 2 */ + 8347 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 38 80 first + 8348 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "10011110" // /* MW 3 */ + 8350 "00000100" // /* MW 2 */ + 8351 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 36 4 first +.tail_call + 8352 "10000100" // J #8144 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8144 delay_slots=5 */ + 8353 "00000000" // /* MW 5 */ + 8354 "00000000" // /* MW 4 */ + 8355 "11101000" // /* MW 3 */ + 8356 "00001111" // /* MW 2 */ + 8357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 + 8367 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.src_ref 11 "transposeshuffle_params.h" 71 first +.src_ref 11 "transposeshuffle_params.h" 76 16 +.src_ref 11 "transposeshuffle_params.h" 76 18 first +.function_start + 8368 "10111010" // LDA el0, [p1], #4; MOVXM r0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8369 "00010000" // /* MW 9 */ + 8370 "11100000" // /* MW 8 */ + 8371 "00001001" // /* MW 7 */ + 8372 "11110000" // /* MW 6 */ + 8373 "00000001" // /* MW 5 */ + 8374 "00000000" // /* MW 4 */ + 8375 "11010000" // /* MW 3 */ + 8376 "10000101" // /* MW 2 */ + 8377 "00100011" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 9 +.src_ref 11 "transposeshuffle_params.h" 76 16 +.src_ref 11 "transposeshuffle_params.h" 80 28 +.src_ref 11 "transposeshuffle_params.h" 80 36 +.src_ref 11 "transposeshuffle_params.h" 81 28 +.src_ref 11 "transposeshuffle_params.h" 81 36 + 8378 "01110110" // MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8379 "00001000" // /* MW 11 */ + 8380 "00000001" // /* MW 10 */ + 8381 "00110000" // /* MW 9 */ + 8382 "10101001" // /* MW 8 */ + 8383 "00100111" // /* MW 7 */ + 8384 "00111110" // /* MW 6 */ + 8385 "00001011" // /* MW 5 */ + 8386 "10000000" // /* MW 4 */ + 8387 "10000000" // /* MW 3 */ + 8388 "00000000" // /* MW 2 */ + 8389 "00001000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 85 22 +.src_ref 11 "transposeshuffle_params.h" 86 17 +.src_ref 11 "transposeshuffle_params.h" 89 43 +.src_ref 11 "transposeshuffle_params.h" 91 18 +.src_ref 11 "transposeshuffle_params.h" 93 4 +.src_ref 11 "transposeshuffle_params.h" 94 4 + 8390 "01100100" // MOVX r1, #4; MOV r0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8391 "00000001" // /* MW 5 */ + 8392 "00100010" // /* MW 4 */ + 8393 "00100000" // /* MW 3 */ + 8394 "01000010" // /* MW 2 */ + 8395 "00000000" // /* MW 1 */ + 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8397 "00000000" // /* MW 1 */ + 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8399 "00000000" // /* MW 1 */ + 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8401 "00000000" // /* MW 1 */ + 8402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8403 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 first + 8404 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8405 "00101001" // /* MW 3 */ + 8406 "00011100" // /* MW 2 */ + 8407 "00001000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8408 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8409 "00101110" // /* MW 3 */ + 8410 "00011100" // /* MW 2 */ + 8411 "00000001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8412 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8413 "00001110" // /* MW 3 */ + 8414 "00011100" // /* MW 2 */ + 8415 "00000001" // /* MW 1 */ + 8416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8417 "00000000" // /* MW 1 */ + 8418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8419 "00000000" // /* MW 1 */ + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8426 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8427 "00101001" // /* MW 3 */ + 8428 "00011100" // /* MW 2 */ + 8429 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8430 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8431 "00001001" // /* MW 3 */ + 8432 "00011100" // /* MW 2 */ + 8433 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8434 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8435 "00101110" // /* MW 3 */ + 8436 "00011100" // /* MW 2 */ + 8437 "00000001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8438 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8439 "00001110" // /* MW 3 */ + 8440 "00011100" // /* MW 2 */ + 8441 "00000001" // /* MW 1 */ + 8442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8443 "00000000" // /* MW 1 */ + 8444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8445 "00000000" // /* MW 1 */ + 8446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8447 "00000000" // /* MW 1 */ + 8448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8449 "00000000" // /* MW 1 */ + 8450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8451 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8452 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8453 "00101001" // /* MW 3 */ + 8454 "00011100" // /* MW 2 */ + 8455 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8456 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8457 "00001001" // /* MW 3 */ + 8458 "00011100" // /* MW 2 */ + 8459 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8460 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8461 "00001110" // /* MW 3 */ + 8462 "00000100" // /* MW 2 */ + 8463 "00000001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8464 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8465 "00101110" // /* MW 3 */ + 8466 "00010100" // /* MW 2 */ + 8467 "00000001" // /* MW 1 */ + 8468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8469 "00000000" // /* MW 1 */ + 8470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8471 "00000000" // /* MW 1 */ + 8472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8473 "00000000" // /* MW 1 */ + 8474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8475 "00000000" // /* MW 1 */ + 8476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8477 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8478 "10011000" // ST eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8479 "00001001" // /* MW 3 */ + 8480 "00000100" // /* MW 2 */ + 8481 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8482 "10011000" // ST el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00101001" // /* MW 3 */ + 8484 "00010100" // /* MW 2 */ + 8485 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 28 first + 8486 "10011000" // LDA r3, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "01110110" // /* MW 3 */ + 8488 "00001000" // /* MW 2 */ + 8489 "00000000" // /* MW 1 */ + 8490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8491 "00000000" // /* MW 1 */ + 8492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8493 "00000000" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 36 + 8502 "10011000" // LSHL r4, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8503 "00101101" // /* MW 3 */ + 8504 "11001000" // /* MW 2 */ + 8505 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 42 +.src_ref 11 "transposeshuffle_params.h" 89 43 first + 8506 "00100100" // LSHL r3, r3, r1; ADD.NC r1, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8507 "11111111" // /* MW 5 */ + 8508 "10100100" // /* MW 4 */ + 8509 "10110000" // /* MW 3 */ + 8510 "11000011" // /* MW 2 */ + 8511 "00011000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 19 +.src_ref 11 "transposeshuffle_params.h" 80 19 first + 8512 "00000010" // ST r1, [p0]; MOV r4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8513 "01110000" // /* MW 7 */ + 8514 "01100000" // /* MW 6 */ + 8515 "10001000" // /* MW 5 */ + 8516 "00000000" // /* MW 4 */ + 8517 "00110000" // /* MW 3 */ + 8518 "10000110" // /* MW 2 */ + 8519 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 19 + 8520 "00011000" // ADD.NC p1, r4, #-60 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8521 "01100010" // /* MW 3 */ + 8522 "01100010" // /* MW 2 */ + 8523 "00011001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 28 first + 8524 "10011000" // LDA r4, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8525 "10010110" // /* MW 3 */ + 8526 "00001000" // /* MW 2 */ + 8527 "00000001" // /* MW 1 */ + 8528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8529 "00000000" // /* MW 1 */ + 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8531 "00000000" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ + 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8537 "00000000" // /* MW 1 */ + 8538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 36 +.src_ref 11 "transposeshuffle_params.h" 90 77 + 8540 "01100100" // LSHL r2, r4, r2; MOV r4, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "00000001" // /* MW 5 */ + 8542 "00100010" // /* MW 4 */ + 8543 "10110010" // /* MW 3 */ + 8544 "10000101" // /* MW 2 */ + 8545 "00100000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 42 + 8546 "00011000" // ADD r2, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "11111111" // /* MW 3 */ + 8548 "10000101" // /* MW 2 */ + 8549 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 19 +.src_ref 11 "transposeshuffle_params.h" 90 77 first + 8550 "01011100" // ST r2, [p1], #4; MSC r4, r4, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8551 "01011100" // /* MW 5 */ + 8552 "10010000" // /* MW 4 */ + 8553 "00110001" // /* MW 3 */ + 8554 "10001010" // /* MW 2 */ + 8555 "00100011" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 85 22 first + 8556 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8557 "00010001" // /* MW 3 */ + 8558 "00011100" // /* MW 2 */ + 8559 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 85 22 + 8560 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8561 "00010001" // /* MW 3 */ + 8562 "00011100" // /* MW 2 */ + 8563 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 86 17 first + 8564 "10011000" // ST r0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00010001" // /* MW 3 */ + 8566 "00101100" // /* MW 2 */ + 8567 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 89 23 first + 8568 "10011000" // ST r3, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "01110001" // /* MW 3 */ + 8570 "11111100" // /* MW 2 */ + 8571 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 90 23 first + 8572 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "10010001" // /* MW 3 */ + 8574 "00101100" // /* MW 2 */ + 8575 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 91 18 +.src_ref 11 "transposeshuffle_params.h" 91 18 first + 8576 "00000010" // ST r0, [p1]; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8577 "01110000" // /* MW 7 */ + 8578 "01100000" // /* MW 6 */ + 8579 "10101001" // /* MW 5 */ + 8580 "00000000" // /* MW 4 */ + 8581 "00110000" // /* MW 3 */ + 8582 "10000010" // /* MW 2 */ + 8583 "00100000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 91 18 + 8584 "00011000" // ADD.NC p1, r5, #-68 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8585 "11011110" // /* MW 3 */ + 8586 "01100010" // /* MW 2 */ + 8587 "00011001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 first + 8588 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8589 "00010001" // /* MW 3 */ + 8590 "00011100" // /* MW 2 */ + 8591 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 + 8592 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8593 "00010001" // /* MW 3 */ + 8594 "00011100" // /* MW 2 */ + 8595 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 + 8596 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8597 "01010001" // /* MW 3 */ + 8598 "00011100" // /* MW 2 */ + 8599 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 + 8600 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8601 "00110001" // /* MW 3 */ + 8602 "00011100" // /* MW 2 */ + 8603 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 +.src_ref 11 "transposeshuffle_params.h" 95 first + 8604 "01011100" // ST r0, [p1], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8605 "00000000" // /* MW 5 */ + 8606 "01010000" // /* MW 4 */ + 8607 "00110000" // /* MW 3 */ + 8608 "10000010" // /* MW 2 */ + 8609 "00100011" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 first +.delay_slot + 8610 "10011000" // ST r3, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8611 "01110001" // /* MW 3 */ + 8612 "00101100" // /* MW 2 */ + 8613 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8614 "10011000" // ST r2, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8615 "01010001" // /* MW 3 */ + 8616 "11111100" // /* MW 2 */ + 8617 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8618 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8619 "10010001" // /* MW 3 */ + 8620 "00101100" // /* MW 2 */ + 8621 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8622 "10011000" // ST r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8623 "00110001" // /* MW 3 */ + 8624 "00000100" // /* MW 2 */ + 8625 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8626 "10011000" // ST r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8627 "00010001" // /* MW 3 */ + 8628 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + 8629 "00001001" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.src_ref 11 "transposeshuffle.h" 38 first +.src_ref 11 "transposeshuffle.h" 72 14 +.src_ref 11 "transposeshuffle.h" 79 23 +.function_start + 8640 "10111010" // MOVA r1, #2; MOVXM p2, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8641 "00010000" // /* MW 9 */ + 8642 "11100110" // /* MW 8 */ + 8643 "00110001" // /* MW 7 */ + 8644 "11110001" // /* MW 6 */ + 8645 "00000001" // /* MW 5 */ + 8646 "00000000" // /* MW 4 */ + 8647 "00000000" // /* MW 3 */ + 8648 "01000001" // /* MW 2 */ + 8649 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 72 8 +.src_ref 11 "transposeshuffle.h" 72 14 first +.src_ref 11 "transposeshuffle.h" 72 23 + 8650 "00101100" // LDA r27, [p2]; MOVX r0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8651 "10110010" // /* MW 5 */ + 8652 "00000000" // /* MW 4 */ + 8653 "11010000" // /* MW 3 */ + 8654 "11101110" // /* MW 2 */ + 8655 "01000000" // /* MW 1 */ + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 79 23 first + 8668 "10011000" // EQ r1, r27, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8669 "00010111" // /* MW 3 */ + 8670 "11000010" // /* MW 2 */ + 8671 "00010110" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 79 8 + 8672 "10000100" // JNZ r1, #9136 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9136 delay_slots=5 */ + 8673 "00000001" // /* MW 5 */ + 8674 "01000000" // /* MW 4 */ + 8675 "11011000" // /* MW 3 */ + 8676 "00010001" // /* MW 2 */ + 8677 "00001000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 72 8 +.src_ref 11 "transposeshuffle.h" 72 23 +.delay_slot + 8678 "00011000" // MOVX r2, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8679 "01110101" // /* MW 3 */ + 8680 "00000100" // /* MW 2 */ + 8681 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 72 8 first +.src_ref 11 "transposeshuffle.h" 72 23 first +.delay_slot + 8682 "00011000" // SEL.EQZ r0, r0, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8683 "00100010" // /* MW 3 */ + 8684 "00000000" // /* MW 2 */ + 8685 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8691 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 34 + 8692 "01000100" // MOVXM p2, #508880 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8693 "10100000" // /* MW 5 */ + 8694 "11000111" // /* MW 4 */ + 8695 "11000100" // /* MW 3 */ + 8696 "00000111" // /* MW 2 */ + 8697 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 34 first + 8698 "10011000" // LDA r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8699 "00110110" // /* MW 3 */ + 8700 "00000100" // /* MW 2 */ + 8701 "00000010" // /* MW 1 */ + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ + 8704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8705 "00000000" // /* MW 1 */ + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ + 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8709 "00000000" // /* MW 1 */ + 8710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8711 "00000000" // /* MW 1 */ + 8712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8713 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 8 +.src_ref 11 "transposeshuffle.h" 116 26 + 8714 "10000100" // JZ r1, #9776 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9776 delay_slots=5 */ + 8715 "00000001" // /* MW 5 */ + 8716 "00000000" // /* MW 4 */ + 8717 "00011000" // /* MW 3 */ + 8718 "00010011" // /* MW 2 */ + 8719 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8727 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8729 "00000000" // /* MW 1 */ + 8730 "00011000" // MOVX r2, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8731 "00101001" // /* MW 3 */ + 8732 "00000100" // /* MW 2 */ + 8733 "00010000" // /* MW 1 */ + 8734 "10011000" // LTU r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8735 "00101100" // /* MW 3 */ + 8736 "01000100" // /* MW 2 */ + 8737 "00010000" // /* MW 1 */ + 8738 "10000100" // JNZ r2, #8976 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8976 delay_slots=5 */ + 8739 "00000001" // /* MW 5 */ + 8740 "01000000" // /* MW 4 */ + 8741 "10001000" // /* MW 3 */ + 8742 "00010001" // /* MW 2 */ + 8743 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8753 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 116 8 +.src_ref 11 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8754 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #8880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8755 "00010000" // /* MW 9 */ + 8756 "01011000" // /* MW 8 */ + 8757 "01111001" // /* MW 7 */ + 8758 "00001000" // /* MW 6 */ + 8759 "00000000" // /* MW 5 */ + 8760 "00000000" // /* MW 4 */ + 8761 "01101000" // /* MW 3 */ + 8762 "00111000" // /* MW 2 */ + 8763 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 116 8 first +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8764 "00111010" // VLDB x0, [p0], #64; MOVXM le, #8880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8765 "00010000" // /* MW 9 */ + 8766 "01011000" // /* MW 8 */ + 8767 "10111001" // /* MW 7 */ + 8768 "00001001" // /* MW 6 */ + 8769 "00000000" // /* MW 5 */ + 8770 "00000000" // /* MW 4 */ + 8771 "01101000" // /* MW 3 */ + 8772 "00111000" // /* MW 2 */ + 8773 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 116 8 +.src_ref 11 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8774 "10111010" // NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8775 "11001110" // /* MW 9 */ + 8776 "01111101" // /* MW 8 */ + 8777 "10111000" // /* MW 7 */ + 8778 "00000010" // /* MW 6 */ + 8779 "00110100" // /* MW 5 */ + 8780 "00011100" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8784 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8785 "00000000" // /* MW 15 */ + 8786 "00000000" // /* MW 14 */ + 8787 "01111000" // /* MW 13 */ + 8788 "10100101" // /* MW 12 */ + 8789 "00000001" // /* MW 11 */ + 8790 "00000000" // /* MW 10 */ + 8791 "00000000" // /* MW 9 */ + 8792 "00000000" // /* MW 8 */ + 8793 "01011011" // /* MW 7 */ + 8794 "00000001" // /* MW 6 */ + 8795 "01101000" // /* MW 5 */ + 8796 "00111000" // /* MW 4 */ + 8797 "11110000" // /* MW 3 */ + 8798 "00101100" // /* MW 2 */ + 8799 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8800 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8801 "00000000" // /* MW 15 */ + 8802 "00000000" // /* MW 14 */ + 8803 "01111000" // /* MW 13 */ + 8804 "10100101" // /* MW 12 */ + 8805 "00000001" // /* MW 11 */ + 8806 "00000000" // /* MW 10 */ + 8807 "00000000" // /* MW 9 */ + 8808 "00000000" // /* MW 8 */ + 8809 "01011011" // /* MW 7 */ + 8810 "00000001" // /* MW 6 */ + 8811 "01101000" // /* MW 5 */ + 8812 "00111000" // /* MW 4 */ + 8813 "11110000" // /* MW 3 */ + 8814 "00101100" // /* MW 2 */ + 8815 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8816 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8817 "00000000" // /* MW 15 */ + 8818 "00000000" // /* MW 14 */ + 8819 "01111000" // /* MW 13 */ + 8820 "10100101" // /* MW 12 */ + 8821 "00000001" // /* MW 11 */ + 8822 "00000000" // /* MW 10 */ + 8823 "00000000" // /* MW 9 */ + 8824 "00000000" // /* MW 8 */ + 8825 "01011011" // /* MW 7 */ + 8826 "00000001" // /* MW 6 */ + 8827 "01101000" // /* MW 5 */ + 8828 "00111000" // /* MW 4 */ + 8829 "11110000" // /* MW 3 */ + 8830 "00101100" // /* MW 2 */ + 8831 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8832 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8833 "00000000" // /* MW 15 */ + 8834 "00000000" // /* MW 14 */ + 8835 "01111000" // /* MW 13 */ + 8836 "10100101" // /* MW 12 */ + 8837 "00000001" // /* MW 11 */ + 8838 "00000000" // /* MW 10 */ + 8839 "00000000" // /* MW 9 */ + 8840 "00000000" // /* MW 8 */ + 8841 "01011011" // /* MW 7 */ + 8842 "00000001" // /* MW 6 */ + 8843 "01101000" // /* MW 5 */ + 8844 "00111000" // /* MW 4 */ + 8845 "11110000" // /* MW 3 */ + 8846 "00101100" // /* MW 2 */ + 8847 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.src_ref 11 "transposeshuffle.h" 120 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8848 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8849 "00000000" // /* MW 15 */ + 8850 "00000000" // /* MW 14 */ + 8851 "11101000" // /* MW 13 */ + 8852 "00000000" // /* MW 12 */ + 8853 "00000000" // /* MW 11 */ + 8854 "00000000" // /* MW 10 */ + 8855 "00000000" // /* MW 9 */ + 8856 "00000000" // /* MW 8 */ + 8857 "01011011" // /* MW 7 */ + 8858 "00000001" // /* MW 6 */ + 8859 "01101000" // /* MW 5 */ + 8860 "00111000" // /* MW 4 */ + 8861 "11110000" // /* MW 3 */ + 8862 "00101100" // /* MW 2 */ + 8863 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 119 21 first +.src_ref 11 "transposeshuffle.h" 120 17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8864 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8865 "00000000" // /* MW 15 */ + 8866 "00000000" // /* MW 14 */ + 8867 "11101000" // /* MW 13 */ + 8868 "00000000" // /* MW 12 */ + 8869 "00000000" // /* MW 11 */ + 8870 "00000000" // /* MW 10 */ + 8871 "00000000" // /* MW 9 */ + 8872 "00000000" // /* MW 8 */ + 8873 "01011011" // /* MW 7 */ + 8874 "00000001" // /* MW 6 */ + 8875 "01101000" // /* MW 5 */ + 8876 "00111000" // /* MW 4 */ + 8877 "11110000" // /* MW 3 */ + 8878 "00101100" // /* MW 2 */ + 8879 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "transposeshuffle.h" 119 21 +.src_ref 11 "transposeshuffle.h" 120 17 first +.src_ref 11 "transposeshuffle.h" 122 22 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8880 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8881 "00000000" // /* MW 15 */ + 8882 "00000000" // /* MW 14 */ + 8883 "11101000" // /* MW 13 */ + 8884 "00000000" // /* MW 12 */ + 8885 "00000000" // /* MW 11 */ + 8886 "00000000" // /* MW 10 */ + 8887 "00000000" // /* MW 9 */ + 8888 "10000000" // /* MW 8 */ + 8889 "00000110" // /* MW 7 */ + 8890 "00011100" // /* MW 6 */ + 8891 "01101001" // /* MW 5 */ + 8892 "00111000" // /* MW 4 */ + 8893 "11110000" // /* MW 3 */ + 8894 "00101100" // /* MW 2 */ + 8895 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8896 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8897 "11100000" // /* MW 7 */ + 8898 "00000000" // /* MW 6 */ + 8899 "00000000" // /* MW 5 */ + 8900 "00000000" // /* MW 4 */ + 8901 "11010000" // /* MW 3 */ + 8902 "10000000" // /* MW 2 */ + 8903 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8905 "11100000" // /* MW 7 */ + 8906 "00000000" // /* MW 6 */ + 8907 "00000000" // /* MW 5 */ + 8908 "00000000" // /* MW 4 */ + 8909 "11010000" // /* MW 3 */ + 8910 "10000000" // /* MW 2 */ + 8911 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8912 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8913 "11100000" // /* MW 7 */ + 8914 "00000000" // /* MW 6 */ + 8915 "00000000" // /* MW 5 */ + 8916 "00000000" // /* MW 4 */ + 8917 "11010000" // /* MW 3 */ + 8918 "10000000" // /* MW 2 */ + 8919 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.src_ref 11 "transposeshuffle.h" 126 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8920 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 8921 "11101001" // /* MW 9 */ + 8922 "00000000" // /* MW 8 */ + 8923 "00000000" // /* MW 7 */ + 8924 "00000000" // /* MW 6 */ + 8925 "01000000" // /* MW 5 */ + 8926 "00000001" // /* MW 4 */ + 8927 "11010000" // /* MW 3 */ + 8928 "10000000" // /* MW 2 */ + 8929 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "transposeshuffle.h" 120 17 first +.src_ref 11 "transposeshuffle.h" 122 22 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8931 "11100000" // /* MW 7 */ + 8932 "00000000" // /* MW 6 */ + 8933 "00000000" // /* MW 5 */ + 8934 "00000000" // /* MW 4 */ + 8935 "11010000" // /* MW 3 */ + 8936 "10000000" // /* MW 2 */ + 8937 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8938 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8939 "11100000" // /* MW 7 */ + 8940 "00000000" // /* MW 6 */ + 8941 "00000000" // /* MW 5 */ + 8942 "00000000" // /* MW 4 */ + 8943 "11010000" // /* MW 3 */ + 8944 "10000000" // /* MW 2 */ + 8945 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8946 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8947 "11100000" // /* MW 7 */ + 8948 "00000000" // /* MW 6 */ + 8949 "00000000" // /* MW 5 */ + 8950 "00000000" // /* MW 4 */ + 8951 "11010000" // /* MW 3 */ + 8952 "10000000" // /* MW 2 */ + 8953 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8954 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8955 "00001101" // /* MW 5 */ + 8956 "00111000" // /* MW 4 */ + 8957 "11110010" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot + 8960 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "10100101" // /* MW 12 */ + 8965 "00000001" // /* MW 11 */ + 8966 "00000000" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "10000000" // /* MW 8 */ + 8969 "00000110" // /* MW 7 */ + 8970 "00011100" // /* MW 6 */ + 8971 "00100001" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.src_ref 11 "transposeshuffle.h" 116 8 first + 8976 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8977 "10100000" // /* MW 3 */ + 8978 "01110000" // /* MW 2 */ + 8979 "00011101" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 8 + 8980 "01000100" // MOVXM ls, #8992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8981 "01000000" // /* MW 5 */ + 8982 "11100110" // /* MW 4 */ + 8983 "00100001" // /* MW 3 */ + 8984 "00000000" // /* MW 2 */ + 8985 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 8 + 8986 "01000100" // MOVXM le, #9104 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8987 "00100000" // /* MW 5 */ + 8988 "11100111" // /* MW 4 */ + 8989 "00100110" // /* MW 3 */ + 8990 "00000000" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 119 21 first +.begin_of_loop +.loop_nesting 1 + 8992 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8993 "00110100" // /* MW 3 */ + 8994 "00011100" // /* MW 2 */ + 8995 "00111000" // /* MW 1 */ + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ + 8998 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8999 "01111110" // /* MW 9 */ + 9000 "10100101" // /* MW 8 */ + 9001 "00000001" // /* MW 7 */ + 9002 "00000000" // /* MW 6 */ + 9003 "00010000" // /* MW 5 */ + 9004 "00000000" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ + 9008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "10100101" // /* MW 12 */ + 9013 "00000001" // /* MW 11 */ + 9014 "00000000" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "01011011" // /* MW 7 */ + 9018 "00000001" // /* MW 6 */ + 9019 "00100000" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "10100101" // /* MW 12 */ + 9029 "00000001" // /* MW 11 */ + 9030 "00000000" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ + 9040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9041 "00000000" // /* MW 15 */ + 9042 "00000000" // /* MW 14 */ + 9043 "01111000" // /* MW 13 */ + 9044 "10100101" // /* MW 12 */ + 9045 "00000001" // /* MW 11 */ + 9046 "00000000" // /* MW 10 */ + 9047 "00000000" // /* MW 9 */ + 9048 "00000000" // /* MW 8 */ + 9049 "01011011" // /* MW 7 */ + 9050 "00000001" // /* MW 6 */ + 9051 "00100000" // /* MW 5 */ + 9052 "00000000" // /* MW 4 */ + 9053 "11110000" // /* MW 3 */ + 9054 "00101100" // /* MW 2 */ + 9055 "00000000" // /* MW 1 */ + 9056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9057 "00000000" // /* MW 15 */ + 9058 "00000000" // /* MW 14 */ + 9059 "01111000" // /* MW 13 */ + 9060 "10100101" // /* MW 12 */ + 9061 "00000001" // /* MW 11 */ + 9062 "00000000" // /* MW 10 */ + 9063 "00000000" // /* MW 9 */ + 9064 "00000000" // /* MW 8 */ + 9065 "01011011" // /* MW 7 */ + 9066 "00000001" // /* MW 6 */ + 9067 "00100000" // /* MW 5 */ + 9068 "00000000" // /* MW 4 */ + 9069 "11110000" // /* MW 3 */ + 9070 "00101100" // /* MW 2 */ + 9071 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 120 17 first + 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9073 "00000000" // /* MW 15 */ + 9074 "00000000" // /* MW 14 */ + 9075 "11101000" // /* MW 13 */ + 9076 "00000000" // /* MW 12 */ + 9077 "00000000" // /* MW 11 */ + 9078 "00000000" // /* MW 10 */ + 9079 "00000000" // /* MW 9 */ + 9080 "00000000" // /* MW 8 */ + 9081 "01011011" // /* MW 7 */ + 9082 "00000001" // /* MW 6 */ + 9083 "00100000" // /* MW 5 */ + 9084 "00000000" // /* MW 4 */ + 9085 "11110000" // /* MW 3 */ + 9086 "00101100" // /* MW 2 */ + 9087 "00000000" // /* MW 1 */ + 9088 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9089 "00000000" // /* MW 15 */ + 9090 "00000000" // /* MW 14 */ + 9091 "01111000" // /* MW 13 */ + 9092 "10100101" // /* MW 12 */ + 9093 "00000001" // /* MW 11 */ + 9094 "00000000" // /* MW 10 */ + 9095 "00000000" // /* MW 9 */ + 9096 "00000000" // /* MW 8 */ + 9097 "01011011" // /* MW 7 */ + 9098 "00000001" // /* MW 6 */ + 9099 "00100000" // /* MW 5 */ + 9100 "00000000" // /* MW 4 */ + 9101 "11110000" // /* MW 3 */ + 9102 "00101100" // /* MW 2 */ + 9103 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "transposeshuffle.h" 122 22 first +.end_of_loop + 9104 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9105 "00000000" // /* MW 15 */ + 9106 "00000000" // /* MW 14 */ + 9107 "01111000" // /* MW 13 */ + 9108 "10100101" // /* MW 12 */ + 9109 "00000001" // /* MW 11 */ + 9110 "00000000" // /* MW 10 */ + 9111 "00000000" // /* MW 9 */ + 9112 "10000000" // /* MW 8 */ + 9113 "00000110" // /* MW 7 */ + 9114 "00011100" // /* MW 6 */ + 9115 "00100001" // /* MW 5 */ + 9116 "00000000" // /* MW 4 */ + 9117 "11110000" // /* MW 3 */ + 9118 "00101100" // /* MW 2 */ + 9119 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9120 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9121 "00000000" // /* MW 3 */ + 9122 "00101000" // /* MW 2 */ + 9123 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9132 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9133 "01100111" // /* MW 3 */ + 9134 "00000001" // /* MW 2 */ + 9135 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.src_ref 11 "transposeshuffle.h" 86 34 + 9136 "01000100" // MOVXM p2, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9137 "10000000" // /* MW 5 */ + 9138 "11000111" // /* MW 4 */ + 9139 "11000100" // /* MW 3 */ + 9140 "00000111" // /* MW 2 */ + 9141 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 34 first + 9142 "10011000" // LDA r0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9143 "00010110" // /* MW 3 */ + 9144 "00000100" // /* MW 2 */ + 9145 "00000010" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ + 9148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9149 "00000000" // /* MW 1 */ + 9150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9151 "00000000" // /* MW 1 */ + 9152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9153 "00000000" // /* MW 1 */ + 9154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9155 "00000000" // /* MW 1 */ + 9156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9157 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 26 + 9158 "10000100" // JZ r0, #9776 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9776 delay_slots=5 */ + 9159 "00000001" // /* MW 5 */ + 9160 "00000000" // /* MW 4 */ + 9161 "00011000" // /* MW 3 */ + 9162 "00010011" // /* MW 2 */ + 9163 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9173 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9174 "10111010" // MOVA m5, #36; MOVXM p4, #508868 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9175 "00010000" // /* MW 9 */ + 9176 "11100010" // /* MW 8 */ + 9177 "00110001" // /* MW 7 */ + 9178 "11110010" // /* MW 6 */ + 9179 "00000001" // /* MW 5 */ + 9180 "00000000" // /* MW 4 */ + 9181 "10000000" // /* MW 3 */ + 9182 "10010100" // /* MW 2 */ + 9183 "00000100" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 87 35 + 9184 "10111010" // LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9185 "01011000" // /* MW 9 */ + 9186 "11111101" // /* MW 8 */ + 9187 "01001111" // /* MW 7 */ + 9188 "00001000" // /* MW 6 */ + 9189 "01010001" // /* MW 5 */ + 9190 "00000000" // /* MW 4 */ + 9191 "11010000" // /* MW 3 */ + 9192 "10000110" // /* MW 2 */ + 9193 "10000011" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 11 "transposeshuffle.h" 86 8 + 9194 "10111010" // LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9195 "01011000" // /* MW 9 */ + 9196 "00000000" // /* MW 8 */ + 9197 "01100000" // /* MW 7 */ + 9198 "00101010" // /* MW 6 */ + 9199 "00110000" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "11010000" // /* MW 3 */ + 9202 "00010010" // /* MW 2 */ + 9203 "10010101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9204 "01110110" // LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9205 "01011000" // /* MW 11 */ + 9206 "00100000" // /* MW 10 */ + 9207 "00000000" // /* MW 9 */ + 9208 "10001010" // /* MW 8 */ + 9209 "01100000" // /* MW 7 */ + 9210 "00000000" // /* MW 6 */ + 9211 "01001011" // /* MW 5 */ + 9212 "00010000" // /* MW 4 */ + 9213 "11010000" // /* MW 3 */ + 9214 "10010000" // /* MW 2 */ + 9215 "10011101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 11 "transposeshuffle.h" 86 8 + 9216 "01110110" // LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9217 "01011000" // /* MW 11 */ + 9218 "00110100" // /* MW 10 */ + 9219 "11101000" // /* MW 9 */ + 9220 "11111000" // /* MW 8 */ + 9221 "00001111" // /* MW 7 */ + 9222 "00000000" // /* MW 6 */ + 9223 "01001011" // /* MW 5 */ + 9224 "00010000" // /* MW 4 */ + 9225 "11010001" // /* MW 3 */ + 9226 "10010100" // /* MW 2 */ + 9227 "10011101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9228 "01110110" // LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #9312 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9229 "00010000" // /* MW 11 */ + 9230 "00110000" // /* MW 10 */ + 9231 "00110010" // /* MW 9 */ + 9232 "00001001" // /* MW 8 */ + 9233 "00000000" // /* MW 7 */ + 9234 "00000000" // /* MW 6 */ + 9235 "01001011" // /* MW 5 */ + 9236 "00010000" // /* MW 4 */ + 9237 "11010101" // /* MW 3 */ + 9238 "10011000" // /* MW 2 */ + 9239 "10000111" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 87 12 + 9240 "10111010" // LDA dn5, [p4], #-8; MOVXM p3, #9344 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9241 "00010000" // /* MW 9 */ + 9242 "01000000" // /* MW 8 */ + 9243 "10110010" // /* MW 7 */ + 9244 "00001001" // /* MW 6 */ + 9245 "00000000" // /* MW 5 */ + 9246 "00000000" // /* MW 4 */ + 9247 "11010000" // /* MW 3 */ + 9248 "11010100" // /* MW 2 */ + 9249 "10011101" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 225 15 +.src_ref 11 "transposeshuffle.h" 86 8 + 9250 "00101100" // LDA dj5, [p4], m4; MOVX r16, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9251 "10101010" // /* MW 5 */ + 9252 "01000001" // /* MW 4 */ + 9253 "11010000" // /* MW 3 */ + 9254 "01011000" // /* MW 2 */ + 9255 "10010001" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 87 35 first + 9256 "10111010" // LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9257 "11001000" // /* MW 9 */ + 9258 "01111111" // /* MW 8 */ + 9259 "10101000" // /* MW 7 */ + 9260 "11100100" // /* MW 6 */ + 9261 "10110000" // /* MW 5 */ + 9262 "00001011" // /* MW 4 */ + 9263 "11010000" // /* MW 3 */ + 9264 "10000000" // /* MW 2 */ + 9265 "10011101" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 first +.src_ref 11 "transposeshuffle.h" 86 8 first + 9266 "10111010" // LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9267 "11001000" // /* MW 9 */ + 9268 "00111111" // /* MW 8 */ + 9269 "10101001" // /* MW 7 */ + 9270 "01101100" // /* MW 6 */ + 9271 "00010001" // /* MW 5 */ + 9272 "00001011" // /* MW 4 */ + 9273 "11010000" // /* MW 3 */ + 9274 "10000100" // /* MW 2 */ + 9275 "10011101" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9276 "10111010" // LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9277 "01001000" // /* MW 9 */ + 9278 "01000000" // /* MW 8 */ + 9279 "10101100" // /* MW 7 */ + 9280 "01101100" // /* MW 6 */ + 9281 "00100001" // /* MW 5 */ + 9282 "00001010" // /* MW 4 */ + 9283 "11010000" // /* MW 3 */ + 9284 "10001000" // /* MW 2 */ + 9285 "10000111" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 + 9286 "10111010" // LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9287 "01001000" // /* MW 9 */ + 9288 "10000000" // /* MW 8 */ + 9289 "01101000" // /* MW 7 */ + 9290 "10010000" // /* MW 6 */ + 9291 "01010010" // /* MW 5 */ + 9292 "00000110" // /* MW 4 */ + 9293 "11010000" // /* MW 3 */ + 9294 "11000100" // /* MW 2 */ + 9295 "10000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 + 9296 "11100001" // LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "11111000" // /* MW 10 */ + 9303 "01011111" // /* MW 9 */ + 9304 "00001010" // /* MW 8 */ + 9305 "01011011" // /* MW 7 */ + 9306 "00000001" // /* MW 6 */ + 9307 "00100000" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11010000" // /* MW 3 */ + 9310 "11001000" // /* MW 2 */ + 9311 "10011100" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.src_ref 11 "transposeshuffle.h" 87 12 first +.loop_nesting 1 + 9312 "10000100" // JZ r1, #9760 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9760 delay_slots=5 */ + 9313 "00000001" // /* MW 5 */ + 9314 "00000000" // /* MW 4 */ + 9315 "00010000" // /* MW 3 */ + 9316 "00010011" // /* MW 2 */ + 9317 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9327 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 87 12 + 9328 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9329 "00000000" // /* MW 15 */ + 9330 "00000000" // /* MW 14 */ + 9331 "01111000" // /* MW 13 */ + 9332 "01010000" // /* MW 12 */ + 9333 "00101001" // /* MW 11 */ + 9334 "00000010" // /* MW 10 */ + 9335 "00000000" // /* MW 9 */ + 9336 "00000000" // /* MW 8 */ + 9337 "01011011" // /* MW 7 */ + 9338 "00000001" // /* MW 6 */ + 9339 "00100000" // /* MW 5 */ + 9340 "00000000" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.src_ref 11 "transposeshuffle.h" 88 16 first +.loop_nesting 2 + 9344 "10000100" // JZ r4, #9744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9744 delay_slots=5 */ + 9345 "00000001" // /* MW 5 */ + 9346 "00000000" // /* MW 4 */ + 9347 "00001000" // /* MW 3 */ + 9348 "00010011" // /* MW 2 */ + 9349 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "10011000" // LTU r18, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9361 "01101100" // /* MW 3 */ + 9362 "11100100" // /* MW 2 */ + 9363 "00010000" // /* MW 1 */ + 9364 "10000100" // JNZ r18, #9600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9600 delay_slots=5 */ + 9365 "00000001" // /* MW 5 */ + 9366 "01000000" // /* MW 4 */ + 9367 "11000000" // /* MW 3 */ + 9368 "00010010" // /* MW 2 */ + 9369 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9379 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1132 26 first +.src_ref 11 "transposeshuffle.h" 88 16 + 9380 "00111010" // VLDB x0, [p0, #64]; MOVXM ls, #9488 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9381 "00010000" // /* MW 9 */ + 9382 "10001000" // /* MW 8 */ + 9383 "01111010" // /* MW 7 */ + 9384 "00001000" // /* MW 6 */ + 9385 "00000000" // /* MW 5 */ + 9386 "00000000" // /* MW 4 */ + 9387 "01101000" // /* MW 3 */ + 9388 "00101000" // /* MW 2 */ + 9389 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 11 "transposeshuffle.h" 88 16 first + 9390 "00111010" // VLDB.3D x1, [p0], d1; MOVXM le, #9520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9391 "00010000" // /* MW 9 */ + 9392 "10011000" // /* MW 8 */ + 9393 "10111010" // /* MW 7 */ + 9394 "00001001" // /* MW 6 */ + 9395 "00000000" // /* MW 5 */ + 9396 "00000000" // /* MW 4 */ + 9397 "11101000" // /* MW 3 */ + 9398 "01110000" // /* MW 2 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 88 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9400 "10011000" // ADD.NC lc, r3, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "11111110" // /* MW 3 */ + 9402 "01110001" // /* MW 2 */ + 9403 "00011101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9404 "00011000" // VLDB x0, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9405 "00110100" // /* MW 3 */ + 9406 "00010100" // /* MW 2 */ + 9407 "00111000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9408 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9409 "00000000" // /* MW 15 */ + 9410 "00000000" // /* MW 14 */ + 9411 "01111000" // /* MW 13 */ + 9412 "10100101" // /* MW 12 */ + 9413 "00000001" // /* MW 11 */ + 9414 "00000000" // /* MW 10 */ + 9415 "00000000" // /* MW 9 */ + 9416 "00000000" // /* MW 8 */ + 9417 "01011011" // /* MW 7 */ + 9418 "00000001" // /* MW 6 */ + 9419 "11101000" // /* MW 5 */ + 9420 "01110000" // /* MW 4 */ + 9421 "11110000" // /* MW 3 */ + 9422 "00101100" // /* MW 2 */ + 9423 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9424 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9425 "00000000" // /* MW 15 */ + 9426 "00000000" // /* MW 14 */ + 9427 "01111000" // /* MW 13 */ + 9428 "10100101" // /* MW 12 */ + 9429 "00000001" // /* MW 11 */ + 9430 "00000000" // /* MW 10 */ + 9431 "00000000" // /* MW 9 */ + 9432 "00000000" // /* MW 8 */ + 9433 "01011011" // /* MW 7 */ + 9434 "00000001" // /* MW 6 */ + 9435 "00100000" // /* MW 5 */ + 9436 "00000000" // /* MW 4 */ + 9437 "11110000" // /* MW 3 */ + 9438 "00101100" // /* MW 2 */ + 9439 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9440 "11100001" // NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "01111000" // /* MW 13 */ + 9444 "10100101" // /* MW 12 */ + 9445 "00000001" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "01101000" // /* MW 5 */ + 9452 "00101000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9456 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9457 "00000000" // /* MW 15 */ + 9458 "00000000" // /* MW 14 */ + 9459 "01111000" // /* MW 13 */ + 9460 "10100101" // /* MW 12 */ + 9461 "00000001" // /* MW 11 */ + 9462 "00000000" // /* MW 10 */ + 9463 "00000000" // /* MW 9 */ + 9464 "00000000" // /* MW 8 */ + 9465 "01011011" // /* MW 7 */ + 9466 "00000001" // /* MW 6 */ + 9467 "11101000" // /* MW 5 */ + 9468 "01110000" // /* MW 4 */ + 9469 "11110000" // /* MW 3 */ + 9470 "00101100" // /* MW 2 */ + 9471 "00000000" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9472 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9473 "00000000" // /* MW 15 */ + 9474 "00000000" // /* MW 14 */ + 9475 "11101000" // /* MW 13 */ + 9476 "00001110" // /* MW 12 */ + 9477 "01000100" // /* MW 11 */ + 9478 "00000000" // /* MW 10 */ + 9479 "00000000" // /* MW 9 */ + 9480 "00000000" // /* MW 8 */ + 9481 "01011011" // /* MW 7 */ + 9482 "00000001" // /* MW 6 */ + 9483 "00100000" // /* MW 5 */ + 9484 "00000000" // /* MW 4 */ + 9485 "11110000" // /* MW 3 */ + 9486 "00101100" // /* MW 2 */ + 9487 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.src_ref 8 "vector.hpp" 1132 26 first +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 3 + 9488 "11100001" // NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9489 "00000000" // /* MW 15 */ + 9490 "00000000" // /* MW 14 */ + 9491 "11101000" // /* MW 13 */ + 9492 "00100000" // /* MW 12 */ + 9493 "00000100" // /* MW 11 */ + 9494 "00000000" // /* MW 10 */ + 9495 "00000000" // /* MW 9 */ + 9496 "00000000" // /* MW 8 */ + 9497 "10001011" // /* MW 7 */ + 9498 "10000100" // /* MW 6 */ + 9499 "01101100" // /* MW 5 */ + 9500 "00101000" // /* MW 4 */ + 9501 "11110000" // /* MW 3 */ + 9502 "00101100" // /* MW 2 */ + 9503 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 8 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9504 "11100001" // NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9505 "00000000" // /* MW 15 */ + 9506 "00000000" // /* MW 14 */ + 9507 "01111000" // /* MW 13 */ + 9508 "10100101" // /* MW 12 */ + 9509 "00000001" // /* MW 11 */ + 9510 "00000000" // /* MW 10 */ + 9511 "00000000" // /* MW 9 */ + 9512 "10000000" // /* MW 8 */ + 9513 "00100110" // /* MW 7 */ + 9514 "00011000" // /* MW 6 */ + 9515 "11101001" // /* MW 5 */ + 9516 "01110000" // /* MW 4 */ + 9517 "11110000" // /* MW 3 */ + 9518 "00101100" // /* MW 2 */ + 9519 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 224 15 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9520 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9521 "00000000" // /* MW 15 */ + 9522 "00000000" // /* MW 14 */ + 9523 "11101000" // /* MW 13 */ + 9524 "00001110" // /* MW 12 */ + 9525 "01000100" // /* MW 11 */ + 9526 "00000000" // /* MW 10 */ + 9527 "00000000" // /* MW 9 */ + 9528 "10000000" // /* MW 8 */ + 9529 "00000110" // /* MW 7 */ + 9530 "00010100" // /* MW 6 */ + 9531 "00100100" // /* MW 5 */ + 9532 "00000000" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 9536 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9537 "11100000" // /* MW 7 */ + 9538 "00100000" // /* MW 6 */ + 9539 "00000100" // /* MW 5 */ + 9540 "00000000" // /* MW 4 */ + 9541 "01100000" // /* MW 3 */ + 9542 "10010001" // /* MW 2 */ + 9543 "10010000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9544 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9545 "00100110" // /* MW 3 */ + 9546 "00011000" // /* MW 2 */ + 9547 "00001001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9548 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9549 "11100000" // /* MW 7 */ + 9550 "00001110" // /* MW 6 */ + 9551 "01000100" // /* MW 5 */ + 9552 "00000000" // /* MW 4 */ + 9553 "11010000" // /* MW 3 */ + 9554 "10000000" // /* MW 2 */ + 9555 "10000010" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9556 "11011000" // VSHUFFLE bmll0, x1, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "01000001" // /* MW 3 */ + 9558 "00001000" // /* MW 2 */ + 9559 "00011000" // /* MW 1 */ + 9560 "10000100" // J #9744 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9744 delay_slots=5 */ + 9561 "00000000" // /* MW 5 */ + 9562 "00000000" // /* MW 4 */ + 9563 "00001000" // /* MW 3 */ + 9564 "00010011" // /* MW 2 */ + 9565 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "vector.hpp" 1152 43 first +.delay_slot + 9566 "00000010" // VST.3D bmlh0, [p1], d0; MOV p4, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9567 "01110000" // /* MW 7 */ + 9568 "01100000" // /* MW 6 */ + 9569 "00110001" // /* MW 5 */ + 9570 "00000010" // /* MW 4 */ + 9571 "11010000" // /* MW 3 */ + 9572 "00000100" // /* MW 2 */ + 9573 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 224 15 first +.delay_slot + 9574 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9575 "11100000" // /* MW 7 */ + 9576 "00001110" // /* MW 6 */ + 9577 "01000100" // /* MW 5 */ + 9578 "00000000" // /* MW 4 */ + 9579 "11010000" // /* MW 3 */ + 9580 "10000000" // /* MW 2 */ + 9581 "10000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 225 15 first +.delay_slot + 9582 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9583 "11100000" // /* MW 7 */ + 9584 "00100000" // /* MW 6 */ + 9585 "00000100" // /* MW 5 */ + 9586 "00000000" // /* MW 4 */ + 9587 "01100000" // /* MW 3 */ + 9588 "10010001" // /* MW 2 */ + 9589 "10010000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 first +.delay_slot + 9590 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9591 "00100110" // /* MW 3 */ + 9592 "00011000" // /* MW 2 */ + 9593 "00001001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.delay_slot + 9594 "00001100" // NOPA; VST bmll0, [p4, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9595 "00001101" // /* MW 5 */ + 9596 "00101000" // /* MW 4 */ + 9597 "11111000" // /* MW 3 */ + 9598 "00101100" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.src_ref 11 "transposeshuffle.h" 88 16 first + 9600 "01000100" // MOVXM ls, #9616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9601 "00100000" // /* MW 5 */ + 9602 "11101011" // /* MW 4 */ + 9603 "00100001" // /* MW 3 */ + 9604 "00000000" // /* MW 2 */ + 9605 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 88 16 + 9606 "01000100" // MOVXM le, #9728 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9607 "00000000" // /* MW 5 */ + 9608 "11101100" // /* MW 4 */ + 9609 "00100110" // /* MW 3 */ + 9610 "00000000" // /* MW 2 */ + 9611 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 88 16 + 9612 "10011000" // ADD.NC lc, r2, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9613 "00000000" // /* MW 3 */ + 9614 "01110001" // /* MW 2 */ + 9615 "00011101" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.src_ref 8 "vector.hpp" 1132 26 first +.src_ref 8 "vector.hpp" 1152 43 +.begin_of_loop +.loop_nesting 3 + 9616 "11110100" // VLDB x0, [p0, #64]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9617 "10000001" // /* MW 5 */ + 9618 "11000101" // /* MW 4 */ + 9619 "10001000" // /* MW 3 */ + 9620 "10000110" // /* MW 2 */ + 9621 "00000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 + 9622 "00011000" // VLDB.3D x1, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9623 "01110100" // /* MW 3 */ + 9624 "00111000" // /* MW 2 */ + 9625 "00111000" // /* MW 1 */ + 9626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9627 "00000000" // /* MW 1 */ + 9628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9629 "00000000" // /* MW 1 */ + 9630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9631 "00000000" // /* MW 1 */ + 9632 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "01111000" // /* MW 13 */ + 9636 "10100101" // /* MW 12 */ + 9637 "00000001" // /* MW 11 */ + 9638 "00000000" // /* MW 10 */ + 9639 "00000000" // /* MW 9 */ + 9640 "00000000" // /* MW 8 */ + 9641 "01011011" // /* MW 7 */ + 9642 "00000001" // /* MW 6 */ + 9643 "00100000" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ + 9648 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "10100101" // /* MW 12 */ + 9653 "00000001" // /* MW 11 */ + 9654 "00000000" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "00000000" // /* MW 8 */ + 9657 "01011011" // /* MW 7 */ + 9658 "00000001" // /* MW 6 */ + 9659 "00100000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11110000" // /* MW 3 */ + 9662 "00101100" // /* MW 2 */ + 9663 "00000000" // /* MW 1 */ + 9664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9665 "00000000" // /* MW 15 */ + 9666 "00000000" // /* MW 14 */ + 9667 "01111000" // /* MW 13 */ + 9668 "10100101" // /* MW 12 */ + 9669 "00000001" // /* MW 11 */ + 9670 "00000000" // /* MW 10 */ + 9671 "00000000" // /* MW 9 */ + 9672 "00000000" // /* MW 8 */ + 9673 "01011011" // /* MW 7 */ + 9674 "00000001" // /* MW 6 */ + 9675 "00100000" // /* MW 5 */ + 9676 "00000000" // /* MW 4 */ + 9677 "11110000" // /* MW 3 */ + 9678 "00101100" // /* MW 2 */ + 9679 "00000000" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 224 15 first + 9680 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9681 "00000000" // /* MW 15 */ + 9682 "00000000" // /* MW 14 */ + 9683 "11101000" // /* MW 13 */ + 9684 "00001110" // /* MW 12 */ + 9685 "01000100" // /* MW 11 */ + 9686 "00000000" // /* MW 10 */ + 9687 "00000000" // /* MW 9 */ + 9688 "00000000" // /* MW 8 */ + 9689 "01011011" // /* MW 7 */ + 9690 "00000001" // /* MW 6 */ + 9691 "00100000" // /* MW 5 */ + 9692 "00000000" // /* MW 4 */ + 9693 "11110000" // /* MW 3 */ + 9694 "00101100" // /* MW 2 */ + 9695 "00000000" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 225 15 first + 9696 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9697 "00000000" // /* MW 15 */ + 9698 "00000000" // /* MW 14 */ + 9699 "11101000" // /* MW 13 */ + 9700 "00100000" // /* MW 12 */ + 9701 "00000100" // /* MW 11 */ + 9702 "00000000" // /* MW 10 */ + 9703 "00000000" // /* MW 9 */ + 9704 "00000000" // /* MW 8 */ + 9705 "01011011" // /* MW 7 */ + 9706 "00000001" // /* MW 6 */ + 9707 "00100000" // /* MW 5 */ + 9708 "00000000" // /* MW 4 */ + 9709 "11110000" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 first + 9712 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9713 "00000000" // /* MW 15 */ + 9714 "00000000" // /* MW 14 */ + 9715 "01111000" // /* MW 13 */ + 9716 "10100101" // /* MW 12 */ + 9717 "00000001" // /* MW 11 */ + 9718 "00000000" // /* MW 10 */ + 9719 "00000000" // /* MW 9 */ + 9720 "10000000" // /* MW 8 */ + 9721 "00100110" // /* MW 7 */ + 9722 "00011000" // /* MW 6 */ + 9723 "00100001" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "11110000" // /* MW 3 */ + 9726 "00101100" // /* MW 2 */ + 9727 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.src_ref 8 "vector.hpp" 1152 43 +.end_of_loop + 9728 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9729 "00000000" // /* MW 15 */ + 9730 "00000000" // /* MW 14 */ + 9731 "01111000" // /* MW 13 */ + 9732 "10100101" // /* MW 12 */ + 9733 "00000001" // /* MW 11 */ + 9734 "00000000" // /* MW 10 */ + 9735 "00000000" // /* MW 9 */ + 9736 "10000000" // /* MW 8 */ + 9737 "00000110" // /* MW 7 */ + 9738 "00010100" // /* MW 6 */ + 9739 "00100100" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11110000" // /* MW 3 */ + 9742 "00101100" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.src_ref 11 "transposeshuffle.h" 87 12 first +.loop_nesting 2 + 9744 "00011000" // JNZD r17, r17, p3 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 9745 "11100000" // /* MW 3 */ + 9746 "01100010" // /* MW 2 */ + 9747 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.src_ref 11 "transposeshuffle.h" 86 8 first +.loop_nesting 1 + 9760 "00011000" // JNZD r0, r0, p2 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 9761 "10100000" // /* MW 3 */ + 9762 "00000000" // /* MW 2 */ + 9763 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9772 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9773 "01100111" // /* MW 3 */ + 9774 "00000001" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.src_ref 11 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9776 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9777 "00000000" // /* MW 3 */ + 9778 "00101000" // /* MW 2 */ + 9779 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + 9789 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 first +.function_start + 9792 "11111000" // MOV p3, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9793 "11000000" // /* MW 3 */ + 9794 "01101100" // /* MW 2 */ + 9795 "00011011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 + 9796 "00111010" // MOVS p6, p1; MOVXM p1, #508780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9797 "00010001" // /* MW 9 */ + 9798 "10110110" // /* MW 8 */ + 9799 "10110001" // /* MW 7 */ + 9800 "11110000" // /* MW 6 */ + 9801 "00000001" // /* MW 5 */ + 9802 "00000000" // /* MW 4 */ + 9803 "01100000" // /* MW 3 */ + 9804 "10010001" // /* MW 2 */ + 9805 "11010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 first + 9806 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9807 "00010110" // /* MW 3 */ + 9808 "00000110" // /* MW 2 */ + 9809 "00000001" // /* MW 1 */ + 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9811 "00000000" // /* MW 1 */ + 9812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9813 "00000000" // /* MW 1 */ + 9814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9815 "00000000" // /* MW 1 */ + 9816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9817 "00000000" // /* MW 1 */ + 9818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9819 "00000000" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 15 + 9822 "10000100" // JNZ r16, #9888 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9888 delay_slots=5 */ + 9823 "00000001" // /* MW 5 */ + 9824 "01000000" // /* MW 4 */ + 9825 "01010000" // /* MW 3 */ + 9826 "00010011" // /* MW 2 */ + 9827 "10000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 +.delay_slot + 9828 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9829 "00000001" // /* MW 5 */ + 9830 "00000000" // /* MW 4 */ + 9831 "00000000" // /* MW 3 */ + 9832 "00001000" // /* MW 2 */ + 9833 "00000000" // /* MW 1 */ +.delay_slot + 9834 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9835 "00111101" // /* MW 3 */ + 9836 "11110100" // /* MW 2 */ + 9837 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 9838 "00000010" // MOVS p7, p0; MOV p1, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9839 "01110000" // /* MW 7 */ + 9840 "01100000" // /* MW 6 */ + 9841 "10110111" // /* MW 5 */ + 9842 "00000000" // /* MW 4 */ + 9843 "01100000" // /* MW 3 */ + 9844 "00010001" // /* MW 2 */ + 9845 "11110000" // /* MW 1 */ +.delay_slot + 9846 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9847 "10011101" // /* MW 3 */ + 9848 "11111001" // /* MW 2 */ + 9849 "00001111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 9850 "00111010" // ST p1, [sp, #-4]; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9851 "00010001" // /* MW 9 */ + 9852 "11100000" // /* MW 8 */ + 9853 "00110001" // /* MW 7 */ + 9854 "11110000" // /* MW 6 */ + 9855 "00000001" // /* MW 5 */ + 9856 "00000000" // /* MW 4 */ + 9857 "10110000" // /* MW 3 */ + 9858 "10010011" // /* MW 2 */ + 9859 "11111111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 first +.no_stack_arguments + 9860 "00000100" // JL #8368 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8368 delay_slots=5 */ + 9861 "00000001" // /* MW 5 */ + 9862 "00000000" // /* MW 4 */ + 9863 "01011000" // /* MW 3 */ + 9864 "00010000" // /* MW 2 */ + 9865 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 9866 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9867 "11000000" // /* MW 3 */ + 9868 "01100100" // /* MW 2 */ + 9869 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9873 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9876 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9877 "10000001" // /* MW 11 */ + 9878 "10101101" // /* MW 10 */ + 9879 "00000000" // /* MW 9 */ + 9880 "00000000" // /* MW 8 */ + 9881 "00000000" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00100000" // /* MW 5 */ + 9884 "00000000" // /* MW 4 */ + 9885 "11110000" // /* MW 3 */ + 9886 "00101100" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 11 "transposeshuffle.h" 137 72 +.return_address + 9888 "10111010" // LDA r16, [p7]; MOVXM p7, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9889 "00010000" // /* MW 9 */ + 9890 "11101010" // /* MW 8 */ + 9891 "10110001" // /* MW 7 */ + 9892 "11110011" // /* MW 6 */ + 9893 "00000001" // /* MW 5 */ + 9894 "00000000" // /* MW 4 */ + 9895 "11010000" // /* MW 3 */ + 9896 "11000010" // /* MW 2 */ + 9897 "11100000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 72 first + 9898 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9899 "00110110" // /* MW 3 */ + 9900 "00000110" // /* MW 2 */ + 9901 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 9902 "10011000" // LDA p1, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9903 "10011110" // /* MW 3 */ + 9904 "00000100" // /* MW 2 */ + 9905 "00000110" // /* MW 1 */ + 9906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9907 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 11 first +.no_stack_arguments + 9908 "00000100" // JL #8640 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8640 delay_slots=5 */ + 9909 "00000001" // /* MW 5 */ + 9910 "00000000" // /* MW 4 */ + 9911 "11100000" // /* MW 3 */ + 9912 "00010000" // /* MW 2 */ + 9913 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 64 +.delay_slot + 9914 "00011000" // MOVX r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9915 "00000101" // /* MW 3 */ + 9916 "00100100" // /* MW 2 */ + 9917 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 11 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 9918 "01000100" // MOVXM p2, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9919 "10000000" // /* MW 5 */ + 9920 "11000111" // /* MW 4 */ + 9921 "11000100" // /* MW 3 */ + 9922 "00000111" // /* MW 2 */ + 9923 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 9924 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9925 "11000000" // /* MW 3 */ + 9926 "01100100" // /* MW 2 */ + 9927 "00011110" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 64 +.delay_slot + 9928 "10011000" // LSHL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9929 "00101101" // /* MW 3 */ + 9930 "01100011" // /* MW 2 */ + 9931 "00010100" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 64 +.delay_slot + 9932 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9933 "11000001" // /* MW 3 */ + 9934 "01101000" // /* MW 2 */ + 9935 "00011000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 +.return_address + 9936 "10111010" // LDA lr, [sp, #-12]; MOVXM p2, #508780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9937 "00010000" // /* MW 9 */ + 9938 "10110110" // /* MW 8 */ + 9939 "00110001" // /* MW 7 */ + 9940 "11110001" // /* MW 6 */ + 9941 "00000001" // /* MW 5 */ + 9942 "00000000" // /* MW 4 */ + 9943 "00100000" // /* MW 3 */ + 9944 "10000111" // /* MW 2 */ + 9945 "11111110" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first + 9946 "00101100" // LDA r16, [p2]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00000010" // /* MW 5 */ + 9948 "01100000" // /* MW 4 */ + 9949 "11010000" // /* MW 3 */ + 9950 "11000010" // /* MW 2 */ + 9951 "01000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 + 9952 "10011000" // LDA r17, [p6, #24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "00110110" // /* MW 3 */ + 9954 "01100110" // /* MW 2 */ + 9955 "00000110" // /* MW 1 */ + 9956 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9957 "00011001" // /* MW 3 */ + 9958 "11111011" // /* MW 2 */ + 9959 "00000111" // /* MW 1 */ + 9960 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9961 "10011001" // /* MW 3 */ + 9962 "11111111" // /* MW 2 */ + 9963 "00000111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 first + 9964 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9965 "00000001" // /* MW 5 */ + 9966 "00000000" // /* MW 4 */ + 9967 "00000000" // /* MW 3 */ + 9968 "11111000" // /* MW 2 */ + 9969 "11111111" // /* MW 1 */ + 9970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9971 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 + 9972 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9973 "00000000" // /* MW 3 */ + 9974 "00101000" // /* MW 2 */ + 9975 "00010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first +.delay_slot + 9976 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9977 "00000111" // /* MW 3 */ + 9978 "00100000" // /* MW 2 */ + 9979 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 17 +.delay_slot + 9980 "10011000" // EQ r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9981 "00000111" // /* MW 3 */ + 9982 "01110111" // /* MW 2 */ + 9983 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.delay_slot + 9984 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9985 "10000010" // /* MW 3 */ + 9986 "00100001" // /* MW 2 */ + 9987 "00010100" // /* MW 1 */ +.delay_slot + 9988 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9989 "00010001" // /* MW 3 */ + 9990 "00000110" // /* MW 2 */ + 9991 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + 9993 "00000000" // /* MW 1 */ +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function _b7835_wrapper _Z14_b7835_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 43 first +.src_ref 0 "0_0_reloadable82.cc" 45 79 +.function_start + 10000 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "11000000" // /* MW 3 */ + 10002 "01100000" // /* MW 2 */ + 10003 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 45 79 first + 10004 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "00011110" // /* MW 3 */ + 10006 "00011100" // /* MW 2 */ + 10007 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 47 46 first + 10008 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10009 "00011110" // /* MW 3 */ + 10010 "00010101" // /* MW 2 */ + 10011 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 46 80 first + 10012 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10013 "10011110" // /* MW 3 */ + 10014 "00000100" // /* MW 2 */ + 10015 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 44 4 first +.tail_call + 10016 "10000100" // J #9792 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9792 delay_slots=5 */ + 10017 "00000000" // /* MW 5 */ + 10018 "00000000" // /* MW 4 */ + 10019 "00100000" // /* MW 3 */ + 10020 "00010011" // /* MW 2 */ + 10021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 + 10031 "00000000" // /* MW 1 */ +.label memset +.function memset memset +.src_ref 12 "string.c" 325 first +.src_ref 12 "string.c" 328 4 first +.function_start + 10032 "10000100" // JZ r1, #10192 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10192 delay_slots=5 */ + 10033 "00000001" // /* MW 5 */ + 10034 "00000000" // /* MW 4 */ + 10035 "11101000" // /* MW 3 */ + 10036 "00010011" // /* MW 2 */ + 10037 "00001000" // /* MW 1 */ +.src_ref 12 "string.c" 329 3 +.delay_slot + 10038 "11111000" // MOV p0, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10039 "11000000" // /* MW 3 */ + 10040 "01100010" // /* MW 2 */ + 10041 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10049 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 first +.src_ref 12 "string.c" 329 3 + 10050 "00000010" // MOVS p1, p0; MOV lc, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10051 "01110000" // /* MW 7 */ + 10052 "01010000" // /* MW 6 */ + 10053 "10111000" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01100000" // /* MW 3 */ + 10056 "00010001" // /* MW 2 */ + 10057 "00110000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 10058 "01000100" // MOVXM ls, #10080 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10059 "11000000" // /* MW 5 */ + 10060 "11101110" // /* MW 4 */ + 10061 "00100001" // /* MW 3 */ + 10062 "00000000" // /* MW 2 */ + 10063 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 10064 "11100001" // NOPA; NOPB; NOPS; MOVXM le, #10176; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10065 "00000000" // /* MW 15 */ + 10066 "00000000" // /* MW 14 */ + 10067 "00010000" // /* MW 13 */ + 10068 "11100000" // /* MW 12 */ + 10069 "10111011" // /* MW 11 */ + 10070 "00001001" // /* MW 10 */ + 10071 "00000000" // /* MW 9 */ + 10072 "00000000" // /* MW 8 */ + 10073 "01011011" // /* MW 7 */ + 10074 "00000001" // /* MW 6 */ + 10075 "00100000" // /* MW 5 */ + 10076 "00000000" // /* MW 4 */ + 10077 "11110000" // /* MW 3 */ + 10078 "00101100" // /* MW 2 */ + 10079 "00000000" // /* MW 1 */ +.label ZLS_Fmemset_48 +.src_ref 12 "string.c" 329 3 first +.begin_of_loop +.loop_nesting 1 + 10080 "11100001" // ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "10100101" // /* MW 12 */ + 10085 "00000001" // /* MW 11 */ + 10086 "00000000" // /* MW 10 */ + 10087 "00000000" // /* MW 9 */ + 10088 "00000000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11100000" // /* MW 3 */ + 10094 "10000000" // /* MW 2 */ + 10095 "00100011" // /* MW 1 */ + 10096 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10097 "00000000" // /* MW 15 */ + 10098 "00000000" // /* MW 14 */ + 10099 "01111000" // /* MW 13 */ + 10100 "10100101" // /* MW 12 */ + 10101 "00000001" // /* MW 11 */ + 10102 "00000000" // /* MW 10 */ + 10103 "00000000" // /* MW 9 */ + 10104 "00000000" // /* MW 8 */ + 10105 "01011011" // /* MW 7 */ + 10106 "00000001" // /* MW 6 */ + 10107 "00100000" // /* MW 5 */ + 10108 "00000000" // /* MW 4 */ + 10109 "11110000" // /* MW 3 */ + 10110 "00101100" // /* MW 2 */ + 10111 "00000000" // /* MW 1 */ + 10112 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10113 "00000000" // /* MW 15 */ + 10114 "00000000" // /* MW 14 */ + 10115 "01111000" // /* MW 13 */ + 10116 "10100101" // /* MW 12 */ + 10117 "00000001" // /* MW 11 */ + 10118 "00000000" // /* MW 10 */ + 10119 "00000000" // /* MW 9 */ + 10120 "00000000" // /* MW 8 */ + 10121 "01011011" // /* MW 7 */ + 10122 "00000001" // /* MW 6 */ + 10123 "00100000" // /* MW 5 */ + 10124 "00000000" // /* MW 4 */ + 10125 "11110000" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ + 10128 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10129 "00000000" // /* MW 15 */ + 10130 "00000000" // /* MW 14 */ + 10131 "01111000" // /* MW 13 */ + 10132 "10100101" // /* MW 12 */ + 10133 "00000001" // /* MW 11 */ + 10134 "00000000" // /* MW 10 */ + 10135 "00000000" // /* MW 9 */ + 10136 "00000000" // /* MW 8 */ + 10137 "01011011" // /* MW 7 */ + 10138 "00000001" // /* MW 6 */ + 10139 "00100000" // /* MW 5 */ + 10140 "00000000" // /* MW 4 */ + 10141 "11110000" // /* MW 3 */ + 10142 "00101100" // /* MW 2 */ + 10143 "00000000" // /* MW 1 */ + 10144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10145 "00000000" // /* MW 15 */ + 10146 "00000000" // /* MW 14 */ + 10147 "01111000" // /* MW 13 */ + 10148 "10100101" // /* MW 12 */ + 10149 "00000001" // /* MW 11 */ + 10150 "00000000" // /* MW 10 */ + 10151 "00000000" // /* MW 9 */ + 10152 "00000000" // /* MW 8 */ + 10153 "01011011" // /* MW 7 */ + 10154 "00000001" // /* MW 6 */ + 10155 "00100000" // /* MW 5 */ + 10156 "00000000" // /* MW 4 */ + 10157 "11110000" // /* MW 3 */ + 10158 "00101100" // /* MW 2 */ + 10159 "00000000" // /* MW 1 */ + 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "00100000" // /* MW 5 */ + 10172 "00000000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.label ZLE_Fmemset_144 +.end_of_loop + 10176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10177 "00000000" // /* MW 15 */ + 10178 "00000000" // /* MW 14 */ + 10179 "01111000" // /* MW 13 */ + 10180 "10100101" // /* MW 12 */ + 10181 "00000001" // /* MW 11 */ + 10182 "00000000" // /* MW 10 */ + 10183 "00000000" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "01011011" // /* MW 7 */ + 10186 "00000001" // /* MW 6 */ + 10187 "00100000" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.label TGT_Fmemset_160 +.src_ref 12 "string.c" 330 4 first +.loop_nesting 0 + 10192 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10193 "00000000" // /* MW 3 */ + 10194 "00101000" // /* MW 2 */ + 10195 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10197 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10199 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10201 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label memset__end + 10205 "00000000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/gemm" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 6 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 9 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 11 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 12 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.cmico b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.lst b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.lst new file mode 100644 index 0000000000000000000000000000000000000000..0cf8f3e7e4cf573d9c01bc28dc7089ecb51d82f3 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.lst @@ -0,0 +1,2933 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:36:48 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable82 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2528 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2528 0x00 0xc2 0xd0 0xe9 0xe0 0x2c LDA r16, [p0]; NEZ r26, r1 + 2534 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2540 0x0f 0xef 0x1d 0x98 ST p6, [sp, #-20] + 2544 0xfe 0x3a 0xb0 0x01 0xc8 0xd0 0x70 0x02 ST r14, [sp, #-16]; MOV r14, r3 + 2552 0xff 0x3e 0xb0 0x01 0xe8 0x50 0x70 0x02 ST r15, [sp, #-8]; MOV r15, r1 + 2560 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 2564 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 2568 0x1e 0x68 0x02 0x18 ADD.NC p6, r16, #4 + 2572 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 2576 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 2580 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 2584 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2588 0x00 0x00 NOPX + 2590 0x00 0x00 NOPX + 2592 0x00 0x00 NOPX + 2594 0x00 0x00 NOPX + 2596 0x00 0x00 NOPX + 2598 0x00 0x00 NOPX + 2600 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2604 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 2608 0xfc 0x1f 0xa0 0x35 0x39 0xe4 MOVX r16, #-1; MOV el0, r26 + 2614 0x00 0x00 NOPX + 2616 0x00 0x00 NOPX + 2618 0x00 0x00 NOPX + 2620 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2624 0x04 0x41 0x29 0xa0 0x05 0x64 MOVX r17, #2; MOV r19, #1 + 2630 0xd5 0x23 0xb9 0x21 0x81 0xe4 LSHL r20, r26, r17; MOV r18, p0 + 2636 0x9c 0x9f 0x9c 0xd2 0xa2 0xa4 LTU r18, r19, r15; ADD.NC p6, r18, r20 + 2642 0xc0 0xd2 0xd7 0xe6 0x95 0x82 0x6e 0x60 0x72 0xba LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 + 2652 0xfd 0x4a 0xb0 0x03 0x4c 0x90 0x70 0x02 ST r18, [sp, #-24]; MOV r26, r18 + 2660 0x00 0x00 NOPX + 2662 0x00 0x00 NOPX + 2664 0x00 0x00 NOPX + 2666 0x00 0x00 NOPX + 2668 0x00 0x00 NOPX + 2670 0x1e 0x6a 0x02 0x18 ADD.NC p6, r20, #4 + 2674 0x06 0x1e 0x96 0x98 LDA r20, [p6], #4 + 2678 0x06 0x3e 0xd6 0x98 LDA r22, [p6], #12 + 2682 0x06 0xee 0xb6 0x98 LDA r21, [p6], #-8 + 2686 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2690 0x00 0x00 NOPX + 2692 0x00 0x00 NOPX + 2694 0x00 0x00 NOPX + 2696 0x00 0x00 NOPX + 2698 0x00 0x00 NOPX + 2700 0x00 0x00 NOPX + 2702 0x15 0x29 0x62 0x18 SEL.EQZ r20, r20, r22, r27 + 2706 0x0e 0xd6 0x91 0x98 ST r20, [p6, #-12] + 2710 0x00 0x00 NOPX + 2712 0x00 0x00 NOPX + 2714 0x00 0x00 NOPX + 2716 0x00 0x00 NOPX + 2718 0x15 0x57 0x08 0x18 ACQ.COND r21, r16, r26 + 2722 0x14 0xa5 0x1d 0x98 LSHL r18, r18, r17 + 2726 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 2732 0x76 0x9e 0x0c 0xd3 0x92 0xa4 NEZ r26, r14; ADD.NC p6, r19, r18 + 2738 0xc0 0xca 0xdf 0xc6 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-32] + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x00 0x00 NOPX + 2756 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 2760 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2764 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2768 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 2772 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2776 0x00 0x00 NOPX + 2778 0x00 0x00 NOPX + 2780 0x00 0x00 NOPX + 2782 0x00 0x00 NOPX + 2784 0x00 0x00 NOPX + 2786 0x00 0x00 NOPX + 2788 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 2792 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2796 0x00 0x00 NOPX + 2798 0x00 0x00 NOPX + 2800 0x00 0x00 NOPX + 2802 0x00 0x00 NOPX + 2804 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 2808 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 2812 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 2816 0x00 0x07 0xce 0xc7 0x00 0x44 MOVXM p7, #508800 + 2822 0xe0 0x13 0xdf 0xb8 0x5b 0x0c LDA p1, [p7, dj0]; ST el0, [sp, #-36] + 2828 0x00 0x00 NOPX + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX +.no_stack_arguments + 2840 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2844 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2848 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2854 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2864 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2870 0x07 0xdf 0x51 0x18 LDA r26, [sp, #-36] + 2874 0x07 0xe4 0x41 0x18 LDA dj0, [sp, #-28] + 2878 0x07 0xe8 0x29 0x18 LDA el0, [sp, #-24] + 2882 0x07 0xe0 0x09 0x18 LDA eh0, [sp, #-32] + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x18 0x68 0x88 0x18 ADD.NC p0, r17, #16 + 2894 0x00 0x06 0x36 0x98 LDA r17, [p0] + 2898 0x00 0x00 NOPX + 2900 0x00 0x00 NOPX + 2902 0x00 0x00 NOPX + 2904 0x00 0x00 NOPX + 2906 0x00 0x00 NOPX + 2908 0x00 0x00 NOPX + 2910 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2914 0x1e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p0, #-4]; MOV r27, r15 + 2920 0xe0 0x4a 0xdd 0x40 0x39 0xd4 LDA r18, [p7, dj0]; MOV r26, el0 + 2926 0x00 0x00 NOPX + 2928 0x00 0x00 NOPX + 2930 0x00 0x00 NOPX + 2932 0x00 0x00 NOPX + 2934 0x00 0x00 NOPX + 2936 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2940 0x8c 0x66 0x4e 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 + 2946 0xe0 0xc6 0xd1 0xec 0x63 0x0c LDA r17, [p7]; ST r17, [p0, #-4] + 2952 0x00 0x00 NOPX + 2954 0x00 0x00 NOPX + 2956 0x00 0x00 NOPX + 2958 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 0x1e 0xa1 0x1c 0xf8 MOV r26, eh0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2970 0xfe 0xc6 0xdd 0xc0 0x39 0xd4 LDA r17, [p7, #-4]; MOV r27, el0 + 2976 0x06 0x06 0x56 0x98 LDA r18, [p6] + 2980 0x00 0x00 NOPX + 2982 0x00 0x00 NOPX + 2984 0x00 0x00 NOPX + 2986 0x00 0x00 NOPX + 2988 0x00 0x00 NOPX + 2990 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2994 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 3000 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x00 0x00 NOPX + 3016 0x00 0x00 NOPX + 3018 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 3022 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3026 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] + 3030 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 0x1e 0xd7 0x20 0xf8 MOV r27, r14 +.delay_slot + 3066 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 3070 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 3088 +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_begin0 +.function_start + 3088 0x03 0x8e 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r3, [p0], #4; MOVXM p1, #508416 + 3098 0xff 0xdd 0x00 0x3e 0x67 0xa8 0xaf 0xfc 0x58 0xba MOVA r29, #-2; MOVX r6, #-3; MOV r5, #-4 + 3108 0x00 0x18 0x00 0x00 0x10 0x28 0x08 0x08 0x58 0xba MOVA r24, #0; MOVX r1, #1; MOV r0, #8 + 3118 0x20 0x04 0x00 0x00 0x3f 0xff 0x8f 0xff 0x10 0xba MOVA r4, #256; MOVXM r28, #16777214 + 3128 0x00 0xf0 0x00 0x01 0x31 0x28 0x4a 0x00 0x58 0xba MOVA r16, #7; MOVX r19, #9; MOV r2, #512 + 3138 0x11 0xc0 0x20 0x00 0xd1 0x64 MOVX r7, #128; MOV m0, #52 + 3144 0x18 0x80 0x00 0xf8 MOV dj0, m0 + 3148 0x23 0x8e 0x31 0xec 0xdb 0x5c ST r3, [p1], #4; LSHL r27, r3, r6 + 3154 0x03 0x8e 0xd1 0xc4 0xbb 0x2c LDA r3, [p0], #4; LSHL r17, r3, r5 + 3160 0xdf 0xc1 0xb8 0xb1 0xff 0x24 LSHL r31, r27, r0; ADD.NC r17, r17, #-1 + 3166 0xdc 0xe7 0xb9 0x3f 0x22 0xa4 LSHL r19, r27, r19; ADD.NC r18, r31, r4 + 3172 0x16 0xed 0x0d 0x98 LSHL r22, r27, r16 + 3176 0x00 0x00 NOPX + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x23 0x8e 0x31 0xe8 0xdb 0x5c ST r3, [p1], #4; LSHL r26, r3, r6 + 3188 0x03 0xd6 0xdd 0x53 0xfe 0x2c LDA r21, [p0], #4; ADD r20, r26, #-1 + 3194 0x15 0xaf 0x4f 0x98 MUL r23, r22, r20 + 3198 0x00 0x00 NOPX + 3200 0x11 0xfd 0x71 0x98 SUB r30, r7, r23 + 3204 0x00 0x00 NOPX + 3206 0x00 0x00 NOPX + 3208 0x00 0x00 NOPX + 3210 0x23 0xd6 0x31 0x8e 0xbf 0x5c ST r21, [p1], #4; MUL r3, r3, r21 + 3216 0x03 0x85 0xda 0x98 0xdb 0x2c LDA el0, [p0], #4; LSHL r6, r21, r6 + 3222 0xde 0x4d 0xfe 0x26 0xe2 0xa4 MUL r25, r27, r6; ADD.NC r28, r6, r28 + 3228 0xa9 0x4b 0xba 0xba 0xa2 0xa4 LSHL r5, r21, r5; ADD.NC r21, r26, r20 + 3234 0x10 0x19 0x00 0x33 0xde 0xec 0xa9 0x7f 0xc8 0xba MOVA r25, #128; LSHL r29, r25, r29; ADD.NC r5, r5, #-1 + 3244 0x17 0xc5 0xce 0x18 MSC r2, r2, r31, r28 + 3248 0x11 0x8d 0x0d 0x98 LSHL r6, r6, r16 + 3252 0x15 0x6b 0x0d 0x98 LSHL r21, r21, r16 + 3256 0x23 0x85 0x3d 0x70 0x1b 0x5c ST el0, [p1], #4; LSHL r28, r26, r0 + 3262 0x00 0x85 0xdd 0x42 0x1b 0x2c LDA el0, [p0]; LSHL r16, r26, r16 + 3268 0x17 0x37 0x51 0x98 SUB r27, r28, r21 + 3272 0x11 0x40 0x0d 0x98 LSHL r0, r5, r0 + 3276 0x11 0x81 0x46 0x18 MAC r0, r0, r6, r20 + 3280 0x11 0xb3 0x4e 0x18 MSC r25, r25, r6, r20 + 3284 0x00 0x00 NOPX + 3286 0x00 0x00 NOPX + 3288 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 3292 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4] + 3296 0x00 0x00 NOPX + 3298 0x00 0x00 NOPX + 3300 0x00 0x00 NOPX + 3302 0x00 0x00 NOPX + 3304 0x00 0x00 NOPX + 3306 0x00 0x00 NOPX + 3308 0x09 0x3c 0x29 0x98 ST el0, [p1], #12 + 3312 0x23 0x8e 0x30 0x00 0x69 0xa0 0x00 0x02 ST r3, [p1], #4; ADD.NC r3, r6, #-128 + 3320 0x23 0xf6 0x33 0xf6 0x03 0x5c ST r29, [p1], #4; SUB r29, r7, r16 + 3326 0x23 0xea 0x30 0x03 0x4d 0xa0 0x00 0x02 ST r26, [p1], #4; ADD.NC r26, r22, #-128 + 3334 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 3338 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 3342 0x09 0x1f 0xf1 0x98 ST r31, [p1], #4 + 3346 0x09 0x1c 0x91 0x98 ST r4, [p1], #4 + 3350 0x23 0xca 0x3d 0xca 0x01 0x5c ST r18, [p1], #4; ADD r18, r27, r16 + 3356 0x23 0xce 0x32 0x4e 0xe3 0x5c ST r19, [p1], #4; SUB r19, r4, r23 + 3362 0x23 0x96 0x33 0x12 0x9c 0x5c ST r5, [p1], #4; MSC r4, r4, r6, r20 + 3368 0x29 0x8a 0x3f 0x08 0x02 0x5c ST r2, [p1], #16; MOVX r2, #-128 + 3374 0x23 0xe2 0x3b 0xfc 0x4d 0x5c ST r24, [p1], #4; XOR r31, r23, r2 + 3380 0x3b 0xe2 0x3c 0x5e 0xe3 0x5c ST r24, [p1], #-12; SUB r23, r24, r23 + 3386 0x23 0xe2 0x31 0x08 0x0d 0x5c ST r24, [p1], #4; XOR r2, r2, r0 + 3392 0x3d 0xe2 0x3c 0x00 0x03 0x5c ST r24, [p1], #-8; SUB r0, r24, r0 + 3398 0x09 0x5f 0x11 0x98 ST r24, [p1], #20 + 3402 0x09 0x1e 0x31 0x98 ST r17, [p1], #4 + 3406 0x09 0x1f 0xd1 0x98 ST r30, [p1], #4 + 3410 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 3414 0x09 0x1f 0xf1 0x98 ST r31, [p1], #4 + 3418 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 3422 0x09 0x1f 0x51 0x98 ST r26, [p1], #4 + 3426 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 3430 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 3434 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 3438 0x09 0x1e 0xd1 0x98 ST r22, [p1], #4 + 3442 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 3446 0x09 0x1e 0xf1 0x98 ST r23, [p1], #4 + 3450 0x23 0xce 0x32 0xd7 0x8c 0x5c ST r19, [p1], #4; MAC r21, r21, r5, r28 + 3456 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 3460 0x23 0xc6 0x3c 0x72 0xa3 0x5c ST r17, [p1], #4; SUB r28, r24, r21 + 3466 0x23 0xf2 0x38 0x56 0xa3 0x5c ST r28, [p1], #4; SUB r21, r16, r21 + 3472 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 3476 0x09 0x1f 0x71 0x98 ST r27, [p1], #4 + 3480 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 3484 0x09 0x1f 0xb1 0x98 ST r29, [p1], #4 + 3488 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 3492 0x09 0x1e 0x11 0x98 ST r16, [p1], #4 + 3496 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 3500 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 3504 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 3508 0x09 0x1e 0x51 0x98 ST r18, [p1], #4 + 3512 0x09 0x1e 0xb1 0x98 ST r21, [p1], #4 + 3516 0x09 0x1e 0x11 0x98 ST r16, [p1], #4 + 3520 0x09 0x1e 0x31 0x98 ST r17, [p1], #4 + 3524 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 3528 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 3532 0x09 0x1f 0x31 0x98 ST r25, [p1], #4 + 3536 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 3540 0x09 0x1c 0x71 0x98 ST r3, [p1], #4 + 3544 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 3548 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 3552 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 3556 0x09 0x1c 0xd1 0x98 ST r6, [p1], #4 + 3560 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 3564 0x09 0x1c 0x91 0x98 ST r4, [p1], #4 + 3568 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 3572 0x09 0x08 0xf1 0x98 ST r7, [p1], m0 + 3576 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 3580 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 3584 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 3588 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 3592 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 3596 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 3600 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 3604 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 3608 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 3612 0x3b 0xe2 0x30 0x50 0x00 0x5c ST r24, [p1], #-12; RET lr +.delay_slot + 3618 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 +.delay_slot + 3622 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 +.delay_slot + 3626 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 +.delay_slot + 3630 0x09 0x07 0x11 0x98 ST r24, [p1] +.delay_slot + 3634 0x09 0x03 0x11 0x98 ST r24, [p1, dj0] +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv__end +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_end0 + +.text_segment PM 3648 +.label __Z8init_accILt1EEvPaS0_iii___func_begin0 +.label _Z8init_accILt1EEvPaS0_iii +.function_start + 3648 0x00 0x07 0xc4 0xc6 0xe8 0x44 MOVXM p2, #508788 + 3654 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3660 0x02 0x04 0x82 0x98 LDA.s8 r4, [p2] + 3664 0x00 0x00 0x01 0xfd 0xe0 0x44 MOVXM ls, #3824 + 3670 0x00 0x1a 0x00 0x00 0x00 0x01 0xbf 0x98 0x10 0xba MOVA r26, #0; MOVXM le, #3888 + 3680 0xff 0x85 0x00 0x00 0x00 0x01 0xb7 0x60 0x10 0xba MOVA r5, #-4; MOVXM p3, #3776 + 3690 0x00 0xa3 0x00 0x02 0x52 0xec 0x41 0xa8 0xb8 0xba MOVA r3, #5; LSHL r5, r1, r5; VINSERT.32 x1, x0, #0, r26 + 3700 0x01 0xc7 0xb4 0xcb 0xc1 0xe4 LSHL r7, r0, r3; MOV p2, sp + 3706 0x10 0xc7 0xb0 0x05 0x25 0xe4 LSHL r3, r2, r3; VMOV bmll0, x1 + 3712 0x27 0x50 0x00 0xa5 0x81 0xe4 MOVX crRnd, r4; MOV r1, p1 + 3718 0x08 0x02 0xc5 0xff 0x20 0x01 0x00 0x28 0x80 0xd0 0x78 0x36 PADDB [p2], #-64; VCONV.bf16.fp32 wl0, bmll0; MOVX r16, #1; MOV m1, r3 + 3730 0xff 0xa6 0x00 0x05 0xb8 0x3c 0x6a 0x60 0x78 0xba MOVA r6, #-3; EQ r27, r2, r16; MOV r3, p2 + 3740 0x00 0x0d 0xb1 0x02 0x06 0xa4 LSHL r0, r0, r6; VEXTBCST.16 x1, x0, #0 + 3746 0x00 0x2c 0xf0 0x00 0x20 0xc2 0x12 0x00 0x3a 0x0f 0x30 0x11 0x60 0x7e NOPA; NOPB; MOVS p1, p0; SEL.EQZ r1, r3, r1, r27; MOV m0, r7 + 3760 0x00 0x2c 0xf0 0x17 0x22 0x04 0x53 0x0a 0x2f 0xf9 0x30 0x50 0x78 0x00 0x00 0xe1 NOPA; PADDB [p0], m0; VST x1, [p2]; ADD r2, r5, #-1; MOV p2, r1; NOPV +.label TGT_F_Z8init_accILt1EEvPaS0_iii_128 +.loop_nesting 1 + 3776 0x40 0x84 0x8a 0xe0 0x41 0xf4 VLDB wl0, [p2]; MOV lc, r0 + 3782 0x00 0x00 NOPX + 3784 0x00 0x00 NOPX + 3786 0x00 0x00 NOPX + 3788 0x00 0x00 NOPX + 3790 0x00 0x00 NOPX + 3792 0x00 0x00 NOPX + 3794 0x18 0x01 0x22 0xf8 VMOV wh0, wl0 + 3798 0x19 0x84 0x03 0x58 VEXTBCST.128 x3, x0, #0 + 3802 0x18 0x84 0x07 0x58 VEXTBCST.128 x1, x0, #1 + 3806 0x18 0x07 0x8a 0xf8 VCONV.fp32.bf16 cml0, x3 + 3810 0x18 0x83 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x1 + 3814 0x19 0x00 0x12 0xf8 VMOV bmll1, bmll0 + 3818 0x00 0x2c 0xf2 0x84 0x25 0xd4 NOPA; VMOV bmlh1, bmhl0 +.label ZLS_F_Z8init_accILt1EEvPaS0_iii_176 +.loop_nesting 2 +.begin_of_loop + 3824 0x09 0x14 0x26 0x98 VST bmlh0, [p1, #64] + 3828 0x09 0x2c 0x86 0x98 VST bmll1, [p1], #128 + 3832 0x09 0x14 0x26 0x98 VST bmlh0, [p1, #64] + 3836 0x09 0x2c 0x86 0x98 VST bmll1, [p1], #128 + 3840 0x00 0x2c 0xf0 0x00 0x20 0x14 0x66 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV + 3856 0x00 0x2c 0xf0 0x00 0x20 0x2c 0xa6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV + 3872 0x00 0x2c 0xf0 0x00 0x20 0x14 0x66 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV +.label ZLE_F_Z8init_accILt1EEvPaS0_iii_240 +.end_of_loop + 3888 0x00 0x2c 0xf0 0x00 0x20 0x2c 0xa6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV +.loop_nesting 1 + 3904 0x21 0x0c 0xf4 0x57 0x20 0x84 0xe0 0x12 PADDA [p1], m0; PADDB [p2], m1; JNZD r2, r2, p3 +.delay_slot + 3912 0x38 0x0b 0x90 0x18 PADDB [p0], m0 +.delay_slot +.swstall delay_slot + 3916 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3922 0x00 0x00 NOPX +.loop_nesting 0 + 3924 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3928 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3934 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3940 0x00 0x00 NOPX +.label _Z8init_accILt1EEvPaS0_iii__end +.label __Z8init_accILt1EEvPaS0_iii___func_end0 + +.text_segment PM 3952 +.label __Z12post_processPai___func_begin0 +.label _Z12post_processPai +.function_start + 3952 0x40 0x00 0x82 0x80 0x8b 0x00 0x01 0xf0 0xb1 0xba 0x10 0x76 MOVA m0, #512; MOVS p2, p0; MOVXM p1, #508788 + 3964 0xff 0x21 0x00 0x00 0x20 0x08 0x88 0x01 0x58 0xba MOVA r1, #-7; MOVX r2, #0; MOV r4, #1 + 3974 0x20 0xe0 0x51 0x80 0x8b 0x05 0x02 0x2c 0x68 0x07 0x58 0x76 LDA.s8 r24, [p1]; MOVS p1, p0; OR r16, r2, r4; MOV r3, #7 + 3986 0x22 0x96 0xb0 0x00 0x10 0xec 0xa8 0x02 0x58 0xba VLDA bmlh1, [p1, #64]; LSHL r1, r0, r1; MOV r5, #2 + 3996 0x21 0x12 0xb0 0x21 0x21 0xec 0x08 0x60 0x78 0xba VLDA bmll1, [p1], m0; LSHL r18, r16, r3; MOV r0, p0 + 4006 0x00 0x66 0x00 0x0a 0x71 0x2d 0xb4 0x80 0xa8 0xba MOVA r6, #3; OR r7, r5, r2; ADD.NC p3, r18, r0 + 4016 0x62 0x8e 0xb0 0x0f 0x31 0xee 0xb8 0x7f 0xc8 0xba VLDA bmhh0, [p3, #64]; LSHL r19, r7, r3; ADD.NC lc, r1, #-1 + 4026 0x60 0x8a 0xb0 0x0d 0x11 0x2e 0x34 0xc0 0xa8 0xba VLDA bmhl0, [p3]; OR r17, r6, r2; ADD.NC p4, r19, r0 + 4036 0x82 0x86 0xb0 0x23 0x41 0xec 0x48 0x81 0x08 0xba VLDA bmlh0, [p4, #64]; LSHL r20, r17, r3; ADD.NC r2, r2, #4 + 4046 0x80 0x82 0xb0 0x31 0xd4 0x02 0xb5 0x00 0xa8 0xba VLDA bmll0, [p4]; MOVX crRnd, r24; ADD.NC p5, r20, r0 + 4056 0xa2 0x9e 0xb0 0x00 0x00 0x04 0x78 0x00 0x10 0xba VLDA bmhh1, [p5, #64]; MOVXM ls, #4096 + 4066 0xa0 0x9a 0xb0 0x00 0x00 0x05 0xb8 0x30 0x10 0xba VLDA bmhl1, [p5]; MOVXM le, #4192 + 4076 0x10 0x02 0x19 0x18 MOVX r1, #6 + 4080 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z12post_processPai_144 +.loop_nesting 1 +.begin_of_loop + 4096 0x49 0x94 0x68 0x54 0x3b 0x5c VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 + 4102 0x3d 0x83 0xb5 0x15 0x41 0xe4 LSHL r22, r7, r1; MOV dj2, r21 + 4108 0x08 0x0c 0x60 0x23 0x70 0xec 0x45 0x90 0x79 0x3a VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r23, r17, r1; MOV dj0, r22 + 4118 0x00 0x04 0x60 0x05 0x02 0x2c 0xc5 0xd0 0x79 0x3a VST.CONV.bf16.fp32 cml0, [p0, dj0];OR r16, r2, r4; MOV dj1, r23 + 4128 0x11 0x4e 0x25 0x98 OR r7, r5, r2 + 4132 0x04 0x1c 0x60 0x0d 0x11 0x2c 0x48 0x81 0x09 0x3a VST.CONV.bf16.fp32 cmh1, [p0, dj1];OR r17, r6, r2; ADD.NC r2, r2, #4 + 4142 0x22 0x96 0xb8 0x48 0x7b 0x2c VLDA bmlh1, [p1, #64]; LSHL r18, r16, r3 + 4148 0x21 0x12 0xb0 0x0f 0x31 0xed 0xb4 0x80 0xa8 0xba VLDA bmll1, [p1], m0; LSHL r19, r7, r3; ADD.NC p3, r18, r0 + 4158 0x62 0x8e 0xb0 0x23 0x41 0xee 0x34 0xc0 0xa8 0xba VLDA bmhh0, [p3, #64]; LSHL r20, r17, r3; ADD.NC p4, r19, r0 + 4168 0x60 0x8a 0xba 0xd4 0x02 0x94 VLDA bmhl0, [p3]; ADD.NC p5, r20, r0 + 4174 0x04 0x14 0x35 0x98 VLDA bmlh0, [p4, #64] + 4178 0x04 0x04 0x15 0x98 VLDA bmll0, [p4] + 4182 0x05 0x14 0xf5 0x98 VLDA bmhh1, [p5, #64] + 4186 0xa0 0x9a 0xb0 0x00 0x20 0x3c VLDA bmhl1, [p5]; NOPB +.label ZLE_F_Z12post_processPai_240 +.end_of_loop + 4192 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 4208 0x49 0x94 0x68 0x54 0x3b 0x5c VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 + 4214 0x05 0x00 0x05 0x15 0x41 0xe4 RET lr; MOV dj2, r21 +.delay_slot + 4220 0x08 0x0c 0x63 0xd8 0x3b 0x5c VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r22, r7, r1 +.delay_slot + 4226 0x8d 0xc3 0xb1 0x16 0x41 0xe4 LSHL r23, r17, r1; MOV dj0, r22 +.delay_slot + 4232 0x00 0x04 0x60 0x00 0xc5 0xd0 0x70 0x02 VST.CONV.bf16.fp32 cml0, [p0, dj0]; MOV dj1, r23 +.delay_slot + 4240 0x08 0x20 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p0, dj1] +.delay_slot +.swstall delay_slot + 4244 0x00 0x00 NOPX +.label _Z12post_processPai__end +.label __Z12post_processPai___func_end0 + +.text_segment PM 4256 +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_begin0 +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.function_start + 4256 0xda 0x90 0x84 0x9c 0x8b 0x00 0x01 0xf3 0xb1 0xa0 0x10 0x76 MOVA m4, #-300; MOVS p4, p7; MOVXM p7, #508736 + 4268 0x07 0x8a 0x16 0x98 LDA r16, [p7], m4 + 4272 0x00 0x00 NOPX + 4274 0x00 0x00 NOPX + 4276 0x00 0x00 NOPX + 4278 0x13 0x11 0x60 0x03 0x30 0x60 0x70 0x02 MOVS p0, p6; MOV p6, p0 + 4286 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 4292 0x0f 0xfc 0x1d 0x98 ST p0, [sp, #-4] + 4296 0xfe 0x43 0xb0 0x20 0x02 0x24 0x10 0x00 0x61 0x3a ST p4, [sp, #-16]; JNZ r16, #4384 +.delay_slot + 4306 0x0f 0xf5 0x1d 0x98 ST p2, [sp, #-12] +.delay_slot + 4310 0x0f 0xec 0x9d 0x98 ST p1, [sp, #-20] +.delay_slot + 4314 0x00 0x32 0x07 0xf8 0x3d 0x80 0x01 0xf0 0x31 0xb8 0x10 0x76 MOVA r18, #1; ST lr, [sp, #-8]; MOVXM p0, #508784 +.delay_slot + 4326 0x01 0x71 0x00 0x06 0x51 0x80 0x01 0xf0 0x31 0xba 0x10 0x76 MOVA r17, #11; ST r18, [p0]; MOVXM p0, #508788 +.delay_slot + 4338 0x00 0xc4 0xe0 0x00 0x01 0xf2 0xb1 0x00 0x10 0xba ST.s8 r17, [p0]; MOVXM p5, #508416 + 4348 0xa5 0x82 0xd0 0xc9 0x81 0xd4 LDA r0, [p5], #8; MOV p0, p2 + 4354 0x05 0x04 0x36 0x98 LDA r1, [p5] + 4358 0x05 0x14 0x56 0x98 LDA r2, [p5, #4] +.no_stack_arguments + 4362 0x00 0x07 0x20 0x00 0x01 0x04 JL #3648 +.delay_slot + 4368 0x19 0x66 0xc0 0xf8 MOV p1, p3 +.delay_slot +.swstall delay_slot + 4372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4376 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4378 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.return_address + 4384 0x17 0x10 0x00 0x3b 0x21 0x0a 0x00 0xdc 0x58 0xba MOVA r16, #184; MOVX r18, #-184; MOV m4, #220 + 4394 0x07 0x8b 0x76 0x98 LDA r27, [p7], m4 + 4398 0x1c 0xde 0xc0 0xf8 MOV r19, p7 + 4402 0x1d 0x19 0xe4 0x18 ADD.NC r20, r19, #-56 + 4406 0x1f 0x69 0x51 0x58 ADD.NC p7, r18, r20 + 4410 0x10 0xe2 0xc1 0x18 MOVX r17, #240 + 4414 0x00 0x00 NOPX + 4416 0x00 0x00 NOPX + 4418 0x15 0x25 0x32 0x18 SEL.EQZ r18, r20, r19, r27 + 4422 0x84 0x22 0x46 0xd2 0x04 0x24 SEL.EQZ r16, r16, r17, r27; ADD.NC p3, r18, #4 + 4428 0x63 0x98 0xd0 0x36 0x02 0x4a 0x00 0x00 0x60 0xba LDA dj1, [p3], #4; JZ r27, #4688 +.delay_slot + 4438 0x63 0xd4 0xd7 0x10 0x41 0xd4 LDA dn5, [p3], #4; MOV dj3, r16 +.delay_slot + 4444 0x03 0x1e 0xc6 0x98 LDA dj5, [p3], #4 +.delay_slot + 4448 0x07 0x60 0xa6 0x98 LDA dn1, [p7, dj3] +.delay_slot + 4452 0x03 0x06 0x16 0x98 LDA r16, [p3] +.delay_slot + 4456 0x03 0x16 0x06 0x98 LDA m4, [p3, #4] + 4460 0xfd 0xb3 0x20 0x00 0x01 0xf1 0x31 0x10 0x10 0xba LDA p3, [sp, #-20]; MOVXM p2, #508448 + 4470 0x40 0xce 0xd0 0x00 0x00 0x04 0x78 0xe8 0x10 0xba LDA r19, [p2]; MOVXM ls, #4560 + 4480 0xff 0x54 0x00 0x00 0x00 0x05 0xb9 0x08 0x10 0xba MOVA r20, #-6; MOVXM le, #4624 + 4490 0x04 0x5a 0x29 0x20 0xd5 0x64 MOVX r17, #52; MOV r18, #53 + 4496 0x00 0x00 NOPX + 4498 0x00 0x00 NOPX + 4500 0x00 0x00 NOPX + 4502 0x1a 0x66 0xc0 0xf8 MOV p2, p3 + 4506 0x42 0x80 0xf9 0xce 0x9b 0x2c VLDA lfh0, [p2, #64]; LSHL r19, r19, r20 + 4512 0x45 0x90 0xfa 0xf3 0xfe 0x14 VLDA lfl0, [p2], #128; ADD.NC lc, r19, #-2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4518 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4520 0x02 0x14 0x07 0x98 VLDA lfh0, [p2, #64] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4524 0x02 0x2c 0x87 0x98 VLDA lfl0, [p2], #128 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4530 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 0x1c 0x21 0x92 0xf8 VMOV x8, lfh0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4536 0x00 0x2b 0x60 0x00 0x50 0xc9 0x70 0x02 NOPS; VMOV x1, lfh0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4544 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x12 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4560 0x42 0x80 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc2 0x24 0x38 0x00 0x00 0xe1 VLDA lfh0, [p2, #64]; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x8, r18; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4576 0x45 0x90 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x82 0x22 0x38 0x00 0x00 0xe1 VLDA lfl0, [p2], #128; NOPB; NOPS; NOPX; VSHUFFLE x2, x0, x8, r17; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4592 0x00 0x2c 0xf0 0x00 0x23 0x14 0xd3 0x00 0x00 0x02 0x10 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p3, #64]; NOPX; VMOV x8, lfh0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4608 0x00 0x2c 0xf0 0x00 0x23 0x2c 0x93 0x00 0x00 0x00 0x50 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x2, [p3], #128; NOPX; VMOV x1, lfh0; NOPV +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4624 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x12 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV +.loop_nesting 0 + 4640 0x19 0x84 0x48 0x78 VSHUFFLE x3, x0, x8, r18 + 4644 0x19 0x04 0x44 0x78 VSHUFFLE x2, x0, x8, r17 + 4648 0x62 0x9a 0x60 0x02 0x10 0xc9 0x70 0x02 VST x3, [p3, #64]; VMOV x8, lfh0 + 4656 0x65 0x92 0x60 0x00 0x12 0xc9 0x70 0x02 VST x2, [p3], #128; VMOV x0, lfl0 + 4664 0x19 0x84 0x48 0x78 VSHUFFLE x3, x0, x8, r18 + 4668 0x19 0x04 0x44 0x78 VSHUFFLE x2, x0, x8, r17 + 4672 0x62 0x9a 0x60 0x00 0x50 0xc9 0x70 0x02 VST x3, [p3, #64]; VMOV x1, lfh0 + 4680 0x65 0x92 0x60 0x00 0x01 0xa5 0x70 0x02 VST x2, [p3], #128; NOPM +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 + 4688 0x3f 0x1f 0x90 0x18 PADDB [p7], #64 + 4692 0x07 0x1d 0xc6 0x98 LDA dj3, [p7], #4 + 4696 0xe3 0xb4 0xda 0x1f 0x71 0x54 LDA dn3, [p7], #4; MOV m5, #-36 + 4702 0x07 0xaa 0x56 0x98 LDA r18, [p7], m5 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4706 0xe7 0xd2 0xd0 0x00 0x01 0xf1 0xb1 0xba 0x10 0xba LDA r20, [p7], #12; MOVXM p3, #508788 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4716 0x60 0xd0 0x50 0x00 0x00 0x0e 0xef 0xc0 0x10 0xba LDA.s8 r20, [p3]; MOVXM r23, #16256 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4726 0xfe 0xb3 0x25 0xba 0xe5 0xd4 LDA p3, [sp, #-12]; VBCST.16 x5, r23 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4732 0xc2 0xcd 0x7c 0x01 0x51 0x54 VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOV m6, #84 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4738 0xe3 0xf0 0xd4 0xba 0xe5 0xd4 LDA m7, [p7], #4; VBCST.16 x4, r23 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4744 0xe3 0xb0 0xda 0x51 0x25 0xd4 LDA m3, [p7], #4; VMOV x10, x4 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4750 0xe3 0x90 0xdb 0x55 0x25 0xd4 LDA m1, [p7], #4; VMOV x11, x5 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4756 0xf9 0x60 0xda 0x5f 0xf6 0x2c LDA m6, [p7], m6; ADD r23, r20, #-2 + 4762 0xf9 0x80 0xd4 0x04 0x61 0x54 LDA m0, [p7], #-16; MOV m2, #280 + 4768 0xe3 0x84 0xda 0x1e 0x51 0x54 LDA dn0, [p7], #4; MOV m5, #-108 + 4774 0x07 0x1c 0x46 0x98 LDA dj0, [p7], #4 + 4778 0x07 0x1e 0x26 0x98 LDA dn4, [p7], #4 + 4782 0x07 0x2e 0x46 0x98 LDA dj4, [p7], #8 + 4786 0xf5 0x50 0xd9 0x80 0x01 0x54 LDA m5, [p7], m5; MOV dc4, #0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4792 0xe9 0x6a 0xd0 0x8c 0x8b 0x03 0x2f 0x60 0x72 0xba LDA r26, [p7], m2; MOVS p0, p3; MOV r25, p7 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4802 0xfd 0xf3 0x20 0xd7 0x20 0x00 0x00 0xa6 0x33 0x02 0x02 0x09 0x60 0x7e LDA p7, [sp, #-20]; PADDB [p0], m3; MOVS dc0, dc4; MOVXM p2, #4912 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4816 0x02 0xa6 0xb2 0x10 0x4b 0x02 0xe4 0xc0 0x72 0xba VLDA bmlh2, [p0, #64]; MOVS dc2, dc4; MOV dc5, dc4 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4826 0x04 0xaa 0xb4 0x8c 0x8b 0x01 0x43 0x80 0x72 0xba VLDA bmhl2, [p0, #128]; MOVS p4, p3; MOV dj2, dj3 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4836 0x06 0xae 0xb7 0x97 0x23 0x22 0x31 0x61 0x20 0x0f 0x31 0x91 0x60 0x7e VLDA bmhh2, [p0, #192]; PADDB [p3], m6; MOVS p1, p3; MOVX r17, #780; MOV r24, m1 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4850 0x62 0x86 0xb0 0x00 0x25 0x3a 0x80 0x51 0xec 0x0f 0x60 0x09 0x60 0x7e VLDA bmlh0, [p3, #64]; NOPB; MOVS dc3, dc0; MOVX crRnd, r20; MOV r20, p7 + 4864 0x64 0x8a 0xb0 0x00 0x22 0x4e 0x4b 0x01 0x36 0x89 0x03 0x00 0x78 0x00 0x00 0xe1 VLDA bmhl0, [p3, #128]; NOPB; MOVS dn2, dn3; MOVX r19, #52; MOV m2, m3; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4880 0x66 0x8e 0xb0 0x00 0x21 0x0c 0x4b 0x01 0x56 0xa9 0x84 0x90 0x78 0x00 0x00 0xe1 VLDA bmhh0, [p3, #192]; NOPB; MOVS dc1, dc3; MOVX r21, #53; MOV m3, r18; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4896 0x60 0x82 0xbe 0x2a 0x6c 0x28 0x5b 0x01 0x67 0x8a 0xb4 0x60 0x78 0x00 0x00 0xe1 VLDA bmll0, [p3]; VLDB x4, [p7, #64]; PADDS [p4], m1; MOVX r22, #60; MOV p5, p4; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 +.loop_nesting 1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4912 0x82 0x96 0xbb 0xd7 0x23 0x94 0x8b 0x00 0x84 0x10 0x70 0xf6 VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4924 0x84 0x9a 0xb3 0x73 0x90 0x02 0xbe 0xbf 0x4e 0xba VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4934 0x86 0x9e 0xb0 0x00 0x00 0x04 0x7a 0x00 0x10 0xba VLDA bmhh1, [p4, #192]; MOVXM ls, #5120 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4944 0x80 0x92 0xbe 0x73 0xe8 0x00 0x00 0xb7 0x45 0x02 0x93 0x91 0x60 0x7e VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #5200 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4958 0xa2 0xb6 0xb4 0x8b 0x90 0x01 0x82 0x00 0x7e 0xba VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4968 0x80 0xcb 0x78 0x2a 0xec 0x9c 0x8b 0x32 VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4976 0xa4 0xba 0xbe 0x2a 0x6c 0x88 0x5b 0x32 VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4984 0xa6 0xbe 0xbe 0x73 0xe8 0x3c VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4990 0xa0 0xb2 0xb4 0x15 0x74 0x02 0xb6 0x60 0x7e 0xba VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5000 0xc3 0x45 0x7b 0x57 0x20 0x3c VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5006 0x00 0xa2 0xb4 0x06 0x74 0x01 0x9d 0x26 0x3e 0xba VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5016 0x1b 0xba 0x54 0x78 VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5020 0x93 0x91 0x67 0x15 0x34 0x02 0x25 0x66 0x36 0xba VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5030 0xa0 0xc5 0x79 0x17 0x24 0xca 0xd4 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5042 0xa2 0xcd 0x78 0x2a 0xed 0x98 0x8b 0x01 0x9d 0x26 0x30 0xf6 VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5054 0xb5 0x0c 0xf7 0x74 0xa8 0xd4 PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5060 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5070 0xc3 0x45 0x77 0x39 0xf4 0x02 0x25 0x66 0x3e 0xba VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5080 0x1c 0xca 0xd4 0x78 VSHUFFLE x9, x9, x5, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5084 0x08 0x8a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex1, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5088 0xa0 0xc5 0x74 0x06 0x74 0x1d 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5098 0xa2 0xcd 0x72 0x14 0x6c 0x0c VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5104 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5120 0x0e 0x2a 0x6d 0x6c 0xc0 0xe6 0xb4 0xf1 0x51 0x4a VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5130 0xc2 0xcd 0x7e 0x73 0xec 0x9c 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5146 0xc3 0x45 0x79 0x17 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5162 0xb5 0x0c 0xf8 0x2a 0xe8 0x45 0x1b 0x0e 0x8a 0x40 0x69 0x66 PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5174 0xa0 0xc5 0x78 0x0c 0xec 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5186 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5200 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5216 0xc2 0xcd 0x7e 0x2a 0x6c 0x84 0x8b 0x00 0x00 0x02 0xb6 0x60 0x7d 0xa7 0x8a 0x8b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB x4, [p7, #64]; MOVS p4, p1; NOPX; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5232 0xc3 0x45 0x7b 0x57 0x20 0x84 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5248 0x0d 0x0c 0xf3 0xd7 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b PADDA [p0], m3; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5264 0x11 0x46 0xc1 0x0c 0x20 0xe4 0x8a 0x40 0x69 0x4a VCONV.bfp16ebs8.fp32 ex1, dm4; MOV m1, r24; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5274 0xa0 0xc5 0x74 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5284 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5298 0x31 0x46 0xc4 0x2b 0x90 0x02 0xb4 0x60 0x76 0xba PADDB [p4], m1; VCONV.bfp16ebs8.fp32 ex3, dm4; MOV p5, p4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5308 0x02 0x09 0x20 0xe6 0x8b 0x60 0x29 0x62 MOV m2, r18; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5316 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5326 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5330 0x11 0x46 0xc0 0x02 0x8a 0x40 0x69 0x62 VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5338 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5342 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5344 0x09 0x0a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5348 0x31 0x46 0xc0 0x02 0x8b 0x60 0x29 0x62 VCONV.bfp16ebs8.fp32 ex3, dm4; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5356 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5358 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5360 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5364 0x8a 0x40 0x69 0x48 VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5368 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5372 0x09 0x15 0xa6 0x98 VST bmlh3, [p1, #64] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5376 0x09 0x25 0xc6 0x98 VST bmhl3, [p1, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5380 0x09 0x35 0xe6 0x98 VST bmhh3, [p1, #192] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5384 0x20 0xb0 0xd5 0xcb 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5394 0x02 0xa4 0xd1 0x53 0x90 0x01 0x03 0x00 0x76 0xba PADDB.2D [p1], d2; VST bmlh2, [p0, #64]; MOV m2, m3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5404 0x08 0x25 0x46 0x98 VST bmhl2, [p0, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5408 0x08 0x35 0x66 0x98 VST bmhh2, [p0, #192] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5412 0x00 0xa0 0xd0 0x00 0x33 0x60 0x70 0x02 VST bmll2, [p0]; MOV p0, p3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5420 0x82 0x94 0xd0 0x6b 0x90 0x01 0x84 0x90 0x76 0xba PADDB [p0], m3; VST bmlh1, [p4, #64]; MOV m3, r18 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5430 0x02 0xa6 0xb8 0x49 0x8d 0x0c VLDA bmlh2, [p0, #64]; VST bmhl1, [p4, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5436 0x04 0xaa 0xb4 0x34 0xe6 0x80 0x05 0xee 0xa0 0x7a VLDA bmhl2, [p0, #128]; VST bmhh1, [p4, #192]; JNZD r23, r23, p2 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5446 0x06 0xae 0xb7 0x97 0x24 0x04 0x86 0x82 0x33 0x60 0x70 0xf6 VLDA bmhh2, [p0, #192]; PADDB [p3], m6; VST bmll1, [p4]; MOV p4, p3 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5458 0x62 0x86 0xba 0x28 0x4d 0x0c VLDA bmlh0, [p3, #64]; VST bmlh0, [p5, #64] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5464 0x64 0x8a 0xba 0x48 0x8d 0x0c VLDA bmhl0, [p3, #128]; VST bmhl0, [p5, #128] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5470 0x66 0x8e 0xba 0x68 0xcd 0x0c VLDA bmhh0, [p3, #192]; VST bmhh0, [p5, #192] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5476 0x60 0x82 0xb8 0x57 0x25 0x04 0x06 0x82 0xb4 0x60 0x70 0xf6 VLDA bmll0, [p3]; PADDB [p4], m1; VST bmll0, [p5]; MOV p5, p4 +.loop_nesting 0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5488 0x82 0x96 0xbb 0xd7 0x23 0x94 0x8b 0x00 0x84 0x10 0x70 0xf6 VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5500 0x84 0x9a 0xb3 0x73 0x90 0x02 0xbe 0xbf 0x4e 0xba VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5510 0x86 0x9e 0xb0 0x00 0x00 0x04 0x7b 0x20 0x10 0xba VLDA bmhh1, [p4, #192]; MOVXM ls, #5696 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5520 0x80 0x92 0xbe 0x73 0xe8 0x00 0x00 0xb7 0x69 0x02 0x93 0x91 0x60 0x7e VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #5776 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5534 0xa2 0xb6 0xb4 0x8b 0x90 0x01 0x82 0x00 0x7e 0xba VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5544 0x80 0xcb 0x78 0x2a 0xec 0x9c 0x8b 0x32 VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5552 0xa4 0xba 0xbe 0x2a 0x6c 0x88 0x5b 0x32 VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5560 0xa6 0xbe 0xbe 0x73 0xe8 0x3c VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5566 0xa0 0xb2 0xb4 0x15 0x74 0x02 0xb6 0x60 0x7e 0xba VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5576 0xc3 0x45 0x7b 0x57 0x20 0x3c VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5582 0x00 0xa2 0xb4 0x06 0x74 0x01 0x9d 0x26 0x3e 0xba VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5592 0x1b 0xba 0x54 0x78 VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5596 0x93 0x91 0x67 0x15 0x34 0x02 0x25 0x66 0x36 0xba VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5606 0xa0 0xc5 0x79 0x17 0x24 0xca 0xd4 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5618 0xa2 0xcd 0x78 0x2a 0xed 0x98 0x8b 0x01 0x9d 0x26 0x30 0xf6 VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5630 0xb5 0x0c 0xf7 0x74 0xa8 0xd4 PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5636 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5646 0xc3 0x45 0x77 0x39 0xf4 0x02 0x25 0x66 0x3e 0xba VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5656 0x1c 0xca 0xd4 0x78 VSHUFFLE x9, x9, x5, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5660 0x08 0x8a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex1, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5664 0xa0 0xc5 0x74 0x06 0x74 0x1d 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5674 0xa2 0xcd 0x72 0x14 0x6c 0x0c VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5680 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5696 0x0e 0x2a 0x6d 0x6c 0xc0 0xe6 0xb4 0xf1 0x51 0x4a VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5706 0xc2 0xcd 0x7e 0x73 0xec 0x9c 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5722 0xc3 0x45 0x79 0x17 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5738 0xb5 0x0c 0xf8 0x2a 0xe8 0x45 0x1b 0x0e 0x8a 0x40 0x69 0x66 PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5750 0xa0 0xc5 0x78 0x0c 0xec 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5762 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5776 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5792 0xc2 0xcd 0x79 0x09 0x16 0x00 0x00 0x6b 0x66 0x07 0xb4 0xf1 0x51 0x6e VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOVS p4, p1; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5806 0xc3 0x45 0x7b 0x57 0x20 0x84 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5822 0xda 0x06 0x83 0xd7 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b MOVA dj1, #-304; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5838 0x0d 0x0c 0xf1 0x14 0x6c 0x00 0x00 0x7b 0x51 0x07 0x8a 0x40 0x69 0x6e PADDA [p0], m3; VCONV.bfp16ebs8.fp32 ex1, dm4; MOV p7, r20; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5852 0xa0 0xc5 0x7d 0x32 0x16 0x00 0x00 0x62 0x56 0x63 0xb4 0xed 0x51 0x6e VLDA.CONV.fp32.bf16 cml4, [p5]; MOVS p6, r25; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5866 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5880 0x31 0x46 0xc0 0x00 0x86 0x10 0x70 0x02 VCONV.bfp16ebs8.fp32 ex3, dm4; MOV m1, r24 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5888 0x08 0x57 0x25 0x68 0xc0 0xe6 0x8b 0x60 0x29 0x4a PADDB [p4], m1; MOV p5, p4; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5898 0x01 0x46 0xc0 0x02 0xb4 0xf1 0x51 0x62 VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5906 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5910 0x11 0x46 0xc0 0x02 0x8a 0x40 0x69 0x62 VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5918 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5922 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5924 0xe4 0x46 0xd1 0x0a 0x36 0x00 0xc1 0x18 0x52 0xba LDA r17, [p7, dj1]; VCONV.bfp16ebs8.fp32 ex2, dm4; MOV dj1, #280 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5934 0xc4 0x42 0xd0 0x00 0x21 0x8a 0x36 0x00 0x01 0xf3 0xb1 0x00 0x14 0x5b 0x01 0x4b LDA r16, [p6, dj1]; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;MOVXM p7, #508416; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5950 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5952 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5954 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5958 0x8a 0x40 0x69 0x48 VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5962 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 + 5966 0x22 0xb4 0xd8 0xc7 0xfe 0x5c VST bmlh3, [p1, #64]; ADD r17, r17, #-1 + 5972 0x24 0xb8 0xd8 0xc6 0x11 0x5c VST bmhl3, [p1, #128]; NE r17, r17, r16 + 5978 0x09 0x35 0xe6 0x98 VST bmhh3, [p1, #192] + 5982 0x20 0xb0 0xd5 0xcb 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 + 5992 0x08 0x15 0x26 0x98 VST bmlh2, [p0, #64] + 5996 0x08 0x25 0x46 0x98 VST bmhl2, [p0, #128] + 6000 0x08 0x35 0x66 0x98 VST bmhh2, [p0, #192] + 6004 0x08 0x05 0x06 0x98 VST bmll2, [p0] + 6008 0x0c 0x14 0xa6 0x98 VST bmlh1, [p4, #64] + 6012 0x0c 0x24 0xc6 0x98 VST bmhl1, [p4, #128] + 6016 0x86 0x9c 0xd0 0x22 0x02 0xfe 0x10 0x00 0x61 0x3a VST bmhh1, [p4, #192]; JNZ r17, #6128 +.delay_slot + 6026 0x0c 0x04 0x86 0x98 VST bmll1, [p4] +.delay_slot + 6030 0x0d 0x14 0x26 0x98 VST bmlh0, [p5, #64] +.delay_slot + 6034 0x0d 0x24 0x46 0x98 VST bmhl0, [p5, #128] +.delay_slot + 6038 0xa6 0x8c 0xd0 0x01 0x04 0x90 0x70 0x02 VST bmhh0, [p5, #192]; MOV m2, r18 +.delay_slot + 6046 0x2a 0x72 0x0a 0x08 0x0d 0x4c PADDB.2D [p1], d2; VST bmll0, [p5] + 6052 0x07 0x06 0x16 0x98 LDA r16, [p7] + 6056 0x07 0x26 0x36 0x98 LDA r17, [p7, #8] + 6060 0x07 0xf4 0x19 0x18 LDA p0, [sp, #-12] + 6064 0x00 0x00 NOPX +.no_stack_arguments + 6066 0x00 0x07 0xb8 0x00 0x01 0x04 JL #3952 +.delay_slot +.swstall delay_slot + 6072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6076 0x00 0x00 NOPX +.delay_slot + 6078 0x14 0x41 0x0f 0x98 MUL r0, r17, r16 +.delay_slot +.swstall delay_slot + 6082 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address + 6096 0x00 0x0c 0x00 0x00 0x00 0x84 J #6144 +.delay_slot + 6102 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.swstall delay_slot + 6106 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6110 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6112 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 + 6128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x21 0x00 0x38 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ADD r16, r16, #1; NOPM; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 + 6144 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 6148 0x07 0xf3 0x99 0x18 LDA p7, [sp, #-16] + 6152 0x00 0x00 NOPX + 6154 0x00 0x00 NOPX + 6156 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 6158 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 6160 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 6164 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 6168 0x19 0x82 0x30 0xb8 MOV dj1, #280 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 0x0e 0x22 0x11 0x98 ST r16, [p6, dj1] +.delay_slot + 6176 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6182 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6184 0x00 0x00 NOPX +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params__end +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_end0 + +.text_segment PM 6192 +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 6192 0x90 0x91 0x60 0x00 0x01 0xf2 0xb1 0xb0 0x11 0x3a MOVS p4, p1; MOVXM p5, #508768 + 6202 0x05 0x06 0x16 0x98 LDA r16, [p5] + 6206 0x00 0x00 NOPX + 6208 0x00 0x00 NOPX + 6210 0x00 0x00 NOPX + 6212 0x00 0x00 NOPX + 6214 0x00 0x00 NOPX + 6216 0x00 0x00 NOPX + 6218 0x80 0x0c 0x60 0x40 0x01 0x84 JNZ r16, #6336 +.delay_slot + 6224 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 6230 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 6234 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] +.delay_slot + 6238 0xff 0x07 0xb0 0x03 0xb0 0x60 0x70 0x02 ST lr, [sp, #-8]; MOV p7, p0 +.delay_slot + 6246 0x1e 0x66 0xc0 0xf8 MOV p6, p3 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6250 0x11 0x11 0x60 0x00 0x01 0xf1 0xb1 0xba 0x11 0x3a MOVS p0, p2; MOVXM p3, #508788 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6260 0x60 0xc0 0xe0 0x00 0x01 0xf1 0xb1 0xb8 0x10 0xba ST.s8 r16, [p3]; MOVXM p3, #508784 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6270 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6272 0x00 0x06 0x08 0x00 0x01 0x04 JL #3088 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6280 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6282 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 6286 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 6290 0x00 0x2c 0xf6 0x0c 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p3]; NOPM; NOPV +.return_address + 6304 0x4c 0x85 0xd0 0x00 0x01 0xf1 0x31 0xb4 0x10 0xba LDA el0, [p2, #24]; MOVXM p2, #508776 + 6314 0x00 0x00 NOPX + 6316 0x00 0x00 NOPX + 6318 0x00 0x00 NOPX + 6320 0x00 0x00 NOPX + 6322 0x00 0x00 NOPX + 6324 0x00 0x00 NOPX + 6326 0x00 0x2c 0xf2 0x04 0x29 0x80 0x00 0x00 0x00 0x7a NOPA; ST el0, [p2]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 + 6336 0xa0 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xb2 0x10 0xba LDA r16, [p5]; MOVXM p2, #508772 + 6346 0x02 0x06 0x36 0x98 LDA r17, [p2] + 6350 0x00 0x00 NOPX + 6352 0x00 0x00 NOPX + 6354 0x00 0x00 NOPX + 6356 0x00 0x00 NOPX + 6358 0x00 0x00 NOPX + 6360 0x00 0x00 NOPX + 6362 0x88 0x0c 0x98 0x40 0x01 0x84 JNZ r17, #6448 +.delay_slot + 6368 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 6372 0x0d 0x06 0x11 0x98 ST r16, [p5] +.delay_slot + 6376 0x14 0x60 0x07 0x18 ADD r16, r17, #1 +.delay_slot + 6380 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 6384 0x00 0x00 NOPX + 6386 0x1c 0x1c 0xc0 0xf8 MOV r16, p6 + 6390 0x1a 0x68 0x06 0x18 ADD.NC p2, r16, #12 + 6394 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 6398 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 6402 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6406 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6408 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6412 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6414 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6416 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6418 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6420 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6424 0x0a 0x06 0x11 0x98 ST r16, [p2] + 6428 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 6432 0x00 0x00 NOPX + 6434 0x00 0x00 NOPX + 6436 0x00 0x00 NOPX + 6438 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 + 6448 0x00 0x18 0x00 0x1f 0xff 0xfe 0x0f 0xf0 0x10 0xba MOVA r24, #0; MOVXM r16, #2147483616 + 6458 0x10 0x22 0x05 0x18 MOVX r17, #1 + 6462 0x00 0x00 NOPX + 6464 0x80 0xb3 0xd0 0x00 0x01 0xf2 0x31 0x06 0x10 0xba LDA p3, [p4]; MOVXM p4, #508428 + 6474 0x04 0xff 0x76 0x98 LDA r27, [p4], #-4 + 6478 0x04 0xee 0x56 0x98 LDA r18, [p4], #-8 + 6482 0xe0 0x83 0xde 0xd1 0x81 0xd4 LDA p0, [p7]; MOV p7, p4 + 6488 0x06 0x05 0x1e 0x98 LDA p2, [p6] + 6492 0x00 0x00 NOPX + 6494 0x00 0x00 NOPX + 6496 0x1c 0xd6 0xc0 0xf8 MOV r19, p3 +.no_stack_arguments + 6500 0x00 0x08 0x50 0x00 0x01 0x04 JL #4256 +.delay_slot + 6506 0x14 0xa4 0x7f 0x18 ADD r18, r18, #31 +.delay_slot + 6510 0x14 0xa1 0x04 0x98 AND r16, r18, r16 +.delay_slot + 6514 0x16 0x21 0x02 0x18 SEL.EQZ r16, r24, r16, r27 +.delay_slot + 6518 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 +.delay_slot + 6522 0x00 0x2c 0xf2 0xd3 0x82 0x94 NOPA; ADD.NC p1, r19, r16 +.return_address + 6528 0xe8 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xb2 0x10 0xba LDA r16, [p7, #16]; MOVXM p2, #508772 + 6538 0x40 0xca 0xd8 0xb9 0x81 0xd4 LDA r18, [p2]; MOV r17, p6 + 6544 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] + 6548 0x00 0x00 NOPX + 6550 0x00 0x00 NOPX + 6552 0x00 0x00 NOPX + 6554 0x00 0x00 NOPX + 6556 0x00 0x00 NOPX + 6558 0x14 0xa1 0x08 0x98 NE r16, r18, r16 + 6562 0x80 0x0c 0xf8 0x40 0x01 0x84 JNZ r16, #6640 +.delay_slot + 6568 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 6572 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6574 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6578 0x00 0x00 NOPX + 6580 0x40 0xe2 0x30 0x01 0x00 0x2b 0x34 0x45 0x09 0x3a ST r24, [p2]; MOVX r16, #1; ADD.NC p6, r17, #20 + 6590 0x06 0x06 0x36 0x98 LDA r17, [p6] + 6594 0x00 0x00 NOPX + 6596 0x00 0x00 NOPX + 6598 0x00 0x00 NOPX + 6600 0x00 0x00 NOPX + 6602 0x00 0x00 NOPX + 6604 0x00 0x00 NOPX + 6606 0x14 0x51 0x08 0x18 REL r17, r16 + 6610 0x06 0xe6 0x36 0x98 LDA r17, [p6, #-8] + 6614 0x00 0x00 NOPX + 6616 0x00 0x00 NOPX + 6618 0x00 0x00 NOPX + 6620 0x00 0x00 NOPX + 6622 0x00 0x00 NOPX + 6624 0x00 0x00 NOPX + 6626 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 6630 0x00 0x2c 0xf6 0xe6 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6, #-8]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 6640 0x00 0x07 0xc4 0xc6 0xc0 0x44 MOVXM p2, #508768 + 6646 0x40 0xc2 0xd0 0x00 0x01 0xf1 0xb1 0xb4 0x10 0xba LDA r16, [p2]; MOVXM p3, #508776 + 6656 0x03 0x06 0x36 0x98 LDA r17, [p3] + 6660 0x00 0x00 NOPX + 6662 0x00 0x00 NOPX + 6664 0x00 0x00 NOPX + 6666 0x00 0x00 NOPX + 6668 0x00 0x00 NOPX + 6670 0x00 0x00 NOPX + 6672 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 6676 0x80 0x0d 0x18 0x40 0x01 0x84 JNZ r16, #6704 +.delay_slot + 6682 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] +.delay_slot +.swstall delay_slot + 6686 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6690 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6692 0x00 0x00 NOPX + 6694 0x00 0x2c 0xf2 0x07 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r24, [p2]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 + 6704 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 6708 0x00 0x00 NOPX + 6710 0x00 0x00 NOPX + 6712 0x00 0x00 NOPX + 6714 0x00 0x00 NOPX + 6716 0x00 0x00 NOPX + 6718 0x00 0x00 NOPX + 6720 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6724 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6730 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6732 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6734 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6736 0x00 0x00 NOPX +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 6752 +.label __Z15_b13786_wrapperPPv___func_begin0 +.label _Z15_b13786_wrapperPPv +.function_start + 6752 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 6756 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 6760 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 6764 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 6768 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 6772 0x00 0x0c 0x18 0x00 0x00 0x84 J #6192 +.delay_slot +.swstall delay_slot + 6778 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6780 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6782 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6786 0x00 0x00 NOPX +.label _Z15_b13786_wrapperPPv__end +.label __Z15_b13786_wrapperPPv___func_end0 + +.text_segment PM 6800 +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function_start + 6800 0x20 0x85 0xd8 0xa9 0x81 0xd4 LDA el0, [p1]; MOV r17, p2 + 6806 0x19 0x68 0x82 0x18 ADD.NC p1, r17, #4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6810 0x01 0x1e 0x56 0x98 LDA r18, [p1], #4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6814 0x01 0x05 0xf6 0x98 LDA r15, [p1] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6818 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6820 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6822 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6824 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6826 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6830 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6836 0x7c 0xa5 0xf8 0x3f 0xfd 0x64 MUL r18, r15, r18; MOV r16, #-1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6842 0xfd 0xca 0xb0 0x0f 0xff 0xfe 0x2f 0xff 0x91 0x3a ST r18, [sp, #-20]; MOVXM r17, #1073741823 + 6852 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 + 6856 0x14 0x61 0x04 0x98 AND r16, r17, r16 + 6860 0x80 0x0d 0x90 0x00 0x01 0x84 JZ r16, #6944 +.delay_slot + 6866 0x00 0xf3 0xd0 0xdd 0x81 0xd4 LDA p7, [p0]; MOV p0, p7 +.delay_slot + 6872 0x0f 0xf8 0x1d 0x98 ST p0, [sp, #-8] +.delay_slot + 6876 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] +.delay_slot + 6880 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] +.delay_slot + 6884 0x3c 0xba 0xdf 0xf8 0x2b 0x0c LDA r14, [p1, #-8]; ST r0, [sp, #-4] + 6890 0xfd 0x05 0xb0 0x00 0x02 0x5c ST el0, [sp, #-24]; MOVX r0, #0 + 6896 0x07 0xe8 0x99 0x18 LDA p1, [sp, #-24] +.no_stack_arguments + 6900 0x00 0x13 0x98 0x00 0x01 0x04 JL #10032 +.delay_slot + 6906 0x10 0x22 0x09 0x18 MOVX r17, #2 +.delay_slot + 6910 0x14 0x03 0x1d 0x98 LSHL r1, r16, r17 +.delay_slot +.swstall delay_slot + 6914 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6916 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6918 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 6928 0x00 0x0d 0x98 0x00 0x00 0x84 J #6960 +.delay_slot +.swstall delay_slot + 6934 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6940 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6942 0x00 0x00 NOPX +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 6944 0x00 0x2c 0xf0 0x00 0x27 0xe8 0x2d 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 + 6960 0x78 0x0e 0x20 0x00 0x01 0x84 JZ r15, #7232 +.delay_slot +.swstall delay_slot + 6966 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6968 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6970 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6972 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6974 0x00 0x00 NOPX + 6976 0xfd 0xc6 0x20 0x00 0x00 0x04 0x7d 0xd0 0x10 0xba LDA r17, [sp, #-20]; MOVXM ls, #7072 + 6986 0x00 0x33 0x00 0x00 0x00 0x05 0xbe 0x00 0x10 0xba MOVA r19, #1; MOVXM le, #7168 + 6996 0xfd 0x4a 0x20 0x1d 0x49 0xee 0x0b 0xff 0xc8 0xba LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 + 7006 0xfe 0x07 0x20 0x00 0x00 0x04 0x35 0xc0 0x10 0xba LDA lr, [sp, #-16]; MOVXM p0, #7040 + 7016 0x18 0x0a 0x20 0xf8 MOV m0, r20 + 7020 0x00 0x00 NOPX + 7022 0x00 0x00 NOPX + 7024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x23 0x19 0xec 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.loop_nesting 1 + 7040 0x70 0x0e 0x08 0x00 0x01 0x84 JZ r14, #7184 +.delay_slot +.swstall delay_slot + 7046 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7048 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7050 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7052 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7054 0x00 0x00 NOPX + 7056 0x53 0x91 0x60 0x02 0xbb 0x90 0x70 0x02 MOVS p2, p7; MOV lc, r14 + 7064 0x00 0x2b 0x60 0x00 0xb4 0x90 0x70 0x02 NOPS; MOV p1, r18 +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.loop_nesting 2 +.begin_of_loop + 7072 0x43 0xce 0x50 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 7088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 7104 0x23 0xce 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 7120 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 7136 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 7152 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 7168 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.loop_nesting 1 + 7184 0xe1 0x72 0x08 0x40 0x40 0x1c PADDB [p7], m0; JNZD r16, r16, p0 +.delay_slot +.swstall delay_slot + 7190 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7192 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7194 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7196 0x00 0x00 NOPX +.delay_slot + 7198 0x1c 0x98 0xc9 0x58 ADD.NC r18, r17, r18 +.loop_nesting 0 + 7202 0x00 0x0e 0x28 0x00 0x00 0x84 J #7248 +.delay_slot +.swstall delay_slot + 7208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7210 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7212 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7214 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7216 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 + 7232 0xfe 0x07 0x20 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 7248 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] + 7252 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 7256 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 7260 0x00 0x00 NOPX + 7262 0x00 0x00 NOPX + 7264 0x00 0x00 NOPX + 7266 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7270 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7276 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7282 0x00 0x00 NOPX +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + +.text_segment PM 7296 +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function_start + 7296 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 7300 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 7304 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 7308 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 7312 0x00 0x0d 0x48 0x00 0x00 0x84 J #6800 +.delay_slot +.swstall delay_slot + 7318 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7320 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7322 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7324 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7326 0x00 0x00 NOPX +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start + 7328 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7332 0x00 0x00 NOPX + 7334 0x00 0x00 NOPX + 7336 0x00 0x00 NOPX + 7338 0x00 0x00 NOPX + 7340 0x00 0x00 NOPX + 7342 0x00 0x00 NOPX + 7344 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7348 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7352 0x00 0x00 NOPX + 7354 0x00 0x00 NOPX + 7356 0x00 0x00 NOPX + 7358 0x00 0x00 NOPX + 7360 0x00 0x00 NOPX + 7362 0x00 0x00 NOPX + 7364 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7368 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7372 0x00 0x00 NOPX + 7374 0x00 0x00 NOPX + 7376 0x00 0x00 NOPX + 7378 0x00 0x00 NOPX + 7380 0x00 0x00 NOPX + 7382 0x00 0x00 NOPX + 7384 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7388 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7392 0x00 0x00 NOPX + 7394 0x00 0x00 NOPX + 7396 0x00 0x00 NOPX + 7398 0x00 0x00 NOPX + 7400 0x00 0x00 NOPX + 7402 0x00 0x00 NOPX + 7404 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7408 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7412 0x00 0x00 NOPX + 7414 0x00 0x00 NOPX + 7416 0x00 0x00 NOPX + 7418 0x00 0x00 NOPX + 7420 0x00 0x00 NOPX + 7422 0x00 0x00 NOPX + 7424 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7428 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7432 0x00 0x00 NOPX + 7434 0x00 0x00 NOPX + 7436 0x00 0x00 NOPX + 7438 0x00 0x00 NOPX + 7440 0x00 0x00 NOPX + 7442 0x00 0x00 NOPX + 7444 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7448 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 7452 0x00 0x00 NOPX + 7454 0x00 0x00 NOPX + 7456 0x00 0x00 NOPX + 7458 0x00 0x00 NOPX + 7460 0x00 0x00 NOPX + 7462 0x00 0x00 NOPX + 7464 0x08 0x04 0x29 0x98 ST el0, [p0] + 7468 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 7472 0x00 0x00 NOPX + 7474 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 7478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7484 0x00 0x00 NOPX +.delay_slot + 7486 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 7504 +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function_start + 7504 0x03 0x86 0xd0 0x00 0x00 0x28 0x80 0x20 0x58 0xba LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 + 7514 0x03 0x96 0xd0 0x00 0x30 0x48 0x4f 0xfa 0x58 0xba LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 + 7524 0x05 0x92 0xd0 0x01 0x01 0x54 LDA r4, [p0], #8; MOV m0, #64 + 7530 0x05 0x1a 0xd1 0x02 0x01 0x54 LDA r6, [p0], m1; MOV dj0, #128 + 7536 0x00 0x00 NOPX + 7538 0x00 0x00 NOPX + 7540 0x00 0x00 NOPX + 7542 0x00 0x00 NOPX + 7544 0x00 0x00 NOPX + 7546 0x11 0x42 0x1f 0x98 MUL r1, r5, r1 + 7550 0x11 0x80 0x04 0x98 AND r0, r6, r0 + 7554 0x10 0xc0 0x05 0x98 OR r0, r3, r0 + 7558 0x19 0x82 0x30 0x84 0x9f 0x5c ST r0, [p0], #-16; MUL r1, r1, r4 + 7564 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7568 0x10 0x40 0x2d 0x98 LSHL r0, r1, r2 +.delay_slot + 7572 0x08 0x1c 0x11 0x98 ST r0, [p0], #4 +.delay_slot + 7576 0x08 0x1c 0x01 0x98 ST m0, [p0], #4 +.delay_slot + 7580 0x08 0x04 0x41 0x98 ST dj0, [p0] +.delay_slot + 7584 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + +.text_segment PM 7600 +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 7600 0x00 0x0e 0x50 0x00 0x01 0x04 JL #7328 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7606 0x18 0xc1 0xe0 0xf8 MOV dc0, lr +.delay_slot + 7610 0x1a 0x60 0xc0 0xf8 MOV p2, p0 +.delay_slot +.swstall delay_slot + 7614 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7616 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7618 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.tail_call +.return_address + 7632 0x00 0x0e 0xa8 0x00 0x00 0x84 J #7504 +.delay_slot + 7638 0x1f 0x71 0x80 0xf8 MOV lr, dc0 +.delay_slot + 7642 0x18 0x64 0xc0 0xf8 MOV p0, p2 +.delay_slot +.swstall delay_slot + 7646 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7648 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7650 0x00 0x00 NOPX +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 7664 +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function_start + 7664 0xb0 0x91 0x60 0x00 0x0a 0x60 0x70 0x02 MOVS p5, p1; MOV r0, p2 + 7672 0x1b 0x60 0x12 0x18 ADD.NC p3, r0, #36 + 7676 0x63 0xa0 0xd0 0x3d 0x81 0xd4 LDA m2, [p3], #4; MOV r0, p7 + 7682 0x03 0x1c 0x06 0x98 LDA m0, [p3], #4 + 7686 0x03 0xd4 0x56 0x98 LDA r2, [p3, #-12] + 7690 0x03 0x04 0x86 0x98 LDA m1, [p3] + 7694 0x00 0x00 NOPX + 7696 0x00 0x00 NOPX + 7698 0x00 0x00 NOPX + 7700 0x00 0x00 NOPX + 7702 0x00 0x00 NOPX + 7704 0x10 0x0f 0xe0 0x00 0x01 0x84 JZ r2, #8128 +.delay_slot + 7710 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot + 7714 0xe1 0x72 0x06 0xdd 0x81 0xf4 PADDB [p7], m0; MOV p3, p7 +.delay_slot + 7720 0x38 0x4b 0x90 0x18 PADDB [p0], m2 +.delay_slot + 7724 0x01 0x72 0x08 0xc1 0x81 0xf4 PADDB [p0], m0; MOV p4, p0 +.delay_slot + 7730 0x39 0x2b 0x90 0x18 PADDB [p1], m1 + 7734 0x10 0x02 0x11 0x18 MOVX r1, #4 + 7738 0x10 0x86 0x1c 0x98 LTU r3, r2, r1 + 7742 0x18 0x0f 0x98 0x40 0x01 0x84 JNZ r3, #7984 +.delay_slot + 7748 0x18 0x80 0x60 0xb8 MOV dj0, #48 +.delay_slot + 7752 0x02 0x00 0x36 0x98 LDA r1, [p2, dj0] +.delay_slot +.swstall delay_slot + 7756 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7758 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7760 0x00 0x00 NOPX + 7762 0x81 0x13 0x76 0x10 0xe8 0x00 0x00 0x04 0x7f 0x58 0x10 0xb6 VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #7856 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7774 0x81 0x0c 0xfe 0x10 0x68 0x00 0x00 0xb7 0xee 0x02 0x61 0x0b 0x60 0x7e PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #7904 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7788 0x61 0x0b 0x70 0x11 0xef 0x08 0x5b 0x02 0xb8 0xbf 0x40 0xf6 VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7800 0x01 0x0c 0xf8 0x11 0x6b 0x08 0x5b 0x32 PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7808 0x81 0x0c 0xfe 0x10 0x68 0x3c PADDA [p4], m0; VLDB x0, [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7814 0x01 0x1e 0x8e 0x10 0xb6 0x4c VLDB x3, [p0], m0; PADDS [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7820 0x01 0x0c 0xf6 0x10 0xe8 0x3c PADDA [p0], m0; VLDB x1, [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7826 0x81 0x16 0x80 0x12 0x0b 0xb4 VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7832 0x00 0x2c 0xfe 0x10 0x6b 0x08 0x5b 0x32 NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7840 0x00 0x2c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7856 0x81 0x0c 0xf6 0x10 0xef 0x08 0x5b 0x00 0x00 0x00 0x04 0x82 0xe8 0x00 0x00 0xe1 PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7872 0x01 0x0c 0xf8 0x11 0x69 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7888 0xa5 0x0c 0xfe 0x10 0x6b 0x08 0x5b 0x00 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7904 0x25 0x0c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7920 0x18 0x09 0x05 0xd8 VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7924 0x09 0x28 0x26 0x98 VST bmlh0, [p1], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7928 0x25 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7934 0x00 0x0f 0xe0 0x00 0x00 0x84 J #8128 +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7940 0xa5 0x0c 0xf1 0x28 0x26 0x80 0x04 0x82 0xe2 0xba PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7950 0x25 0x0c 0xfa 0x50 0x0d 0x0c PADDA [p1], m1; VST bmll0, [p5], m1 +.delay_slot + 7956 0xa5 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 +.delay_slot + 7962 0x00 0x2c 0xfa 0x50 0x0d 0x0c NOPA; VST bmll0, [p5], m1 +.delay_slot + 7968 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 + 7984 0x1d 0x71 0x20 0xf8 MOV lc, r2 + 7988 0x00 0x00 0x11 0xfe 0x80 0x44 MOVXM ls, #8000 + 7994 0x00 0x00 0x16 0xff 0x60 0x44 MOVXM le, #8112 +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.loop_nesting 1 +.begin_of_loop + 8000 0x81 0x0b 0x76 0x11 0x68 0x3c VLDA x1, [p4], m0; VLDB x2, [p3], m0 + 8006 0x61 0x0c 0xfe 0x10 0x6c 0x08 0x5b 0x32 PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 + 8014 0xe1 0x0c 0xf0 0x11 0xe8 0x3c PADDA [p7], m0; VLDB x3, [p0], m0 + 8020 0x38 0x0b 0x90 0x18 PADDB [p0], m0 + 8024 0x00 0x00 NOPX + 8026 0x00 0x00 NOPX + 8028 0x00 0x01 0x67 0x98 NOPA + 8032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x08 0x42 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV + 8048 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 8064 0x00 0x2c 0xf0 0x00 0x25 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV + 8080 0x00 0x2c 0xfa 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV + 8096 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.end_of_loop + 8112 0x00 0x2c 0xf2 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.loop_nesting 0 + 8128 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8132 0x1f 0x60 0x20 0xf8 MOV p7, r0 +.delay_slot +.swstall delay_slot + 8136 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8138 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8140 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8142 0x00 0x00 NOPX +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function_start + 8144 0xb0 0x11 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p5, p0; PADDXM [sp], #128 + 8154 0xff 0x87 0xb0 0x01 0xb1 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV p3, p1 +.no_stack_arguments + 8162 0x31 0x11 0x60 0x00 0x03 0xb6 0x00 0x00 0x41 0x3a MOVS p1, p2; JL #7600 +.delay_slot + 8172 0x18 0x65 0xe0 0xf8 MOV p0, sp +.delay_slot + 8176 0x38 0xef 0x90 0x18 PADDB [p0], #-128 +.delay_slot + 8180 0x1c 0x60 0xc0 0xf8 MOV p4, p0 +.delay_slot +.swstall delay_slot + 8184 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8186 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.return_address + 8192 0xf0 0x4a 0x22 0x90 0x8b 0x02 0x2d 0x70 0x72 0xba LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID + 8202 0xf0 0xda 0x28 0xc5 0x20 0x2c LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 + 8208 0xf1 0x52 0x20 0x00 0x00 0x3e 0x6f 0xff 0x10 0xba LDA r20, [sp, #-120]; MOVXM r19, #65534 + 8218 0x60 0x93 0xd9 0xc6 0x21 0x2c LDA p1, [p3]; ADD r17, r19, r17 + 8224 0xf1 0xce 0x28 0xd5 0x60 0x2c LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 + 8230 0x00 0x00 NOPX + 8232 0x05 0x06 0x36 0x98 LDA r17, [p5] + 8236 0x00 0x00 NOPX + 8238 0x15 0xa5 0x2f 0x98 MUL r18, r22, r18 + 8242 0x00 0x00 NOPX + 8244 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 8248 0x00 0x00 NOPX + 8250 0x15 0x65 0x2f 0x98 MUL r18, r21, r18 +.no_stack_arguments + 8254 0x00 0x0e 0xf8 0x00 0x01 0x04 JL #7664 +.delay_slot + 8260 0x14 0xe5 0x2f 0x98 MUL r18, r19, r18 +.delay_slot + 8264 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8268 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 +.delay_slot + 8272 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.delay_slot +.swstall delay_slot + 8276 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 8288 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 8292 0x00 0x00 NOPX + 8294 0x00 0x00 NOPX + 8296 0x00 0x00 NOPX + 8298 0x00 0x00 NOPX + 8300 0x00 0x00 NOPX + 8302 0x00 0x00 NOPX + 8304 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8308 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 8314 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8316 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8318 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8320 0x00 0x00 NOPX +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + +.text_segment PM 8336 +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function_start + 8336 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 8340 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 8344 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 8348 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 8352 0x00 0x0f 0xe8 0x00 0x00 0x84 J #8144 +.delay_slot +.swstall delay_slot + 8358 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8360 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8362 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8364 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8366 0x00 0x00 NOPX +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function_start + 8368 0x23 0x85 0xd0 0x00 0x01 0xf0 0x09 0xe0 0x10 0xba LDA el0, [p1], #4; MOVXM r0, #508864 + 8378 0x08 0x00 0x80 0x80 0x0b 0x3e 0x27 0xa9 0x30 0x01 0x08 0x76 MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 + 8390 0x00 0x42 0x20 0x22 0x01 0x64 MOVX r1, #4; MOV r0, #128 + 8396 0x00 0x00 NOPX + 8398 0x00 0x00 NOPX + 8400 0x00 0x00 NOPX + 8402 0x00 0x00 NOPX + 8404 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8408 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8412 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 8416 0x00 0x00 NOPX + 8418 0x00 0x00 NOPX + 8420 0x00 0x00 NOPX + 8422 0x00 0x00 NOPX + 8424 0x00 0x00 NOPX + 8426 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 8430 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 8434 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8438 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 8442 0x00 0x00 NOPX + 8444 0x00 0x00 NOPX + 8446 0x00 0x00 NOPX + 8448 0x00 0x00 NOPX + 8450 0x00 0x00 NOPX + 8452 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 8456 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 8460 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 8464 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 8468 0x00 0x00 NOPX + 8470 0x00 0x00 NOPX + 8472 0x00 0x00 NOPX + 8474 0x00 0x00 NOPX + 8476 0x00 0x00 NOPX + 8478 0x0a 0x04 0x09 0x98 ST eh0, [p2] + 8482 0x0a 0x14 0x29 0x98 ST el0, [p2, #4] + 8486 0x00 0x08 0x76 0x98 LDA r3, [p0], m0 + 8490 0x00 0x00 NOPX + 8492 0x00 0x00 NOPX + 8494 0x00 0x00 NOPX + 8496 0x00 0x00 NOPX + 8498 0x00 0x00 NOPX + 8500 0x00 0x00 NOPX + 8502 0x10 0xc8 0x2d 0x98 LSHL r4, r3, r2 + 8506 0x18 0xc3 0xb0 0xa4 0xff 0x24 LSHL r3, r3, r1; ADD.NC r1, r4, #-1 + 8512 0x00 0x86 0x30 0x00 0x88 0x60 0x70 0x02 ST r1, [p0]; MOV r4, p0 + 8520 0x19 0x62 0x62 0x18 ADD.NC p1, r4, #-60 + 8524 0x01 0x08 0x96 0x98 LDA r4, [p1], m0 + 8528 0x00 0x00 NOPX + 8530 0x00 0x00 NOPX + 8532 0x00 0x00 NOPX + 8534 0x00 0x00 NOPX + 8536 0x00 0x00 NOPX + 8538 0x00 0x00 NOPX + 8540 0x20 0x85 0xb2 0x22 0x01 0x64 LSHL r2, r4, r2; MOV r4, #128 + 8546 0x10 0x85 0xff 0x18 ADD r2, r2, #-1 + 8550 0x23 0x8a 0x31 0x90 0x5c 0x5c ST r2, [p1], #4; MSC r4, r4, r3, r2 + 8556 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 8560 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 8564 0x09 0x2c 0x11 0x98 ST r0, [p1], #8 + 8568 0x09 0xfc 0x71 0x98 ST r3, [p1], #-4 + 8572 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 + 8576 0x20 0x82 0x30 0x00 0xa9 0x60 0x70 0x02 ST r0, [p1]; MOV r5, p1 + 8584 0x19 0x62 0xde 0x18 ADD.NC p1, r5, #-68 + 8588 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 8592 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 8596 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 8600 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 8604 0x23 0x82 0x30 0x50 0x00 0x5c ST r0, [p1], #4; RET lr +.delay_slot + 8610 0x09 0x2c 0x71 0x98 ST r3, [p1], #8 +.delay_slot + 8614 0x09 0xfc 0x51 0x98 ST r2, [p1], #-4 +.delay_slot + 8618 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 +.delay_slot + 8622 0x09 0x04 0x31 0x98 ST r1, [p1] +.delay_slot + 8626 0x09 0x14 0x11 0x98 ST r0, [p1, #4] +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + +.text_segment PM 8640 +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function_start + 8640 0x00 0x41 0x00 0x00 0x01 0xf1 0x31 0xe6 0x10 0xba MOVA r1, #2; MOVXM p2, #508876 + 8650 0x40 0xee 0xd0 0x00 0xb2 0x2c LDA r27, [p2]; MOVX r0, #22 + 8656 0x00 0x00 NOPX + 8658 0x00 0x00 NOPX + 8660 0x00 0x00 NOPX + 8662 0x00 0x00 NOPX + 8664 0x00 0x00 NOPX + 8666 0x00 0x00 NOPX + 8668 0x16 0xc2 0x17 0x98 EQ r1, r27, r1 + 8672 0x08 0x11 0xd8 0x40 0x01 0x84 JNZ r1, #9136 +.delay_slot + 8678 0x10 0x04 0x75 0x18 MOVX r2, #29 +.delay_slot + 8682 0x10 0x00 0x22 0x18 SEL.EQZ r0, r0, r2, r27 +.delay_slot +.swstall delay_slot + 8686 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8690 0x00 0x00 NOPX + 8692 0x00 0x07 0xc4 0xc7 0xa0 0x44 MOVXM p2, #508880 + 8698 0x02 0x04 0x36 0x98 LDA r1, [p2] + 8702 0x00 0x00 NOPX + 8704 0x00 0x00 NOPX + 8706 0x00 0x00 NOPX + 8708 0x00 0x00 NOPX + 8710 0x00 0x00 NOPX + 8712 0x00 0x00 NOPX + 8714 0x08 0x13 0x18 0x00 0x01 0x84 JZ r1, #9776 +.delay_slot +.swstall delay_slot + 8720 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8724 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8726 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8728 0x00 0x00 NOPX + 8730 0x10 0x04 0x29 0x18 MOVX r2, #10 + 8734 0x10 0x44 0x2c 0x98 LTU r2, r1, r2 + 8738 0x10 0x11 0x88 0x40 0x01 0x84 JNZ r2, #8976 +.delay_slot +.swstall delay_slot + 8744 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8746 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8748 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8752 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8754 0x00 0x38 0x68 0x00 0x00 0x08 0x79 0x58 0x10 0x3a VLDB x0, [p0], #64; MOVXM ls, #8880 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8764 0x00 0x38 0x68 0x00 0x00 0x09 0xb9 0x58 0x10 0x3a VLDB x0, [p0], #64; MOVXM le, #8880 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8774 0x00 0x2c 0xf0 0x1c 0x34 0x02 0xb8 0x7d 0xce 0xba NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8784 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8800 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8816 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8832 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8848 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8864 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.loop_nesting 1 +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8880 0x00 0x2c 0xf0 0x38 0x69 0x1c 0x06 0x80 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8912 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8920 0x23 0x80 0xd0 0x01 0x40 0x00 0x00 0x00 0xe9 0x3a VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8938 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8946 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8954 0x00 0x2c 0xf2 0x38 0x0d 0x0c NOPA; VST bmll0, [p1], #64 +.delay_slot + 8960 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 + 8976 0x1d 0x70 0xa0 0xf8 MOV lc, r1 + 8980 0x00 0x00 0x21 0xe6 0x40 0x44 MOVXM ls, #8992 + 8986 0x00 0x00 0x26 0xe7 0x20 0x44 MOVXM le, #9104 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.loop_nesting 1 +.begin_of_loop + 8992 0x38 0x1c 0x34 0x18 VLDB x0, [p0], #64 + 8996 0x00 0x00 NOPX + 8998 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM + 9008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9040 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9056 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9072 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV + 9088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.end_of_loop + 9104 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.loop_nesting 0 + 9120 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 9124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9130 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9132 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 + 9136 0x00 0x07 0xc4 0xc7 0x80 0x44 MOVXM p2, #508864 + 9142 0x02 0x04 0x16 0x98 LDA r0, [p2] + 9146 0x00 0x00 NOPX + 9148 0x00 0x00 NOPX + 9150 0x00 0x00 NOPX + 9152 0x00 0x00 NOPX + 9154 0x00 0x00 NOPX + 9156 0x00 0x00 NOPX + 9158 0x00 0x13 0x18 0x00 0x01 0x84 JZ r0, #9776 +.delay_slot +.swstall delay_slot + 9164 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9166 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9168 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9170 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9172 0x00 0x00 NOPX + 9174 0x04 0x94 0x80 0x00 0x01 0xf2 0x31 0xe2 0x10 0xba MOVA m5, #36; MOVXM p4, #508868 + 9184 0x83 0x86 0xd0 0x00 0x51 0x08 0x4f 0xfd 0x58 0xba LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 + 9194 0x95 0x12 0xd0 0x00 0x30 0x2a 0x60 0x00 0x58 0xba LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 + 9204 0x9d 0x90 0xd0 0x10 0x4b 0x00 0x60 0x8a 0x00 0x20 0x58 0x76 LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 + 9216 0x9d 0x94 0xd1 0x10 0x4b 0x00 0x0f 0xf8 0xe8 0x34 0x58 0x76 LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 + 9228 0x87 0x98 0xd5 0x10 0x4b 0x00 0x00 0x09 0x32 0x30 0x10 0x76 LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #9312 + 9240 0x9d 0xd4 0xd0 0x00 0x00 0x09 0xb2 0x40 0x10 0xba LDA dn5, [p4], #-8; MOVXM p3, #9344 + 9250 0x91 0x58 0xd0 0x41 0xaa 0x2c LDA dj5, [p4], m4; MOVX r16, #53 + 9256 0x9d 0x80 0xd0 0x0b 0xb0 0xe4 0xa8 0x7f 0xc8 0xba LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 + 9266 0x9d 0x84 0xd0 0x0b 0x11 0x6c 0xa9 0x3f 0xc8 0xba LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 + 9276 0x87 0x88 0xd0 0x0a 0x21 0x6c 0xac 0x40 0x48 0xba LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 + 9286 0x80 0xc4 0xd0 0x06 0x52 0x90 0x68 0x80 0x48 0xba LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 + 9296 0x9c 0xc8 0xd0 0x00 0x20 0x01 0x5b 0x0a 0x5f 0xf8 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.loop_nesting 1 + 9312 0x08 0x13 0x10 0x00 0x01 0x84 JZ r1, #9760 +.delay_slot +.swstall delay_slot + 9318 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9320 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9322 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9324 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9326 0x00 0x00 NOPX + 9328 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x29 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.loop_nesting 2 + 9344 0x20 0x13 0x08 0x00 0x01 0x84 JZ r4, #9744 +.delay_slot +.swstall delay_slot + 9350 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9352 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9354 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9356 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9358 0x00 0x00 NOPX + 9360 0x10 0xe4 0x6c 0x98 LTU r18, r3, r6 + 9364 0x90 0x12 0xc0 0x40 0x01 0x84 JNZ r18, #9600 +.delay_slot +.swstall delay_slot + 9370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9376 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9378 0x00 0x00 NOPX + 9380 0x00 0x28 0x68 0x00 0x00 0x08 0x7a 0x88 0x10 0x3a VLDB x0, [p0, #64]; MOVXM ls, #9488 + 9390 0x00 0x70 0xe8 0x00 0x00 0x09 0xba 0x98 0x10 0x3a VLDB.3D x1, [p0], d1; MOVXM le, #9520 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9400 0x1d 0x71 0xfe 0x98 ADD.NC lc, r3, #-3 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9404 0x38 0x14 0x34 0x18 VLDB x0, [p0, #64] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9408 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9424 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9440 0x00 0x2c 0xf0 0x28 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9456 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9472 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.loop_nesting 3 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9488 0x00 0x2c 0xf0 0x28 0x6c 0x84 0x8b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9504 0x00 0x2c 0xf0 0x70 0xe9 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9520 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.loop_nesting 2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9536 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9544 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9548 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9556 0x18 0x08 0x41 0xd8 VSHUFFLE bmll0, x1, x0, r16 + 9560 0x00 0x13 0x08 0x00 0x00 0x84 J #9744 +.delay_slot + 9566 0x23 0x04 0xd0 0x02 0x31 0x60 0x70 0x02 VST.3D bmlh0, [p1], d0; MOV p4, p1 +.delay_slot + 9574 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.delay_slot + 9582 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.delay_slot + 9590 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.delay_slot + 9594 0x00 0x2c 0xf8 0x28 0x0d 0x0c NOPA; VST bmll0, [p4, #64] +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 + 9600 0x00 0x00 0x21 0xeb 0x20 0x44 MOVXM ls, #9616 + 9606 0x00 0x00 0x26 0xec 0x00 0x44 MOVXM le, #9728 + 9612 0x1d 0x71 0x00 0x98 ADD.NC lc, r2, #1 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.loop_nesting 3 +.begin_of_loop + 9616 0x02 0x86 0x88 0xc5 0x81 0xf4 VLDB x0, [p0, #64]; MOV p4, p1 + 9622 0x38 0x38 0x74 0x18 VLDB.3D x1, [p0], d1 + 9626 0x00 0x00 NOPX + 9628 0x00 0x00 NOPX + 9630 0x00 0x00 NOPX + 9632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9648 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9680 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV + 9696 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV + 9712 0x00 0x2c 0xf0 0x00 0x21 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.end_of_loop + 9728 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.loop_nesting 2 + 9744 0x14 0x62 0xe0 0x18 JNZD r17, r17, p3 +.delay_slot +.swstall delay_slot + 9748 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9756 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.loop_nesting 1 + 9760 0x10 0x00 0xa0 0x18 JNZD r0, r0, p2 +.delay_slot +.swstall delay_slot + 9764 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9766 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9768 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9770 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9772 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.loop_nesting 0 + 9776 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 9780 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9782 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9788 0x00 0x00 NOPX +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + +.text_segment PM 9792 +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function_start + 9792 0x1b 0x6c 0xc0 0xf8 MOV p3, p6 + 9796 0xd0 0x91 0x60 0x00 0x01 0xf0 0xb1 0xb6 0x11 0x3a MOVS p6, p1; MOVXM p1, #508780 + 9806 0x01 0x06 0x16 0x98 LDA r16, [p1] + 9810 0x00 0x00 NOPX + 9812 0x00 0x00 NOPX + 9814 0x00 0x00 NOPX + 9816 0x00 0x00 NOPX + 9818 0x00 0x00 NOPX + 9820 0x00 0x00 NOPX + 9822 0x80 0x13 0x50 0x40 0x01 0x84 JNZ r16, #9888 +.delay_slot + 9828 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 9834 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 9838 0xf0 0x11 0x60 0x00 0xb7 0x60 0x70 0x02 MOVS p7, p0; MOV p1, p7 +.delay_slot + 9846 0x0f 0xf9 0x9d 0x98 ST p3, [sp, #-8] +.delay_slot + 9850 0xff 0x93 0xb0 0x00 0x01 0xf0 0x31 0xe0 0x11 0x3a ST p1, [sp, #-4]; MOVXM p0, #508864 +.no_stack_arguments + 9860 0x00 0x10 0x58 0x00 0x01 0x04 JL #8368 +.delay_slot + 9866 0x19 0x64 0xc0 0xf8 MOV p1, p2 +.delay_slot +.swstall delay_slot + 9870 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9872 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9874 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9876 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.return_address + 9888 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb1 0xea 0x10 0xba LDA r16, [p7]; MOVXM p7, #508884 + 9898 0x07 0x06 0x36 0x98 LDA r17, [p7] + 9902 0x06 0x04 0x9e 0x98 LDA p1, [p6] + 9906 0x00 0x00 NOPX +.no_stack_arguments + 9908 0x00 0x10 0xe0 0x00 0x01 0x04 JL #8640 +.delay_slot + 9914 0x10 0x24 0x05 0x18 MOVX r18, #1 +.delay_slot + 9918 0x00 0x07 0xc4 0xc7 0x80 0x44 MOVXM p2, #508864 +.delay_slot + 9924 0x1e 0x64 0xc0 0xf8 MOV p6, p2 +.delay_slot + 9928 0x14 0x63 0x2d 0x98 LSHL r17, r17, r18 +.delay_slot + 9932 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.return_address + 9936 0xfe 0x87 0x20 0x00 0x01 0xf1 0x31 0xb6 0x10 0xba LDA lr, [sp, #-12]; MOVXM p2, #508780 + 9946 0x40 0xc2 0xd0 0x60 0x02 0x2c LDA r16, [p2]; MOVX r24, #0 + 9952 0x06 0x66 0x36 0x98 LDA r17, [p6, #24] + 9956 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 9960 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 9964 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 9970 0x00 0x00 NOPX + 9972 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 9976 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9980 0x14 0x77 0x07 0x98 EQ r27, r17, r16 +.delay_slot + 9984 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot + 9988 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 9992 0x00 0x00 NOPX +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + +.text_segment PM 10000 +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function_start + 10000 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 10004 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 10008 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 10012 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 10016 0x00 0x13 0x20 0x00 0x00 0x84 J #9792 +.delay_slot +.swstall delay_slot + 10022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10024 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10026 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10028 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10030 0x00 0x00 NOPX +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 +.label memset +.function_start + 10032 0x08 0x13 0xe8 0x00 0x01 0x84 JZ r1, #10192 +.delay_slot + 10038 0x18 0x62 0xc0 0xf8 MOV p0, p1 +.delay_slot +.swstall delay_slot + 10042 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10044 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10046 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10048 0x00 0x00 NOPX + 10050 0x30 0x11 0x60 0x02 0xb8 0x50 0x70 0x02 MOVS p1, p0; MOV lc, r1 + 10058 0x00 0x00 0x21 0xee 0xc0 0x44 MOVXM ls, #10080 + 10064 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x09 0xbb 0xe0 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM le, #10176; NOPV +.label ZLS_Fmemset_48 +.loop_nesting 1 +.begin_of_loop + 10080 0x23 0x80 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV + 10096 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10112 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_Fmemset_144 +.end_of_loop + 10176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_Fmemset_160 +.loop_nesting 0 + 10192 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 10196 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10198 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10200 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10202 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10204 0x00 0x00 NOPX +.label memset__end + +.bss_segment DMb 508416 352 + +.bss_segment DMb 508768 20 + +.bss_segment DMb 508788 1 + +.rodata_segment DMb 508800 +.label _ZL20g_uniformKernelFuncs + 0x60 + 0x1a + 0x0 + 0x0 + 0x80 + 0x1c + 0x0 + 0x0 + 0x90 + 0x20 + 0x0 + 0x0 + 0x10 + 0x27 + 0x0 + 0x0 + +.bss_segment DMb 508864 128 + +.stack DM_stack 507264 508352 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.map b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.map new file mode 100644 index 0000000000000000000000000000000000000000..82a06d8b5045b9d0adc391286d4767bf78b9f311 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.map @@ -0,0 +1,189 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:36:48 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable82 ../Release/0_0_reloadable82.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable82.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3593526 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1088 + + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1605 + + 0x00000000..0x0007bd7f ( 507264 items) : Reserved + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + 0x0007c1c0..0x0007c1ff ( 64 items) : Reserved + 0x0007c200..0x0007c35f ( 352 items) : ../Release/0_0_reloadable82.o::gem_bfp_param (Data, Global, .bss.DMb.32) + 0x0007c360..0x0007c363 ( 4 items) : ../Release/0_0_reloadable82.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c364..0x0007c367 ( 4 items) : ../Release/0_0_reloadable82.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c368..0x0007c36b ( 4 items) : ../Release/0_0_reloadable82.o::_ZL11total_iters (Data, Local, .bss.DMb.4) + 0x0007c36c..0x0007c36f ( 4 items) : ../Release/0_0_reloadable82.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep (Data, Weak, .bss.DMb.4) + 0x0007c370..0x0007c373 ( 4 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c374..0x0007c374 ( 1 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c380..0x0007c38f ( 16 items) : ../Release/0_0_reloadable82.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z15_b13786_wrapperPPv + _Z14_b8148_wrapperPPv + _Z14_b8170_wrapperPPv + _Z14_b7835_wrapperPPv + + 0x0007c3c0..0x0007c43f ( 128 items) : ../Release/0_0_reloadable82.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params (Data, Weak, .bss.DMb.64) + 0x0007ca00..0x000fffff ( 538112 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 7520 + + 0x00000000..0x000009df ( 2528 items) : Reserved + 0x000009e0..0x00000c01 ( 546 items) : ../Release/0_0_reloadable82.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000c10..0x00000e35 ( 550 items) : ../Release/0_0_reloadable82.o::_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv (Function, Local, .text) (stack frame size = 0) + + Referenced symbols: gem_bfp_param + + 0x00000e40..0x00000f65 ( 294 items) : ../Release/0_0_reloadable82.o::_Z8init_accILt1EEvPaS0_iii (Function, Weak, .text) (stack frame size = 64) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00000f70..0x00001095 ( 294 items) : ../Release/0_0_reloadable82.o::_Z12post_processPai (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x000010a0..0x00001829 ( 1930 items) : ../Release/0_0_reloadable82.o::_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z8init_accILt1EEvPaS0_iii + _Z12post_processPai + + Referenced symbols: gem_bfp_param + _ZN12me_primitive11control_satE + _ZN12me_primitive11control_rndE + + 0x00001830..0x00001a51 ( 546 items) : ../Release/0_0_reloadable82.o::_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv + _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params + + Referenced symbols: _ZL9curr_iter + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11total_iters + _ZL10depth_iter + gem_bfp_param + + 0x00001a60..0x00001a83 ( 36 items) : ../Release/0_0_reloadable82.o::_Z15_b13786_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00001a90..0x00001c73 ( 484 items) : ../Release/0_0_reloadable82.o::_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : memset + + 0x00001c80..0x00001c9f ( 32 items) : ../Release/0_0_reloadable82.o::_Z14_b8148_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + + 0x00001ca0..0x00001d41 ( 162 items) : ../Release/0_0_reloadable82.o::_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + 0x00001d50..0x00001da3 ( 84 items) : ../Release/0_0_reloadable82.o::_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params (Function, Local, .text) (stack frame size = 0) + 0x00001db0..0x00001de3 ( 52 items) : ../Release/0_0_reloadable82.o::_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + + 0x00001df0..0x00001fcf ( 480 items) : ../Release/0_0_reloadable82.o::_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params (Function, Weak, .text) (stack frame size = 0) + 0x00001fd0..0x00002081 ( 178 items) : ../Release/0_0_reloadable82.o::_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + 0x00002090..0x000020af ( 32 items) : ../Release/0_0_reloadable82.o::_Z14_b8170_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + + 0x000020b0..0x000021b5 ( 262 items) : ../Release/0_0_reloadable82.o::_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x000021c0..0x0000263d ( 1150 items) : ../Release/0_0_reloadable82.o::_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002640..0x00002709 ( 202 items) : ../Release/0_0_reloadable82.o::_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep + _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002710..0x0000272f ( 32 items) : ../Release/0_0_reloadable82.o::_Z14_b7835_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + + 0x00002730..0x000027dd ( 174 items) : string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a)::memset (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x27de + _pc_start = 0x9e0 + _sp_end_DM_stack = 0x7c1c0 + _sp_start_DM_stack = 0x7bd80 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 1088 + ---------- ---------- + 1088 Total + +Section summary for memory 'DMb': + + .bss .rodata File + ---------- ---------- ---------- + 496 16 ../Release/0_0_reloadable82.o + 5 0 me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- + 501 16 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 7346 ../Release/0_0_reloadable82.o + 174 string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + ---------- ---------- + 7520 Total + +File summary: + +../Release/0_0_reloadable82.o + DMb 512 + PM 7346 + +me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + PM 174 + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.o.lst b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.o.lst new file mode 100644 index 0000000000000000000000000000000000000000..85071d4968a6b69f6bfb51d3274dc2ea74dc53d4 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.o.lst @@ -0,0 +1,37217 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:36:44 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno ../Release/0_0_reloadable82.o me + +// Release: ipp V-2024.06-TGT-241219 +.data_segment_name +.bss local 4 _ZL9curr_iter DMb 4 + +.data_segment_name +.bss local 4 _ZL14num_depth_iter DMb 4 + +.data_segment_name +.data local 4 _ZL8num_iter DMb + 0x1 + 0x0 + 0x0 + 0x0 + +.data_segment_name +.bss local 4 _ZL10depth_iter DMb 4 + +.data_segment_name +.bss local 4 _ZL11total_iters DMb 4 + +.data_segment_name +.bss local 4 _ZL8core_row DMb 4 + +.data_segment_name +.bss local 4 _ZL11ifm1_offset DMb 4 + +.data_segment_name +.bss local 4 _ZL11ifm2_offset DMb 4 + +.data_segment_name +.bss local 4 _ZL10ifmsv_size DMb 4 + +.data_segment_name +.bss local 4 _ZL11reduce_axis DMb 4 + +.data_segment_name +.bss local 4 _ZL10width_iter DMb 4 + +.data_segment_name +.bss local 4 _ZL11height_iter DMb 4 + +.data_segment_name +.rodata local 64 _ZL20g_uniformKernelFuncs DMb +.rela 62 _Z15_b13786_wrapperPPv 0 + 0x0 + 0x0 + 0x0 + 0x0 +.rela 62 _Z14_b8148_wrapperPPv 0 + 0x0 + 0x0 + 0x0 + 0x0 +.rela 62 _Z14_b8170_wrapperPPv 0 + 0x0 + 0x0 + 0x0 + 0x0 +.rela 62 _Z14_b7835_wrapperPPv 0 + 0x0 + 0x0 + 0x0 + 0x0 + +.undef local data _ZL9curr_iter + +.undef local data _ZL14num_depth_iter + +.undef local data _ZL8num_iter + +.undef local data _ZL10depth_iter + +.undef local data _ZL11total_iters + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL8num_iter + +.undef local data _ZL11ifm1_offset + +.text_segment_name +.text local 16 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function_start + 0 0x18 0x96 0xc0 0xf8 MOV r2, p3 + 4 0x00 0x00 0x2a 0xc2 0x0e 0x24 MOVX r0, #0; ADD.NC p5, r2, #14 + 10 0xa3 0x82 0x50 0x02 0xe5 0xd4 LDA.s16 r0, [p5], #2; VBCST.16 x0, r0 + 16 0x05 0x04 0x56 0x98 LDA r2, [p5] + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x10 0x02 0x09 0x18 MOVX r1, #2 + 34 0x10 0x42 0x2c 0x98 LTU r1, r1, r2 + 38 0x08 0x00 0x58 0x40 0x01 0x84 JNZ r1, #TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 +.delay_slot + 44 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 50 0x1c 0x65 0xe0 0xf8 MOV p4, sp +.delay_slot + 54 0x3c 0xff 0x90 0x18 PADDB [p4], #-64 +.delay_slot + 58 0x0c 0x04 0x13 0x18 VST x0, [p4] +.delay_slot +.swstall delay_slot + 62 0x00 0x00 NOPX + 64 0x01 0x82 0x80 0x02 0xe5 0xd4 MOVA dj0, #12; VBCST.16 x0, r0 + 70 0x03 0x00 0x0a 0x98 LDA.u8 r0, [p3, dj0] + 74 0x00 0x00 NOPX + 76 0x00 0x00 NOPX + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x00 0x00 0x40 0x40 0x01 0x84 JNZ r0, #TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 +.delay_slot + 92 0x18 0x00 0x00 0xb8 MOV m0, #0 +.delay_slot + 96 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.delay_slot +.swstall delay_slot + 100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 104 0x00 0x00 NOPX + 106 0x00 0x04 0x80 0x00 0x00 0x12 0x00 0x00 0x20 0xba MOVA m1, #0; J #TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.delay_slot +.swstall delay_slot + 116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 120 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 122 0x00 0x00 NOPX +.delay_slot + 124 0x08 0x04 0x13 0x18 VST x0, [p0] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_128 + 128 0x19 0x00 0x80 0xb8 MOV m1, #64 + 132 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0 +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 144 0x00 0x00 0x88 0x00 0x00 0x84 J #TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 +.delay_slot + 150 0x12 0x11 0x60 0x02 0x30 0x60 0x70 0x02 MOVS p0, p4; MOV p4, p0 +.delay_slot +.swstall delay_slot + 158 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 160 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 162 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 164 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_176 + 176 0x10 0x02 0x0d 0x18 MOVX r1, #3 + 180 0x10 0x42 0x27 0x98 EQ r1, r1, r2 + 184 0x08 0x00 0x70 0x40 0x01 0x84 JNZ r1, #TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.delay_slot + 190 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZN12me_primitive11control_rndE +.delay_slot +.swstall delay_slot + 196 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 198 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 200 0x00 0x00 NOPX +.delay_slot + 202 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_224 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 224 0x60 0x80 0x50 0x01 0x01 0x54 LDA.s8 r0, [p3]; MOV m0, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 230 0x19 0x00 0x00 0xb8 MOV m1, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 234 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 238 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 240 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 242 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0 + 246 0x19 0x00 0x92 0xf8 VMOV bmll1, x0 + 250 0x10 0x3a 0x80 0x18 MOVX crRnd, r0 + 254 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 + 258 0x00 0x00 NOPX + 260 0x18 0x01 0x03 0x58 VEXTBCST.16 x0, x0, #0 + 264 0x00 0x00 NOPX + 266 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_272 + 272 0xb8 0x86 0xd8 0x50 0xe8 0x00 0x00 0x00 0x78 0xc0 0x10 0xb6 LDA r1, [p5, #-16]; VLDB x1, [p4], m1; MOVXM ls, #(ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 + 0) +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 284 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x01 0xb8 0xd8 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #(ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 + 0) +.aggressive_scheduled_block_id 2 +.noswbrkpt + 296 0x01 0x05 0x78 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p4], m1; MOVX r0, #60 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 304 0x29 0x03 0x70 0x00 0x00 0x01 0xb0 0x00 0x10 0xba VLDA x0, [p1], m2; MOVXM p3, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 314 0x03 0x04 0x42 0x98 LDA.s8 r2, [p3] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 318 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 324 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 328 0x05 0x70 0xfe 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r1, #-3; VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 336 0x29 0x03 0x78 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p4], m1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 342 0x01 0x05 0x70 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPM +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x05 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX crRnd, r2; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_384 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 384 0x29 0x03 0x78 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p4], m1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 400 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 416 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 448 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 450 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 454 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 456 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 460 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 462 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 466 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 470 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 476 0x00 0x00 NOPX +.delay_slot + 478 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end last +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 last +.delay_slot +.swstall delay_slot + 482 0x00 0x00 NOPX + +.undef local text _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL8num_iter + +.undef local data _ZL11ifm1_offset + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL8num_iter + +.undef local data _ZL11ifm1_offset + +.undef local text _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL8num_iter + +.undef local data _ZL11ifm1_offset + +.undef local data _ZL11ifm2_offset + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL8num_iter + +.undef local data _ZL11ifm1_offset + +.undef local data _ZL11ifm2_offset + +.undef local text _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL8num_iter + +.undef local data _ZL11ifm1_offset + +.undef local data _ZL11ifm2_offset + +.text_segment_name +.text local 10 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function_start + 0 0x03 0x85 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #conv2d_dw_params + 10 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 + 20 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 26 0xfe 0x73 0xb0 0x00 0x00 0x03 0xb0 0x00 0x11 0x3a ST p7, [sp, #-16]; MOVXM p7, #conv2d_dw_params + 36 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 40 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] + 44 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] + 48 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 52 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 56 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 60 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x00 0x00 NOPX + 72 0x00 0x00 NOPX + 74 0x09 0x04 0x29 0x98 ST el0, [p1] + 78 0x09 0x14 0x09 0x98 ST eh0, [p1, #4] + 82 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5 + 86 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2 + 90 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2 + 94 0x00 0x00 NOPX + 96 0x00 0x00 NOPX + 98 0x00 0x00 NOPX + 100 0x00 0x00 NOPX +.no_stack_arguments + 102 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot + 108 0xfd 0xca 0xb8 0xba 0x43 0x5c ST r18, [sp, #-20]; SUB r14, r17, r18 +.delay_slot + 114 0xfc 0x86 0xb0 0x03 0x08 0x45 0xe8 0x50 0x79 0x3a ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 +.delay_slot + 124 0xfd 0x42 0xb7 0x6f 0x15 0x5c ST r16, [sp, #-24]; LT r27, r14, r24 +.delay_slot + 130 0x16 0x22 0xe1 0x98 SUB r17, r24, r14 +.delay_slot + 134 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x03 0x81 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 +.return_address + 144 0xe7 0xc5 0x50 0x1f 0x47 0x36 0x08 0x00 0x58 0xba LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 + 154 0xfd 0xc9 0x58 0x4c 0x43 0x2c LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 + 160 0xfc 0x86 0x2a 0x6e 0x15 0x2c LDA r1, [sp, #-28]; LT r27, r20, r16 + 166 0x10 0xa7 0x32 0x18 SEL.EQZ r19, r2, r19, r27 + 170 0x00 0x00 NOPX + 172 0x00 0x00 NOPX +.no_stack_arguments + 174 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot + 180 0x14 0xe6 0x70 0x18 EXTEND.s16 r19, r19 +.delay_slot + 184 0xfc 0x4a 0xb0 0x22 0xe9 0x0d 0xec 0xc0 0x49 0x3a ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 +.delay_slot + 194 0x13 0xb7 0x0a 0x98 LT r27, r14, r16 +.delay_slot + 198 0x14 0x22 0xe1 0x98 SUB r17, r16, r14 +.delay_slot + 202 0x00 0x2c 0xf7 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r14, r17, r27 +.return_address + 208 0xfc 0x86 0x20 0x01 0x30 0x48 0x00 0x42 0x58 0xba LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 + 218 0xe1 0x51 0x50 0x01 0x80 0x0a 0x48 0x08 0x58 0xba LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 + 228 0xfc 0x72 0x20 0x3f 0x07 0x4b 0xe8 0x17 0x58 0xba LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 + 238 0xfd 0xda 0x20 0x3f 0xa7 0xca 0xa8 0x06 0x58 0xba LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 + 248 0xfd 0x0e 0x20 0x0f 0xd7 0x89 0x00 0x20 0x58 0xba LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 + 258 0xe9 0xc0 0x80 0x01 0x70 0x28 0x08 0x80 0x58 0xba MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 + 268 0x17 0x44 0x80 0x31 0x11 0x0c 0x9d 0xb0 0x78 0xba MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn + 278 0x10 0x7c 0xe6 0x98 XOR r30, r1, r14 + 282 0x17 0xb7 0x8a 0x98 LT r27, r30, r24 + 286 0x14 0x62 0x43 0xbc 0xff 0x24 SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 + 292 0x8f 0x8e 0x0b 0x36 0x02 0x24 EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 + 298 0x7f 0xa9 0xf7 0x3e 0x01 0x24 MUL r30, r15, r20; ADD.NC r14, r30, #1 + 304 0x08 0x9d 0xf8 0xb6 0x01 0x24 MUL r2, r1, r14; ADD.NC r17, r22, #1 + 310 0x14 0xf6 0x17 0x98 EQ r27, r19, r1 + 314 0x17 0x84 0x2f 0x98 MUL r2, r30, r2 + 318 0xff 0xe4 0x49 0x3f 0xf5 0x64 SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 + 324 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16 + 328 0x14 0x45 0xad 0x98 LSHL r2, r17, r26 + 332 0x10 0xb9 0xf2 0x22 0xff 0x24 MUL r2, r2, r28; ADD.NC r4, r2, #-1 + 338 0x10 0xc7 0x5d 0x98 LSHL r3, r3, r21 + 342 0xff 0x8a 0x37 0x94 0x3f 0x5c ST r2, [p7], #-4; MUL r5, r15, r1 + 348 0xe9 0x42 0x30 0x3b 0x6b 0x26 0x08 0x04 0x59 0x3a ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 + 358 0xf9 0xfe 0x32 0xda 0xc1 0x5c ST r31, [p7], #-16; ADD r22, r5, r22 + 364 0xed 0x8e 0x3b 0x7e 0x9f 0x5c ST r3, [p7], #24; MUL r31, r22, r20 + 370 0xe3 0x92 0x3b 0x5a 0x1b 0x5c ST r4, [p7], #4; LSHL r22, r22, r16 + 376 0x17 0xc7 0x7d 0x98 LSHL r3, r31, r23 + 380 0x11 0x09 0x5d 0x98 LSHL r4, r4, r21 + 384 0xb6 0x46 0x32 0x24 0x02 0xa4 SUB r25, r22, r3; ADD.NC r4, r4, r0 + 390 0xf8 0x00 0x00 0x06 0x62 0x0f 0x2e 0x40 0xa8 0xba MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 + 400 0xe3 0x82 0x3f 0x84 0x3f 0x5c ST r0, [p7], #4; MUL r1, r31, r1 + 406 0xe3 0x9e 0x3f 0xfc 0xff 0x5c ST r7, [p7], #4; MUL r31, r31, r7 + 412 0xe3 0x9a 0x32 0x96 0x5b 0x5c ST r6, [p7], #4; LSHL r5, r5, r18 + 418 0xf9 0xaf 0xbf 0xa5 0xff 0x24 LSHL r6, r31, r23; ADD.NC r31, r5, #-1 + 424 0x00 0xe4 0x00 0x28 0x59 0x6e 0x49 0x88 0xa8 0xba MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 + 434 0x16 0x25 0x21 0x98 SUB r18, r24, r18 + 438 0xe3 0xca 0x30 0x02 0x1b 0xee 0x49 0x7f 0xc9 0x3a ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 + 448 0xe3 0xca 0x30 0x9a 0xc1 0x5c ST r18, [p7], #4; ADD r6, r1, r22 + 454 0xe3 0xda 0x3f 0x84 0x9b 0x5c ST r22, [p7], #4; LSHL r1, r31, r4 + 460 0xe3 0xfe 0x30 0x07 0x60 0x84 0x2f 0xff 0x59 0x3a ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 + 470 0xe3 0xe6 0x30 0x0c 0x3b 0x0e 0xc8 0x40 0x59 0x3a ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 + 480 0xe3 0x8e 0x37 0xc2 0x1b 0x5c ST r3, [p7], #4; LSHL r16, r15, r16 + 486 0xe3 0xca 0x30 0x03 0x04 0x5c ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 + 492 0xf0 0xef 0xb0 0x30 0x02 0xa4 LSHL r3, r30, r23; ADD.NC r0, r16, r0 + 498 0xe3 0x82 0x38 0x40 0x63 0x5c ST r0, [p7], #4; SUB r16, r16, r3 + 504 0xe3 0xfe 0x30 0x0a 0x11 0x33 0xec 0x10 0x09 0x3a ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 + 514 0xe3 0xfe 0x39 0x7e 0xbb 0x5c ST r31, [p7], #4; LSHL r31, r18, r21 + 520 0xe3 0xda 0x30 0x8a 0xbb 0x5c ST r22, [p7], #4; LSHL r2, r1, r21 + 526 0xe3 0x86 0x3c 0x04 0x43 0x5c ST r1, [p7], #4; SUB r1, r24, r2 + 532 0xe3 0xda 0x3c 0x0b 0xe3 0x5c ST r22, [p7], #4; SUB r2, r24, r31 + 538 0x0f 0x1c 0x31 0x98 ST r1, [p7], #4 + 542 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 546 0x0f 0x1e 0xd1 0x98 ST r22, [p7], #4 + 550 0x0f 0x08 0x51 0x98 ST r2, [p7], m0 + 554 0x07 0x28 0x2a 0x98 LDA.u8 r1, [p7], m1 + 558 0x00 0x00 NOPX + 560 0x00 0x00 NOPX + 562 0x00 0x00 NOPX + 564 0x00 0x00 NOPX + 566 0x00 0x00 NOPX + 568 0x00 0x00 NOPX + 570 0x08 0x01 0x30 0x00 0x01 0x84 JZ r1, #TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.delay_slot + 576 0x10 0x20 0x0d 0x18 MOVX r16, #3 +.delay_slot + 580 0x13 0xe1 0x0d 0x98 LSHL r16, r15, r16 +.delay_slot + 584 0xff 0x7f 0x0f 0xa0 0x00 0x44 MOVXM r31, #-8454144 +.delay_slot +.swstall delay_slot + 590 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 592 0x00 0x00 NOPX + 594 0x00 0x2c 0xf0 0x00 0x20 0x3e 0x01 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; MOVX r31, #0; NOPM +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 + 608 0xe7 0x60 0x80 0x00 0x00 0x00 0x30 0x00 0x10 0xba MOVA m0, #-197; MOVXM p0, #_ZN12me_primitive11control_rndE + 618 0x00 0xc4 0x50 0x3b 0xd8 0xa4 0x01 0xf8 0xb8 0xba LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 + 628 0xff 0x06 0x20 0x01 0xf0 0xa8 0x00 0x49 0x78 0xba LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 + 638 0xff 0x87 0x20 0x1f 0xff 0xec 0x80 0xc9 0x58 0xba LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 + 648 0xfe 0x03 0x20 0x64 0x02 0x2c LDA p0, [sp, #-16]; MOVX r25, #0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 654 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 656 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 660 0x07 0x2c 0x37 0x18 ST.s16 r1, [p7], #4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 664 0xef 0x39 0xff 0x71 0x41 0xe4 MUL r28, r29, r28; MOV crRnd, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 670 0x08 0x02 0xc0 0x1f 0x1d 0x6d 0xe8 0x50 0x79 0x3a VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 680 0xe5 0x29 0xf8 0xb1 0xff 0x24 MUL r20, r28, r20; ADD.NC r17, r17, #-1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 686 0xa7 0x67 0xb0 0x82 0x03 0x64 LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 692 0x17 0xb8 0xef 0x98 MUL r28, r30, r14 + 696 0x14 0x6b 0x5d 0x98 LSHL r21, r17, r21 + 700 0xe3 0xd2 0x3e 0x5e 0xfb 0x5c ST r20, [p7], #4; LSHL r23, r28, r23 + 706 0xe3 0xf6 0x3f 0xea 0xa3 0x5c ST r29, [p7], #4; SUB r26, r31, r21 + 712 0xe1 0x72 0x3f 0xd6 0x4c 0x5c ST r28, [p7], m0; MAC r21, r21, r31, r18 + 718 0x07 0x2a 0x8a 0x98 LDA.u8 r20, [p7], m1 + 722 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 728 0x00 0x00 NOPX + 730 0x00 0x00 NOPX + 732 0x00 0x00 NOPX + 734 0x17 0xbd 0x3d 0x98 LSHL r30, r30, r19 + 738 0x17 0xab 0x51 0x98 SUB r21, r30, r21 + 742 0x14 0xf7 0x47 0x98 EQ r27, r19, r20 + 746 0x16 0x27 0x72 0x18 SEL.EQZ r19, r24, r23, r27 + 750 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 754 0x0f 0x1e 0x11 0x98 ST r16, [p7], #4 + 758 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 762 0xe3 0xda 0x30 0x50 0x00 0x5c ST r22, [p7], #4; RET lr +.delay_slot + 768 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 +.delay_slot + 772 0x0f 0x1f 0x51 0x98 ST r26, [p7], #4 +.delay_slot + 776 0x0f 0x1e 0xb1 0x98 ST r21, [p7], #4 +.delay_slot + 780 0x0f 0x07 0x31 0x98 ST r25, [p7] +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end last +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 last +.delay_slot + 784 0xe2 0xe6 0x30 0x03 0xb0 0x60 0x70 0x02 ST r25, [p7, #4]; MOV p7, p0 + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL10ifmsv_size + +.undef local data _ZL8num_iter + +.undef local text _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + +.text_segment_name +.text local 10 _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_begin0 +.function_start + 0 0x03 0x8e 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r3, [p0], #4; MOVXM p1, #gem_bfp_param + 10 0xff 0xdd 0x00 0x3e 0x67 0xa8 0xaf 0xfc 0x58 0xba MOVA r29, #-2; MOVX r6, #-3; MOV r5, #-4 + 20 0x00 0x18 0x00 0x00 0x10 0x28 0x08 0x08 0x58 0xba MOVA r24, #0; MOVX r1, #1; MOV r0, #8 + 30 0x20 0x04 0x00 0x00 0x3f 0xff 0x8f 0xff 0x10 0xba MOVA r4, #256; MOVXM r28, #16777214 + 40 0x00 0xf0 0x00 0x01 0x31 0x28 0x4a 0x00 0x58 0xba MOVA r16, #7; MOVX r19, #9; MOV r2, #512 + 50 0x11 0xc0 0x20 0x00 0xd1 0x64 MOVX r7, #128; MOV m0, #52 + 56 0x18 0x80 0x00 0xf8 MOV dj0, m0 + 60 0x23 0x8e 0x31 0xec 0xdb 0x5c ST r3, [p1], #4; LSHL r27, r3, r6 + 66 0x03 0x8e 0xd1 0xc4 0xbb 0x2c LDA r3, [p0], #4; LSHL r17, r3, r5 + 72 0xdf 0xc1 0xb8 0xb1 0xff 0x24 LSHL r31, r27, r0; ADD.NC r17, r17, #-1 + 78 0xdc 0xe7 0xb9 0x3f 0x22 0xa4 LSHL r19, r27, r19; ADD.NC r18, r31, r4 + 84 0x16 0xed 0x0d 0x98 LSHL r22, r27, r16 + 88 0x00 0x00 NOPX + 90 0x00 0x00 NOPX + 92 0x00 0x00 NOPX + 94 0x23 0x8e 0x31 0xe8 0xdb 0x5c ST r3, [p1], #4; LSHL r26, r3, r6 + 100 0x03 0xd6 0xdd 0x53 0xfe 0x2c LDA r21, [p0], #4; ADD r20, r26, #-1 + 106 0x15 0xaf 0x4f 0x98 MUL r23, r22, r20 + 110 0x00 0x00 NOPX + 112 0x11 0xfd 0x71 0x98 SUB r30, r7, r23 + 116 0x00 0x00 NOPX + 118 0x00 0x00 NOPX + 120 0x00 0x00 NOPX + 122 0x23 0xd6 0x31 0x8e 0xbf 0x5c ST r21, [p1], #4; MUL r3, r3, r21 + 128 0x03 0x85 0xda 0x98 0xdb 0x2c LDA el0, [p0], #4; LSHL r6, r21, r6 + 134 0xde 0x4d 0xfe 0x26 0xe2 0xa4 MUL r25, r27, r6; ADD.NC r28, r6, r28 + 140 0xa9 0x4b 0xba 0xba 0xa2 0xa4 LSHL r5, r21, r5; ADD.NC r21, r26, r20 + 146 0x10 0x19 0x00 0x33 0xde 0xec 0xa9 0x7f 0xc8 0xba MOVA r25, #128; LSHL r29, r25, r29; ADD.NC r5, r5, #-1 + 156 0x17 0xc5 0xce 0x18 MSC r2, r2, r31, r28 + 160 0x11 0x8d 0x0d 0x98 LSHL r6, r6, r16 + 164 0x15 0x6b 0x0d 0x98 LSHL r21, r21, r16 + 168 0x23 0x85 0x3d 0x70 0x1b 0x5c ST el0, [p1], #4; LSHL r28, r26, r0 + 174 0x00 0x85 0xdd 0x42 0x1b 0x2c LDA el0, [p0]; LSHL r16, r26, r16 + 180 0x17 0x37 0x51 0x98 SUB r27, r28, r21 + 184 0x11 0x40 0x0d 0x98 LSHL r0, r5, r0 + 188 0x11 0x81 0x46 0x18 MAC r0, r0, r6, r20 + 192 0x11 0xb3 0x4e 0x18 MSC r25, r25, r6, r20 + 196 0x00 0x00 NOPX + 198 0x00 0x00 NOPX + 200 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 204 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4] + 208 0x00 0x00 NOPX + 210 0x00 0x00 NOPX + 212 0x00 0x00 NOPX + 214 0x00 0x00 NOPX + 216 0x00 0x00 NOPX + 218 0x00 0x00 NOPX + 220 0x09 0x3c 0x29 0x98 ST el0, [p1], #12 + 224 0x23 0x8e 0x30 0x00 0x69 0xa0 0x00 0x02 ST r3, [p1], #4; ADD.NC r3, r6, #-128 + 232 0x23 0xf6 0x33 0xf6 0x03 0x5c ST r29, [p1], #4; SUB r29, r7, r16 + 238 0x23 0xea 0x30 0x03 0x4d 0xa0 0x00 0x02 ST r26, [p1], #4; ADD.NC r26, r22, #-128 + 246 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 250 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 254 0x09 0x1f 0xf1 0x98 ST r31, [p1], #4 + 258 0x09 0x1c 0x91 0x98 ST r4, [p1], #4 + 262 0x23 0xca 0x3d 0xca 0x01 0x5c ST r18, [p1], #4; ADD r18, r27, r16 + 268 0x23 0xce 0x32 0x4e 0xe3 0x5c ST r19, [p1], #4; SUB r19, r4, r23 + 274 0x23 0x96 0x33 0x12 0x9c 0x5c ST r5, [p1], #4; MSC r4, r4, r6, r20 + 280 0x29 0x8a 0x3f 0x08 0x02 0x5c ST r2, [p1], #16; MOVX r2, #-128 + 286 0x23 0xe2 0x3b 0xfc 0x4d 0x5c ST r24, [p1], #4; XOR r31, r23, r2 + 292 0x3b 0xe2 0x3c 0x5e 0xe3 0x5c ST r24, [p1], #-12; SUB r23, r24, r23 + 298 0x23 0xe2 0x31 0x08 0x0d 0x5c ST r24, [p1], #4; XOR r2, r2, r0 + 304 0x3d 0xe2 0x3c 0x00 0x03 0x5c ST r24, [p1], #-8; SUB r0, r24, r0 + 310 0x09 0x5f 0x11 0x98 ST r24, [p1], #20 + 314 0x09 0x1e 0x31 0x98 ST r17, [p1], #4 + 318 0x09 0x1f 0xd1 0x98 ST r30, [p1], #4 + 322 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 326 0x09 0x1f 0xf1 0x98 ST r31, [p1], #4 + 330 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 334 0x09 0x1f 0x51 0x98 ST r26, [p1], #4 + 338 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 342 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 346 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 350 0x09 0x1e 0xd1 0x98 ST r22, [p1], #4 + 354 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 358 0x09 0x1e 0xf1 0x98 ST r23, [p1], #4 + 362 0x23 0xce 0x32 0xd7 0x8c 0x5c ST r19, [p1], #4; MAC r21, r21, r5, r28 + 368 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 372 0x23 0xc6 0x3c 0x72 0xa3 0x5c ST r17, [p1], #4; SUB r28, r24, r21 + 378 0x23 0xf2 0x38 0x56 0xa3 0x5c ST r28, [p1], #4; SUB r21, r16, r21 + 384 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 388 0x09 0x1f 0x71 0x98 ST r27, [p1], #4 + 392 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 396 0x09 0x1f 0xb1 0x98 ST r29, [p1], #4 + 400 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 404 0x09 0x1e 0x11 0x98 ST r16, [p1], #4 + 408 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 412 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 416 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 420 0x09 0x1e 0x51 0x98 ST r18, [p1], #4 + 424 0x09 0x1e 0xb1 0x98 ST r21, [p1], #4 + 428 0x09 0x1e 0x11 0x98 ST r16, [p1], #4 + 432 0x09 0x1e 0x31 0x98 ST r17, [p1], #4 + 436 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 440 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 444 0x09 0x1f 0x31 0x98 ST r25, [p1], #4 + 448 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 452 0x09 0x1c 0x71 0x98 ST r3, [p1], #4 + 456 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 460 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 + 464 0x09 0x1e 0x91 0x98 ST r20, [p1], #4 + 468 0x09 0x1c 0xd1 0x98 ST r6, [p1], #4 + 472 0x09 0x1c 0xb1 0x98 ST r5, [p1], #4 + 476 0x09 0x1c 0x91 0x98 ST r4, [p1], #4 + 480 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 484 0x09 0x08 0xf1 0x98 ST r7, [p1], m0 + 488 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 492 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 496 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 500 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 504 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 508 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 512 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 516 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 + 520 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 + 524 0x3b 0xe2 0x30 0x50 0x00 0x5c ST r24, [p1], #-12; RET lr +.delay_slot + 530 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 +.delay_slot + 534 0x09 0xdf 0x11 0x98 ST r24, [p1], #-12 +.delay_slot + 538 0x09 0x1f 0x11 0x98 ST r24, [p1], #4 +.delay_slot + 542 0x09 0x07 0x11 0x98 ST r24, [p1] +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv__end last +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_end0 last +.delay_slot + 546 0x09 0x03 0x11 0x98 ST r24, [p1, dj0] + +.undef local data _ZL9curr_iter + +.undef local data _ZL11total_iters + +.undef local data _ZL10depth_iter + +.undef local text _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv + +.undef local data _ZL8core_row + +.undef local data _ZL9curr_iter + +.undef local data _ZL11reduce_axis + +.undef local data _ZL11ifm1_offset + +.undef local data _ZL8num_iter + +.undef local data _ZL10depth_iter + +.undef local data _ZL10width_iter + +.undef local data _ZL11height_iter + +.text_segment_name +.text local 10 _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start + 0 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 4 0x00 0x00 NOPX + 6 0x00 0x00 NOPX + 8 0x00 0x00 NOPX + 10 0x00 0x00 NOPX + 12 0x00 0x00 NOPX + 14 0x00 0x00 NOPX + 16 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 20 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x00 0x00 NOPX + 32 0x00 0x00 NOPX + 34 0x00 0x00 NOPX + 36 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 40 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x00 0x00 NOPX + 52 0x00 0x00 NOPX + 54 0x00 0x00 NOPX + 56 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 60 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x00 0x00 NOPX + 72 0x00 0x00 NOPX + 74 0x00 0x00 NOPX + 76 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 80 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 84 0x00 0x00 NOPX + 86 0x00 0x00 NOPX + 88 0x00 0x00 NOPX + 90 0x00 0x00 NOPX + 92 0x00 0x00 NOPX + 94 0x00 0x00 NOPX + 96 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 100 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 104 0x00 0x00 NOPX + 106 0x00 0x00 NOPX + 108 0x00 0x00 NOPX + 110 0x00 0x00 NOPX + 112 0x00 0x00 NOPX + 114 0x00 0x00 NOPX + 116 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 120 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 124 0x00 0x00 NOPX + 126 0x00 0x00 NOPX + 128 0x00 0x00 NOPX + 130 0x00 0x00 NOPX + 132 0x00 0x00 NOPX + 134 0x00 0x00 NOPX + 136 0x08 0x04 0x29 0x98 ST el0, [p0] + 140 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 144 0x00 0x00 NOPX + 146 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 150 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 152 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 154 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 156 0x00 0x00 NOPX +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end last +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 last +.delay_slot + 158 0x08 0x14 0x29 0x98 ST el0, [p0, #4] + +.text_segment_name +.text local 10 _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function_start + 0 0x03 0x86 0xd0 0x00 0x00 0x28 0x80 0x20 0x58 0xba LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 + 10 0x03 0x96 0xd0 0x00 0x30 0x48 0x4f 0xfa 0x58 0xba LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 + 20 0x05 0x92 0xd0 0x01 0x01 0x54 LDA r4, [p0], #8; MOV m0, #64 + 26 0x05 0x1a 0xd1 0x02 0x01 0x54 LDA r6, [p0], m1; MOV dj0, #128 + 32 0x00 0x00 NOPX + 34 0x00 0x00 NOPX + 36 0x00 0x00 NOPX + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x11 0x42 0x1f 0x98 MUL r1, r5, r1 + 46 0x11 0x80 0x04 0x98 AND r0, r6, r0 + 50 0x10 0xc0 0x05 0x98 OR r0, r3, r0 + 54 0x19 0x82 0x30 0x84 0x9f 0x5c ST r0, [p0], #-16; MUL r1, r1, r4 + 60 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 64 0x10 0x40 0x2d 0x98 LSHL r0, r1, r2 +.delay_slot + 68 0x08 0x1c 0x11 0x98 ST r0, [p0], #4 +.delay_slot + 72 0x08 0x1c 0x01 0x98 ST m0, [p0], #4 +.delay_slot + 76 0x08 0x04 0x41 0x98 ST dj0, [p0] +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end last +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 last +.delay_slot + 80 0x08 0x14 0x01 0x98 ST m0, [p0, #4] + +.text_segment_name +.text local 10 _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 0 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6 0x18 0xc1 0xe0 0xf8 MOV dc0, lr +.delay_slot + 10 0x1a 0x60 0xc0 0xf8 MOV p2, p0 +.delay_slot +.swstall delay_slot + 14 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 16 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 18 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.tail_call +.return_address + 32 0x00 0x00 0x00 0x00 0x00 0x84 J #_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.delay_slot + 38 0x1f 0x71 0x80 0xf8 MOV lr, dc0 +.delay_slot + 42 0x18 0x64 0xc0 0xf8 MOV p0, p2 +.delay_slot +.swstall delay_slot + 46 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 48 0x00 0x00 NOPX +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end last +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 last +.delay_slot +.swstall delay_slot + 50 0x00 0x00 NOPX + +.undef local text _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + +.undef local text _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + +.undef local text _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + +.undef local data _ZL20g_uniformKernelFuncs + +.data_segment_name +.data weak 40 _ZN11sigmoid_lutILj0ELj256EE7data_abE DMb + 0x46 + 0x3b + 0xb0 + 0x39 + 0x5d + 0x3b + 0xc7 + 0x39 + 0x77 + 0x3b + 0xe2 + 0x39 + 0x8a + 0x3b + 0x0 + 0x3a + 0x9a + 0x3b + 0x11 + 0x3a + 0xac + 0x3b + 0x24 + 0x3a + 0xc0 + 0x3b + 0x3a + 0x3a + 0xd6 + 0x3b + 0x53 + 0x3a + 0x46 + 0x3b + 0xb0 + 0x39 + 0x5d + 0x3b + 0xc7 + 0x39 + 0x77 + 0x3b + 0xe2 + 0x39 + 0x8a + 0x3b + 0x0 + 0x3a + 0x9a + 0x3b + 0x11 + 0x3a + 0xac + 0x3b + 0x24 + 0x3a + 0xc0 + 0x3b + 0x3a + 0x3a + 0xd6 + 0x3b + 0x53 + 0x3a + 0xef + 0x3b + 0x6f + 0x3a + 0x5 + 0x3c + 0x87 + 0x3a + 0x14 + 0x3c + 0x99 + 0x3a + 0x25 + 0x3c + 0xad + 0x3a + 0x38 + 0x3c + 0xc4 + 0x3a + 0x4e + 0x3c + 0xdf + 0x3a + 0x64 + 0x3c + 0xfc + 0x3a + 0x7f + 0x3c + 0xf + 0x3b + 0xef + 0x3b + 0x6f + 0x3a + 0x5 + 0x3c + 0x87 + 0x3a + 0x14 + 0x3c + 0x99 + 0x3a + 0x25 + 0x3c + 0xad + 0x3a + 0x38 + 0x3c + 0xc4 + 0x3a + 0x4e + 0x3c + 0xdf + 0x3a + 0x64 + 0x3c + 0xfc + 0x3a + 0x7f + 0x3c + 0xf + 0x3b + 0x8e + 0x3c + 0x22 + 0x3b + 0x9d + 0x3c + 0x37 + 0x3b + 0xaf + 0x3c + 0x4f + 0x3b + 0xc3 + 0x3c + 0x6b + 0x3b + 0xd8 + 0x3c + 0x85 + 0x3b + 0xef + 0x3c + 0x96 + 0x3b + 0x5 + 0x3d + 0xaa + 0x3b + 0x14 + 0x3d + 0xc1 + 0x3b + 0x8e + 0x3c + 0x22 + 0x3b + 0x9d + 0x3c + 0x37 + 0x3b + 0xaf + 0x3c + 0x4f + 0x3b + 0xc3 + 0x3c + 0x6b + 0x3b + 0xd8 + 0x3c + 0x85 + 0x3b + 0xef + 0x3c + 0x96 + 0x3b + 0x5 + 0x3d + 0xaa + 0x3b + 0x14 + 0x3d + 0xc1 + 0x3b + 0x24 + 0x3d + 0xda + 0x3b + 0x35 + 0x3d + 0xf6 + 0x3b + 0x48 + 0x3d + 0xb + 0x3c + 0x5e + 0x3d + 0x1e + 0x3c + 0x75 + 0x3d + 0x32 + 0x3c + 0x87 + 0x3d + 0x49 + 0x3c + 0x95 + 0x3d + 0x63 + 0x3c + 0xa5 + 0x3d + 0x80 + 0x3c + 0x24 + 0x3d + 0xda + 0x3b + 0x35 + 0x3d + 0xf6 + 0x3b + 0x48 + 0x3d + 0xb + 0x3c + 0x5e + 0x3d + 0x1e + 0x3c + 0x75 + 0x3d + 0x32 + 0x3c + 0x87 + 0x3d + 0x49 + 0x3c + 0x95 + 0x3d + 0x63 + 0x3c + 0xa5 + 0x3d + 0x80 + 0x3c + 0xb6 + 0x3d + 0x91 + 0x3c + 0xc8 + 0x3d + 0xa3 + 0x3c + 0xdc + 0x3d + 0xb8 + 0x3c + 0xf1 + 0x3d + 0xcf + 0x3c + 0x4 + 0x3e + 0xe9 + 0x3c + 0x10 + 0x3e + 0x3 + 0x3d + 0x1e + 0x3e + 0x13 + 0x3d + 0x2c + 0x3e + 0x25 + 0x3d + 0xb6 + 0x3d + 0x91 + 0x3c + 0xc8 + 0x3d + 0xa3 + 0x3c + 0xdc + 0x3d + 0xb8 + 0x3c + 0xf1 + 0x3d + 0xcf + 0x3c + 0x4 + 0x3e + 0xe9 + 0x3c + 0x10 + 0x3e + 0x3 + 0x3d + 0x1e + 0x3e + 0x13 + 0x3d + 0x2c + 0x3e + 0x25 + 0x3d + 0x3b + 0x3e + 0x39 + 0x3d + 0x4b + 0x3e + 0x4f + 0x3d + 0x5c + 0x3e + 0x67 + 0x3d + 0x6e + 0x3e + 0x81 + 0x3d + 0x81 + 0x3e + 0x90 + 0x3d + 0x8a + 0x3e + 0x9f + 0x3d + 0x94 + 0x3e + 0xb1 + 0x3d + 0x9e + 0x3e + 0xc3 + 0x3d + 0x3b + 0x3e + 0x39 + 0x3d + 0x4b + 0x3e + 0x4f + 0x3d + 0x5c + 0x3e + 0x67 + 0x3d + 0x6e + 0x3e + 0x81 + 0x3d + 0x81 + 0x3e + 0x90 + 0x3d + 0x8a + 0x3e + 0x9f + 0x3d + 0x94 + 0x3e + 0xb1 + 0x3d + 0x9e + 0x3e + 0xc3 + 0x3d + 0xa9 + 0x3e + 0xd7 + 0x3d + 0xb3 + 0x3e + 0xec + 0x3d + 0xbd + 0x3e + 0x1 + 0x3e + 0xc7 + 0x3e + 0xd + 0x3e + 0xd0 + 0x3e + 0x19 + 0x3e + 0xd9 + 0x3e + 0x25 + 0x3e + 0xe1 + 0x3e + 0x31 + 0x3e + 0xe8 + 0x3e + 0x3d + 0x3e + 0xa9 + 0x3e + 0xd7 + 0x3d + 0xb3 + 0x3e + 0xec + 0x3d + 0xbd + 0x3e + 0x1 + 0x3e + 0xc7 + 0x3e + 0xd + 0x3e + 0xd0 + 0x3e + 0x19 + 0x3e + 0xd9 + 0x3e + 0x25 + 0x3e + 0xe1 + 0x3e + 0x31 + 0x3e + 0xe8 + 0x3e + 0x3d + 0x3e + 0xee + 0x3e + 0x49 + 0x3e + 0xf4 + 0x3e + 0x55 + 0x3e + 0xf8 + 0x3e + 0x5f + 0x3e + 0xfb + 0x3e + 0x69 + 0x3e + 0xfe + 0x3e + 0x71 + 0x3e + 0xff + 0x3e + 0x77 + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0xee + 0x3e + 0x49 + 0x3e + 0xf4 + 0x3e + 0x55 + 0x3e + 0xf8 + 0x3e + 0x5f + 0x3e + 0xfb + 0x3e + 0x69 + 0x3e + 0xfe + 0x3e + 0x71 + 0x3e + 0xff + 0x3e + 0x77 + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0x0 + 0x3f + 0x80 + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x1 + 0x3f + 0x77 + 0x3e + 0x1 + 0x3f + 0x71 + 0x3e + 0x2 + 0x3f + 0x69 + 0x3e + 0x4 + 0x3f + 0x5f + 0x3e + 0x6 + 0x3f + 0x55 + 0x3e + 0x0 + 0x3f + 0x80 + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x1 + 0x3f + 0x77 + 0x3e + 0x1 + 0x3f + 0x71 + 0x3e + 0x2 + 0x3f + 0x69 + 0x3e + 0x4 + 0x3f + 0x5f + 0x3e + 0x6 + 0x3f + 0x55 + 0x3e + 0x9 + 0x3f + 0x49 + 0x3e + 0xc + 0x3f + 0x3d + 0x3e + 0x10 + 0x3f + 0x31 + 0x3e + 0x14 + 0x3f + 0x25 + 0x3e + 0x18 + 0x3f + 0x19 + 0x3e + 0x1d + 0x3f + 0xd + 0x3e + 0x22 + 0x3f + 0x1 + 0x3e + 0x27 + 0x3f + 0xec + 0x3d + 0x9 + 0x3f + 0x49 + 0x3e + 0xc + 0x3f + 0x3d + 0x3e + 0x10 + 0x3f + 0x31 + 0x3e + 0x14 + 0x3f + 0x25 + 0x3e + 0x18 + 0x3f + 0x19 + 0x3e + 0x1d + 0x3f + 0xd + 0x3e + 0x22 + 0x3f + 0x1 + 0x3e + 0x27 + 0x3f + 0xec + 0x3d + 0x2c + 0x3f + 0xd7 + 0x3d + 0x31 + 0x3f + 0xc3 + 0x3d + 0x36 + 0x3f + 0xb1 + 0x3d + 0x3b + 0x3f + 0x9f + 0x3d + 0x40 + 0x3f + 0x90 + 0x3d + 0x44 + 0x3f + 0x81 + 0x3d + 0x49 + 0x3f + 0x67 + 0x3d + 0x4d + 0x3f + 0x4f + 0x3d + 0x2c + 0x3f + 0xd7 + 0x3d + 0x31 + 0x3f + 0xc3 + 0x3d + 0x36 + 0x3f + 0xb1 + 0x3d + 0x3b + 0x3f + 0x9f + 0x3d + 0x40 + 0x3f + 0x90 + 0x3d + 0x44 + 0x3f + 0x81 + 0x3d + 0x49 + 0x3f + 0x67 + 0x3d + 0x4d + 0x3f + 0x4f + 0x3d + 0x51 + 0x3f + 0x39 + 0x3d + 0x55 + 0x3f + 0x25 + 0x3d + 0x59 + 0x3f + 0x13 + 0x3d + 0x5c + 0x3f + 0x3 + 0x3d + 0x5f + 0x3f + 0xe9 + 0x3c + 0x62 + 0x3f + 0xcf + 0x3c + 0x65 + 0x3f + 0xb8 + 0x3c + 0x67 + 0x3f + 0xa3 + 0x3c + 0x51 + 0x3f + 0x39 + 0x3d + 0x55 + 0x3f + 0x25 + 0x3d + 0x59 + 0x3f + 0x13 + 0x3d + 0x5c + 0x3f + 0x3 + 0x3d + 0x5f + 0x3f + 0xe9 + 0x3c + 0x62 + 0x3f + 0xcf + 0x3c + 0x65 + 0x3f + 0xb8 + 0x3c + 0x67 + 0x3f + 0xa3 + 0x3c + 0x69 + 0x3f + 0x91 + 0x3c + 0x6b + 0x3f + 0x80 + 0x3c + 0x6d + 0x3f + 0x63 + 0x3c + 0x6f + 0x3f + 0x49 + 0x3c + 0x71 + 0x3f + 0x32 + 0x3c + 0x72 + 0x3f + 0x1e + 0x3c + 0x73 + 0x3f + 0xb + 0x3c + 0x75 + 0x3f + 0xf6 + 0x3b + 0x69 + 0x3f + 0x91 + 0x3c + 0x6b + 0x3f + 0x80 + 0x3c + 0x6d + 0x3f + 0x63 + 0x3c + 0x6f + 0x3f + 0x49 + 0x3c + 0x71 + 0x3f + 0x32 + 0x3c + 0x72 + 0x3f + 0x1e + 0x3c + 0x73 + 0x3f + 0xb + 0x3c + 0x75 + 0x3f + 0xf6 + 0x3b + 0x76 + 0x3f + 0xda + 0x3b + 0x77 + 0x3f + 0xc1 + 0x3b + 0x78 + 0x3f + 0xaa + 0x3b + 0x79 + 0x3f + 0x96 + 0x3b + 0x79 + 0x3f + 0x85 + 0x3b + 0x7a + 0x3f + 0x6b + 0x3b + 0x7b + 0x3f + 0x4f + 0x3b + 0x7b + 0x3f + 0x37 + 0x3b + 0x76 + 0x3f + 0xda + 0x3b + 0x77 + 0x3f + 0xc1 + 0x3b + 0x78 + 0x3f + 0xaa + 0x3b + 0x79 + 0x3f + 0x96 + 0x3b + 0x79 + 0x3f + 0x85 + 0x3b + 0x7a + 0x3f + 0x6b + 0x3b + 0x7b + 0x3f + 0x4f + 0x3b + 0x7b + 0x3f + 0x37 + 0x3b + 0x7c + 0x3f + 0x22 + 0x3b + 0x7c + 0x3f + 0xf + 0x3b + 0x7c + 0x3f + 0xfc + 0x3a + 0x7d + 0x3f + 0xdf + 0x3a + 0x7d + 0x3f + 0xc4 + 0x3a + 0x7d + 0x3f + 0xad + 0x3a + 0x7e + 0x3f + 0x99 + 0x3a + 0x7e + 0x3f + 0x87 + 0x3a + 0x7c + 0x3f + 0x22 + 0x3b + 0x7c + 0x3f + 0xf + 0x3b + 0x7c + 0x3f + 0xfc + 0x3a + 0x7d + 0x3f + 0xdf + 0x3a + 0x7d + 0x3f + 0xc4 + 0x3a + 0x7d + 0x3f + 0xad + 0x3a + 0x7e + 0x3f + 0x99 + 0x3a + 0x7e + 0x3f + 0x87 + 0x3a + 0x7e + 0x3f + 0x6f + 0x3a + 0x7e + 0x3f + 0x53 + 0x3a + 0x7f + 0x3f + 0x3a + 0x3a + 0x7f + 0x3f + 0x24 + 0x3a + 0x7f + 0x3f + 0x11 + 0x3a + 0x7f + 0x3f + 0x0 + 0x3a + 0x7f + 0x3f + 0xe2 + 0x39 + 0x7f + 0x3f + 0xc7 + 0x39 + 0x7e + 0x3f + 0x6f + 0x3a + 0x7e + 0x3f + 0x53 + 0x3a + 0x7f + 0x3f + 0x3a + 0x3a + 0x7f + 0x3f + 0x24 + 0x3a + 0x7f + 0x3f + 0x11 + 0x3a + 0x7f + 0x3f + 0x0 + 0x3a + 0x7f + 0x3f + 0xe2 + 0x39 + 0x7f + 0x3f + 0xc7 + 0x39 + +.data_segment_name +.data weak 64 _ZN11sigmoid_lutILj0ELj256EE7data_cdE DMb + 0x46 + 0x3b + 0xb0 + 0x39 + 0x5d + 0x3b + 0xc7 + 0x39 + 0x77 + 0x3b + 0xe2 + 0x39 + 0x8a + 0x3b + 0x0 + 0x3a + 0x9a + 0x3b + 0x11 + 0x3a + 0xac + 0x3b + 0x24 + 0x3a + 0xc0 + 0x3b + 0x3a + 0x3a + 0xd6 + 0x3b + 0x53 + 0x3a + 0x46 + 0x3b + 0xb0 + 0x39 + 0x5d + 0x3b + 0xc7 + 0x39 + 0x77 + 0x3b + 0xe2 + 0x39 + 0x8a + 0x3b + 0x0 + 0x3a + 0x9a + 0x3b + 0x11 + 0x3a + 0xac + 0x3b + 0x24 + 0x3a + 0xc0 + 0x3b + 0x3a + 0x3a + 0xd6 + 0x3b + 0x53 + 0x3a + 0xef + 0x3b + 0x6f + 0x3a + 0x5 + 0x3c + 0x87 + 0x3a + 0x14 + 0x3c + 0x99 + 0x3a + 0x25 + 0x3c + 0xad + 0x3a + 0x38 + 0x3c + 0xc4 + 0x3a + 0x4e + 0x3c + 0xdf + 0x3a + 0x64 + 0x3c + 0xfc + 0x3a + 0x7f + 0x3c + 0xf + 0x3b + 0xef + 0x3b + 0x6f + 0x3a + 0x5 + 0x3c + 0x87 + 0x3a + 0x14 + 0x3c + 0x99 + 0x3a + 0x25 + 0x3c + 0xad + 0x3a + 0x38 + 0x3c + 0xc4 + 0x3a + 0x4e + 0x3c + 0xdf + 0x3a + 0x64 + 0x3c + 0xfc + 0x3a + 0x7f + 0x3c + 0xf + 0x3b + 0x8e + 0x3c + 0x22 + 0x3b + 0x9d + 0x3c + 0x37 + 0x3b + 0xaf + 0x3c + 0x4f + 0x3b + 0xc3 + 0x3c + 0x6b + 0x3b + 0xd8 + 0x3c + 0x85 + 0x3b + 0xef + 0x3c + 0x96 + 0x3b + 0x5 + 0x3d + 0xaa + 0x3b + 0x14 + 0x3d + 0xc1 + 0x3b + 0x8e + 0x3c + 0x22 + 0x3b + 0x9d + 0x3c + 0x37 + 0x3b + 0xaf + 0x3c + 0x4f + 0x3b + 0xc3 + 0x3c + 0x6b + 0x3b + 0xd8 + 0x3c + 0x85 + 0x3b + 0xef + 0x3c + 0x96 + 0x3b + 0x5 + 0x3d + 0xaa + 0x3b + 0x14 + 0x3d + 0xc1 + 0x3b + 0x24 + 0x3d + 0xda + 0x3b + 0x35 + 0x3d + 0xf6 + 0x3b + 0x48 + 0x3d + 0xb + 0x3c + 0x5e + 0x3d + 0x1e + 0x3c + 0x75 + 0x3d + 0x32 + 0x3c + 0x87 + 0x3d + 0x49 + 0x3c + 0x95 + 0x3d + 0x63 + 0x3c + 0xa5 + 0x3d + 0x80 + 0x3c + 0x24 + 0x3d + 0xda + 0x3b + 0x35 + 0x3d + 0xf6 + 0x3b + 0x48 + 0x3d + 0xb + 0x3c + 0x5e + 0x3d + 0x1e + 0x3c + 0x75 + 0x3d + 0x32 + 0x3c + 0x87 + 0x3d + 0x49 + 0x3c + 0x95 + 0x3d + 0x63 + 0x3c + 0xa5 + 0x3d + 0x80 + 0x3c + 0xb6 + 0x3d + 0x91 + 0x3c + 0xc8 + 0x3d + 0xa3 + 0x3c + 0xdc + 0x3d + 0xb8 + 0x3c + 0xf1 + 0x3d + 0xcf + 0x3c + 0x4 + 0x3e + 0xe9 + 0x3c + 0x10 + 0x3e + 0x3 + 0x3d + 0x1e + 0x3e + 0x13 + 0x3d + 0x2c + 0x3e + 0x25 + 0x3d + 0xb6 + 0x3d + 0x91 + 0x3c + 0xc8 + 0x3d + 0xa3 + 0x3c + 0xdc + 0x3d + 0xb8 + 0x3c + 0xf1 + 0x3d + 0xcf + 0x3c + 0x4 + 0x3e + 0xe9 + 0x3c + 0x10 + 0x3e + 0x3 + 0x3d + 0x1e + 0x3e + 0x13 + 0x3d + 0x2c + 0x3e + 0x25 + 0x3d + 0x3b + 0x3e + 0x39 + 0x3d + 0x4b + 0x3e + 0x4f + 0x3d + 0x5c + 0x3e + 0x67 + 0x3d + 0x6e + 0x3e + 0x81 + 0x3d + 0x81 + 0x3e + 0x90 + 0x3d + 0x8a + 0x3e + 0x9f + 0x3d + 0x94 + 0x3e + 0xb1 + 0x3d + 0x9e + 0x3e + 0xc3 + 0x3d + 0x3b + 0x3e + 0x39 + 0x3d + 0x4b + 0x3e + 0x4f + 0x3d + 0x5c + 0x3e + 0x67 + 0x3d + 0x6e + 0x3e + 0x81 + 0x3d + 0x81 + 0x3e + 0x90 + 0x3d + 0x8a + 0x3e + 0x9f + 0x3d + 0x94 + 0x3e + 0xb1 + 0x3d + 0x9e + 0x3e + 0xc3 + 0x3d + 0xa9 + 0x3e + 0xd7 + 0x3d + 0xb3 + 0x3e + 0xec + 0x3d + 0xbd + 0x3e + 0x1 + 0x3e + 0xc7 + 0x3e + 0xd + 0x3e + 0xd0 + 0x3e + 0x19 + 0x3e + 0xd9 + 0x3e + 0x25 + 0x3e + 0xe1 + 0x3e + 0x31 + 0x3e + 0xe8 + 0x3e + 0x3d + 0x3e + 0xa9 + 0x3e + 0xd7 + 0x3d + 0xb3 + 0x3e + 0xec + 0x3d + 0xbd + 0x3e + 0x1 + 0x3e + 0xc7 + 0x3e + 0xd + 0x3e + 0xd0 + 0x3e + 0x19 + 0x3e + 0xd9 + 0x3e + 0x25 + 0x3e + 0xe1 + 0x3e + 0x31 + 0x3e + 0xe8 + 0x3e + 0x3d + 0x3e + 0xee + 0x3e + 0x49 + 0x3e + 0xf4 + 0x3e + 0x55 + 0x3e + 0xf8 + 0x3e + 0x5f + 0x3e + 0xfb + 0x3e + 0x69 + 0x3e + 0xfe + 0x3e + 0x71 + 0x3e + 0xff + 0x3e + 0x77 + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0xee + 0x3e + 0x49 + 0x3e + 0xf4 + 0x3e + 0x55 + 0x3e + 0xf8 + 0x3e + 0x5f + 0x3e + 0xfb + 0x3e + 0x69 + 0x3e + 0xfe + 0x3e + 0x71 + 0x3e + 0xff + 0x3e + 0x77 + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0x0 + 0x3f + 0x80 + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x1 + 0x3f + 0x77 + 0x3e + 0x1 + 0x3f + 0x71 + 0x3e + 0x2 + 0x3f + 0x69 + 0x3e + 0x4 + 0x3f + 0x5f + 0x3e + 0x6 + 0x3f + 0x55 + 0x3e + 0x0 + 0x3f + 0x80 + 0x3e + 0x0 + 0x3f + 0x7f + 0x3e + 0x0 + 0x3f + 0x7c + 0x3e + 0x1 + 0x3f + 0x77 + 0x3e + 0x1 + 0x3f + 0x71 + 0x3e + 0x2 + 0x3f + 0x69 + 0x3e + 0x4 + 0x3f + 0x5f + 0x3e + 0x6 + 0x3f + 0x55 + 0x3e + 0x9 + 0x3f + 0x49 + 0x3e + 0xc + 0x3f + 0x3d + 0x3e + 0x10 + 0x3f + 0x31 + 0x3e + 0x14 + 0x3f + 0x25 + 0x3e + 0x18 + 0x3f + 0x19 + 0x3e + 0x1d + 0x3f + 0xd + 0x3e + 0x22 + 0x3f + 0x1 + 0x3e + 0x27 + 0x3f + 0xec + 0x3d + 0x9 + 0x3f + 0x49 + 0x3e + 0xc + 0x3f + 0x3d + 0x3e + 0x10 + 0x3f + 0x31 + 0x3e + 0x14 + 0x3f + 0x25 + 0x3e + 0x18 + 0x3f + 0x19 + 0x3e + 0x1d + 0x3f + 0xd + 0x3e + 0x22 + 0x3f + 0x1 + 0x3e + 0x27 + 0x3f + 0xec + 0x3d + 0x2c + 0x3f + 0xd7 + 0x3d + 0x31 + 0x3f + 0xc3 + 0x3d + 0x36 + 0x3f + 0xb1 + 0x3d + 0x3b + 0x3f + 0x9f + 0x3d + 0x40 + 0x3f + 0x90 + 0x3d + 0x44 + 0x3f + 0x81 + 0x3d + 0x49 + 0x3f + 0x67 + 0x3d + 0x4d + 0x3f + 0x4f + 0x3d + 0x2c + 0x3f + 0xd7 + 0x3d + 0x31 + 0x3f + 0xc3 + 0x3d + 0x36 + 0x3f + 0xb1 + 0x3d + 0x3b + 0x3f + 0x9f + 0x3d + 0x40 + 0x3f + 0x90 + 0x3d + 0x44 + 0x3f + 0x81 + 0x3d + 0x49 + 0x3f + 0x67 + 0x3d + 0x4d + 0x3f + 0x4f + 0x3d + 0x51 + 0x3f + 0x39 + 0x3d + 0x55 + 0x3f + 0x25 + 0x3d + 0x59 + 0x3f + 0x13 + 0x3d + 0x5c + 0x3f + 0x3 + 0x3d + 0x5f + 0x3f + 0xe9 + 0x3c + 0x62 + 0x3f + 0xcf + 0x3c + 0x65 + 0x3f + 0xb8 + 0x3c + 0x67 + 0x3f + 0xa3 + 0x3c + 0x51 + 0x3f + 0x39 + 0x3d + 0x55 + 0x3f + 0x25 + 0x3d + 0x59 + 0x3f + 0x13 + 0x3d + 0x5c + 0x3f + 0x3 + 0x3d + 0x5f + 0x3f + 0xe9 + 0x3c + 0x62 + 0x3f + 0xcf + 0x3c + 0x65 + 0x3f + 0xb8 + 0x3c + 0x67 + 0x3f + 0xa3 + 0x3c + 0x69 + 0x3f + 0x91 + 0x3c + 0x6b + 0x3f + 0x80 + 0x3c + 0x6d + 0x3f + 0x63 + 0x3c + 0x6f + 0x3f + 0x49 + 0x3c + 0x71 + 0x3f + 0x32 + 0x3c + 0x72 + 0x3f + 0x1e + 0x3c + 0x73 + 0x3f + 0xb + 0x3c + 0x75 + 0x3f + 0xf6 + 0x3b + 0x69 + 0x3f + 0x91 + 0x3c + 0x6b + 0x3f + 0x80 + 0x3c + 0x6d + 0x3f + 0x63 + 0x3c + 0x6f + 0x3f + 0x49 + 0x3c + 0x71 + 0x3f + 0x32 + 0x3c + 0x72 + 0x3f + 0x1e + 0x3c + 0x73 + 0x3f + 0xb + 0x3c + 0x75 + 0x3f + 0xf6 + 0x3b + 0x76 + 0x3f + 0xda + 0x3b + 0x77 + 0x3f + 0xc1 + 0x3b + 0x78 + 0x3f + 0xaa + 0x3b + 0x79 + 0x3f + 0x96 + 0x3b + 0x79 + 0x3f + 0x85 + 0x3b + 0x7a + 0x3f + 0x6b + 0x3b + 0x7b + 0x3f + 0x4f + 0x3b + 0x7b + 0x3f + 0x37 + 0x3b + 0x76 + 0x3f + 0xda + 0x3b + 0x77 + 0x3f + 0xc1 + 0x3b + 0x78 + 0x3f + 0xaa + 0x3b + 0x79 + 0x3f + 0x96 + 0x3b + 0x79 + 0x3f + 0x85 + 0x3b + 0x7a + 0x3f + 0x6b + 0x3b + 0x7b + 0x3f + 0x4f + 0x3b + 0x7b + 0x3f + 0x37 + 0x3b + 0x7c + 0x3f + 0x22 + 0x3b + 0x7c + 0x3f + 0xf + 0x3b + 0x7c + 0x3f + 0xfc + 0x3a + 0x7d + 0x3f + 0xdf + 0x3a + 0x7d + 0x3f + 0xc4 + 0x3a + 0x7d + 0x3f + 0xad + 0x3a + 0x7e + 0x3f + 0x99 + 0x3a + 0x7e + 0x3f + 0x87 + 0x3a + 0x7c + 0x3f + 0x22 + 0x3b + 0x7c + 0x3f + 0xf + 0x3b + 0x7c + 0x3f + 0xfc + 0x3a + 0x7d + 0x3f + 0xdf + 0x3a + 0x7d + 0x3f + 0xc4 + 0x3a + 0x7d + 0x3f + 0xad + 0x3a + 0x7e + 0x3f + 0x99 + 0x3a + 0x7e + 0x3f + 0x87 + 0x3a + 0x7e + 0x3f + 0x6f + 0x3a + 0x7e + 0x3f + 0x53 + 0x3a + 0x7f + 0x3f + 0x3a + 0x3a + 0x7f + 0x3f + 0x24 + 0x3a + 0x7f + 0x3f + 0x11 + 0x3a + 0x7f + 0x3f + 0x0 + 0x3a + 0x7f + 0x3f + 0xe2 + 0x39 + 0x7f + 0x3f + 0xc7 + 0x39 + 0x7e + 0x3f + 0x6f + 0x3a + 0x7e + 0x3f + 0x53 + 0x3a + 0x7f + 0x3f + 0x3a + 0x3a + 0x7f + 0x3f + 0x24 + 0x3a + 0x7f + 0x3f + 0x11 + 0x3a + 0x7f + 0x3f + 0x0 + 0x3a + 0x7f + 0x3f + 0xe2 + 0x39 + 0x7f + 0x3f + 0xc7 + 0x39 + +.data_segment_name +.data weak 64 _ZN16sigmoid_lut_fp16ILj0ELj256EE7data_abE DMb + 0x7f + 0xd + 0x0 + 0x0 + 0x3a + 0xe + 0x0 + 0x0 + 0xd + 0xf + 0x0 + 0x0 + 0x0 + 0x10 + 0x0 + 0x0 + 0x87 + 0x10 + 0x0 + 0x0 + 0x21 + 0x11 + 0x0 + 0x0 + 0xd0 + 0x11 + 0x0 + 0x0 + 0x96 + 0x12 + 0x0 + 0x0 + 0x7f + 0xd + 0x0 + 0x0 + 0x3a + 0xe + 0x0 + 0x0 + 0xd + 0xf + 0x0 + 0x0 + 0x0 + 0x10 + 0x0 + 0x0 + 0x87 + 0x10 + 0x0 + 0x0 + 0x21 + 0x11 + 0x0 + 0x0 + 0xd0 + 0x11 + 0x0 + 0x0 + 0x96 + 0x12 + 0x0 + 0x0 + 0x76 + 0x13 + 0x0 + 0x0 + 0x3a + 0x14 + 0x0 + 0x0 + 0xca + 0x14 + 0x0 + 0x0 + 0x6e + 0x15 + 0x0 + 0x0 + 0x26 + 0x16 + 0x0 + 0x0 + 0xf7 + 0x16 + 0x0 + 0x0 + 0xe4 + 0x17 + 0x0 + 0x0 + 0x78 + 0x18 + 0x0 + 0x0 + 0x76 + 0x13 + 0x0 + 0x0 + 0x3a + 0x14 + 0x0 + 0x0 + 0xca + 0x14 + 0x0 + 0x0 + 0x6e + 0x15 + 0x0 + 0x0 + 0x26 + 0x16 + 0x0 + 0x0 + 0xf7 + 0x16 + 0x0 + 0x0 + 0xe4 + 0x17 + 0x0 + 0x0 + 0x78 + 0x18 + 0x0 + 0x0 + 0x10 + 0x19 + 0x0 + 0x0 + 0xbd + 0x19 + 0x0 + 0x0 + 0x7f + 0x1a + 0x0 + 0x0 + 0x5c + 0x1b + 0x0 + 0x0 + 0xc1 + 0x26 + 0x27 + 0x1c + 0x7d + 0x27 + 0xb2 + 0x1c + 0x28 + 0x28 + 0x50 + 0x1d + 0x9d + 0x28 + 0x5 + 0x1e + 0x10 + 0x19 + 0x0 + 0x0 + 0xbd + 0x19 + 0x0 + 0x0 + 0x7f + 0x1a + 0x0 + 0x0 + 0x5c + 0x1b + 0x0 + 0x0 + 0xc1 + 0x26 + 0x27 + 0x1c + 0x7d + 0x27 + 0xb2 + 0x1c + 0x28 + 0x28 + 0x50 + 0x1d + 0x9d + 0x28 + 0x5 + 0x1e + 0x1c + 0x29 + 0xcf + 0x1e + 0xa9 + 0x29 + 0xb3 + 0x1f + 0x45 + 0x2a + 0x5b + 0x20 + 0xef + 0x2a + 0xec + 0x20 + 0xab + 0x2b + 0x91 + 0x21 + 0x3c + 0x2c + 0x49 + 0x22 + 0xab + 0x2c + 0x18 + 0x23 + 0x26 + 0x2d + 0x2 + 0x24 + 0x1c + 0x29 + 0xcf + 0x1e + 0xa9 + 0x29 + 0xb3 + 0x1f + 0x45 + 0x2a + 0x5b + 0x20 + 0xef + 0x2a + 0xec + 0x20 + 0xab + 0x2b + 0x91 + 0x21 + 0x3c + 0x2c + 0x49 + 0x22 + 0xab + 0x2c + 0x18 + 0x23 + 0x26 + 0x2d + 0x2 + 0x24 + 0xad + 0x2d + 0x86 + 0x24 + 0x3d + 0x2e + 0x19 + 0x25 + 0xdb + 0x2e + 0xbf + 0x25 + 0x86 + 0x2f + 0x78 + 0x26 + 0x1f + 0x30 + 0x47 + 0x27 + 0x84 + 0x30 + 0x19 + 0x28 + 0xee + 0x30 + 0x99 + 0x28 + 0x60 + 0x31 + 0x28 + 0x29 + 0xad + 0x2d + 0x86 + 0x24 + 0x3d + 0x2e + 0x19 + 0x25 + 0xdb + 0x2e + 0xbf + 0x25 + 0x86 + 0x2f + 0x78 + 0x26 + 0x1f + 0x30 + 0x47 + 0x27 + 0x84 + 0x30 + 0x19 + 0x28 + 0xee + 0x30 + 0x99 + 0x28 + 0x60 + 0x31 + 0x28 + 0x29 + 0xdc + 0x31 + 0xc9 + 0x29 + 0x5d + 0x32 + 0x79 + 0x2a + 0xe4 + 0x32 + 0x3a + 0x2b + 0x74 + 0x33 + 0x8 + 0x2c + 0x5 + 0x34 + 0x7d + 0x2c + 0x52 + 0x34 + 0xfc + 0x2c + 0xa2 + 0x34 + 0x85 + 0x2d + 0xf2 + 0x34 + 0x19 + 0x2e + 0xdc + 0x31 + 0xc9 + 0x29 + 0x5d + 0x32 + 0x79 + 0x2a + 0xe4 + 0x32 + 0x3a + 0x2b + 0x74 + 0x33 + 0x8 + 0x2c + 0x5 + 0x34 + 0x7d + 0x2c + 0x52 + 0x34 + 0xfc + 0x2c + 0xa2 + 0x34 + 0x85 + 0x2d + 0xf2 + 0x34 + 0x19 + 0x2e + 0x45 + 0x35 + 0xb9 + 0x2e + 0x96 + 0x35 + 0x62 + 0x2f + 0xe6 + 0x35 + 0x9 + 0x30 + 0x35 + 0x36 + 0x66 + 0x30 + 0x80 + 0x36 + 0xc6 + 0x30 + 0xc7 + 0x36 + 0x29 + 0x31 + 0x5 + 0x37 + 0x88 + 0x31 + 0x42 + 0x37 + 0xee + 0x31 + 0x45 + 0x35 + 0xb9 + 0x2e + 0x96 + 0x35 + 0x62 + 0x2f + 0xe6 + 0x35 + 0x9 + 0x30 + 0x35 + 0x36 + 0x66 + 0x30 + 0x80 + 0x36 + 0xc6 + 0x30 + 0xc7 + 0x36 + 0x29 + 0x31 + 0x5 + 0x37 + 0x88 + 0x31 + 0x42 + 0x37 + 0xee + 0x31 + 0x72 + 0x37 + 0x4b + 0x32 + 0x9e + 0x37 + 0xa6 + 0x32 + 0xc0 + 0x37 + 0xf9 + 0x32 + 0xdb + 0x37 + 0x46 + 0x33 + 0xec + 0x37 + 0x85 + 0x33 + 0xf7 + 0x37 + 0xba + 0x33 + 0xfc + 0x37 + 0xde + 0x33 + 0xff + 0x37 + 0xf7 + 0x33 + 0x72 + 0x37 + 0x4b + 0x32 + 0x9e + 0x37 + 0xa6 + 0x32 + 0xc0 + 0x37 + 0xf9 + 0x32 + 0xdb + 0x37 + 0x46 + 0x33 + 0xec + 0x37 + 0x85 + 0x33 + 0xf7 + 0x37 + 0xba + 0x33 + 0xfc + 0x37 + 0xde + 0x33 + 0xff + 0x37 + 0xf7 + 0x33 + 0x0 + 0x38 + 0x0 + 0x34 + 0x0 + 0x38 + 0xf7 + 0x33 + 0x1 + 0x38 + 0xdf + 0x33 + 0x4 + 0x38 + 0xba + 0x33 + 0xa + 0x38 + 0x86 + 0x33 + 0x14 + 0x38 + 0x44 + 0x33 + 0x20 + 0x38 + 0xf9 + 0x32 + 0x31 + 0x38 + 0xa4 + 0x32 + 0x0 + 0x38 + 0x0 + 0x34 + 0x0 + 0x38 + 0xf7 + 0x33 + 0x1 + 0x38 + 0xdf + 0x33 + 0x4 + 0x38 + 0xba + 0x33 + 0xa + 0x38 + 0x86 + 0x33 + 0x14 + 0x38 + 0x44 + 0x33 + 0x20 + 0x38 + 0xf9 + 0x32 + 0x31 + 0x38 + 0xa4 + 0x32 + 0x46 + 0x38 + 0x4a + 0x32 + 0x60 + 0x38 + 0xed + 0x31 + 0x7c + 0x38 + 0x8a + 0x31 + 0x9e + 0x38 + 0x27 + 0x31 + 0xc0 + 0x38 + 0xc7 + 0x30 + 0xe6 + 0x38 + 0x65 + 0x30 + 0xd + 0x39 + 0x9 + 0x30 + 0x35 + 0x39 + 0x61 + 0x2f + 0x46 + 0x38 + 0x4a + 0x32 + 0x60 + 0x38 + 0xed + 0x31 + 0x7c + 0x38 + 0x8a + 0x31 + 0x9e + 0x38 + 0x27 + 0x31 + 0xc0 + 0x38 + 0xc7 + 0x30 + 0xe6 + 0x38 + 0x65 + 0x30 + 0xd + 0x39 + 0x9 + 0x30 + 0x35 + 0x39 + 0x61 + 0x2f + 0x5d + 0x39 + 0xb7 + 0x2e + 0x87 + 0x39 + 0x1a + 0x2e + 0xaf + 0x39 + 0x86 + 0x2d + 0xd7 + 0x39 + 0xfc + 0x2c + 0xfe + 0x39 + 0x7d + 0x2c + 0x23 + 0x3a + 0x9 + 0x2c + 0x48 + 0x3a + 0x3c + 0x2b + 0x68 + 0x3a + 0x78 + 0x2a + 0x5d + 0x39 + 0xb7 + 0x2e + 0x87 + 0x39 + 0x1a + 0x2e + 0xaf + 0x39 + 0x86 + 0x2d + 0xd7 + 0x39 + 0xfc + 0x2c + 0xfe + 0x39 + 0x7d + 0x2c + 0x23 + 0x3a + 0x9 + 0x2c + 0x48 + 0x3a + 0x3c + 0x2b + 0x68 + 0x3a + 0x78 + 0x2a + 0x8a + 0x3a + 0xc7 + 0x29 + 0xa8 + 0x3a + 0x29 + 0x29 + 0xc4 + 0x3a + 0x99 + 0x28 + 0xdf + 0x3a + 0x18 + 0x28 + 0xf8 + 0x3a + 0x48 + 0x27 + 0xf + 0x3b + 0x79 + 0x26 + 0x25 + 0x3b + 0xbf + 0x25 + 0x39 + 0x3b + 0x1a + 0x25 + 0x8a + 0x3a + 0xc7 + 0x29 + 0xa8 + 0x3a + 0x29 + 0x29 + 0xc4 + 0x3a + 0x99 + 0x28 + 0xdf + 0x3a + 0x18 + 0x28 + 0xf8 + 0x3a + 0x48 + 0x27 + 0xf + 0x3b + 0x79 + 0x26 + 0x25 + 0x3b + 0xbf + 0x25 + 0x39 + 0x3b + 0x1a + 0x25 + 0x4a + 0x3b + 0x85 + 0x24 + 0x5b + 0x3b + 0x1 + 0x24 + 0x69 + 0x3b + 0x19 + 0x23 + 0x78 + 0x3b + 0x49 + 0x22 + 0x86 + 0x3b + 0x91 + 0x21 + 0x91 + 0x3b + 0xec + 0x20 + 0x9b + 0x3b + 0x5a + 0x20 + 0xa5 + 0x3b + 0xb2 + 0x1f + 0x4a + 0x3b + 0x85 + 0x24 + 0x5b + 0x3b + 0x1 + 0x24 + 0x69 + 0x3b + 0x19 + 0x23 + 0x78 + 0x3b + 0x49 + 0x22 + 0x86 + 0x3b + 0x91 + 0x21 + 0x91 + 0x3b + 0xec + 0x20 + 0x9b + 0x3b + 0x5a + 0x20 + 0xa5 + 0x3b + 0xb2 + 0x1f + 0xae + 0x3b + 0xce + 0x1e + 0xb6 + 0x3b + 0x5 + 0x1e + 0xbe + 0x3b + 0x53 + 0x1d + 0xc4 + 0x3b + 0xb2 + 0x1c + 0xca + 0x3b + 0x27 + 0x1c + 0xcf + 0x3b + 0x54 + 0x1b + 0xd5 + 0x3b + 0x7b + 0x1a + 0xd8 + 0x3b + 0xb8 + 0x19 + 0xae + 0x3b + 0xce + 0x1e + 0xb6 + 0x3b + 0x5 + 0x1e + 0xbe + 0x3b + 0x53 + 0x1d + 0xc4 + 0x3b + 0xb2 + 0x1c + 0xca + 0x3b + 0x27 + 0x1c + 0xcf + 0x3b + 0x54 + 0x1b + 0xd5 + 0x3b + 0x7b + 0x1a + 0xd8 + 0x3b + 0xb8 + 0x19 + 0xdc + 0x3b + 0xc + 0x19 + 0xe1 + 0x3b + 0x77 + 0x18 + 0xe3 + 0x3b + 0xe0 + 0x17 + 0xe6 + 0x3b + 0xf3 + 0x16 + 0xe8 + 0x3b + 0x22 + 0x16 + 0xec + 0x3b + 0x6c + 0x15 + 0xee + 0x3b + 0xca + 0x14 + 0xef + 0x3b + 0x39 + 0x14 + 0xdc + 0x3b + 0xc + 0x19 + 0xe1 + 0x3b + 0x77 + 0x18 + 0xe3 + 0x3b + 0xe0 + 0x17 + 0xe6 + 0x3b + 0xf3 + 0x16 + 0xe8 + 0x3b + 0x22 + 0x16 + 0xec + 0x3b + 0x6c + 0x15 + 0xee + 0x3b + 0xca + 0x14 + 0xef + 0x3b + 0x39 + 0x14 + 0xf1 + 0x3b + 0x74 + 0x13 + 0xf2 + 0x3b + 0x95 + 0x12 + 0xf3 + 0x3b + 0xce + 0x11 + 0xf5 + 0x3b + 0x1f + 0x11 + 0xf6 + 0x3b + 0x86 + 0x10 + 0xf8 + 0x3b + 0xff + 0xf + 0xf9 + 0x3b + 0xf + 0xf + 0xfa + 0x3b + 0x3a + 0xe + 0xf1 + 0x3b + 0x74 + 0x13 + 0xf2 + 0x3b + 0x95 + 0x12 + 0xf3 + 0x3b + 0xce + 0x11 + 0xf5 + 0x3b + 0x1f + 0x11 + 0xf6 + 0x3b + 0x86 + 0x10 + 0xf8 + 0x3b + 0xff + 0xf + 0xf9 + 0x3b + 0xf + 0xf + 0xfa + 0x3b + 0x3a + 0xe + +.data_segment_name +.data weak 64 _ZN16sigmoid_lut_fp16ILj0ELj256EE7data_cdE DMb + 0x7f + 0xd + 0x0 + 0x0 + 0x3a + 0xe + 0x0 + 0x0 + 0xd + 0xf + 0x0 + 0x0 + 0x0 + 0x10 + 0x0 + 0x0 + 0x87 + 0x10 + 0x0 + 0x0 + 0x21 + 0x11 + 0x0 + 0x0 + 0xd0 + 0x11 + 0x0 + 0x0 + 0x96 + 0x12 + 0x0 + 0x0 + 0x7f + 0xd + 0x0 + 0x0 + 0x3a + 0xe + 0x0 + 0x0 + 0xd + 0xf + 0x0 + 0x0 + 0x0 + 0x10 + 0x0 + 0x0 + 0x87 + 0x10 + 0x0 + 0x0 + 0x21 + 0x11 + 0x0 + 0x0 + 0xd0 + 0x11 + 0x0 + 0x0 + 0x96 + 0x12 + 0x0 + 0x0 + 0x76 + 0x13 + 0x0 + 0x0 + 0x3a + 0x14 + 0x0 + 0x0 + 0xca + 0x14 + 0x0 + 0x0 + 0x6e + 0x15 + 0x0 + 0x0 + 0x26 + 0x16 + 0x0 + 0x0 + 0xf7 + 0x16 + 0x0 + 0x0 + 0xe4 + 0x17 + 0x0 + 0x0 + 0x78 + 0x18 + 0x0 + 0x0 + 0x76 + 0x13 + 0x0 + 0x0 + 0x3a + 0x14 + 0x0 + 0x0 + 0xca + 0x14 + 0x0 + 0x0 + 0x6e + 0x15 + 0x0 + 0x0 + 0x26 + 0x16 + 0x0 + 0x0 + 0xf7 + 0x16 + 0x0 + 0x0 + 0xe4 + 0x17 + 0x0 + 0x0 + 0x78 + 0x18 + 0x0 + 0x0 + 0x10 + 0x19 + 0x0 + 0x0 + 0xbd + 0x19 + 0x0 + 0x0 + 0x7f + 0x1a + 0x0 + 0x0 + 0x5c + 0x1b + 0x0 + 0x0 + 0xc1 + 0x26 + 0x27 + 0x1c + 0x7d + 0x27 + 0xb2 + 0x1c + 0x28 + 0x28 + 0x50 + 0x1d + 0x9d + 0x28 + 0x5 + 0x1e + 0x10 + 0x19 + 0x0 + 0x0 + 0xbd + 0x19 + 0x0 + 0x0 + 0x7f + 0x1a + 0x0 + 0x0 + 0x5c + 0x1b + 0x0 + 0x0 + 0xc1 + 0x26 + 0x27 + 0x1c + 0x7d + 0x27 + 0xb2 + 0x1c + 0x28 + 0x28 + 0x50 + 0x1d + 0x9d + 0x28 + 0x5 + 0x1e + 0x1c + 0x29 + 0xcf + 0x1e + 0xa9 + 0x29 + 0xb3 + 0x1f + 0x45 + 0x2a + 0x5b + 0x20 + 0xef + 0x2a + 0xec + 0x20 + 0xab + 0x2b + 0x91 + 0x21 + 0x3c + 0x2c + 0x49 + 0x22 + 0xab + 0x2c + 0x18 + 0x23 + 0x26 + 0x2d + 0x2 + 0x24 + 0x1c + 0x29 + 0xcf + 0x1e + 0xa9 + 0x29 + 0xb3 + 0x1f + 0x45 + 0x2a + 0x5b + 0x20 + 0xef + 0x2a + 0xec + 0x20 + 0xab + 0x2b + 0x91 + 0x21 + 0x3c + 0x2c + 0x49 + 0x22 + 0xab + 0x2c + 0x18 + 0x23 + 0x26 + 0x2d + 0x2 + 0x24 + 0xad + 0x2d + 0x86 + 0x24 + 0x3d + 0x2e + 0x19 + 0x25 + 0xdb + 0x2e + 0xbf + 0x25 + 0x86 + 0x2f + 0x78 + 0x26 + 0x1f + 0x30 + 0x47 + 0x27 + 0x84 + 0x30 + 0x19 + 0x28 + 0xee + 0x30 + 0x99 + 0x28 + 0x60 + 0x31 + 0x28 + 0x29 + 0xad + 0x2d + 0x86 + 0x24 + 0x3d + 0x2e + 0x19 + 0x25 + 0xdb + 0x2e + 0xbf + 0x25 + 0x86 + 0x2f + 0x78 + 0x26 + 0x1f + 0x30 + 0x47 + 0x27 + 0x84 + 0x30 + 0x19 + 0x28 + 0xee + 0x30 + 0x99 + 0x28 + 0x60 + 0x31 + 0x28 + 0x29 + 0xdc + 0x31 + 0xc9 + 0x29 + 0x5d + 0x32 + 0x79 + 0x2a + 0xe4 + 0x32 + 0x3a + 0x2b + 0x74 + 0x33 + 0x8 + 0x2c + 0x5 + 0x34 + 0x7d + 0x2c + 0x52 + 0x34 + 0xfc + 0x2c + 0xa2 + 0x34 + 0x85 + 0x2d + 0xf2 + 0x34 + 0x19 + 0x2e + 0xdc + 0x31 + 0xc9 + 0x29 + 0x5d + 0x32 + 0x79 + 0x2a + 0xe4 + 0x32 + 0x3a + 0x2b + 0x74 + 0x33 + 0x8 + 0x2c + 0x5 + 0x34 + 0x7d + 0x2c + 0x52 + 0x34 + 0xfc + 0x2c + 0xa2 + 0x34 + 0x85 + 0x2d + 0xf2 + 0x34 + 0x19 + 0x2e + 0x45 + 0x35 + 0xb9 + 0x2e + 0x96 + 0x35 + 0x62 + 0x2f + 0xe6 + 0x35 + 0x9 + 0x30 + 0x35 + 0x36 + 0x66 + 0x30 + 0x80 + 0x36 + 0xc6 + 0x30 + 0xc7 + 0x36 + 0x29 + 0x31 + 0x5 + 0x37 + 0x88 + 0x31 + 0x42 + 0x37 + 0xee + 0x31 + 0x45 + 0x35 + 0xb9 + 0x2e + 0x96 + 0x35 + 0x62 + 0x2f + 0xe6 + 0x35 + 0x9 + 0x30 + 0x35 + 0x36 + 0x66 + 0x30 + 0x80 + 0x36 + 0xc6 + 0x30 + 0xc7 + 0x36 + 0x29 + 0x31 + 0x5 + 0x37 + 0x88 + 0x31 + 0x42 + 0x37 + 0xee + 0x31 + 0x72 + 0x37 + 0x4b + 0x32 + 0x9e + 0x37 + 0xa6 + 0x32 + 0xc0 + 0x37 + 0xf9 + 0x32 + 0xdb + 0x37 + 0x46 + 0x33 + 0xec + 0x37 + 0x85 + 0x33 + 0xf7 + 0x37 + 0xba + 0x33 + 0xfc + 0x37 + 0xde + 0x33 + 0xff + 0x37 + 0xf7 + 0x33 + 0x72 + 0x37 + 0x4b + 0x32 + 0x9e + 0x37 + 0xa6 + 0x32 + 0xc0 + 0x37 + 0xf9 + 0x32 + 0xdb + 0x37 + 0x46 + 0x33 + 0xec + 0x37 + 0x85 + 0x33 + 0xf7 + 0x37 + 0xba + 0x33 + 0xfc + 0x37 + 0xde + 0x33 + 0xff + 0x37 + 0xf7 + 0x33 + 0x0 + 0x38 + 0x0 + 0x34 + 0x0 + 0x38 + 0xf7 + 0x33 + 0x1 + 0x38 + 0xdf + 0x33 + 0x4 + 0x38 + 0xba + 0x33 + 0xa + 0x38 + 0x86 + 0x33 + 0x14 + 0x38 + 0x44 + 0x33 + 0x20 + 0x38 + 0xf9 + 0x32 + 0x31 + 0x38 + 0xa4 + 0x32 + 0x0 + 0x38 + 0x0 + 0x34 + 0x0 + 0x38 + 0xf7 + 0x33 + 0x1 + 0x38 + 0xdf + 0x33 + 0x4 + 0x38 + 0xba + 0x33 + 0xa + 0x38 + 0x86 + 0x33 + 0x14 + 0x38 + 0x44 + 0x33 + 0x20 + 0x38 + 0xf9 + 0x32 + 0x31 + 0x38 + 0xa4 + 0x32 + 0x46 + 0x38 + 0x4a + 0x32 + 0x60 + 0x38 + 0xed + 0x31 + 0x7c + 0x38 + 0x8a + 0x31 + 0x9e + 0x38 + 0x27 + 0x31 + 0xc0 + 0x38 + 0xc7 + 0x30 + 0xe6 + 0x38 + 0x65 + 0x30 + 0xd + 0x39 + 0x9 + 0x30 + 0x35 + 0x39 + 0x61 + 0x2f + 0x46 + 0x38 + 0x4a + 0x32 + 0x60 + 0x38 + 0xed + 0x31 + 0x7c + 0x38 + 0x8a + 0x31 + 0x9e + 0x38 + 0x27 + 0x31 + 0xc0 + 0x38 + 0xc7 + 0x30 + 0xe6 + 0x38 + 0x65 + 0x30 + 0xd + 0x39 + 0x9 + 0x30 + 0x35 + 0x39 + 0x61 + 0x2f + 0x5d + 0x39 + 0xb7 + 0x2e + 0x87 + 0x39 + 0x1a + 0x2e + 0xaf + 0x39 + 0x86 + 0x2d + 0xd7 + 0x39 + 0xfc + 0x2c + 0xfe + 0x39 + 0x7d + 0x2c + 0x23 + 0x3a + 0x9 + 0x2c + 0x48 + 0x3a + 0x3c + 0x2b + 0x68 + 0x3a + 0x78 + 0x2a + 0x5d + 0x39 + 0xb7 + 0x2e + 0x87 + 0x39 + 0x1a + 0x2e + 0xaf + 0x39 + 0x86 + 0x2d + 0xd7 + 0x39 + 0xfc + 0x2c + 0xfe + 0x39 + 0x7d + 0x2c + 0x23 + 0x3a + 0x9 + 0x2c + 0x48 + 0x3a + 0x3c + 0x2b + 0x68 + 0x3a + 0x78 + 0x2a + 0x8a + 0x3a + 0xc7 + 0x29 + 0xa8 + 0x3a + 0x29 + 0x29 + 0xc4 + 0x3a + 0x99 + 0x28 + 0xdf + 0x3a + 0x18 + 0x28 + 0xf8 + 0x3a + 0x48 + 0x27 + 0xf + 0x3b + 0x79 + 0x26 + 0x25 + 0x3b + 0xbf + 0x25 + 0x39 + 0x3b + 0x1a + 0x25 + 0x8a + 0x3a + 0xc7 + 0x29 + 0xa8 + 0x3a + 0x29 + 0x29 + 0xc4 + 0x3a + 0x99 + 0x28 + 0xdf + 0x3a + 0x18 + 0x28 + 0xf8 + 0x3a + 0x48 + 0x27 + 0xf + 0x3b + 0x79 + 0x26 + 0x25 + 0x3b + 0xbf + 0x25 + 0x39 + 0x3b + 0x1a + 0x25 + 0x4a + 0x3b + 0x85 + 0x24 + 0x5b + 0x3b + 0x1 + 0x24 + 0x69 + 0x3b + 0x19 + 0x23 + 0x78 + 0x3b + 0x49 + 0x22 + 0x86 + 0x3b + 0x91 + 0x21 + 0x91 + 0x3b + 0xec + 0x20 + 0x9b + 0x3b + 0x5a + 0x20 + 0xa5 + 0x3b + 0xb2 + 0x1f + 0x4a + 0x3b + 0x85 + 0x24 + 0x5b + 0x3b + 0x1 + 0x24 + 0x69 + 0x3b + 0x19 + 0x23 + 0x78 + 0x3b + 0x49 + 0x22 + 0x86 + 0x3b + 0x91 + 0x21 + 0x91 + 0x3b + 0xec + 0x20 + 0x9b + 0x3b + 0x5a + 0x20 + 0xa5 + 0x3b + 0xb2 + 0x1f + 0xae + 0x3b + 0xce + 0x1e + 0xb6 + 0x3b + 0x5 + 0x1e + 0xbe + 0x3b + 0x53 + 0x1d + 0xc4 + 0x3b + 0xb2 + 0x1c + 0xca + 0x3b + 0x27 + 0x1c + 0xcf + 0x3b + 0x54 + 0x1b + 0xd5 + 0x3b + 0x7b + 0x1a + 0xd8 + 0x3b + 0xb8 + 0x19 + 0xae + 0x3b + 0xce + 0x1e + 0xb6 + 0x3b + 0x5 + 0x1e + 0xbe + 0x3b + 0x53 + 0x1d + 0xc4 + 0x3b + 0xb2 + 0x1c + 0xca + 0x3b + 0x27 + 0x1c + 0xcf + 0x3b + 0x54 + 0x1b + 0xd5 + 0x3b + 0x7b + 0x1a + 0xd8 + 0x3b + 0xb8 + 0x19 + 0xdc + 0x3b + 0xc + 0x19 + 0xe1 + 0x3b + 0x77 + 0x18 + 0xe3 + 0x3b + 0xe0 + 0x17 + 0xe6 + 0x3b + 0xf3 + 0x16 + 0xe8 + 0x3b + 0x22 + 0x16 + 0xec + 0x3b + 0x6c + 0x15 + 0xee + 0x3b + 0xca + 0x14 + 0xef + 0x3b + 0x39 + 0x14 + 0xdc + 0x3b + 0xc + 0x19 + 0xe1 + 0x3b + 0x77 + 0x18 + 0xe3 + 0x3b + 0xe0 + 0x17 + 0xe6 + 0x3b + 0xf3 + 0x16 + 0xe8 + 0x3b + 0x22 + 0x16 + 0xec + 0x3b + 0x6c + 0x15 + 0xee + 0x3b + 0xca + 0x14 + 0xef + 0x3b + 0x39 + 0x14 + 0xf1 + 0x3b + 0x74 + 0x13 + 0xf2 + 0x3b + 0x95 + 0x12 + 0xf3 + 0x3b + 0xce + 0x11 + 0xf5 + 0x3b + 0x1f + 0x11 + 0xf6 + 0x3b + 0x86 + 0x10 + 0xf8 + 0x3b + 0xff + 0xf + 0xf9 + 0x3b + 0xf + 0xf + 0xfa + 0x3b + 0x3a + 0xe + 0xf1 + 0x3b + 0x74 + 0x13 + 0xf2 + 0x3b + 0x95 + 0x12 + 0xf3 + 0x3b + 0xce + 0x11 + 0xf5 + 0x3b + 0x1f + 0x11 + 0xf6 + 0x3b + 0x86 + 0x10 + 0xf8 + 0x3b + 0xff + 0xf + 0xf9 + 0x3b + 0xf + 0xf + 0xfa + 0x3b + 0x3a + 0xe + +.data_segment_name +.data weak 64 _ZN11gelu_lut_32ILj0ELj512EE7data_abE DMb + 0x1c + 0xb8 + 0xf0 + 0xb6 + 0x3a + 0xb8 + 0x10 + 0xb7 + 0x5f + 0xb8 + 0x2e + 0xb7 + 0x85 + 0xb8 + 0x51 + 0xb7 + 0x9e + 0xb8 + 0x7a + 0xb7 + 0xbc + 0xb8 + 0x96 + 0xb7 + 0xdf + 0xb8 + 0xb3 + 0xb7 + 0x4 + 0xb9 + 0xd5 + 0xb7 + 0x1c + 0xb8 + 0xf0 + 0xb6 + 0x3a + 0xb8 + 0x10 + 0xb7 + 0x5f + 0xb8 + 0x2e + 0xb7 + 0x85 + 0xb8 + 0x51 + 0xb7 + 0x9e + 0xb8 + 0x7a + 0xb7 + 0xbc + 0xb8 + 0x96 + 0xb7 + 0xdf + 0xb8 + 0xb3 + 0xb7 + 0x4 + 0xb9 + 0xd5 + 0xb7 + 0x1c + 0xb9 + 0xfe + 0xb7 + 0x38 + 0xb9 + 0x17 + 0xb8 + 0x58 + 0xb9 + 0x33 + 0xb8 + 0x7f + 0xb9 + 0x55 + 0xb8 + 0x96 + 0xb9 + 0x7c + 0xb8 + 0xb0 + 0xb9 + 0x95 + 0xb8 + 0xcd + 0xb9 + 0xaf + 0xb8 + 0xef + 0xb9 + 0xce + 0xb8 + 0x1c + 0xb9 + 0xfe + 0xb7 + 0x38 + 0xb9 + 0x17 + 0xb8 + 0x58 + 0xb9 + 0x33 + 0xb8 + 0x7f + 0xb9 + 0x55 + 0xb8 + 0x96 + 0xb9 + 0x7c + 0xb8 + 0xb0 + 0xb9 + 0x95 + 0xb8 + 0xcd + 0xb9 + 0xaf + 0xb8 + 0xef + 0xb9 + 0xce + 0xb8 + 0xc + 0xba + 0xf3 + 0xb8 + 0x22 + 0xba + 0xe + 0xb9 + 0x3d + 0xba + 0x27 + 0xb9 + 0x5b + 0xba + 0x43 + 0xb9 + 0x7e + 0xba + 0x64 + 0xb9 + 0x93 + 0xba + 0x85 + 0xb9 + 0xaa + 0xba + 0x9b + 0xb9 + 0xc4 + 0xba + 0xb4 + 0xb9 + 0xc + 0xba + 0xf3 + 0xb8 + 0x22 + 0xba + 0xe + 0xb9 + 0x3d + 0xba + 0x27 + 0xb9 + 0x5b + 0xba + 0x43 + 0xb9 + 0x7e + 0xba + 0x64 + 0xb9 + 0x93 + 0xba + 0x85 + 0xb9 + 0xaa + 0xba + 0x9b + 0xb9 + 0xc4 + 0xba + 0xb4 + 0xb9 + 0xe1 + 0xba + 0xd1 + 0xb9 + 0x1 + 0xbb + 0xf2 + 0xb9 + 0x14 + 0xbb + 0xc + 0xba + 0x2a + 0xbb + 0x22 + 0xba + 0x42 + 0xbb + 0x3a + 0xba + 0x5d + 0xbb + 0x56 + 0xba + 0x7c + 0xbb + 0x76 + 0xba + 0x8f + 0xbb + 0x8d + 0xba + 0xe1 + 0xba + 0xd1 + 0xb9 + 0x1 + 0xbb + 0xf2 + 0xb9 + 0x14 + 0xbb + 0xc + 0xba + 0x2a + 0xbb + 0x22 + 0xba + 0x42 + 0xbb + 0x3a + 0xba + 0x5d + 0xbb + 0x56 + 0xba + 0x7c + 0xbb + 0x76 + 0xba + 0x8f + 0xbb + 0x8d + 0xba + 0xa3 + 0xbb + 0xa2 + 0xba + 0xb8 + 0xbb + 0xb9 + 0xba + 0xd0 + 0xbb + 0xd3 + 0xba + 0xeb + 0xbb + 0xf0 + 0xba + 0x5 + 0xbc + 0x9 + 0xbb + 0x15 + 0xbc + 0x1b + 0xbb + 0x27 + 0xbc + 0x30 + 0xbb + 0x3b + 0xbc + 0x47 + 0xbb + 0xa3 + 0xbb + 0xa2 + 0xba + 0xb8 + 0xbb + 0xb9 + 0xba + 0xd0 + 0xbb + 0xd3 + 0xba + 0xeb + 0xbb + 0xf0 + 0xba + 0x5 + 0xbc + 0x9 + 0xbb + 0x15 + 0xbc + 0x1b + 0xbb + 0x27 + 0xbc + 0x30 + 0xbb + 0x3b + 0xbc + 0x47 + 0xbb + 0x52 + 0xbc + 0x61 + 0xbb + 0x6b + 0xbc + 0x7e + 0xbb + 0x83 + 0xbc + 0x8f + 0xbb + 0x91 + 0xbc + 0xa0 + 0xbb + 0xa2 + 0xbc + 0xb4 + 0xbb + 0xb3 + 0xbc + 0xca + 0xbb + 0xc6 + 0xbc + 0xe1 + 0xbb + 0xdb + 0xbc + 0xfc + 0xbb + 0x52 + 0xbc + 0x61 + 0xbb + 0x6b + 0xbc + 0x7e + 0xbb + 0x83 + 0xbc + 0x8f + 0xbb + 0x91 + 0xbc + 0xa0 + 0xbb + 0xa2 + 0xbc + 0xb4 + 0xbb + 0xb3 + 0xbc + 0xca + 0xbb + 0xc6 + 0xbc + 0xe1 + 0xbb + 0xdb + 0xbc + 0xfc + 0xbb + 0xf2 + 0xbc + 0xc + 0xbc + 0x5 + 0xbd + 0x1c + 0xbc + 0x12 + 0xbd + 0x2d + 0xbc + 0x21 + 0xbd + 0x40 + 0xbc + 0x30 + 0xbd + 0x54 + 0xbc + 0x40 + 0xbd + 0x6a + 0xbc + 0x51 + 0xbd + 0x81 + 0xbc + 0x64 + 0xbd + 0x8e + 0xbc + 0xf2 + 0xbc + 0xc + 0xbc + 0x5 + 0xbd + 0x1c + 0xbc + 0x12 + 0xbd + 0x2d + 0xbc + 0x21 + 0xbd + 0x40 + 0xbc + 0x30 + 0xbd + 0x54 + 0xbc + 0x40 + 0xbd + 0x6a + 0xbc + 0x51 + 0xbd + 0x81 + 0xbc + 0x64 + 0xbd + 0x8e + 0xbc + 0x78 + 0xbd + 0x9c + 0xbc + 0x86 + 0xbd + 0xab + 0xbc + 0x91 + 0xbd + 0xbb + 0xbc + 0x9d + 0xbd + 0xcc + 0xbc + 0xaa + 0xbd + 0xdf + 0xbc + 0xb6 + 0xbd + 0xf2 + 0xbc + 0xc3 + 0xbd + 0x3 + 0xbd + 0xd1 + 0xbd + 0xe + 0xbd + 0x78 + 0xbd + 0x9c + 0xbc + 0x86 + 0xbd + 0xab + 0xbc + 0x91 + 0xbd + 0xbb + 0xbc + 0x9d + 0xbd + 0xcc + 0xbc + 0xaa + 0xbd + 0xdf + 0xbc + 0xb6 + 0xbd + 0xf2 + 0xbc + 0xc3 + 0xbd + 0x3 + 0xbd + 0xd1 + 0xbd + 0xe + 0xbd + 0xe0 + 0xbd + 0x1a + 0xbd + 0xef + 0xbd + 0x26 + 0xbd + 0xff + 0xbd + 0x33 + 0xbd + 0x8 + 0xbe + 0x41 + 0xbd + 0x10 + 0xbe + 0x4f + 0xbd + 0x18 + 0xbe + 0x5d + 0xbd + 0x21 + 0xbe + 0x6c + 0xbd + 0x2a + 0xbe + 0x7c + 0xbd + 0xe0 + 0xbd + 0x1a + 0xbd + 0xef + 0xbd + 0x26 + 0xbd + 0xff + 0xbd + 0x33 + 0xbd + 0x8 + 0xbe + 0x41 + 0xbd + 0x10 + 0xbe + 0x4f + 0xbd + 0x18 + 0xbe + 0x5d + 0xbd + 0x21 + 0xbe + 0x6c + 0xbd + 0x2a + 0xbe + 0x7c + 0xbd + 0x33 + 0xbe + 0x86 + 0xbd + 0x3b + 0xbe + 0x8e + 0xbd + 0x45 + 0xbe + 0x97 + 0xbd + 0x4d + 0xbe + 0x9f + 0xbd + 0x57 + 0xbe + 0xa8 + 0xbd + 0x5f + 0xbe + 0xb0 + 0xbd + 0x67 + 0xbe + 0xb9 + 0xbd + 0x6f + 0xbe + 0xc1 + 0xbd + 0x33 + 0xbe + 0x86 + 0xbd + 0x3b + 0xbe + 0x8e + 0xbd + 0x45 + 0xbe + 0x97 + 0xbd + 0x4d + 0xbe + 0x9f + 0xbd + 0x57 + 0xbe + 0xa8 + 0xbd + 0x5f + 0xbe + 0xb0 + 0xbd + 0x67 + 0xbe + 0xb9 + 0xbd + 0x6f + 0xbe + 0xc1 + 0xbd + 0x78 + 0xbe + 0xca + 0xbd + 0x7f + 0xbe + 0xd2 + 0xbd + 0x83 + 0xbe + 0xda + 0xbd + 0x87 + 0xbe + 0xe2 + 0xbd + 0x8a + 0xbe + 0xe9 + 0xbd + 0x8d + 0xbe + 0xf0 + 0xbd + 0x8f + 0xbe + 0xf6 + 0xbd + 0x91 + 0xbe + 0xfb + 0xbd + 0x78 + 0xbe + 0xca + 0xbd + 0x7f + 0xbe + 0xd2 + 0xbd + 0x83 + 0xbe + 0xda + 0xbd + 0x87 + 0xbe + 0xe2 + 0xbd + 0x8a + 0xbe + 0xe9 + 0xbd + 0x8d + 0xbe + 0xf0 + 0xbd + 0x8f + 0xbe + 0xf6 + 0xbd + 0x91 + 0xbe + 0xfb + 0xbd + 0x93 + 0xbe + 0x0 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x96 + 0xbe + 0x3 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x94 + 0xbe + 0x0 + 0xbe + 0x93 + 0xbe + 0x0 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x96 + 0xbe + 0x3 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x94 + 0xbe + 0x0 + 0xbe + 0x92 + 0xbe + 0xfb + 0xbd + 0x90 + 0xbe + 0xf4 + 0xbd + 0x8d + 0xbe + 0xeb + 0xbd + 0x8a + 0xbe + 0xe0 + 0xbd + 0x86 + 0xbe + 0xd3 + 0xbd + 0x82 + 0xbe + 0xc4 + 0xbd + 0x7b + 0xbe + 0xb2 + 0xbd + 0x72 + 0xbe + 0x9f + 0xbd + 0x92 + 0xbe + 0xfb + 0xbd + 0x90 + 0xbe + 0xf4 + 0xbd + 0x8d + 0xbe + 0xeb + 0xbd + 0x8a + 0xbe + 0xe0 + 0xbd + 0x86 + 0xbe + 0xd3 + 0xbd + 0x82 + 0xbe + 0xc4 + 0xbd + 0x7b + 0xbe + 0xb2 + 0xbd + 0x72 + 0xbe + 0x9f + 0xbd + 0x67 + 0xbe + 0x89 + 0xbd + 0x5c + 0xbe + 0x61 + 0xbd + 0x51 + 0xbe + 0x2c + 0xbd + 0x44 + 0xbe + 0xe3 + 0xbc + 0x38 + 0xbe + 0x4b + 0xbc + 0x2b + 0xbe + 0x89 + 0x3b + 0x1e + 0xbe + 0xb4 + 0x3c + 0x11 + 0xbe + 0x27 + 0x3d + 0x67 + 0xbe + 0x89 + 0xbd + 0x5c + 0xbe + 0x61 + 0xbd + 0x51 + 0xbe + 0x2c + 0xbd + 0x44 + 0xbe + 0xe3 + 0xbc + 0x38 + 0xbe + 0x4b + 0xbc + 0x2b + 0xbe + 0x89 + 0x3b + 0x1e + 0xbe + 0xb4 + 0x3c + 0x11 + 0xbe + 0x27 + 0x3d + 0x3 + 0xbe + 0x79 + 0x3d + 0xec + 0xbd + 0xa8 + 0x3d + 0xd2 + 0xbd + 0xd6 + 0x3d + 0xb9 + 0xbd + 0x3 + 0x3e + 0xa1 + 0xbd + 0x1c + 0x3e + 0x89 + 0xbd + 0x36 + 0x3e + 0x66 + 0xbd + 0x51 + 0x3e + 0x3e + 0xbd + 0x6c + 0x3e + 0x3 + 0xbe + 0x79 + 0x3d + 0xec + 0xbd + 0xa8 + 0x3d + 0xd2 + 0xbd + 0xd6 + 0x3d + 0xb9 + 0xbd + 0x3 + 0x3e + 0xa1 + 0xbd + 0x1c + 0x3e + 0x89 + 0xbd + 0x36 + 0x3e + 0x66 + 0xbd + 0x51 + 0x3e + 0x3e + 0xbd + 0x6c + 0x3e + 0x19 + 0xbd + 0x84 + 0x3e + 0xec + 0xbc + 0x93 + 0x3e + 0xaf + 0xbc + 0xa2 + 0x3e + 0x76 + 0xbc + 0xb1 + 0x3e + 0x1c + 0xbc + 0xc1 + 0x3e + 0xb5 + 0xbb + 0xd0 + 0x3e + 0x21 + 0xbb + 0xe0 + 0x3e + 0x20 + 0xba + 0xf0 + 0x3e + 0x19 + 0xbd + 0x84 + 0x3e + 0xec + 0xbc + 0x93 + 0x3e + 0xaf + 0xbc + 0xa2 + 0x3e + 0x76 + 0xbc + 0xb1 + 0x3e + 0x1c + 0xbc + 0xc1 + 0x3e + 0xb5 + 0xbb + 0xd0 + 0x3e + 0x21 + 0xbb + 0xe0 + 0x3e + 0x20 + 0xba + 0xf0 + 0x3e + 0x0 + 0x0 + 0x0 + 0x3f + 0x20 + 0xba + 0x8 + 0x3f + 0x21 + 0xbb + 0x10 + 0x3f + 0xb5 + 0xbb + 0x18 + 0x3f + 0x21 + 0xbc + 0x20 + 0x3f + 0x70 + 0xbc + 0x27 + 0x3f + 0xaf + 0xbc + 0x2f + 0x3f + 0xe7 + 0xbc + 0x36 + 0x3f + 0x0 + 0x0 + 0x0 + 0x3f + 0x20 + 0xba + 0x8 + 0x3f + 0x21 + 0xbb + 0x10 + 0x3f + 0xb5 + 0xbb + 0x18 + 0x3f + 0x21 + 0xbc + 0x20 + 0x3f + 0x70 + 0xbc + 0x27 + 0x3f + 0xaf + 0xbc + 0x2f + 0x3f + 0xe7 + 0xbc + 0x36 + 0x3f + 0x19 + 0xbd + 0x3e + 0x3f + 0x3e + 0xbd + 0x45 + 0x3f + 0x68 + 0xbd + 0x4c + 0x3f + 0x8b + 0xbd + 0x53 + 0x3f + 0xa1 + 0xbd + 0x59 + 0x3f + 0xb8 + 0xbd + 0x5f + 0x3f + 0xd1 + 0xbd + 0x65 + 0x3f + 0xec + 0xbd + 0x6b + 0x3f + 0x19 + 0xbd + 0x3e + 0x3f + 0x3e + 0xbd + 0x45 + 0x3f + 0x68 + 0xbd + 0x4c + 0x3f + 0x8b + 0xbd + 0x53 + 0x3f + 0xa1 + 0xbd + 0x59 + 0x3f + 0xb8 + 0xbd + 0x5f + 0x3f + 0xd1 + 0xbd + 0x65 + 0x3f + 0xec + 0xbd + 0x6b + 0x3f + 0x2 + 0xbe + 0x70 + 0x3f + 0x12 + 0xbe + 0x76 + 0x3f + 0x1d + 0xbe + 0x7a + 0x3f + 0x2b + 0xbe + 0x7f + 0x3f + 0x3a + 0xbe + 0x82 + 0x3f + 0x47 + 0xbe + 0x84 + 0x3f + 0x4e + 0xbe + 0x85 + 0x3f + 0x5c + 0xbe + 0x87 + 0x3f + 0x2 + 0xbe + 0x70 + 0x3f + 0x12 + 0xbe + 0x76 + 0x3f + 0x1d + 0xbe + 0x7a + 0x3f + 0x2b + 0xbe + 0x7f + 0x3f + 0x3a + 0xbe + 0x82 + 0x3f + 0x47 + 0xbe + 0x84 + 0x3f + 0x4e + 0xbe + 0x85 + 0x3f + 0x5c + 0xbe + 0x87 + 0x3f + 0x6b + 0xbe + 0x89 + 0x3f + 0x73 + 0xbe + 0x8a + 0x3f + 0x7a + 0xbe + 0x8b + 0x3f + 0x81 + 0xbe + 0x8c + 0x3f + 0x86 + 0xbe + 0x8d + 0x3f + 0x8a + 0xbe + 0x8e + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x6b + 0xbe + 0x89 + 0x3f + 0x73 + 0xbe + 0x8a + 0x3f + 0x7a + 0xbe + 0x8b + 0x3f + 0x81 + 0xbe + 0x8c + 0x3f + 0x86 + 0xbe + 0x8d + 0x3f + 0x8a + 0xbe + 0x8e + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x5f + 0xbe + 0x8b + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x5f + 0xbe + 0x8b + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe2 + 0xbc + 0x81 + 0x3f + 0x97 + 0xba + 0x80 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe2 + 0xbc + 0x81 + 0x3f + 0x97 + 0xba + 0x80 + 0x3f + 0x84 + 0xba + 0x80 + 0x3f + 0x67 + 0xba + 0x80 + 0x3f + 0x4a + 0xba + 0x80 + 0x3f + 0x30 + 0xba + 0x80 + 0x3f + 0x1a + 0xba + 0x80 + 0x3f + 0x5 + 0xba + 0x80 + 0x3f + 0xe8 + 0xb9 + 0x80 + 0x3f + 0xc9 + 0xb9 + 0x80 + 0x3f + 0x84 + 0xba + 0x80 + 0x3f + 0x67 + 0xba + 0x80 + 0x3f + 0x4a + 0xba + 0x80 + 0x3f + 0x30 + 0xba + 0x80 + 0x3f + 0x1a + 0xba + 0x80 + 0x3f + 0x5 + 0xba + 0x80 + 0x3f + 0xe8 + 0xb9 + 0x80 + 0x3f + 0xc9 + 0xb9 + 0x80 + 0x3f + 0xae + 0xb9 + 0x80 + 0x3f + 0x96 + 0xb9 + 0x80 + 0x3f + 0x82 + 0xb9 + 0x80 + 0x3f + 0x5f + 0xb9 + 0x80 + 0x3f + 0x40 + 0xb9 + 0x80 + 0x3f + 0x25 + 0xb9 + 0x80 + 0x3f + 0xd + 0xb9 + 0x80 + 0x3f + 0xf2 + 0xb8 + 0x80 + 0x3f + 0xae + 0xb9 + 0x80 + 0x3f + 0x96 + 0xb9 + 0x80 + 0x3f + 0x82 + 0xb9 + 0x80 + 0x3f + 0x5f + 0xb9 + 0x80 + 0x3f + 0x40 + 0xb9 + 0x80 + 0x3f + 0x25 + 0xb9 + 0x80 + 0x3f + 0xd + 0xb9 + 0x80 + 0x3f + 0xf2 + 0xb8 + 0x80 + 0x3f + 0xcf + 0xb8 + 0x80 + 0x3f + 0xb0 + 0xb8 + 0x80 + 0x3f + 0x96 + 0xb8 + 0x80 + 0x3f + 0x80 + 0xb8 + 0x80 + 0x3f + 0x59 + 0xb8 + 0x80 + 0x3f + 0x38 + 0xb8 + 0x80 + 0x3f + 0x1c + 0xb8 + 0x80 + 0x3f + 0x4 + 0xb8 + 0x80 + 0x3f + 0xcf + 0xb8 + 0x80 + 0x3f + 0xb0 + 0xb8 + 0x80 + 0x3f + 0x96 + 0xb8 + 0x80 + 0x3f + 0x80 + 0xb8 + 0x80 + 0x3f + 0x59 + 0xb8 + 0x80 + 0x3f + 0x38 + 0xb8 + 0x80 + 0x3f + 0x1c + 0xb8 + 0x80 + 0x3f + 0x4 + 0xb8 + 0x80 + 0x3f + 0xdf + 0xb7 + 0x80 + 0x3f + 0xbc + 0xb7 + 0x80 + 0x3f + 0x9e + 0xb7 + 0x80 + 0x3f + 0x85 + 0xb7 + 0x80 + 0x3f + 0x5f + 0xb7 + 0x80 + 0x3f + 0x3b + 0xb7 + 0x80 + 0x3f + 0x1c + 0xb7 + 0x80 + 0x3f + 0x2 + 0xb7 + 0x80 + 0x3f + 0xdf + 0xb7 + 0x80 + 0x3f + 0xbc + 0xb7 + 0x80 + 0x3f + 0x9e + 0xb7 + 0x80 + 0x3f + 0x85 + 0xb7 + 0x80 + 0x3f + 0x5f + 0xb7 + 0x80 + 0x3f + 0x3b + 0xb7 + 0x80 + 0x3f + 0x1c + 0xb7 + 0x80 + 0x3f + 0x2 + 0xb7 + 0x80 + 0x3f + 0xd9 + 0xb6 + 0x80 + 0x3f + 0xb5 + 0xb6 + 0x80 + 0x3f + 0x97 + 0xb6 + 0x80 + 0x3f + 0x7a + 0xb6 + 0x80 + 0x3f + 0x4f + 0xb6 + 0x80 + 0x3f + 0x2b + 0xb6 + 0x80 + 0x3f + 0xe + 0xb6 + 0x80 + 0x3f + 0xea + 0xb5 + 0x80 + 0x3f + 0xd9 + 0xb6 + 0x80 + 0x3f + 0xb5 + 0xb6 + 0x80 + 0x3f + 0x97 + 0xb6 + 0x80 + 0x3f + 0x7a + 0xb6 + 0x80 + 0x3f + 0x4f + 0xb6 + 0x80 + 0x3f + 0x2b + 0xb6 + 0x80 + 0x3f + 0xe + 0xb6 + 0x80 + 0x3f + 0xea + 0xb5 + 0x80 + 0x3f + +.data_segment_name +.data weak 64 _ZN11gelu_lut_32ILj0ELj512EE7data_cdE DMb + 0x1c + 0xb8 + 0xf0 + 0xb6 + 0x3a + 0xb8 + 0x10 + 0xb7 + 0x5f + 0xb8 + 0x2e + 0xb7 + 0x85 + 0xb8 + 0x51 + 0xb7 + 0x9e + 0xb8 + 0x7a + 0xb7 + 0xbc + 0xb8 + 0x96 + 0xb7 + 0xdf + 0xb8 + 0xb3 + 0xb7 + 0x4 + 0xb9 + 0xd5 + 0xb7 + 0x1c + 0xb8 + 0xf0 + 0xb6 + 0x3a + 0xb8 + 0x10 + 0xb7 + 0x5f + 0xb8 + 0x2e + 0xb7 + 0x85 + 0xb8 + 0x51 + 0xb7 + 0x9e + 0xb8 + 0x7a + 0xb7 + 0xbc + 0xb8 + 0x96 + 0xb7 + 0xdf + 0xb8 + 0xb3 + 0xb7 + 0x4 + 0xb9 + 0xd5 + 0xb7 + 0x1c + 0xb9 + 0xfe + 0xb7 + 0x38 + 0xb9 + 0x17 + 0xb8 + 0x58 + 0xb9 + 0x33 + 0xb8 + 0x7f + 0xb9 + 0x55 + 0xb8 + 0x96 + 0xb9 + 0x7c + 0xb8 + 0xb0 + 0xb9 + 0x95 + 0xb8 + 0xcd + 0xb9 + 0xaf + 0xb8 + 0xef + 0xb9 + 0xce + 0xb8 + 0x1c + 0xb9 + 0xfe + 0xb7 + 0x38 + 0xb9 + 0x17 + 0xb8 + 0x58 + 0xb9 + 0x33 + 0xb8 + 0x7f + 0xb9 + 0x55 + 0xb8 + 0x96 + 0xb9 + 0x7c + 0xb8 + 0xb0 + 0xb9 + 0x95 + 0xb8 + 0xcd + 0xb9 + 0xaf + 0xb8 + 0xef + 0xb9 + 0xce + 0xb8 + 0xc + 0xba + 0xf3 + 0xb8 + 0x22 + 0xba + 0xe + 0xb9 + 0x3d + 0xba + 0x27 + 0xb9 + 0x5b + 0xba + 0x43 + 0xb9 + 0x7e + 0xba + 0x64 + 0xb9 + 0x93 + 0xba + 0x85 + 0xb9 + 0xaa + 0xba + 0x9b + 0xb9 + 0xc4 + 0xba + 0xb4 + 0xb9 + 0xc + 0xba + 0xf3 + 0xb8 + 0x22 + 0xba + 0xe + 0xb9 + 0x3d + 0xba + 0x27 + 0xb9 + 0x5b + 0xba + 0x43 + 0xb9 + 0x7e + 0xba + 0x64 + 0xb9 + 0x93 + 0xba + 0x85 + 0xb9 + 0xaa + 0xba + 0x9b + 0xb9 + 0xc4 + 0xba + 0xb4 + 0xb9 + 0xe1 + 0xba + 0xd1 + 0xb9 + 0x1 + 0xbb + 0xf2 + 0xb9 + 0x14 + 0xbb + 0xc + 0xba + 0x2a + 0xbb + 0x22 + 0xba + 0x42 + 0xbb + 0x3a + 0xba + 0x5d + 0xbb + 0x56 + 0xba + 0x7c + 0xbb + 0x76 + 0xba + 0x8f + 0xbb + 0x8d + 0xba + 0xe1 + 0xba + 0xd1 + 0xb9 + 0x1 + 0xbb + 0xf2 + 0xb9 + 0x14 + 0xbb + 0xc + 0xba + 0x2a + 0xbb + 0x22 + 0xba + 0x42 + 0xbb + 0x3a + 0xba + 0x5d + 0xbb + 0x56 + 0xba + 0x7c + 0xbb + 0x76 + 0xba + 0x8f + 0xbb + 0x8d + 0xba + 0xa3 + 0xbb + 0xa2 + 0xba + 0xb8 + 0xbb + 0xb9 + 0xba + 0xd0 + 0xbb + 0xd3 + 0xba + 0xeb + 0xbb + 0xf0 + 0xba + 0x5 + 0xbc + 0x9 + 0xbb + 0x15 + 0xbc + 0x1b + 0xbb + 0x27 + 0xbc + 0x30 + 0xbb + 0x3b + 0xbc + 0x47 + 0xbb + 0xa3 + 0xbb + 0xa2 + 0xba + 0xb8 + 0xbb + 0xb9 + 0xba + 0xd0 + 0xbb + 0xd3 + 0xba + 0xeb + 0xbb + 0xf0 + 0xba + 0x5 + 0xbc + 0x9 + 0xbb + 0x15 + 0xbc + 0x1b + 0xbb + 0x27 + 0xbc + 0x30 + 0xbb + 0x3b + 0xbc + 0x47 + 0xbb + 0x52 + 0xbc + 0x61 + 0xbb + 0x6b + 0xbc + 0x7e + 0xbb + 0x83 + 0xbc + 0x8f + 0xbb + 0x91 + 0xbc + 0xa0 + 0xbb + 0xa2 + 0xbc + 0xb4 + 0xbb + 0xb3 + 0xbc + 0xca + 0xbb + 0xc6 + 0xbc + 0xe1 + 0xbb + 0xdb + 0xbc + 0xfc + 0xbb + 0x52 + 0xbc + 0x61 + 0xbb + 0x6b + 0xbc + 0x7e + 0xbb + 0x83 + 0xbc + 0x8f + 0xbb + 0x91 + 0xbc + 0xa0 + 0xbb + 0xa2 + 0xbc + 0xb4 + 0xbb + 0xb3 + 0xbc + 0xca + 0xbb + 0xc6 + 0xbc + 0xe1 + 0xbb + 0xdb + 0xbc + 0xfc + 0xbb + 0xf2 + 0xbc + 0xc + 0xbc + 0x5 + 0xbd + 0x1c + 0xbc + 0x12 + 0xbd + 0x2d + 0xbc + 0x21 + 0xbd + 0x40 + 0xbc + 0x30 + 0xbd + 0x54 + 0xbc + 0x40 + 0xbd + 0x6a + 0xbc + 0x51 + 0xbd + 0x81 + 0xbc + 0x64 + 0xbd + 0x8e + 0xbc + 0xf2 + 0xbc + 0xc + 0xbc + 0x5 + 0xbd + 0x1c + 0xbc + 0x12 + 0xbd + 0x2d + 0xbc + 0x21 + 0xbd + 0x40 + 0xbc + 0x30 + 0xbd + 0x54 + 0xbc + 0x40 + 0xbd + 0x6a + 0xbc + 0x51 + 0xbd + 0x81 + 0xbc + 0x64 + 0xbd + 0x8e + 0xbc + 0x78 + 0xbd + 0x9c + 0xbc + 0x86 + 0xbd + 0xab + 0xbc + 0x91 + 0xbd + 0xbb + 0xbc + 0x9d + 0xbd + 0xcc + 0xbc + 0xaa + 0xbd + 0xdf + 0xbc + 0xb6 + 0xbd + 0xf2 + 0xbc + 0xc3 + 0xbd + 0x3 + 0xbd + 0xd1 + 0xbd + 0xe + 0xbd + 0x78 + 0xbd + 0x9c + 0xbc + 0x86 + 0xbd + 0xab + 0xbc + 0x91 + 0xbd + 0xbb + 0xbc + 0x9d + 0xbd + 0xcc + 0xbc + 0xaa + 0xbd + 0xdf + 0xbc + 0xb6 + 0xbd + 0xf2 + 0xbc + 0xc3 + 0xbd + 0x3 + 0xbd + 0xd1 + 0xbd + 0xe + 0xbd + 0xe0 + 0xbd + 0x1a + 0xbd + 0xef + 0xbd + 0x26 + 0xbd + 0xff + 0xbd + 0x33 + 0xbd + 0x8 + 0xbe + 0x41 + 0xbd + 0x10 + 0xbe + 0x4f + 0xbd + 0x18 + 0xbe + 0x5d + 0xbd + 0x21 + 0xbe + 0x6c + 0xbd + 0x2a + 0xbe + 0x7c + 0xbd + 0xe0 + 0xbd + 0x1a + 0xbd + 0xef + 0xbd + 0x26 + 0xbd + 0xff + 0xbd + 0x33 + 0xbd + 0x8 + 0xbe + 0x41 + 0xbd + 0x10 + 0xbe + 0x4f + 0xbd + 0x18 + 0xbe + 0x5d + 0xbd + 0x21 + 0xbe + 0x6c + 0xbd + 0x2a + 0xbe + 0x7c + 0xbd + 0x33 + 0xbe + 0x86 + 0xbd + 0x3b + 0xbe + 0x8e + 0xbd + 0x45 + 0xbe + 0x97 + 0xbd + 0x4d + 0xbe + 0x9f + 0xbd + 0x57 + 0xbe + 0xa8 + 0xbd + 0x5f + 0xbe + 0xb0 + 0xbd + 0x67 + 0xbe + 0xb9 + 0xbd + 0x6f + 0xbe + 0xc1 + 0xbd + 0x33 + 0xbe + 0x86 + 0xbd + 0x3b + 0xbe + 0x8e + 0xbd + 0x45 + 0xbe + 0x97 + 0xbd + 0x4d + 0xbe + 0x9f + 0xbd + 0x57 + 0xbe + 0xa8 + 0xbd + 0x5f + 0xbe + 0xb0 + 0xbd + 0x67 + 0xbe + 0xb9 + 0xbd + 0x6f + 0xbe + 0xc1 + 0xbd + 0x78 + 0xbe + 0xca + 0xbd + 0x7f + 0xbe + 0xd2 + 0xbd + 0x83 + 0xbe + 0xda + 0xbd + 0x87 + 0xbe + 0xe2 + 0xbd + 0x8a + 0xbe + 0xe9 + 0xbd + 0x8d + 0xbe + 0xf0 + 0xbd + 0x8f + 0xbe + 0xf6 + 0xbd + 0x91 + 0xbe + 0xfb + 0xbd + 0x78 + 0xbe + 0xca + 0xbd + 0x7f + 0xbe + 0xd2 + 0xbd + 0x83 + 0xbe + 0xda + 0xbd + 0x87 + 0xbe + 0xe2 + 0xbd + 0x8a + 0xbe + 0xe9 + 0xbd + 0x8d + 0xbe + 0xf0 + 0xbd + 0x8f + 0xbe + 0xf6 + 0xbd + 0x91 + 0xbe + 0xfb + 0xbd + 0x93 + 0xbe + 0x0 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x96 + 0xbe + 0x3 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x94 + 0xbe + 0x0 + 0xbe + 0x93 + 0xbe + 0x0 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x96 + 0xbe + 0x3 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x96 + 0xbe + 0x4 + 0xbe + 0x95 + 0xbe + 0x2 + 0xbe + 0x94 + 0xbe + 0x0 + 0xbe + 0x92 + 0xbe + 0xfb + 0xbd + 0x90 + 0xbe + 0xf4 + 0xbd + 0x8d + 0xbe + 0xeb + 0xbd + 0x8a + 0xbe + 0xe0 + 0xbd + 0x86 + 0xbe + 0xd3 + 0xbd + 0x82 + 0xbe + 0xc4 + 0xbd + 0x7b + 0xbe + 0xb2 + 0xbd + 0x72 + 0xbe + 0x9f + 0xbd + 0x92 + 0xbe + 0xfb + 0xbd + 0x90 + 0xbe + 0xf4 + 0xbd + 0x8d + 0xbe + 0xeb + 0xbd + 0x8a + 0xbe + 0xe0 + 0xbd + 0x86 + 0xbe + 0xd3 + 0xbd + 0x82 + 0xbe + 0xc4 + 0xbd + 0x7b + 0xbe + 0xb2 + 0xbd + 0x72 + 0xbe + 0x9f + 0xbd + 0x67 + 0xbe + 0x89 + 0xbd + 0x5c + 0xbe + 0x61 + 0xbd + 0x51 + 0xbe + 0x2c + 0xbd + 0x44 + 0xbe + 0xe3 + 0xbc + 0x38 + 0xbe + 0x4b + 0xbc + 0x2b + 0xbe + 0x89 + 0x3b + 0x1e + 0xbe + 0xb4 + 0x3c + 0x11 + 0xbe + 0x27 + 0x3d + 0x67 + 0xbe + 0x89 + 0xbd + 0x5c + 0xbe + 0x61 + 0xbd + 0x51 + 0xbe + 0x2c + 0xbd + 0x44 + 0xbe + 0xe3 + 0xbc + 0x38 + 0xbe + 0x4b + 0xbc + 0x2b + 0xbe + 0x89 + 0x3b + 0x1e + 0xbe + 0xb4 + 0x3c + 0x11 + 0xbe + 0x27 + 0x3d + 0x3 + 0xbe + 0x79 + 0x3d + 0xec + 0xbd + 0xa8 + 0x3d + 0xd2 + 0xbd + 0xd6 + 0x3d + 0xb9 + 0xbd + 0x3 + 0x3e + 0xa1 + 0xbd + 0x1c + 0x3e + 0x89 + 0xbd + 0x36 + 0x3e + 0x66 + 0xbd + 0x51 + 0x3e + 0x3e + 0xbd + 0x6c + 0x3e + 0x3 + 0xbe + 0x79 + 0x3d + 0xec + 0xbd + 0xa8 + 0x3d + 0xd2 + 0xbd + 0xd6 + 0x3d + 0xb9 + 0xbd + 0x3 + 0x3e + 0xa1 + 0xbd + 0x1c + 0x3e + 0x89 + 0xbd + 0x36 + 0x3e + 0x66 + 0xbd + 0x51 + 0x3e + 0x3e + 0xbd + 0x6c + 0x3e + 0x19 + 0xbd + 0x84 + 0x3e + 0xec + 0xbc + 0x93 + 0x3e + 0xaf + 0xbc + 0xa2 + 0x3e + 0x76 + 0xbc + 0xb1 + 0x3e + 0x1c + 0xbc + 0xc1 + 0x3e + 0xb5 + 0xbb + 0xd0 + 0x3e + 0x21 + 0xbb + 0xe0 + 0x3e + 0x20 + 0xba + 0xf0 + 0x3e + 0x19 + 0xbd + 0x84 + 0x3e + 0xec + 0xbc + 0x93 + 0x3e + 0xaf + 0xbc + 0xa2 + 0x3e + 0x76 + 0xbc + 0xb1 + 0x3e + 0x1c + 0xbc + 0xc1 + 0x3e + 0xb5 + 0xbb + 0xd0 + 0x3e + 0x21 + 0xbb + 0xe0 + 0x3e + 0x20 + 0xba + 0xf0 + 0x3e + 0x0 + 0x0 + 0x0 + 0x3f + 0x20 + 0xba + 0x8 + 0x3f + 0x21 + 0xbb + 0x10 + 0x3f + 0xb5 + 0xbb + 0x18 + 0x3f + 0x21 + 0xbc + 0x20 + 0x3f + 0x70 + 0xbc + 0x27 + 0x3f + 0xaf + 0xbc + 0x2f + 0x3f + 0xe7 + 0xbc + 0x36 + 0x3f + 0x0 + 0x0 + 0x0 + 0x3f + 0x20 + 0xba + 0x8 + 0x3f + 0x21 + 0xbb + 0x10 + 0x3f + 0xb5 + 0xbb + 0x18 + 0x3f + 0x21 + 0xbc + 0x20 + 0x3f + 0x70 + 0xbc + 0x27 + 0x3f + 0xaf + 0xbc + 0x2f + 0x3f + 0xe7 + 0xbc + 0x36 + 0x3f + 0x19 + 0xbd + 0x3e + 0x3f + 0x3e + 0xbd + 0x45 + 0x3f + 0x68 + 0xbd + 0x4c + 0x3f + 0x8b + 0xbd + 0x53 + 0x3f + 0xa1 + 0xbd + 0x59 + 0x3f + 0xb8 + 0xbd + 0x5f + 0x3f + 0xd1 + 0xbd + 0x65 + 0x3f + 0xec + 0xbd + 0x6b + 0x3f + 0x19 + 0xbd + 0x3e + 0x3f + 0x3e + 0xbd + 0x45 + 0x3f + 0x68 + 0xbd + 0x4c + 0x3f + 0x8b + 0xbd + 0x53 + 0x3f + 0xa1 + 0xbd + 0x59 + 0x3f + 0xb8 + 0xbd + 0x5f + 0x3f + 0xd1 + 0xbd + 0x65 + 0x3f + 0xec + 0xbd + 0x6b + 0x3f + 0x2 + 0xbe + 0x70 + 0x3f + 0x12 + 0xbe + 0x76 + 0x3f + 0x1d + 0xbe + 0x7a + 0x3f + 0x2b + 0xbe + 0x7f + 0x3f + 0x3a + 0xbe + 0x82 + 0x3f + 0x47 + 0xbe + 0x84 + 0x3f + 0x4e + 0xbe + 0x85 + 0x3f + 0x5c + 0xbe + 0x87 + 0x3f + 0x2 + 0xbe + 0x70 + 0x3f + 0x12 + 0xbe + 0x76 + 0x3f + 0x1d + 0xbe + 0x7a + 0x3f + 0x2b + 0xbe + 0x7f + 0x3f + 0x3a + 0xbe + 0x82 + 0x3f + 0x47 + 0xbe + 0x84 + 0x3f + 0x4e + 0xbe + 0x85 + 0x3f + 0x5c + 0xbe + 0x87 + 0x3f + 0x6b + 0xbe + 0x89 + 0x3f + 0x73 + 0xbe + 0x8a + 0x3f + 0x7a + 0xbe + 0x8b + 0x3f + 0x81 + 0xbe + 0x8c + 0x3f + 0x86 + 0xbe + 0x8d + 0x3f + 0x8a + 0xbe + 0x8e + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x6b + 0xbe + 0x89 + 0x3f + 0x73 + 0xbe + 0x8a + 0x3f + 0x7a + 0xbe + 0x8b + 0x3f + 0x81 + 0xbe + 0x8c + 0x3f + 0x86 + 0xbe + 0x8d + 0x3f + 0x8a + 0xbe + 0x8e + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x8f + 0xbe + 0x8f + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x94 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x93 + 0xbe + 0x90 + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x8d + 0xbe + 0x8f + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x86 + 0xbe + 0x8e + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x5f + 0xbe + 0x8b + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x7d + 0xbe + 0x8d + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x6e + 0xbe + 0x8c + 0x3f + 0x5f + 0xbe + 0x8b + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x4e + 0xbe + 0x8a + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x3e + 0xbe + 0x89 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x2c + 0xbe + 0x88 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x1a + 0xbe + 0x87 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0x7 + 0xbe + 0x86 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0xe8 + 0xbd + 0x85 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0xbf + 0xbd + 0x84 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x95 + 0xbd + 0x83 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0x50 + 0xbd + 0x82 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe2 + 0xbc + 0x81 + 0x3f + 0x97 + 0xba + 0x80 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xdf + 0xbc + 0x81 + 0x3f + 0xe0 + 0xbc + 0x81 + 0x3f + 0xe1 + 0xbc + 0x81 + 0x3f + 0xe2 + 0xbc + 0x81 + 0x3f + 0x97 + 0xba + 0x80 + 0x3f + 0x84 + 0xba + 0x80 + 0x3f + 0x67 + 0xba + 0x80 + 0x3f + 0x4a + 0xba + 0x80 + 0x3f + 0x30 + 0xba + 0x80 + 0x3f + 0x1a + 0xba + 0x80 + 0x3f + 0x5 + 0xba + 0x80 + 0x3f + 0xe8 + 0xb9 + 0x80 + 0x3f + 0xc9 + 0xb9 + 0x80 + 0x3f + 0x84 + 0xba + 0x80 + 0x3f + 0x67 + 0xba + 0x80 + 0x3f + 0x4a + 0xba + 0x80 + 0x3f + 0x30 + 0xba + 0x80 + 0x3f + 0x1a + 0xba + 0x80 + 0x3f + 0x5 + 0xba + 0x80 + 0x3f + 0xe8 + 0xb9 + 0x80 + 0x3f + 0xc9 + 0xb9 + 0x80 + 0x3f + 0xae + 0xb9 + 0x80 + 0x3f + 0x96 + 0xb9 + 0x80 + 0x3f + 0x82 + 0xb9 + 0x80 + 0x3f + 0x5f + 0xb9 + 0x80 + 0x3f + 0x40 + 0xb9 + 0x80 + 0x3f + 0x25 + 0xb9 + 0x80 + 0x3f + 0xd + 0xb9 + 0x80 + 0x3f + 0xf2 + 0xb8 + 0x80 + 0x3f + 0xae + 0xb9 + 0x80 + 0x3f + 0x96 + 0xb9 + 0x80 + 0x3f + 0x82 + 0xb9 + 0x80 + 0x3f + 0x5f + 0xb9 + 0x80 + 0x3f + 0x40 + 0xb9 + 0x80 + 0x3f + 0x25 + 0xb9 + 0x80 + 0x3f + 0xd + 0xb9 + 0x80 + 0x3f + 0xf2 + 0xb8 + 0x80 + 0x3f + 0xcf + 0xb8 + 0x80 + 0x3f + 0xb0 + 0xb8 + 0x80 + 0x3f + 0x96 + 0xb8 + 0x80 + 0x3f + 0x80 + 0xb8 + 0x80 + 0x3f + 0x59 + 0xb8 + 0x80 + 0x3f + 0x38 + 0xb8 + 0x80 + 0x3f + 0x1c + 0xb8 + 0x80 + 0x3f + 0x4 + 0xb8 + 0x80 + 0x3f + 0xcf + 0xb8 + 0x80 + 0x3f + 0xb0 + 0xb8 + 0x80 + 0x3f + 0x96 + 0xb8 + 0x80 + 0x3f + 0x80 + 0xb8 + 0x80 + 0x3f + 0x59 + 0xb8 + 0x80 + 0x3f + 0x38 + 0xb8 + 0x80 + 0x3f + 0x1c + 0xb8 + 0x80 + 0x3f + 0x4 + 0xb8 + 0x80 + 0x3f + 0xdf + 0xb7 + 0x80 + 0x3f + 0xbc + 0xb7 + 0x80 + 0x3f + 0x9e + 0xb7 + 0x80 + 0x3f + 0x85 + 0xb7 + 0x80 + 0x3f + 0x5f + 0xb7 + 0x80 + 0x3f + 0x3b + 0xb7 + 0x80 + 0x3f + 0x1c + 0xb7 + 0x80 + 0x3f + 0x2 + 0xb7 + 0x80 + 0x3f + 0xdf + 0xb7 + 0x80 + 0x3f + 0xbc + 0xb7 + 0x80 + 0x3f + 0x9e + 0xb7 + 0x80 + 0x3f + 0x85 + 0xb7 + 0x80 + 0x3f + 0x5f + 0xb7 + 0x80 + 0x3f + 0x3b + 0xb7 + 0x80 + 0x3f + 0x1c + 0xb7 + 0x80 + 0x3f + 0x2 + 0xb7 + 0x80 + 0x3f + 0xd9 + 0xb6 + 0x80 + 0x3f + 0xb5 + 0xb6 + 0x80 + 0x3f + 0x97 + 0xb6 + 0x80 + 0x3f + 0x7a + 0xb6 + 0x80 + 0x3f + 0x4f + 0xb6 + 0x80 + 0x3f + 0x2b + 0xb6 + 0x80 + 0x3f + 0xe + 0xb6 + 0x80 + 0x3f + 0xea + 0xb5 + 0x80 + 0x3f + 0xd9 + 0xb6 + 0x80 + 0x3f + 0xb5 + 0xb6 + 0x80 + 0x3f + 0x97 + 0xb6 + 0x80 + 0x3f + 0x7a + 0xb6 + 0x80 + 0x3f + 0x4f + 0xb6 + 0x80 + 0x3f + 0x2b + 0xb6 + 0x80 + 0x3f + 0xe + 0xb6 + 0x80 + 0x3f + 0xea + 0xb5 + 0x80 + 0x3f + +.data_segment_name +.data weak 64 _ZN8tanh_lutILj0ELj512EE7data_abE DMb + 0x80 + 0xbf + 0x3e + 0x39 + 0x80 + 0xbf + 0x4e + 0x39 + 0x80 + 0xbf + 0x5f + 0x39 + 0x80 + 0xbf + 0x71 + 0x39 + 0x80 + 0xbf + 0x82 + 0x39 + 0x80 + 0xbf + 0x8d + 0x39 + 0x80 + 0xbf + 0x98 + 0x39 + 0x80 + 0xbf + 0xa4 + 0x39 + 0x80 + 0xbf + 0x3e + 0x39 + 0x80 + 0xbf + 0x4e + 0x39 + 0x80 + 0xbf + 0x5f + 0x39 + 0x80 + 0xbf + 0x71 + 0x39 + 0x80 + 0xbf + 0x82 + 0x39 + 0x80 + 0xbf + 0x8d + 0x39 + 0x80 + 0xbf + 0x98 + 0x39 + 0x80 + 0xbf + 0xa4 + 0x39 + 0x80 + 0xbf + 0xb2 + 0x39 + 0x80 + 0xbf + 0xc0 + 0x39 + 0x7f + 0xbf + 0xd0 + 0x39 + 0x7f + 0xbf + 0xe1 + 0x39 + 0x7f + 0xbf + 0xf3 + 0x39 + 0x7f + 0xbf + 0x3 + 0x3a + 0x7f + 0xbf + 0xe + 0x3a + 0x7f + 0xbf + 0x1a + 0x3a + 0x80 + 0xbf + 0xb2 + 0x39 + 0x80 + 0xbf + 0xc0 + 0x39 + 0x7f + 0xbf + 0xd0 + 0x39 + 0x7f + 0xbf + 0xe1 + 0x39 + 0x7f + 0xbf + 0xf3 + 0x39 + 0x7f + 0xbf + 0x3 + 0x3a + 0x7f + 0xbf + 0xe + 0x3a + 0x7f + 0xbf + 0x1a + 0x3a + 0x7f + 0xbf + 0x26 + 0x3a + 0x7f + 0xbf + 0x34 + 0x3a + 0x7f + 0xbf + 0x42 + 0x3a + 0x7f + 0xbf + 0x52 + 0x3a + 0x7f + 0xbf + 0x63 + 0x3a + 0x7f + 0xbf + 0x75 + 0x3a + 0x7f + 0xbf + 0x85 + 0x3a + 0x7f + 0xbf + 0x8f + 0x3a + 0x7f + 0xbf + 0x26 + 0x3a + 0x7f + 0xbf + 0x34 + 0x3a + 0x7f + 0xbf + 0x42 + 0x3a + 0x7f + 0xbf + 0x52 + 0x3a + 0x7f + 0xbf + 0x63 + 0x3a + 0x7f + 0xbf + 0x75 + 0x3a + 0x7f + 0xbf + 0x85 + 0x3a + 0x7f + 0xbf + 0x8f + 0x3a + 0x7f + 0xbf + 0x9b + 0x3a + 0x7f + 0xbf + 0xa8 + 0x3a + 0x7e + 0xbf + 0xb5 + 0x3a + 0x7e + 0xbf + 0xc4 + 0x3a + 0x7e + 0xbf + 0xd4 + 0x3a + 0x7e + 0xbf + 0xe5 + 0x3a + 0x7e + 0xbf + 0xf8 + 0x3a + 0x7e + 0xbf + 0x6 + 0x3b + 0x7f + 0xbf + 0x9b + 0x3a + 0x7f + 0xbf + 0xa8 + 0x3a + 0x7e + 0xbf + 0xb5 + 0x3a + 0x7e + 0xbf + 0xc4 + 0x3a + 0x7e + 0xbf + 0xd4 + 0x3a + 0x7e + 0xbf + 0xe5 + 0x3a + 0x7e + 0xbf + 0xf8 + 0x3a + 0x7e + 0xbf + 0x6 + 0x3b + 0x7e + 0xbf + 0x11 + 0x3b + 0x7d + 0xbf + 0x1d + 0x3b + 0x7d + 0xbf + 0x29 + 0x3b + 0x7d + 0xbf + 0x37 + 0x3b + 0x7d + 0xbf + 0x46 + 0x3b + 0x7d + 0xbf + 0x56 + 0x3b + 0x7c + 0xbf + 0x67 + 0x3b + 0x7c + 0xbf + 0x7a + 0x3b + 0x7e + 0xbf + 0x11 + 0x3b + 0x7d + 0xbf + 0x1d + 0x3b + 0x7d + 0xbf + 0x29 + 0x3b + 0x7d + 0xbf + 0x37 + 0x3b + 0x7d + 0xbf + 0x46 + 0x3b + 0x7d + 0xbf + 0x56 + 0x3b + 0x7c + 0xbf + 0x67 + 0x3b + 0x7c + 0xbf + 0x7a + 0x3b + 0x7c + 0xbf + 0x87 + 0x3b + 0x7c + 0xbf + 0x92 + 0x3b + 0x7b + 0xbf + 0x9e + 0x3b + 0x7b + 0xbf + 0xab + 0x3b + 0x7b + 0xbf + 0xb9 + 0x3b + 0x7a + 0xbf + 0xc8 + 0x3b + 0x7a + 0xbf + 0xd8 + 0x3b + 0x79 + 0xbf + 0xe9 + 0x3b + 0x7c + 0xbf + 0x87 + 0x3b + 0x7c + 0xbf + 0x92 + 0x3b + 0x7b + 0xbf + 0x9e + 0x3b + 0x7b + 0xbf + 0xab + 0x3b + 0x7b + 0xbf + 0xb9 + 0x3b + 0x7a + 0xbf + 0xc8 + 0x3b + 0x7a + 0xbf + 0xd8 + 0x3b + 0x79 + 0xbf + 0xe9 + 0x3b + 0x79 + 0xbf + 0xfc + 0x3b + 0x78 + 0xbf + 0x8 + 0x3c + 0x78 + 0xbf + 0x13 + 0x3c + 0x77 + 0xbf + 0x1f + 0x3c + 0x77 + 0xbf + 0x2c + 0x3c + 0x76 + 0xbf + 0x3a + 0x3c + 0x75 + 0xbf + 0x49 + 0x3c + 0x75 + 0xbf + 0x59 + 0x3c + 0x79 + 0xbf + 0xfc + 0x3b + 0x78 + 0xbf + 0x8 + 0x3c + 0x78 + 0xbf + 0x13 + 0x3c + 0x77 + 0xbf + 0x1f + 0x3c + 0x77 + 0xbf + 0x2c + 0x3c + 0x76 + 0xbf + 0x3a + 0x3c + 0x75 + 0xbf + 0x49 + 0x3c + 0x75 + 0xbf + 0x59 + 0x3c + 0x74 + 0xbf + 0x6b + 0x3c + 0x73 + 0xbf + 0x7e + 0x3c + 0x72 + 0xbf + 0x89 + 0x3c + 0x71 + 0xbf + 0x94 + 0x3c + 0x70 + 0xbf + 0xa0 + 0x3c + 0x6f + 0xbf + 0xad + 0x3c + 0x6e + 0xbf + 0xbb + 0x3c + 0x6d + 0xbf + 0xca + 0x3c + 0x74 + 0xbf + 0x6b + 0x3c + 0x73 + 0xbf + 0x7e + 0x3c + 0x72 + 0xbf + 0x89 + 0x3c + 0x71 + 0xbf + 0x94 + 0x3c + 0x70 + 0xbf + 0xa0 + 0x3c + 0x6f + 0xbf + 0xad + 0x3c + 0x6e + 0xbf + 0xbb + 0x3c + 0x6d + 0xbf + 0xca + 0x3c + 0x6c + 0xbf + 0xda + 0x3c + 0x6a + 0xbf + 0xeb + 0x3c + 0x69 + 0xbf + 0xfe + 0x3c + 0x67 + 0xbf + 0x9 + 0x3d + 0x66 + 0xbf + 0x14 + 0x3d + 0x64 + 0xbf + 0x20 + 0x3d + 0x62 + 0xbf + 0x2d + 0x3d + 0x60 + 0xbf + 0x3a + 0x3d + 0x6c + 0xbf + 0xda + 0x3c + 0x6a + 0xbf + 0xeb + 0x3c + 0x69 + 0xbf + 0xfe + 0x3c + 0x67 + 0xbf + 0x9 + 0x3d + 0x66 + 0xbf + 0x14 + 0x3d + 0x64 + 0xbf + 0x20 + 0x3d + 0x62 + 0xbf + 0x2d + 0x3d + 0x60 + 0xbf + 0x3a + 0x3d + 0x5e + 0xbf + 0x49 + 0x3d + 0x5c + 0xbf + 0x59 + 0x3d + 0x5a + 0xbf + 0x6a + 0x3d + 0x57 + 0xbf + 0x7d + 0x3d + 0x55 + 0xbf + 0x88 + 0x3d + 0x52 + 0xbf + 0x93 + 0x3d + 0x4f + 0xbf + 0x9e + 0x3d + 0x4c + 0xbf + 0xab + 0x3d + 0x5e + 0xbf + 0x49 + 0x3d + 0x5c + 0xbf + 0x59 + 0x3d + 0x5a + 0xbf + 0x6a + 0x3d + 0x57 + 0xbf + 0x7d + 0x3d + 0x55 + 0xbf + 0x88 + 0x3d + 0x52 + 0xbf + 0x93 + 0x3d + 0x4f + 0xbf + 0x9e + 0x3d + 0x4c + 0xbf + 0xab + 0x3d + 0x49 + 0xbf + 0xb8 + 0x3d + 0x46 + 0xbf + 0xc6 + 0x3d + 0x42 + 0xbf + 0xd5 + 0x3d + 0x3f + 0xbf + 0xe6 + 0x3d + 0x3b + 0xbf + 0xf7 + 0x3d + 0x37 + 0xbf + 0x5 + 0x3e + 0x33 + 0xbf + 0xf + 0x3e + 0x2e + 0xbf + 0x1a + 0x3e + 0x49 + 0xbf + 0xb8 + 0x3d + 0x46 + 0xbf + 0xc6 + 0x3d + 0x42 + 0xbf + 0xd5 + 0x3d + 0x3f + 0xbf + 0xe6 + 0x3d + 0x3b + 0xbf + 0xf7 + 0x3d + 0x37 + 0xbf + 0x5 + 0x3e + 0x33 + 0xbf + 0xf + 0x3e + 0x2e + 0xbf + 0x1a + 0x3e + 0x2a + 0xbf + 0x25 + 0x3e + 0x25 + 0xbf + 0x31 + 0x3e + 0x20 + 0xbf + 0x3e + 0x3e + 0x1b + 0xbf + 0x4c + 0x3e + 0x16 + 0xbf + 0x5b + 0x3e + 0x10 + 0xbf + 0x6b + 0x3e + 0xb + 0xbf + 0x7b + 0x3e + 0x6 + 0xbf + 0x86 + 0x3e + 0x2a + 0xbf + 0x25 + 0x3e + 0x25 + 0xbf + 0x31 + 0x3e + 0x20 + 0xbf + 0x3e + 0x3e + 0x1b + 0xbf + 0x4c + 0x3e + 0x16 + 0xbf + 0x5b + 0x3e + 0x10 + 0xbf + 0x6b + 0x3e + 0xb + 0xbf + 0x7b + 0x3e + 0x6 + 0xbf + 0x86 + 0x3e + 0xfe + 0xbe + 0x90 + 0x3e + 0xf3 + 0xbe + 0x99 + 0x3e + 0xe6 + 0xbe + 0xa4 + 0x3e + 0xdb + 0xbe + 0xae + 0x3e + 0xcd + 0xbe + 0xba + 0x3e + 0xc0 + 0xbe + 0xc6 + 0x3e + 0xb4 + 0xbe + 0xd2 + 0x3e + 0xa7 + 0xbe + 0xdf + 0x3e + 0xfe + 0xbe + 0x90 + 0x3e + 0xf3 + 0xbe + 0x99 + 0x3e + 0xe6 + 0xbe + 0xa4 + 0x3e + 0xdb + 0xbe + 0xae + 0x3e + 0xcd + 0xbe + 0xba + 0x3e + 0xc0 + 0xbe + 0xc6 + 0x3e + 0xb4 + 0xbe + 0xd2 + 0x3e + 0xa7 + 0xbe + 0xdf + 0x3e + 0x9b + 0xbe + 0xec + 0x3e + 0x8e + 0xbe + 0xfa + 0x3e + 0x81 + 0xbe + 0x4 + 0x3f + 0x6b + 0xbe + 0xb + 0x3f + 0x52 + 0xbe + 0x13 + 0x3f + 0x3c + 0xbe + 0x1a + 0x3f + 0x25 + 0xbe + 0x22 + 0x3f + 0xf + 0xbe + 0x2a + 0x3f + 0x9b + 0xbe + 0xec + 0x3e + 0x8e + 0xbe + 0xfa + 0x3e + 0x81 + 0xbe + 0x4 + 0x3f + 0x6b + 0xbe + 0xb + 0x3f + 0x52 + 0xbe + 0x13 + 0x3f + 0x3c + 0xbe + 0x1a + 0x3f + 0x25 + 0xbe + 0x22 + 0x3f + 0xf + 0xbe + 0x2a + 0x3f + 0xfb + 0xbd + 0x31 + 0x3f + 0xd4 + 0xbd + 0x39 + 0x3f + 0xb4 + 0xbd + 0x40 + 0x3f + 0x92 + 0xbd + 0x48 + 0x3f + 0x6e + 0xbd + 0x4f + 0x3f + 0x3c + 0xbd + 0x56 + 0x3f + 0xe + 0xbd + 0x5d + 0x3f + 0xd5 + 0xbc + 0x63 + 0x3f + 0xfb + 0xbd + 0x31 + 0x3f + 0xd4 + 0xbd + 0x39 + 0x3f + 0xb4 + 0xbd + 0x40 + 0x3f + 0x92 + 0xbd + 0x48 + 0x3f + 0x6e + 0xbd + 0x4f + 0x3f + 0x3c + 0xbd + 0x56 + 0x3f + 0xe + 0xbd + 0x5d + 0x3f + 0xd5 + 0xbc + 0x63 + 0x3f + 0x96 + 0xbc + 0x69 + 0x3f + 0x4f + 0xbc + 0x6e + 0x3f + 0xd + 0xbc + 0x72 + 0x3f + 0xaa + 0xbb + 0x76 + 0x3f + 0x1d + 0xbb + 0x7a + 0x3f + 0x5c + 0xba + 0x7d + 0x3f + 0xed + 0xb9 + 0x7e + 0x3f + 0xa7 + 0x37 + 0x80 + 0x3f + 0x96 + 0xbc + 0x69 + 0x3f + 0x4f + 0xbc + 0x6e + 0x3f + 0xd + 0xbc + 0x72 + 0x3f + 0xaa + 0xbb + 0x76 + 0x3f + 0x1d + 0xbb + 0x7a + 0x3f + 0x5c + 0xba + 0x7d + 0x3f + 0xed + 0xb9 + 0x7e + 0x3f + 0xa7 + 0x37 + 0x80 + 0x3f + 0x0 + 0x0 + 0x80 + 0x3f + 0xa7 + 0xb7 + 0x80 + 0x3f + 0xed + 0x39 + 0x7e + 0x3f + 0x5c + 0x3a + 0x7d + 0x3f + 0x1d + 0x3b + 0x7a + 0x3f + 0xaa + 0x3b + 0x76 + 0x3f + 0xd + 0x3c + 0x72 + 0x3f + 0x4f + 0x3c + 0x6e + 0x3f + 0x0 + 0x0 + 0x80 + 0x3f + 0xa7 + 0xb7 + 0x80 + 0x3f + 0xed + 0x39 + 0x7e + 0x3f + 0x5c + 0x3a + 0x7d + 0x3f + 0x1d + 0x3b + 0x7a + 0x3f + 0xaa + 0x3b + 0x76 + 0x3f + 0xd + 0x3c + 0x72 + 0x3f + 0x4f + 0x3c + 0x6e + 0x3f + 0x96 + 0x3c + 0x69 + 0x3f + 0xd5 + 0x3c + 0x63 + 0x3f + 0xe + 0x3d + 0x5d + 0x3f + 0x3c + 0x3d + 0x56 + 0x3f + 0x6e + 0x3d + 0x4f + 0x3f + 0x92 + 0x3d + 0x48 + 0x3f + 0xb4 + 0x3d + 0x40 + 0x3f + 0xd4 + 0x3d + 0x39 + 0x3f + 0x96 + 0x3c + 0x69 + 0x3f + 0xd5 + 0x3c + 0x63 + 0x3f + 0xe + 0x3d + 0x5d + 0x3f + 0x3c + 0x3d + 0x56 + 0x3f + 0x6e + 0x3d + 0x4f + 0x3f + 0x92 + 0x3d + 0x48 + 0x3f + 0xb4 + 0x3d + 0x40 + 0x3f + 0xd4 + 0x3d + 0x39 + 0x3f + 0xfb + 0x3d + 0x31 + 0x3f + 0xf + 0x3e + 0x2a + 0x3f + 0x25 + 0x3e + 0x22 + 0x3f + 0x3c + 0x3e + 0x1a + 0x3f + 0x52 + 0x3e + 0x13 + 0x3f + 0x6b + 0x3e + 0xb + 0x3f + 0x81 + 0x3e + 0x4 + 0x3f + 0x8e + 0x3e + 0xfa + 0x3e + 0xfb + 0x3d + 0x31 + 0x3f + 0xf + 0x3e + 0x2a + 0x3f + 0x25 + 0x3e + 0x22 + 0x3f + 0x3c + 0x3e + 0x1a + 0x3f + 0x52 + 0x3e + 0x13 + 0x3f + 0x6b + 0x3e + 0xb + 0x3f + 0x81 + 0x3e + 0x4 + 0x3f + 0x8e + 0x3e + 0xfa + 0x3e + 0x9b + 0x3e + 0xec + 0x3e + 0xa7 + 0x3e + 0xdf + 0x3e + 0xb4 + 0x3e + 0xd2 + 0x3e + 0xc0 + 0x3e + 0xc6 + 0x3e + 0xcd + 0x3e + 0xba + 0x3e + 0xdb + 0x3e + 0xae + 0x3e + 0xe6 + 0x3e + 0xa4 + 0x3e + 0xf3 + 0x3e + 0x99 + 0x3e + 0x9b + 0x3e + 0xec + 0x3e + 0xa7 + 0x3e + 0xdf + 0x3e + 0xb4 + 0x3e + 0xd2 + 0x3e + 0xc0 + 0x3e + 0xc6 + 0x3e + 0xcd + 0x3e + 0xba + 0x3e + 0xdb + 0x3e + 0xae + 0x3e + 0xe6 + 0x3e + 0xa4 + 0x3e + 0xf3 + 0x3e + 0x99 + 0x3e + 0xfe + 0x3e + 0x90 + 0x3e + 0x6 + 0x3f + 0x86 + 0x3e + 0xb + 0x3f + 0x7b + 0x3e + 0x10 + 0x3f + 0x6b + 0x3e + 0x16 + 0x3f + 0x5b + 0x3e + 0x1b + 0x3f + 0x4c + 0x3e + 0x20 + 0x3f + 0x3e + 0x3e + 0x25 + 0x3f + 0x31 + 0x3e + 0xfe + 0x3e + 0x90 + 0x3e + 0x6 + 0x3f + 0x86 + 0x3e + 0xb + 0x3f + 0x7b + 0x3e + 0x10 + 0x3f + 0x6b + 0x3e + 0x16 + 0x3f + 0x5b + 0x3e + 0x1b + 0x3f + 0x4c + 0x3e + 0x20 + 0x3f + 0x3e + 0x3e + 0x25 + 0x3f + 0x31 + 0x3e + 0x2a + 0x3f + 0x25 + 0x3e + 0x2e + 0x3f + 0x1a + 0x3e + 0x33 + 0x3f + 0xf + 0x3e + 0x37 + 0x3f + 0x5 + 0x3e + 0x3b + 0x3f + 0xf7 + 0x3d + 0x3f + 0x3f + 0xe6 + 0x3d + 0x42 + 0x3f + 0xd5 + 0x3d + 0x46 + 0x3f + 0xc6 + 0x3d + 0x2a + 0x3f + 0x25 + 0x3e + 0x2e + 0x3f + 0x1a + 0x3e + 0x33 + 0x3f + 0xf + 0x3e + 0x37 + 0x3f + 0x5 + 0x3e + 0x3b + 0x3f + 0xf7 + 0x3d + 0x3f + 0x3f + 0xe6 + 0x3d + 0x42 + 0x3f + 0xd5 + 0x3d + 0x46 + 0x3f + 0xc6 + 0x3d + 0x49 + 0x3f + 0xb8 + 0x3d + 0x4c + 0x3f + 0xab + 0x3d + 0x4f + 0x3f + 0x9e + 0x3d + 0x52 + 0x3f + 0x93 + 0x3d + 0x55 + 0x3f + 0x88 + 0x3d + 0x57 + 0x3f + 0x7d + 0x3d + 0x5a + 0x3f + 0x6a + 0x3d + 0x5c + 0x3f + 0x59 + 0x3d + 0x49 + 0x3f + 0xb8 + 0x3d + 0x4c + 0x3f + 0xab + 0x3d + 0x4f + 0x3f + 0x9e + 0x3d + 0x52 + 0x3f + 0x93 + 0x3d + 0x55 + 0x3f + 0x88 + 0x3d + 0x57 + 0x3f + 0x7d + 0x3d + 0x5a + 0x3f + 0x6a + 0x3d + 0x5c + 0x3f + 0x59 + 0x3d + 0x5e + 0x3f + 0x49 + 0x3d + 0x60 + 0x3f + 0x3a + 0x3d + 0x62 + 0x3f + 0x2d + 0x3d + 0x64 + 0x3f + 0x20 + 0x3d + 0x66 + 0x3f + 0x14 + 0x3d + 0x67 + 0x3f + 0x9 + 0x3d + 0x69 + 0x3f + 0xfe + 0x3c + 0x6a + 0x3f + 0xeb + 0x3c + 0x5e + 0x3f + 0x49 + 0x3d + 0x60 + 0x3f + 0x3a + 0x3d + 0x62 + 0x3f + 0x2d + 0x3d + 0x64 + 0x3f + 0x20 + 0x3d + 0x66 + 0x3f + 0x14 + 0x3d + 0x67 + 0x3f + 0x9 + 0x3d + 0x69 + 0x3f + 0xfe + 0x3c + 0x6a + 0x3f + 0xeb + 0x3c + 0x6c + 0x3f + 0xda + 0x3c + 0x6d + 0x3f + 0xca + 0x3c + 0x6e + 0x3f + 0xbb + 0x3c + 0x6f + 0x3f + 0xad + 0x3c + 0x70 + 0x3f + 0xa0 + 0x3c + 0x71 + 0x3f + 0x94 + 0x3c + 0x72 + 0x3f + 0x89 + 0x3c + 0x73 + 0x3f + 0x7e + 0x3c + 0x6c + 0x3f + 0xda + 0x3c + 0x6d + 0x3f + 0xca + 0x3c + 0x6e + 0x3f + 0xbb + 0x3c + 0x6f + 0x3f + 0xad + 0x3c + 0x70 + 0x3f + 0xa0 + 0x3c + 0x71 + 0x3f + 0x94 + 0x3c + 0x72 + 0x3f + 0x89 + 0x3c + 0x73 + 0x3f + 0x7e + 0x3c + 0x74 + 0x3f + 0x6b + 0x3c + 0x75 + 0x3f + 0x59 + 0x3c + 0x75 + 0x3f + 0x49 + 0x3c + 0x76 + 0x3f + 0x3a + 0x3c + 0x77 + 0x3f + 0x2c + 0x3c + 0x77 + 0x3f + 0x1f + 0x3c + 0x78 + 0x3f + 0x13 + 0x3c + 0x78 + 0x3f + 0x8 + 0x3c + 0x74 + 0x3f + 0x6b + 0x3c + 0x75 + 0x3f + 0x59 + 0x3c + 0x75 + 0x3f + 0x49 + 0x3c + 0x76 + 0x3f + 0x3a + 0x3c + 0x77 + 0x3f + 0x2c + 0x3c + 0x77 + 0x3f + 0x1f + 0x3c + 0x78 + 0x3f + 0x13 + 0x3c + 0x78 + 0x3f + 0x8 + 0x3c + 0x79 + 0x3f + 0xfc + 0x3b + 0x79 + 0x3f + 0xe9 + 0x3b + 0x7a + 0x3f + 0xd8 + 0x3b + 0x7a + 0x3f + 0xc8 + 0x3b + 0x7b + 0x3f + 0xb9 + 0x3b + 0x7b + 0x3f + 0xab + 0x3b + 0x7b + 0x3f + 0x9e + 0x3b + 0x7c + 0x3f + 0x92 + 0x3b + 0x79 + 0x3f + 0xfc + 0x3b + 0x79 + 0x3f + 0xe9 + 0x3b + 0x7a + 0x3f + 0xd8 + 0x3b + 0x7a + 0x3f + 0xc8 + 0x3b + 0x7b + 0x3f + 0xb9 + 0x3b + 0x7b + 0x3f + 0xab + 0x3b + 0x7b + 0x3f + 0x9e + 0x3b + 0x7c + 0x3f + 0x92 + 0x3b + 0x7c + 0x3f + 0x87 + 0x3b + 0x7c + 0x3f + 0x7a + 0x3b + 0x7c + 0x3f + 0x67 + 0x3b + 0x7d + 0x3f + 0x56 + 0x3b + 0x7d + 0x3f + 0x46 + 0x3b + 0x7d + 0x3f + 0x37 + 0x3b + 0x7d + 0x3f + 0x29 + 0x3b + 0x7d + 0x3f + 0x1d + 0x3b + 0x7c + 0x3f + 0x87 + 0x3b + 0x7c + 0x3f + 0x7a + 0x3b + 0x7c + 0x3f + 0x67 + 0x3b + 0x7d + 0x3f + 0x56 + 0x3b + 0x7d + 0x3f + 0x46 + 0x3b + 0x7d + 0x3f + 0x37 + 0x3b + 0x7d + 0x3f + 0x29 + 0x3b + 0x7d + 0x3f + 0x1d + 0x3b + 0x7e + 0x3f + 0x11 + 0x3b + 0x7e + 0x3f + 0x6 + 0x3b + 0x7e + 0x3f + 0xf8 + 0x3a + 0x7e + 0x3f + 0xe5 + 0x3a + 0x7e + 0x3f + 0xd4 + 0x3a + 0x7e + 0x3f + 0xc4 + 0x3a + 0x7e + 0x3f + 0xb5 + 0x3a + 0x7f + 0x3f + 0xa8 + 0x3a + 0x7e + 0x3f + 0x11 + 0x3b + 0x7e + 0x3f + 0x6 + 0x3b + 0x7e + 0x3f + 0xf8 + 0x3a + 0x7e + 0x3f + 0xe5 + 0x3a + 0x7e + 0x3f + 0xd4 + 0x3a + 0x7e + 0x3f + 0xc4 + 0x3a + 0x7e + 0x3f + 0xb5 + 0x3a + 0x7f + 0x3f + 0xa8 + 0x3a + 0x7f + 0x3f + 0x9b + 0x3a + 0x7f + 0x3f + 0x8f + 0x3a + 0x7f + 0x3f + 0x85 + 0x3a + 0x7f + 0x3f + 0x75 + 0x3a + 0x7f + 0x3f + 0x63 + 0x3a + 0x7f + 0x3f + 0x52 + 0x3a + 0x7f + 0x3f + 0x42 + 0x3a + 0x7f + 0x3f + 0x34 + 0x3a + 0x7f + 0x3f + 0x9b + 0x3a + 0x7f + 0x3f + 0x8f + 0x3a + 0x7f + 0x3f + 0x85 + 0x3a + 0x7f + 0x3f + 0x75 + 0x3a + 0x7f + 0x3f + 0x63 + 0x3a + 0x7f + 0x3f + 0x52 + 0x3a + 0x7f + 0x3f + 0x42 + 0x3a + 0x7f + 0x3f + 0x34 + 0x3a + 0x7f + 0x3f + 0x26 + 0x3a + 0x7f + 0x3f + 0x1a + 0x3a + 0x7f + 0x3f + 0xe + 0x3a + 0x7f + 0x3f + 0x3 + 0x3a + 0x7f + 0x3f + 0xf3 + 0x39 + 0x7f + 0x3f + 0xe1 + 0x39 + 0x7f + 0x3f + 0xd0 + 0x39 + 0x80 + 0x3f + 0xc0 + 0x39 + 0x7f + 0x3f + 0x26 + 0x3a + 0x7f + 0x3f + 0x1a + 0x3a + 0x7f + 0x3f + 0xe + 0x3a + 0x7f + 0x3f + 0x3 + 0x3a + 0x7f + 0x3f + 0xf3 + 0x39 + 0x7f + 0x3f + 0xe1 + 0x39 + 0x7f + 0x3f + 0xd0 + 0x39 + 0x80 + 0x3f + 0xc0 + 0x39 + 0x80 + 0x3f + 0xb2 + 0x39 + 0x80 + 0x3f + 0xa4 + 0x39 + 0x80 + 0x3f + 0x98 + 0x39 + 0x80 + 0x3f + 0x8d + 0x39 + 0x80 + 0x3f + 0x82 + 0x39 + 0x80 + 0x3f + 0x71 + 0x39 + 0x80 + 0x3f + 0x5f + 0x39 + 0x80 + 0x3f + 0x4e + 0x39 + 0x80 + 0x3f + 0xb2 + 0x39 + 0x80 + 0x3f + 0xa4 + 0x39 + 0x80 + 0x3f + 0x98 + 0x39 + 0x80 + 0x3f + 0x8d + 0x39 + 0x80 + 0x3f + 0x82 + 0x39 + 0x80 + 0x3f + 0x71 + 0x39 + 0x80 + 0x3f + 0x5f + 0x39 + 0x80 + 0x3f + 0x4e + 0x39 + +.data_segment_name +.data weak 64 _ZN8tanh_lutILj0ELj512EE7data_cdE DMb + 0x80 + 0xbf + 0x3e + 0x39 + 0x80 + 0xbf + 0x4e + 0x39 + 0x80 + 0xbf + 0x5f + 0x39 + 0x80 + 0xbf + 0x71 + 0x39 + 0x80 + 0xbf + 0x82 + 0x39 + 0x80 + 0xbf + 0x8d + 0x39 + 0x80 + 0xbf + 0x98 + 0x39 + 0x80 + 0xbf + 0xa4 + 0x39 + 0x80 + 0xbf + 0x3e + 0x39 + 0x80 + 0xbf + 0x4e + 0x39 + 0x80 + 0xbf + 0x5f + 0x39 + 0x80 + 0xbf + 0x71 + 0x39 + 0x80 + 0xbf + 0x82 + 0x39 + 0x80 + 0xbf + 0x8d + 0x39 + 0x80 + 0xbf + 0x98 + 0x39 + 0x80 + 0xbf + 0xa4 + 0x39 + 0x80 + 0xbf + 0xb2 + 0x39 + 0x80 + 0xbf + 0xc0 + 0x39 + 0x7f + 0xbf + 0xd0 + 0x39 + 0x7f + 0xbf + 0xe1 + 0x39 + 0x7f + 0xbf + 0xf3 + 0x39 + 0x7f + 0xbf + 0x3 + 0x3a + 0x7f + 0xbf + 0xe + 0x3a + 0x7f + 0xbf + 0x1a + 0x3a + 0x80 + 0xbf + 0xb2 + 0x39 + 0x80 + 0xbf + 0xc0 + 0x39 + 0x7f + 0xbf + 0xd0 + 0x39 + 0x7f + 0xbf + 0xe1 + 0x39 + 0x7f + 0xbf + 0xf3 + 0x39 + 0x7f + 0xbf + 0x3 + 0x3a + 0x7f + 0xbf + 0xe + 0x3a + 0x7f + 0xbf + 0x1a + 0x3a + 0x7f + 0xbf + 0x26 + 0x3a + 0x7f + 0xbf + 0x34 + 0x3a + 0x7f + 0xbf + 0x42 + 0x3a + 0x7f + 0xbf + 0x52 + 0x3a + 0x7f + 0xbf + 0x63 + 0x3a + 0x7f + 0xbf + 0x75 + 0x3a + 0x7f + 0xbf + 0x85 + 0x3a + 0x7f + 0xbf + 0x8f + 0x3a + 0x7f + 0xbf + 0x26 + 0x3a + 0x7f + 0xbf + 0x34 + 0x3a + 0x7f + 0xbf + 0x42 + 0x3a + 0x7f + 0xbf + 0x52 + 0x3a + 0x7f + 0xbf + 0x63 + 0x3a + 0x7f + 0xbf + 0x75 + 0x3a + 0x7f + 0xbf + 0x85 + 0x3a + 0x7f + 0xbf + 0x8f + 0x3a + 0x7f + 0xbf + 0x9b + 0x3a + 0x7f + 0xbf + 0xa8 + 0x3a + 0x7e + 0xbf + 0xb5 + 0x3a + 0x7e + 0xbf + 0xc4 + 0x3a + 0x7e + 0xbf + 0xd4 + 0x3a + 0x7e + 0xbf + 0xe5 + 0x3a + 0x7e + 0xbf + 0xf8 + 0x3a + 0x7e + 0xbf + 0x6 + 0x3b + 0x7f + 0xbf + 0x9b + 0x3a + 0x7f + 0xbf + 0xa8 + 0x3a + 0x7e + 0xbf + 0xb5 + 0x3a + 0x7e + 0xbf + 0xc4 + 0x3a + 0x7e + 0xbf + 0xd4 + 0x3a + 0x7e + 0xbf + 0xe5 + 0x3a + 0x7e + 0xbf + 0xf8 + 0x3a + 0x7e + 0xbf + 0x6 + 0x3b + 0x7e + 0xbf + 0x11 + 0x3b + 0x7d + 0xbf + 0x1d + 0x3b + 0x7d + 0xbf + 0x29 + 0x3b + 0x7d + 0xbf + 0x37 + 0x3b + 0x7d + 0xbf + 0x46 + 0x3b + 0x7d + 0xbf + 0x56 + 0x3b + 0x7c + 0xbf + 0x67 + 0x3b + 0x7c + 0xbf + 0x7a + 0x3b + 0x7e + 0xbf + 0x11 + 0x3b + 0x7d + 0xbf + 0x1d + 0x3b + 0x7d + 0xbf + 0x29 + 0x3b + 0x7d + 0xbf + 0x37 + 0x3b + 0x7d + 0xbf + 0x46 + 0x3b + 0x7d + 0xbf + 0x56 + 0x3b + 0x7c + 0xbf + 0x67 + 0x3b + 0x7c + 0xbf + 0x7a + 0x3b + 0x7c + 0xbf + 0x87 + 0x3b + 0x7c + 0xbf + 0x92 + 0x3b + 0x7b + 0xbf + 0x9e + 0x3b + 0x7b + 0xbf + 0xab + 0x3b + 0x7b + 0xbf + 0xb9 + 0x3b + 0x7a + 0xbf + 0xc8 + 0x3b + 0x7a + 0xbf + 0xd8 + 0x3b + 0x79 + 0xbf + 0xe9 + 0x3b + 0x7c + 0xbf + 0x87 + 0x3b + 0x7c + 0xbf + 0x92 + 0x3b + 0x7b + 0xbf + 0x9e + 0x3b + 0x7b + 0xbf + 0xab + 0x3b + 0x7b + 0xbf + 0xb9 + 0x3b + 0x7a + 0xbf + 0xc8 + 0x3b + 0x7a + 0xbf + 0xd8 + 0x3b + 0x79 + 0xbf + 0xe9 + 0x3b + 0x79 + 0xbf + 0xfc + 0x3b + 0x78 + 0xbf + 0x8 + 0x3c + 0x78 + 0xbf + 0x13 + 0x3c + 0x77 + 0xbf + 0x1f + 0x3c + 0x77 + 0xbf + 0x2c + 0x3c + 0x76 + 0xbf + 0x3a + 0x3c + 0x75 + 0xbf + 0x49 + 0x3c + 0x75 + 0xbf + 0x59 + 0x3c + 0x79 + 0xbf + 0xfc + 0x3b + 0x78 + 0xbf + 0x8 + 0x3c + 0x78 + 0xbf + 0x13 + 0x3c + 0x77 + 0xbf + 0x1f + 0x3c + 0x77 + 0xbf + 0x2c + 0x3c + 0x76 + 0xbf + 0x3a + 0x3c + 0x75 + 0xbf + 0x49 + 0x3c + 0x75 + 0xbf + 0x59 + 0x3c + 0x74 + 0xbf + 0x6b + 0x3c + 0x73 + 0xbf + 0x7e + 0x3c + 0x72 + 0xbf + 0x89 + 0x3c + 0x71 + 0xbf + 0x94 + 0x3c + 0x70 + 0xbf + 0xa0 + 0x3c + 0x6f + 0xbf + 0xad + 0x3c + 0x6e + 0xbf + 0xbb + 0x3c + 0x6d + 0xbf + 0xca + 0x3c + 0x74 + 0xbf + 0x6b + 0x3c + 0x73 + 0xbf + 0x7e + 0x3c + 0x72 + 0xbf + 0x89 + 0x3c + 0x71 + 0xbf + 0x94 + 0x3c + 0x70 + 0xbf + 0xa0 + 0x3c + 0x6f + 0xbf + 0xad + 0x3c + 0x6e + 0xbf + 0xbb + 0x3c + 0x6d + 0xbf + 0xca + 0x3c + 0x6c + 0xbf + 0xda + 0x3c + 0x6a + 0xbf + 0xeb + 0x3c + 0x69 + 0xbf + 0xfe + 0x3c + 0x67 + 0xbf + 0x9 + 0x3d + 0x66 + 0xbf + 0x14 + 0x3d + 0x64 + 0xbf + 0x20 + 0x3d + 0x62 + 0xbf + 0x2d + 0x3d + 0x60 + 0xbf + 0x3a + 0x3d + 0x6c + 0xbf + 0xda + 0x3c + 0x6a + 0xbf + 0xeb + 0x3c + 0x69 + 0xbf + 0xfe + 0x3c + 0x67 + 0xbf + 0x9 + 0x3d + 0x66 + 0xbf + 0x14 + 0x3d + 0x64 + 0xbf + 0x20 + 0x3d + 0x62 + 0xbf + 0x2d + 0x3d + 0x60 + 0xbf + 0x3a + 0x3d + 0x5e + 0xbf + 0x49 + 0x3d + 0x5c + 0xbf + 0x59 + 0x3d + 0x5a + 0xbf + 0x6a + 0x3d + 0x57 + 0xbf + 0x7d + 0x3d + 0x55 + 0xbf + 0x88 + 0x3d + 0x52 + 0xbf + 0x93 + 0x3d + 0x4f + 0xbf + 0x9e + 0x3d + 0x4c + 0xbf + 0xab + 0x3d + 0x5e + 0xbf + 0x49 + 0x3d + 0x5c + 0xbf + 0x59 + 0x3d + 0x5a + 0xbf + 0x6a + 0x3d + 0x57 + 0xbf + 0x7d + 0x3d + 0x55 + 0xbf + 0x88 + 0x3d + 0x52 + 0xbf + 0x93 + 0x3d + 0x4f + 0xbf + 0x9e + 0x3d + 0x4c + 0xbf + 0xab + 0x3d + 0x49 + 0xbf + 0xb8 + 0x3d + 0x46 + 0xbf + 0xc6 + 0x3d + 0x42 + 0xbf + 0xd5 + 0x3d + 0x3f + 0xbf + 0xe6 + 0x3d + 0x3b + 0xbf + 0xf7 + 0x3d + 0x37 + 0xbf + 0x5 + 0x3e + 0x33 + 0xbf + 0xf + 0x3e + 0x2e + 0xbf + 0x1a + 0x3e + 0x49 + 0xbf + 0xb8 + 0x3d + 0x46 + 0xbf + 0xc6 + 0x3d + 0x42 + 0xbf + 0xd5 + 0x3d + 0x3f + 0xbf + 0xe6 + 0x3d + 0x3b + 0xbf + 0xf7 + 0x3d + 0x37 + 0xbf + 0x5 + 0x3e + 0x33 + 0xbf + 0xf + 0x3e + 0x2e + 0xbf + 0x1a + 0x3e + 0x2a + 0xbf + 0x25 + 0x3e + 0x25 + 0xbf + 0x31 + 0x3e + 0x20 + 0xbf + 0x3e + 0x3e + 0x1b + 0xbf + 0x4c + 0x3e + 0x16 + 0xbf + 0x5b + 0x3e + 0x10 + 0xbf + 0x6b + 0x3e + 0xb + 0xbf + 0x7b + 0x3e + 0x6 + 0xbf + 0x86 + 0x3e + 0x2a + 0xbf + 0x25 + 0x3e + 0x25 + 0xbf + 0x31 + 0x3e + 0x20 + 0xbf + 0x3e + 0x3e + 0x1b + 0xbf + 0x4c + 0x3e + 0x16 + 0xbf + 0x5b + 0x3e + 0x10 + 0xbf + 0x6b + 0x3e + 0xb + 0xbf + 0x7b + 0x3e + 0x6 + 0xbf + 0x86 + 0x3e + 0xfe + 0xbe + 0x90 + 0x3e + 0xf3 + 0xbe + 0x99 + 0x3e + 0xe6 + 0xbe + 0xa4 + 0x3e + 0xdb + 0xbe + 0xae + 0x3e + 0xcd + 0xbe + 0xba + 0x3e + 0xc0 + 0xbe + 0xc6 + 0x3e + 0xb4 + 0xbe + 0xd2 + 0x3e + 0xa7 + 0xbe + 0xdf + 0x3e + 0xfe + 0xbe + 0x90 + 0x3e + 0xf3 + 0xbe + 0x99 + 0x3e + 0xe6 + 0xbe + 0xa4 + 0x3e + 0xdb + 0xbe + 0xae + 0x3e + 0xcd + 0xbe + 0xba + 0x3e + 0xc0 + 0xbe + 0xc6 + 0x3e + 0xb4 + 0xbe + 0xd2 + 0x3e + 0xa7 + 0xbe + 0xdf + 0x3e + 0x9b + 0xbe + 0xec + 0x3e + 0x8e + 0xbe + 0xfa + 0x3e + 0x81 + 0xbe + 0x4 + 0x3f + 0x6b + 0xbe + 0xb + 0x3f + 0x52 + 0xbe + 0x13 + 0x3f + 0x3c + 0xbe + 0x1a + 0x3f + 0x25 + 0xbe + 0x22 + 0x3f + 0xf + 0xbe + 0x2a + 0x3f + 0x9b + 0xbe + 0xec + 0x3e + 0x8e + 0xbe + 0xfa + 0x3e + 0x81 + 0xbe + 0x4 + 0x3f + 0x6b + 0xbe + 0xb + 0x3f + 0x52 + 0xbe + 0x13 + 0x3f + 0x3c + 0xbe + 0x1a + 0x3f + 0x25 + 0xbe + 0x22 + 0x3f + 0xf + 0xbe + 0x2a + 0x3f + 0xfb + 0xbd + 0x31 + 0x3f + 0xd4 + 0xbd + 0x39 + 0x3f + 0xb4 + 0xbd + 0x40 + 0x3f + 0x92 + 0xbd + 0x48 + 0x3f + 0x6e + 0xbd + 0x4f + 0x3f + 0x3c + 0xbd + 0x56 + 0x3f + 0xe + 0xbd + 0x5d + 0x3f + 0xd5 + 0xbc + 0x63 + 0x3f + 0xfb + 0xbd + 0x31 + 0x3f + 0xd4 + 0xbd + 0x39 + 0x3f + 0xb4 + 0xbd + 0x40 + 0x3f + 0x92 + 0xbd + 0x48 + 0x3f + 0x6e + 0xbd + 0x4f + 0x3f + 0x3c + 0xbd + 0x56 + 0x3f + 0xe + 0xbd + 0x5d + 0x3f + 0xd5 + 0xbc + 0x63 + 0x3f + 0x96 + 0xbc + 0x69 + 0x3f + 0x4f + 0xbc + 0x6e + 0x3f + 0xd + 0xbc + 0x72 + 0x3f + 0xaa + 0xbb + 0x76 + 0x3f + 0x1d + 0xbb + 0x7a + 0x3f + 0x5c + 0xba + 0x7d + 0x3f + 0xed + 0xb9 + 0x7e + 0x3f + 0xa7 + 0x37 + 0x80 + 0x3f + 0x96 + 0xbc + 0x69 + 0x3f + 0x4f + 0xbc + 0x6e + 0x3f + 0xd + 0xbc + 0x72 + 0x3f + 0xaa + 0xbb + 0x76 + 0x3f + 0x1d + 0xbb + 0x7a + 0x3f + 0x5c + 0xba + 0x7d + 0x3f + 0xed + 0xb9 + 0x7e + 0x3f + 0xa7 + 0x37 + 0x80 + 0x3f + 0x0 + 0x0 + 0x80 + 0x3f + 0xa7 + 0xb7 + 0x80 + 0x3f + 0xed + 0x39 + 0x7e + 0x3f + 0x5c + 0x3a + 0x7d + 0x3f + 0x1d + 0x3b + 0x7a + 0x3f + 0xaa + 0x3b + 0x76 + 0x3f + 0xd + 0x3c + 0x72 + 0x3f + 0x4f + 0x3c + 0x6e + 0x3f + 0x0 + 0x0 + 0x80 + 0x3f + 0xa7 + 0xb7 + 0x80 + 0x3f + 0xed + 0x39 + 0x7e + 0x3f + 0x5c + 0x3a + 0x7d + 0x3f + 0x1d + 0x3b + 0x7a + 0x3f + 0xaa + 0x3b + 0x76 + 0x3f + 0xd + 0x3c + 0x72 + 0x3f + 0x4f + 0x3c + 0x6e + 0x3f + 0x96 + 0x3c + 0x69 + 0x3f + 0xd5 + 0x3c + 0x63 + 0x3f + 0xe + 0x3d + 0x5d + 0x3f + 0x3c + 0x3d + 0x56 + 0x3f + 0x6e + 0x3d + 0x4f + 0x3f + 0x92 + 0x3d + 0x48 + 0x3f + 0xb4 + 0x3d + 0x40 + 0x3f + 0xd4 + 0x3d + 0x39 + 0x3f + 0x96 + 0x3c + 0x69 + 0x3f + 0xd5 + 0x3c + 0x63 + 0x3f + 0xe + 0x3d + 0x5d + 0x3f + 0x3c + 0x3d + 0x56 + 0x3f + 0x6e + 0x3d + 0x4f + 0x3f + 0x92 + 0x3d + 0x48 + 0x3f + 0xb4 + 0x3d + 0x40 + 0x3f + 0xd4 + 0x3d + 0x39 + 0x3f + 0xfb + 0x3d + 0x31 + 0x3f + 0xf + 0x3e + 0x2a + 0x3f + 0x25 + 0x3e + 0x22 + 0x3f + 0x3c + 0x3e + 0x1a + 0x3f + 0x52 + 0x3e + 0x13 + 0x3f + 0x6b + 0x3e + 0xb + 0x3f + 0x81 + 0x3e + 0x4 + 0x3f + 0x8e + 0x3e + 0xfa + 0x3e + 0xfb + 0x3d + 0x31 + 0x3f + 0xf + 0x3e + 0x2a + 0x3f + 0x25 + 0x3e + 0x22 + 0x3f + 0x3c + 0x3e + 0x1a + 0x3f + 0x52 + 0x3e + 0x13 + 0x3f + 0x6b + 0x3e + 0xb + 0x3f + 0x81 + 0x3e + 0x4 + 0x3f + 0x8e + 0x3e + 0xfa + 0x3e + 0x9b + 0x3e + 0xec + 0x3e + 0xa7 + 0x3e + 0xdf + 0x3e + 0xb4 + 0x3e + 0xd2 + 0x3e + 0xc0 + 0x3e + 0xc6 + 0x3e + 0xcd + 0x3e + 0xba + 0x3e + 0xdb + 0x3e + 0xae + 0x3e + 0xe6 + 0x3e + 0xa4 + 0x3e + 0xf3 + 0x3e + 0x99 + 0x3e + 0x9b + 0x3e + 0xec + 0x3e + 0xa7 + 0x3e + 0xdf + 0x3e + 0xb4 + 0x3e + 0xd2 + 0x3e + 0xc0 + 0x3e + 0xc6 + 0x3e + 0xcd + 0x3e + 0xba + 0x3e + 0xdb + 0x3e + 0xae + 0x3e + 0xe6 + 0x3e + 0xa4 + 0x3e + 0xf3 + 0x3e + 0x99 + 0x3e + 0xfe + 0x3e + 0x90 + 0x3e + 0x6 + 0x3f + 0x86 + 0x3e + 0xb + 0x3f + 0x7b + 0x3e + 0x10 + 0x3f + 0x6b + 0x3e + 0x16 + 0x3f + 0x5b + 0x3e + 0x1b + 0x3f + 0x4c + 0x3e + 0x20 + 0x3f + 0x3e + 0x3e + 0x25 + 0x3f + 0x31 + 0x3e + 0xfe + 0x3e + 0x90 + 0x3e + 0x6 + 0x3f + 0x86 + 0x3e + 0xb + 0x3f + 0x7b + 0x3e + 0x10 + 0x3f + 0x6b + 0x3e + 0x16 + 0x3f + 0x5b + 0x3e + 0x1b + 0x3f + 0x4c + 0x3e + 0x20 + 0x3f + 0x3e + 0x3e + 0x25 + 0x3f + 0x31 + 0x3e + 0x2a + 0x3f + 0x25 + 0x3e + 0x2e + 0x3f + 0x1a + 0x3e + 0x33 + 0x3f + 0xf + 0x3e + 0x37 + 0x3f + 0x5 + 0x3e + 0x3b + 0x3f + 0xf7 + 0x3d + 0x3f + 0x3f + 0xe6 + 0x3d + 0x42 + 0x3f + 0xd5 + 0x3d + 0x46 + 0x3f + 0xc6 + 0x3d + 0x2a + 0x3f + 0x25 + 0x3e + 0x2e + 0x3f + 0x1a + 0x3e + 0x33 + 0x3f + 0xf + 0x3e + 0x37 + 0x3f + 0x5 + 0x3e + 0x3b + 0x3f + 0xf7 + 0x3d + 0x3f + 0x3f + 0xe6 + 0x3d + 0x42 + 0x3f + 0xd5 + 0x3d + 0x46 + 0x3f + 0xc6 + 0x3d + 0x49 + 0x3f + 0xb8 + 0x3d + 0x4c + 0x3f + 0xab + 0x3d + 0x4f + 0x3f + 0x9e + 0x3d + 0x52 + 0x3f + 0x93 + 0x3d + 0x55 + 0x3f + 0x88 + 0x3d + 0x57 + 0x3f + 0x7d + 0x3d + 0x5a + 0x3f + 0x6a + 0x3d + 0x5c + 0x3f + 0x59 + 0x3d + 0x49 + 0x3f + 0xb8 + 0x3d + 0x4c + 0x3f + 0xab + 0x3d + 0x4f + 0x3f + 0x9e + 0x3d + 0x52 + 0x3f + 0x93 + 0x3d + 0x55 + 0x3f + 0x88 + 0x3d + 0x57 + 0x3f + 0x7d + 0x3d + 0x5a + 0x3f + 0x6a + 0x3d + 0x5c + 0x3f + 0x59 + 0x3d + 0x5e + 0x3f + 0x49 + 0x3d + 0x60 + 0x3f + 0x3a + 0x3d + 0x62 + 0x3f + 0x2d + 0x3d + 0x64 + 0x3f + 0x20 + 0x3d + 0x66 + 0x3f + 0x14 + 0x3d + 0x67 + 0x3f + 0x9 + 0x3d + 0x69 + 0x3f + 0xfe + 0x3c + 0x6a + 0x3f + 0xeb + 0x3c + 0x5e + 0x3f + 0x49 + 0x3d + 0x60 + 0x3f + 0x3a + 0x3d + 0x62 + 0x3f + 0x2d + 0x3d + 0x64 + 0x3f + 0x20 + 0x3d + 0x66 + 0x3f + 0x14 + 0x3d + 0x67 + 0x3f + 0x9 + 0x3d + 0x69 + 0x3f + 0xfe + 0x3c + 0x6a + 0x3f + 0xeb + 0x3c + 0x6c + 0x3f + 0xda + 0x3c + 0x6d + 0x3f + 0xca + 0x3c + 0x6e + 0x3f + 0xbb + 0x3c + 0x6f + 0x3f + 0xad + 0x3c + 0x70 + 0x3f + 0xa0 + 0x3c + 0x71 + 0x3f + 0x94 + 0x3c + 0x72 + 0x3f + 0x89 + 0x3c + 0x73 + 0x3f + 0x7e + 0x3c + 0x6c + 0x3f + 0xda + 0x3c + 0x6d + 0x3f + 0xca + 0x3c + 0x6e + 0x3f + 0xbb + 0x3c + 0x6f + 0x3f + 0xad + 0x3c + 0x70 + 0x3f + 0xa0 + 0x3c + 0x71 + 0x3f + 0x94 + 0x3c + 0x72 + 0x3f + 0x89 + 0x3c + 0x73 + 0x3f + 0x7e + 0x3c + 0x74 + 0x3f + 0x6b + 0x3c + 0x75 + 0x3f + 0x59 + 0x3c + 0x75 + 0x3f + 0x49 + 0x3c + 0x76 + 0x3f + 0x3a + 0x3c + 0x77 + 0x3f + 0x2c + 0x3c + 0x77 + 0x3f + 0x1f + 0x3c + 0x78 + 0x3f + 0x13 + 0x3c + 0x78 + 0x3f + 0x8 + 0x3c + 0x74 + 0x3f + 0x6b + 0x3c + 0x75 + 0x3f + 0x59 + 0x3c + 0x75 + 0x3f + 0x49 + 0x3c + 0x76 + 0x3f + 0x3a + 0x3c + 0x77 + 0x3f + 0x2c + 0x3c + 0x77 + 0x3f + 0x1f + 0x3c + 0x78 + 0x3f + 0x13 + 0x3c + 0x78 + 0x3f + 0x8 + 0x3c + 0x79 + 0x3f + 0xfc + 0x3b + 0x79 + 0x3f + 0xe9 + 0x3b + 0x7a + 0x3f + 0xd8 + 0x3b + 0x7a + 0x3f + 0xc8 + 0x3b + 0x7b + 0x3f + 0xb9 + 0x3b + 0x7b + 0x3f + 0xab + 0x3b + 0x7b + 0x3f + 0x9e + 0x3b + 0x7c + 0x3f + 0x92 + 0x3b + 0x79 + 0x3f + 0xfc + 0x3b + 0x79 + 0x3f + 0xe9 + 0x3b + 0x7a + 0x3f + 0xd8 + 0x3b + 0x7a + 0x3f + 0xc8 + 0x3b + 0x7b + 0x3f + 0xb9 + 0x3b + 0x7b + 0x3f + 0xab + 0x3b + 0x7b + 0x3f + 0x9e + 0x3b + 0x7c + 0x3f + 0x92 + 0x3b + 0x7c + 0x3f + 0x87 + 0x3b + 0x7c + 0x3f + 0x7a + 0x3b + 0x7c + 0x3f + 0x67 + 0x3b + 0x7d + 0x3f + 0x56 + 0x3b + 0x7d + 0x3f + 0x46 + 0x3b + 0x7d + 0x3f + 0x37 + 0x3b + 0x7d + 0x3f + 0x29 + 0x3b + 0x7d + 0x3f + 0x1d + 0x3b + 0x7c + 0x3f + 0x87 + 0x3b + 0x7c + 0x3f + 0x7a + 0x3b + 0x7c + 0x3f + 0x67 + 0x3b + 0x7d + 0x3f + 0x56 + 0x3b + 0x7d + 0x3f + 0x46 + 0x3b + 0x7d + 0x3f + 0x37 + 0x3b + 0x7d + 0x3f + 0x29 + 0x3b + 0x7d + 0x3f + 0x1d + 0x3b + 0x7e + 0x3f + 0x11 + 0x3b + 0x7e + 0x3f + 0x6 + 0x3b + 0x7e + 0x3f + 0xf8 + 0x3a + 0x7e + 0x3f + 0xe5 + 0x3a + 0x7e + 0x3f + 0xd4 + 0x3a + 0x7e + 0x3f + 0xc4 + 0x3a + 0x7e + 0x3f + 0xb5 + 0x3a + 0x7f + 0x3f + 0xa8 + 0x3a + 0x7e + 0x3f + 0x11 + 0x3b + 0x7e + 0x3f + 0x6 + 0x3b + 0x7e + 0x3f + 0xf8 + 0x3a + 0x7e + 0x3f + 0xe5 + 0x3a + 0x7e + 0x3f + 0xd4 + 0x3a + 0x7e + 0x3f + 0xc4 + 0x3a + 0x7e + 0x3f + 0xb5 + 0x3a + 0x7f + 0x3f + 0xa8 + 0x3a + 0x7f + 0x3f + 0x9b + 0x3a + 0x7f + 0x3f + 0x8f + 0x3a + 0x7f + 0x3f + 0x85 + 0x3a + 0x7f + 0x3f + 0x75 + 0x3a + 0x7f + 0x3f + 0x63 + 0x3a + 0x7f + 0x3f + 0x52 + 0x3a + 0x7f + 0x3f + 0x42 + 0x3a + 0x7f + 0x3f + 0x34 + 0x3a + 0x7f + 0x3f + 0x9b + 0x3a + 0x7f + 0x3f + 0x8f + 0x3a + 0x7f + 0x3f + 0x85 + 0x3a + 0x7f + 0x3f + 0x75 + 0x3a + 0x7f + 0x3f + 0x63 + 0x3a + 0x7f + 0x3f + 0x52 + 0x3a + 0x7f + 0x3f + 0x42 + 0x3a + 0x7f + 0x3f + 0x34 + 0x3a + 0x7f + 0x3f + 0x26 + 0x3a + 0x7f + 0x3f + 0x1a + 0x3a + 0x7f + 0x3f + 0xe + 0x3a + 0x7f + 0x3f + 0x3 + 0x3a + 0x7f + 0x3f + 0xf3 + 0x39 + 0x7f + 0x3f + 0xe1 + 0x39 + 0x7f + 0x3f + 0xd0 + 0x39 + 0x80 + 0x3f + 0xc0 + 0x39 + 0x7f + 0x3f + 0x26 + 0x3a + 0x7f + 0x3f + 0x1a + 0x3a + 0x7f + 0x3f + 0xe + 0x3a + 0x7f + 0x3f + 0x3 + 0x3a + 0x7f + 0x3f + 0xf3 + 0x39 + 0x7f + 0x3f + 0xe1 + 0x39 + 0x7f + 0x3f + 0xd0 + 0x39 + 0x80 + 0x3f + 0xc0 + 0x39 + 0x80 + 0x3f + 0xb2 + 0x39 + 0x80 + 0x3f + 0xa4 + 0x39 + 0x80 + 0x3f + 0x98 + 0x39 + 0x80 + 0x3f + 0x8d + 0x39 + 0x80 + 0x3f + 0x82 + 0x39 + 0x80 + 0x3f + 0x71 + 0x39 + 0x80 + 0x3f + 0x5f + 0x39 + 0x80 + 0x3f + 0x4e + 0x39 + 0x80 + 0x3f + 0xb2 + 0x39 + 0x80 + 0x3f + 0xa4 + 0x39 + 0x80 + 0x3f + 0x98 + 0x39 + 0x80 + 0x3f + 0x8d + 0x39 + 0x80 + 0x3f + 0x82 + 0x39 + 0x80 + 0x3f + 0x71 + 0x39 + 0x80 + 0x3f + 0x5f + 0x39 + 0x80 + 0x3f + 0x4e + 0x39 + +.data_segment_name +.data weak 64 _ZN11log_f32_lutILj256EE21fraction_table_ab_f32E DMb + 0x0 + 0x0 + 0x0 + 0x0 + 0x53 + 0x1 + 0xff + 0x3b + 0x46 + 0x5 + 0x7e + 0x3c + 0xd8 + 0xc8 + 0xbd + 0x3c + 0xd8 + 0x14 + 0xfc + 0x3c + 0x3e + 0xf4 + 0x1c + 0x3d + 0xc8 + 0xa2 + 0x3b + 0x3d + 0xec + 0x16 + 0x5a + 0x3d + 0x0 + 0x0 + 0x0 + 0x0 + 0x53 + 0x1 + 0xff + 0x3b + 0x46 + 0x5 + 0x7e + 0x3c + 0xd8 + 0xc8 + 0xbd + 0x3c + 0xd8 + 0x14 + 0xfc + 0x3c + 0x3e + 0xf4 + 0x1c + 0x3d + 0xc8 + 0xa2 + 0x3b + 0x3d + 0xec + 0x16 + 0x5a + 0x3d + 0x86 + 0x51 + 0x78 + 0x3d + 0xb7 + 0x29 + 0x8b + 0x3d + 0xbd + 0xe + 0x9a + 0x3d + 0x3a + 0xd8 + 0xa8 + 0x3d + 0x94 + 0x86 + 0xb7 + 0x3d + 0x2f + 0x1a + 0xc6 + 0x3d + 0x6a + 0x93 + 0xd4 + 0x3d + 0xa4 + 0xf2 + 0xe2 + 0x3d + 0x86 + 0x51 + 0x78 + 0x3d + 0xb7 + 0x29 + 0x8b + 0x3d + 0xbd + 0xe + 0x9a + 0x3d + 0x3a + 0xd8 + 0xa8 + 0x3d + 0x94 + 0x86 + 0xb7 + 0x3d + 0x2f + 0x1a + 0xc6 + 0x3d + 0x6a + 0x93 + 0xd4 + 0x3d + 0xa4 + 0xf2 + 0xe2 + 0x3d + 0x3b + 0x38 + 0xf1 + 0x3d + 0x8a + 0x64 + 0xff + 0x3d + 0xf4 + 0xbb + 0x6 + 0x3e + 0x57 + 0xb9 + 0xd + 0x3e + 0x98 + 0xaa + 0x14 + 0x3e + 0xe1 + 0x8f + 0x1b + 0x3e + 0x5b + 0x69 + 0x22 + 0x3e + 0x2f + 0x37 + 0x29 + 0x3e + 0x3b + 0x38 + 0xf1 + 0x3d + 0x8a + 0x64 + 0xff + 0x3d + 0xf4 + 0xbb + 0x6 + 0x3e + 0x57 + 0xb9 + 0xd + 0x3e + 0x98 + 0xaa + 0x14 + 0x3e + 0xe1 + 0x8f + 0x1b + 0x3e + 0x5b + 0x69 + 0x22 + 0x3e + 0x2f + 0x37 + 0x29 + 0x3e + 0x84 + 0xf9 + 0x2f + 0x3e + 0x7f + 0xb0 + 0x36 + 0x3e + 0x48 + 0x5c + 0x3d + 0x3e + 0x3 + 0xfd + 0x43 + 0x3e + 0xd5 + 0x92 + 0x4a + 0x3e + 0xe1 + 0x1d + 0x51 + 0x3e + 0x4a + 0x9e + 0x57 + 0x3e + 0x34 + 0x14 + 0x5e + 0x3e + 0x84 + 0xf9 + 0x2f + 0x3e + 0x7f + 0xb0 + 0x36 + 0x3e + 0x48 + 0x5c + 0x3d + 0x3e + 0x3 + 0xfd + 0x43 + 0x3e + 0xd5 + 0x92 + 0x4a + 0x3e + 0xe1 + 0x1d + 0x51 + 0x3e + 0x4a + 0x9e + 0x57 + 0x3e + 0x34 + 0x14 + 0x5e + 0x3e + 0xbe + 0x7f + 0x64 + 0x3e + 0xb + 0xe1 + 0x6a + 0x3e + 0x3b + 0x38 + 0x71 + 0x3e + 0x6e + 0x85 + 0x77 + 0x3e + 0xc3 + 0xc8 + 0x7d + 0x3e + 0x2d + 0x1 + 0x82 + 0x3e + 0x27 + 0x19 + 0x85 + 0x3e + 0x60 + 0x2c + 0x88 + 0x3e + 0xbe + 0x7f + 0x64 + 0x3e + 0xb + 0xe1 + 0x6a + 0x3e + 0x3b + 0x38 + 0x71 + 0x3e + 0x6e + 0x85 + 0x77 + 0x3e + 0xc3 + 0xc8 + 0x7d + 0x3e + 0x2d + 0x1 + 0x82 + 0x3e + 0x27 + 0x19 + 0x85 + 0x3e + 0x60 + 0x2c + 0x88 + 0x3e + 0xe5 + 0x3a + 0x8b + 0x3e + 0xc6 + 0x44 + 0x8e + 0x3e + 0x10 + 0x4a + 0x91 + 0x3e + 0xd1 + 0x4a + 0x94 + 0x3e + 0x16 + 0x47 + 0x97 + 0x3e + 0xed + 0x3e + 0x9a + 0x3e + 0x63 + 0x32 + 0x9d + 0x3e + 0x84 + 0x21 + 0xa0 + 0x3e + 0xe5 + 0x3a + 0x8b + 0x3e + 0xc6 + 0x44 + 0x8e + 0x3e + 0x10 + 0x4a + 0x91 + 0x3e + 0xd1 + 0x4a + 0x94 + 0x3e + 0x16 + 0x47 + 0x97 + 0x3e + 0xed + 0x3e + 0x9a + 0x3e + 0x63 + 0x32 + 0x9d + 0x3e + 0x84 + 0x21 + 0xa0 + 0x3e + 0x5e + 0xc + 0xa3 + 0x3e + 0xfd + 0xf2 + 0xa5 + 0x3e + 0x6c + 0xd5 + 0xa8 + 0x3e + 0xb9 + 0xb3 + 0xab + 0x3e + 0xee + 0x8d + 0xae + 0x3e + 0x18 + 0x64 + 0xb1 + 0x3e + 0x41 + 0x36 + 0xb4 + 0x3e + 0x75 + 0x4 + 0xb7 + 0x3e + 0x5e + 0xc + 0xa3 + 0x3e + 0xfd + 0xf2 + 0xa5 + 0x3e + 0x6c + 0xd5 + 0xa8 + 0x3e + 0xb9 + 0xb3 + 0xab + 0x3e + 0xee + 0x8d + 0xae + 0x3e + 0x18 + 0x64 + 0xb1 + 0x3e + 0x41 + 0x36 + 0xb4 + 0x3e + 0x75 + 0x4 + 0xb7 + 0x3e + 0xc0 + 0xce + 0xb9 + 0x3e + 0x2b + 0x95 + 0xbc + 0x3e + 0xc2 + 0x57 + 0xbf + 0x3e + 0x8f + 0x16 + 0xc2 + 0x3e + 0x9c + 0xd1 + 0xc4 + 0x3e + 0xf4 + 0x88 + 0xc7 + 0x3e + 0xa1 + 0x3c + 0xca + 0x3e + 0xac + 0xec + 0xcc + 0x3e + 0xc0 + 0xce + 0xb9 + 0x3e + 0x2b + 0x95 + 0xbc + 0x3e + 0xc2 + 0x57 + 0xbf + 0x3e + 0x8f + 0x16 + 0xc2 + 0x3e + 0x9c + 0xd1 + 0xc4 + 0x3e + 0xf4 + 0x88 + 0xc7 + 0x3e + 0xa1 + 0x3c + 0xca + 0x3e + 0xac + 0xec + 0xcc + 0x3e + 0x1f + 0x99 + 0xcf + 0x3e + 0x5 + 0x42 + 0xd2 + 0x3e + 0x65 + 0xe7 + 0xd4 + 0x3e + 0x4a + 0x89 + 0xd7 + 0x3e + 0xbc + 0x27 + 0xda + 0x3e + 0xc5 + 0xc2 + 0xdc + 0x3e + 0x6d + 0x5a + 0xdf + 0x3e + 0xbd + 0xee + 0xe1 + 0x3e + 0x1f + 0x99 + 0xcf + 0x3e + 0x5 + 0x42 + 0xd2 + 0x3e + 0x65 + 0xe7 + 0xd4 + 0x3e + 0x4a + 0x89 + 0xd7 + 0x3e + 0xbc + 0x27 + 0xda + 0x3e + 0xc5 + 0xc2 + 0xdc + 0x3e + 0x6d + 0x5a + 0xdf + 0x3e + 0xbd + 0xee + 0xe1 + 0x3e + 0xbe + 0x7f + 0xe4 + 0x3e + 0x78 + 0xd + 0xe7 + 0x3e + 0xf4 + 0x97 + 0xe9 + 0x3e + 0x39 + 0x1f + 0xec + 0x3e + 0x50 + 0xa3 + 0xee + 0x3e + 0x41 + 0x24 + 0xf1 + 0x3e + 0x13 + 0xa2 + 0xf3 + 0x3e + 0xcf + 0x1c + 0xf6 + 0x3e + 0xbe + 0x7f + 0xe4 + 0x3e + 0x78 + 0xd + 0xe7 + 0x3e + 0xf4 + 0x97 + 0xe9 + 0x3e + 0x39 + 0x1f + 0xec + 0x3e + 0x50 + 0xa3 + 0xee + 0x3e + 0x41 + 0x24 + 0xf1 + 0x3e + 0x13 + 0xa2 + 0xf3 + 0x3e + 0xcf + 0x1c + 0xf6 + 0x3e + 0x7b + 0x94 + 0xf8 + 0x3e + 0x20 + 0x9 + 0xfb + 0x3e + 0xc4 + 0x7a + 0xfd + 0x3e + 0x70 + 0xe9 + 0xff + 0x3e + 0x95 + 0x2a + 0x1 + 0x3f + 0xfd + 0x5e + 0x2 + 0x3f + 0xf3 + 0x91 + 0x3 + 0x3f + 0x7a + 0xc3 + 0x4 + 0x3f + 0x7b + 0x94 + 0xf8 + 0x3e + 0x20 + 0x9 + 0xfb + 0x3e + 0xc4 + 0x7a + 0xfd + 0x3e + 0x70 + 0xe9 + 0xff + 0x3e + 0x95 + 0x2a + 0x1 + 0x3f + 0xfd + 0x5e + 0x2 + 0x3f + 0xf3 + 0x91 + 0x3 + 0x3f + 0x7a + 0xc3 + 0x4 + 0x3f + 0x97 + 0xf3 + 0x5 + 0x3f + 0x4c + 0x22 + 0x7 + 0x3f + 0x9d + 0x4f + 0x8 + 0x3f + 0x8d + 0x7b + 0x9 + 0x3f + 0x1f + 0xa6 + 0xa + 0x3f + 0x56 + 0xcf + 0xb + 0x3f + 0x36 + 0xf7 + 0xc + 0x3f + 0xc1 + 0x1d + 0xe + 0x3f + 0x97 + 0xf3 + 0x5 + 0x3f + 0x4c + 0x22 + 0x7 + 0x3f + 0x9d + 0x4f + 0x8 + 0x3f + 0x8d + 0x7b + 0x9 + 0x3f + 0x1f + 0xa6 + 0xa + 0x3f + 0x56 + 0xcf + 0xb + 0x3f + 0x36 + 0xf7 + 0xc + 0x3f + 0xc1 + 0x1d + 0xe + 0x3f + 0xfb + 0x42 + 0xf + 0x3f + 0xe7 + 0x66 + 0x10 + 0x3f + 0x87 + 0x89 + 0x11 + 0x3f + 0xde + 0xaa + 0x12 + 0x3f + 0xf1 + 0xca + 0x13 + 0x3f + 0xc0 + 0xe9 + 0x14 + 0x3f + 0x4f + 0x7 + 0x16 + 0x3f + 0xa2 + 0x23 + 0x17 + 0x3f + 0xfb + 0x42 + 0xf + 0x3f + 0xe7 + 0x66 + 0x10 + 0x3f + 0x87 + 0x89 + 0x11 + 0x3f + 0xde + 0xaa + 0x12 + 0x3f + 0xf1 + 0xca + 0x13 + 0x3f + 0xc0 + 0xe9 + 0x14 + 0x3f + 0x4f + 0x7 + 0x16 + 0x3f + 0xa2 + 0x23 + 0x17 + 0x3f + 0xba + 0x3e + 0x18 + 0x3f + 0x9a + 0x58 + 0x19 + 0x3f + 0x45 + 0x71 + 0x1a + 0x3f + 0xbe + 0x88 + 0x1b + 0x3f + 0x7 + 0x9f + 0x1c + 0x3f + 0x22 + 0xb4 + 0x1d + 0x3f + 0x13 + 0xc8 + 0x1e + 0x3f + 0xdc + 0xda + 0x1f + 0x3f + 0xba + 0x3e + 0x18 + 0x3f + 0x9a + 0x58 + 0x19 + 0x3f + 0x45 + 0x71 + 0x1a + 0x3f + 0xbe + 0x88 + 0x1b + 0x3f + 0x7 + 0x9f + 0x1c + 0x3f + 0x22 + 0xb4 + 0x1d + 0x3f + 0x13 + 0xc8 + 0x1e + 0x3f + 0xdc + 0xda + 0x1f + 0x3f + 0x7f + 0xec + 0x20 + 0x3f + 0xff + 0xfc + 0x21 + 0x3f + 0x5e + 0xc + 0x23 + 0x3f + 0x9f + 0x1a + 0x24 + 0x3f + 0xc3 + 0x27 + 0x25 + 0x3f + 0xcd + 0x33 + 0x26 + 0x3f + 0xc1 + 0x3e + 0x27 + 0x3f + 0x9e + 0x48 + 0x28 + 0x3f + 0x7f + 0xec + 0x20 + 0x3f + 0xff + 0xfc + 0x21 + 0x3f + 0x5e + 0xc + 0x23 + 0x3f + 0x9f + 0x1a + 0x24 + 0x3f + 0xc3 + 0x27 + 0x25 + 0x3f + 0xcd + 0x33 + 0x26 + 0x3f + 0xc1 + 0x3e + 0x27 + 0x3f + 0x9e + 0x48 + 0x28 + 0x3f + 0x69 + 0x51 + 0x29 + 0x3f + 0x23 + 0x59 + 0x2a + 0x3f + 0xcf + 0x5f + 0x2b + 0x3f + 0x6e + 0x65 + 0x2c + 0x3f + 0x2 + 0x6a + 0x2d + 0x3f + 0x8f + 0x6d + 0x2e + 0x3f + 0x15 + 0x70 + 0x2f + 0x3f + 0x98 + 0x71 + 0x30 + 0x3f + 0x69 + 0x51 + 0x29 + 0x3f + 0x23 + 0x59 + 0x2a + 0x3f + 0xcf + 0x5f + 0x2b + 0x3f + 0x6e + 0x65 + 0x2c + 0x3f + 0x2 + 0x6a + 0x2d + 0x3f + 0x8f + 0x6d + 0x2e + 0x3f + 0x15 + 0x70 + 0x2f + 0x3f + 0x98 + 0x71 + 0x30 + 0x3f + +.data_segment_name +.data weak 64 _ZN11log_f32_lutILj256EE21fraction_table_cd_f32E DMb + 0x0 + 0x0 + 0x0 + 0x0 + 0x53 + 0x1 + 0xff + 0x3b + 0x46 + 0x5 + 0x7e + 0x3c + 0xd8 + 0xc8 + 0xbd + 0x3c + 0xd8 + 0x14 + 0xfc + 0x3c + 0x3e + 0xf4 + 0x1c + 0x3d + 0xc8 + 0xa2 + 0x3b + 0x3d + 0xec + 0x16 + 0x5a + 0x3d + 0x0 + 0x0 + 0x0 + 0x0 + 0x53 + 0x1 + 0xff + 0x3b + 0x46 + 0x5 + 0x7e + 0x3c + 0xd8 + 0xc8 + 0xbd + 0x3c + 0xd8 + 0x14 + 0xfc + 0x3c + 0x3e + 0xf4 + 0x1c + 0x3d + 0xc8 + 0xa2 + 0x3b + 0x3d + 0xec + 0x16 + 0x5a + 0x3d + 0x86 + 0x51 + 0x78 + 0x3d + 0xb7 + 0x29 + 0x8b + 0x3d + 0xbd + 0xe + 0x9a + 0x3d + 0x3a + 0xd8 + 0xa8 + 0x3d + 0x94 + 0x86 + 0xb7 + 0x3d + 0x2f + 0x1a + 0xc6 + 0x3d + 0x6a + 0x93 + 0xd4 + 0x3d + 0xa4 + 0xf2 + 0xe2 + 0x3d + 0x86 + 0x51 + 0x78 + 0x3d + 0xb7 + 0x29 + 0x8b + 0x3d + 0xbd + 0xe + 0x9a + 0x3d + 0x3a + 0xd8 + 0xa8 + 0x3d + 0x94 + 0x86 + 0xb7 + 0x3d + 0x2f + 0x1a + 0xc6 + 0x3d + 0x6a + 0x93 + 0xd4 + 0x3d + 0xa4 + 0xf2 + 0xe2 + 0x3d + 0x3b + 0x38 + 0xf1 + 0x3d + 0x8a + 0x64 + 0xff + 0x3d + 0xf4 + 0xbb + 0x6 + 0x3e + 0x57 + 0xb9 + 0xd + 0x3e + 0x98 + 0xaa + 0x14 + 0x3e + 0xe1 + 0x8f + 0x1b + 0x3e + 0x5b + 0x69 + 0x22 + 0x3e + 0x2f + 0x37 + 0x29 + 0x3e + 0x3b + 0x38 + 0xf1 + 0x3d + 0x8a + 0x64 + 0xff + 0x3d + 0xf4 + 0xbb + 0x6 + 0x3e + 0x57 + 0xb9 + 0xd + 0x3e + 0x98 + 0xaa + 0x14 + 0x3e + 0xe1 + 0x8f + 0x1b + 0x3e + 0x5b + 0x69 + 0x22 + 0x3e + 0x2f + 0x37 + 0x29 + 0x3e + 0x84 + 0xf9 + 0x2f + 0x3e + 0x7f + 0xb0 + 0x36 + 0x3e + 0x48 + 0x5c + 0x3d + 0x3e + 0x3 + 0xfd + 0x43 + 0x3e + 0xd5 + 0x92 + 0x4a + 0x3e + 0xe1 + 0x1d + 0x51 + 0x3e + 0x4a + 0x9e + 0x57 + 0x3e + 0x34 + 0x14 + 0x5e + 0x3e + 0x84 + 0xf9 + 0x2f + 0x3e + 0x7f + 0xb0 + 0x36 + 0x3e + 0x48 + 0x5c + 0x3d + 0x3e + 0x3 + 0xfd + 0x43 + 0x3e + 0xd5 + 0x92 + 0x4a + 0x3e + 0xe1 + 0x1d + 0x51 + 0x3e + 0x4a + 0x9e + 0x57 + 0x3e + 0x34 + 0x14 + 0x5e + 0x3e + 0xbe + 0x7f + 0x64 + 0x3e + 0xb + 0xe1 + 0x6a + 0x3e + 0x3b + 0x38 + 0x71 + 0x3e + 0x6e + 0x85 + 0x77 + 0x3e + 0xc3 + 0xc8 + 0x7d + 0x3e + 0x2d + 0x1 + 0x82 + 0x3e + 0x27 + 0x19 + 0x85 + 0x3e + 0x60 + 0x2c + 0x88 + 0x3e + 0xbe + 0x7f + 0x64 + 0x3e + 0xb + 0xe1 + 0x6a + 0x3e + 0x3b + 0x38 + 0x71 + 0x3e + 0x6e + 0x85 + 0x77 + 0x3e + 0xc3 + 0xc8 + 0x7d + 0x3e + 0x2d + 0x1 + 0x82 + 0x3e + 0x27 + 0x19 + 0x85 + 0x3e + 0x60 + 0x2c + 0x88 + 0x3e + 0xe5 + 0x3a + 0x8b + 0x3e + 0xc6 + 0x44 + 0x8e + 0x3e + 0x10 + 0x4a + 0x91 + 0x3e + 0xd1 + 0x4a + 0x94 + 0x3e + 0x16 + 0x47 + 0x97 + 0x3e + 0xed + 0x3e + 0x9a + 0x3e + 0x63 + 0x32 + 0x9d + 0x3e + 0x84 + 0x21 + 0xa0 + 0x3e + 0xe5 + 0x3a + 0x8b + 0x3e + 0xc6 + 0x44 + 0x8e + 0x3e + 0x10 + 0x4a + 0x91 + 0x3e + 0xd1 + 0x4a + 0x94 + 0x3e + 0x16 + 0x47 + 0x97 + 0x3e + 0xed + 0x3e + 0x9a + 0x3e + 0x63 + 0x32 + 0x9d + 0x3e + 0x84 + 0x21 + 0xa0 + 0x3e + 0x5e + 0xc + 0xa3 + 0x3e + 0xfd + 0xf2 + 0xa5 + 0x3e + 0x6c + 0xd5 + 0xa8 + 0x3e + 0xb9 + 0xb3 + 0xab + 0x3e + 0xee + 0x8d + 0xae + 0x3e + 0x18 + 0x64 + 0xb1 + 0x3e + 0x41 + 0x36 + 0xb4 + 0x3e + 0x75 + 0x4 + 0xb7 + 0x3e + 0x5e + 0xc + 0xa3 + 0x3e + 0xfd + 0xf2 + 0xa5 + 0x3e + 0x6c + 0xd5 + 0xa8 + 0x3e + 0xb9 + 0xb3 + 0xab + 0x3e + 0xee + 0x8d + 0xae + 0x3e + 0x18 + 0x64 + 0xb1 + 0x3e + 0x41 + 0x36 + 0xb4 + 0x3e + 0x75 + 0x4 + 0xb7 + 0x3e + 0xc0 + 0xce + 0xb9 + 0x3e + 0x2b + 0x95 + 0xbc + 0x3e + 0xc2 + 0x57 + 0xbf + 0x3e + 0x8f + 0x16 + 0xc2 + 0x3e + 0x9c + 0xd1 + 0xc4 + 0x3e + 0xf4 + 0x88 + 0xc7 + 0x3e + 0xa1 + 0x3c + 0xca + 0x3e + 0xac + 0xec + 0xcc + 0x3e + 0xc0 + 0xce + 0xb9 + 0x3e + 0x2b + 0x95 + 0xbc + 0x3e + 0xc2 + 0x57 + 0xbf + 0x3e + 0x8f + 0x16 + 0xc2 + 0x3e + 0x9c + 0xd1 + 0xc4 + 0x3e + 0xf4 + 0x88 + 0xc7 + 0x3e + 0xa1 + 0x3c + 0xca + 0x3e + 0xac + 0xec + 0xcc + 0x3e + 0x1f + 0x99 + 0xcf + 0x3e + 0x5 + 0x42 + 0xd2 + 0x3e + 0x65 + 0xe7 + 0xd4 + 0x3e + 0x4a + 0x89 + 0xd7 + 0x3e + 0xbc + 0x27 + 0xda + 0x3e + 0xc5 + 0xc2 + 0xdc + 0x3e + 0x6d + 0x5a + 0xdf + 0x3e + 0xbd + 0xee + 0xe1 + 0x3e + 0x1f + 0x99 + 0xcf + 0x3e + 0x5 + 0x42 + 0xd2 + 0x3e + 0x65 + 0xe7 + 0xd4 + 0x3e + 0x4a + 0x89 + 0xd7 + 0x3e + 0xbc + 0x27 + 0xda + 0x3e + 0xc5 + 0xc2 + 0xdc + 0x3e + 0x6d + 0x5a + 0xdf + 0x3e + 0xbd + 0xee + 0xe1 + 0x3e + 0xbe + 0x7f + 0xe4 + 0x3e + 0x78 + 0xd + 0xe7 + 0x3e + 0xf4 + 0x97 + 0xe9 + 0x3e + 0x39 + 0x1f + 0xec + 0x3e + 0x50 + 0xa3 + 0xee + 0x3e + 0x41 + 0x24 + 0xf1 + 0x3e + 0x13 + 0xa2 + 0xf3 + 0x3e + 0xcf + 0x1c + 0xf6 + 0x3e + 0xbe + 0x7f + 0xe4 + 0x3e + 0x78 + 0xd + 0xe7 + 0x3e + 0xf4 + 0x97 + 0xe9 + 0x3e + 0x39 + 0x1f + 0xec + 0x3e + 0x50 + 0xa3 + 0xee + 0x3e + 0x41 + 0x24 + 0xf1 + 0x3e + 0x13 + 0xa2 + 0xf3 + 0x3e + 0xcf + 0x1c + 0xf6 + 0x3e + 0x7b + 0x94 + 0xf8 + 0x3e + 0x20 + 0x9 + 0xfb + 0x3e + 0xc4 + 0x7a + 0xfd + 0x3e + 0x70 + 0xe9 + 0xff + 0x3e + 0x95 + 0x2a + 0x1 + 0x3f + 0xfd + 0x5e + 0x2 + 0x3f + 0xf3 + 0x91 + 0x3 + 0x3f + 0x7a + 0xc3 + 0x4 + 0x3f + 0x7b + 0x94 + 0xf8 + 0x3e + 0x20 + 0x9 + 0xfb + 0x3e + 0xc4 + 0x7a + 0xfd + 0x3e + 0x70 + 0xe9 + 0xff + 0x3e + 0x95 + 0x2a + 0x1 + 0x3f + 0xfd + 0x5e + 0x2 + 0x3f + 0xf3 + 0x91 + 0x3 + 0x3f + 0x7a + 0xc3 + 0x4 + 0x3f + 0x97 + 0xf3 + 0x5 + 0x3f + 0x4c + 0x22 + 0x7 + 0x3f + 0x9d + 0x4f + 0x8 + 0x3f + 0x8d + 0x7b + 0x9 + 0x3f + 0x1f + 0xa6 + 0xa + 0x3f + 0x56 + 0xcf + 0xb + 0x3f + 0x36 + 0xf7 + 0xc + 0x3f + 0xc1 + 0x1d + 0xe + 0x3f + 0x97 + 0xf3 + 0x5 + 0x3f + 0x4c + 0x22 + 0x7 + 0x3f + 0x9d + 0x4f + 0x8 + 0x3f + 0x8d + 0x7b + 0x9 + 0x3f + 0x1f + 0xa6 + 0xa + 0x3f + 0x56 + 0xcf + 0xb + 0x3f + 0x36 + 0xf7 + 0xc + 0x3f + 0xc1 + 0x1d + 0xe + 0x3f + 0xfb + 0x42 + 0xf + 0x3f + 0xe7 + 0x66 + 0x10 + 0x3f + 0x87 + 0x89 + 0x11 + 0x3f + 0xde + 0xaa + 0x12 + 0x3f + 0xf1 + 0xca + 0x13 + 0x3f + 0xc0 + 0xe9 + 0x14 + 0x3f + 0x4f + 0x7 + 0x16 + 0x3f + 0xa2 + 0x23 + 0x17 + 0x3f + 0xfb + 0x42 + 0xf + 0x3f + 0xe7 + 0x66 + 0x10 + 0x3f + 0x87 + 0x89 + 0x11 + 0x3f + 0xde + 0xaa + 0x12 + 0x3f + 0xf1 + 0xca + 0x13 + 0x3f + 0xc0 + 0xe9 + 0x14 + 0x3f + 0x4f + 0x7 + 0x16 + 0x3f + 0xa2 + 0x23 + 0x17 + 0x3f + 0xba + 0x3e + 0x18 + 0x3f + 0x9a + 0x58 + 0x19 + 0x3f + 0x45 + 0x71 + 0x1a + 0x3f + 0xbe + 0x88 + 0x1b + 0x3f + 0x7 + 0x9f + 0x1c + 0x3f + 0x22 + 0xb4 + 0x1d + 0x3f + 0x13 + 0xc8 + 0x1e + 0x3f + 0xdc + 0xda + 0x1f + 0x3f + 0xba + 0x3e + 0x18 + 0x3f + 0x9a + 0x58 + 0x19 + 0x3f + 0x45 + 0x71 + 0x1a + 0x3f + 0xbe + 0x88 + 0x1b + 0x3f + 0x7 + 0x9f + 0x1c + 0x3f + 0x22 + 0xb4 + 0x1d + 0x3f + 0x13 + 0xc8 + 0x1e + 0x3f + 0xdc + 0xda + 0x1f + 0x3f + 0x7f + 0xec + 0x20 + 0x3f + 0xff + 0xfc + 0x21 + 0x3f + 0x5e + 0xc + 0x23 + 0x3f + 0x9f + 0x1a + 0x24 + 0x3f + 0xc3 + 0x27 + 0x25 + 0x3f + 0xcd + 0x33 + 0x26 + 0x3f + 0xc1 + 0x3e + 0x27 + 0x3f + 0x9e + 0x48 + 0x28 + 0x3f + 0x7f + 0xec + 0x20 + 0x3f + 0xff + 0xfc + 0x21 + 0x3f + 0x5e + 0xc + 0x23 + 0x3f + 0x9f + 0x1a + 0x24 + 0x3f + 0xc3 + 0x27 + 0x25 + 0x3f + 0xcd + 0x33 + 0x26 + 0x3f + 0xc1 + 0x3e + 0x27 + 0x3f + 0x9e + 0x48 + 0x28 + 0x3f + 0x69 + 0x51 + 0x29 + 0x3f + 0x23 + 0x59 + 0x2a + 0x3f + 0xcf + 0x5f + 0x2b + 0x3f + 0x6e + 0x65 + 0x2c + 0x3f + 0x2 + 0x6a + 0x2d + 0x3f + 0x8f + 0x6d + 0x2e + 0x3f + 0x15 + 0x70 + 0x2f + 0x3f + 0x98 + 0x71 + 0x30 + 0x3f + 0x69 + 0x51 + 0x29 + 0x3f + 0x23 + 0x59 + 0x2a + 0x3f + 0xcf + 0x5f + 0x2b + 0x3f + 0x6e + 0x65 + 0x2c + 0x3f + 0x2 + 0x6a + 0x2d + 0x3f + 0x8f + 0x6d + 0x2e + 0x3f + 0x15 + 0x70 + 0x2f + 0x3f + 0x98 + 0x71 + 0x30 + 0x3f + +.data_segment_name +.data weak 64 _ZN8exp2_lutILj512EE13exp2_table_abE DMb + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xe + 0xd + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xf + 0xe + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xe + 0xd + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xf + 0xe + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0x10 + 0xf + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x12 + 0x12 + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0x10 + 0xf + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x15 + 0x15 + 0x15 + 0x15 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x19 + 0x19 + 0x19 + 0x19 + 0x1a + 0x19 + 0x1a + 0x1a + 0x1b + 0x1a + 0x1b + 0x1b + 0x1b + 0x1b + 0x1c + 0x1c + 0x1c + 0x1c + 0x1d + 0x1c + 0x1d + 0x1d + 0x1d + 0x1d + 0x1e + 0x1e + 0x1e + 0x1e + 0x1f + 0x1f + 0x18 + 0x18 + 0x19 + 0x19 + 0x19 + 0x19 + 0x1a + 0x19 + 0x1a + 0x1a + 0x1b + 0x1a + 0x1b + 0x1b + 0x1b + 0x1b + 0x1c + 0x1c + 0x1c + 0x1c + 0x1d + 0x1c + 0x1d + 0x1d + 0x1d + 0x1d + 0x1e + 0x1e + 0x1e + 0x1e + 0x1f + 0x1f + 0x1f + 0x1f + 0x20 + 0x1f + 0x20 + 0x20 + 0x20 + 0x20 + 0x21 + 0x21 + 0x21 + 0x21 + 0x22 + 0x22 + 0x22 + 0x22 + 0x23 + 0x22 + 0x23 + 0x23 + 0x24 + 0x23 + 0x24 + 0x24 + 0x24 + 0x24 + 0x25 + 0x25 + 0x25 + 0x25 + 0x26 + 0x26 + 0x1f + 0x1f + 0x20 + 0x1f + 0x20 + 0x20 + 0x20 + 0x20 + 0x21 + 0x21 + 0x21 + 0x21 + 0x22 + 0x22 + 0x22 + 0x22 + 0x23 + 0x22 + 0x23 + 0x23 + 0x24 + 0x23 + 0x24 + 0x24 + 0x24 + 0x24 + 0x25 + 0x25 + 0x25 + 0x25 + 0x26 + 0x26 + 0x26 + 0x26 + 0x27 + 0x26 + 0x27 + 0x27 + 0x28 + 0x27 + 0x28 + 0x28 + 0x28 + 0x28 + 0x29 + 0x29 + 0x29 + 0x29 + 0x2a + 0x2a + 0x2a + 0x2a + 0x2b + 0x2b + 0x2b + 0x2b + 0x2c + 0x2b + 0x2c + 0x2c + 0x2d + 0x2c + 0x2d + 0x2d + 0x26 + 0x26 + 0x27 + 0x26 + 0x27 + 0x27 + 0x28 + 0x27 + 0x28 + 0x28 + 0x28 + 0x28 + 0x29 + 0x29 + 0x29 + 0x29 + 0x2a + 0x2a + 0x2a + 0x2a + 0x2b + 0x2b + 0x2b + 0x2b + 0x2c + 0x2b + 0x2c + 0x2c + 0x2d + 0x2c + 0x2d + 0x2d + 0x2e + 0x2d + 0x2e + 0x2e + 0x2f + 0x2e + 0x2f + 0x2f + 0x2f + 0x2f + 0x30 + 0x30 + 0x30 + 0x30 + 0x31 + 0x31 + 0x31 + 0x31 + 0x32 + 0x32 + 0x32 + 0x32 + 0x33 + 0x33 + 0x33 + 0x33 + 0x34 + 0x34 + 0x34 + 0x34 + 0x35 + 0x35 + 0x2e + 0x2d + 0x2e + 0x2e + 0x2f + 0x2e + 0x2f + 0x2f + 0x2f + 0x2f + 0x30 + 0x30 + 0x30 + 0x30 + 0x31 + 0x31 + 0x31 + 0x31 + 0x32 + 0x32 + 0x32 + 0x32 + 0x33 + 0x33 + 0x33 + 0x33 + 0x34 + 0x34 + 0x34 + 0x34 + 0x35 + 0x35 + 0x36 + 0x35 + 0x36 + 0x36 + 0x37 + 0x37 + 0x38 + 0x38 + 0x39 + 0x39 + 0x3a + 0x3a + 0x3c + 0x3b + 0x3d + 0x3c + 0x3e + 0x3d + 0x3f + 0x3e + 0x40 + 0x3f + 0x41 + 0x40 + 0x42 + 0x41 + 0x43 + 0x42 + 0x44 + 0x43 + 0x45 + 0x44 + 0x36 + 0x35 + 0x36 + 0x36 + 0x37 + 0x37 + 0x38 + 0x38 + 0x39 + 0x39 + 0x3a + 0x3a + 0x3c + 0x3b + 0x3d + 0x3c + 0x3e + 0x3d + 0x3f + 0x3e + 0x40 + 0x3f + 0x41 + 0x40 + 0x42 + 0x41 + 0x43 + 0x42 + 0x44 + 0x43 + 0x45 + 0x44 + 0x46 + 0x45 + 0x47 + 0x46 + 0x48 + 0x48 + 0x49 + 0x49 + 0x4a + 0x4a + 0x4b + 0x4b + 0x4c + 0x4c + 0x4e + 0x4d + 0x4f + 0x4e + 0x50 + 0x4f + 0x51 + 0x50 + 0x52 + 0x52 + 0x53 + 0x53 + 0x54 + 0x54 + 0x56 + 0x55 + 0x57 + 0x56 + 0x46 + 0x45 + 0x47 + 0x46 + 0x48 + 0x48 + 0x49 + 0x49 + 0x4a + 0x4a + 0x4b + 0x4b + 0x4c + 0x4c + 0x4e + 0x4d + 0x4f + 0x4e + 0x50 + 0x4f + 0x51 + 0x50 + 0x52 + 0x52 + 0x53 + 0x53 + 0x54 + 0x54 + 0x56 + 0x55 + 0x57 + 0x56 + 0x58 + 0x57 + 0x59 + 0x58 + 0x5a + 0x5a + 0x5b + 0x5b + 0x5d + 0x5c + 0x5e + 0x5d + 0x5f + 0x5e + 0x60 + 0x60 + 0x61 + 0x61 + 0x63 + 0x62 + 0x64 + 0x63 + 0x65 + 0x64 + 0x66 + 0x66 + 0x68 + 0x67 + 0x69 + 0x68 + 0x6a + 0x69 + 0x58 + 0x57 + 0x59 + 0x58 + 0x5a + 0x5a + 0x5b + 0x5b + 0x5d + 0x5c + 0x5e + 0x5d + 0x5f + 0x5e + 0x60 + 0x60 + 0x61 + 0x61 + 0x63 + 0x62 + 0x64 + 0x63 + 0x65 + 0x64 + 0x66 + 0x66 + 0x68 + 0x67 + 0x69 + 0x68 + 0x6a + 0x69 + 0x6b + 0x6b + 0x6d + 0x6c + 0x6e + 0x6d + 0x6f + 0x6f + 0x71 + 0x70 + 0x72 + 0x71 + 0x73 + 0x73 + 0x74 + 0x74 + 0x76 + 0x75 + 0x77 + 0x76 + 0x78 + 0x78 + 0x7a + 0x79 + 0x7b + 0x7b + 0x7d + 0x7c + 0x7e + 0x7d + 0x7f + 0x7f + 0x6b + 0x6b + 0x6d + 0x6c + 0x6e + 0x6d + 0x6f + 0x6f + 0x71 + 0x70 + 0x72 + 0x71 + 0x73 + 0x73 + 0x74 + 0x74 + 0x76 + 0x75 + 0x77 + 0x76 + 0x78 + 0x78 + 0x7a + 0x79 + 0x7b + 0x7b + 0x7d + 0x7c + 0x7e + 0x7d + 0x7f + 0x7f + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x7 + 0x0 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x7 + 0x0 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x8 + 0x7 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x9 + 0x9 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x8 + 0x7 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xb + 0xa + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xc + 0xb + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xb + 0xa + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xc + 0xb + +.data_segment_name +.data weak 64 _ZN8exp2_lutILj512EE13exp2_table_cdE DMb + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xe + 0xd + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xf + 0xe + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xc + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xd + 0xe + 0xd + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xe + 0xf + 0xe + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0x10 + 0xf + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x12 + 0x12 + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0xf + 0x10 + 0xf + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x10 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x11 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x15 + 0x15 + 0x15 + 0x15 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x12 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x13 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x14 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x15 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x16 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x17 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x18 + 0x19 + 0x19 + 0x19 + 0x19 + 0x1a + 0x19 + 0x1a + 0x1a + 0x1b + 0x1a + 0x1b + 0x1b + 0x1b + 0x1b + 0x1c + 0x1c + 0x1c + 0x1c + 0x1d + 0x1c + 0x1d + 0x1d + 0x1d + 0x1d + 0x1e + 0x1e + 0x1e + 0x1e + 0x1f + 0x1f + 0x18 + 0x18 + 0x19 + 0x19 + 0x19 + 0x19 + 0x1a + 0x19 + 0x1a + 0x1a + 0x1b + 0x1a + 0x1b + 0x1b + 0x1b + 0x1b + 0x1c + 0x1c + 0x1c + 0x1c + 0x1d + 0x1c + 0x1d + 0x1d + 0x1d + 0x1d + 0x1e + 0x1e + 0x1e + 0x1e + 0x1f + 0x1f + 0x1f + 0x1f + 0x20 + 0x1f + 0x20 + 0x20 + 0x20 + 0x20 + 0x21 + 0x21 + 0x21 + 0x21 + 0x22 + 0x22 + 0x22 + 0x22 + 0x23 + 0x22 + 0x23 + 0x23 + 0x24 + 0x23 + 0x24 + 0x24 + 0x24 + 0x24 + 0x25 + 0x25 + 0x25 + 0x25 + 0x26 + 0x26 + 0x1f + 0x1f + 0x20 + 0x1f + 0x20 + 0x20 + 0x20 + 0x20 + 0x21 + 0x21 + 0x21 + 0x21 + 0x22 + 0x22 + 0x22 + 0x22 + 0x23 + 0x22 + 0x23 + 0x23 + 0x24 + 0x23 + 0x24 + 0x24 + 0x24 + 0x24 + 0x25 + 0x25 + 0x25 + 0x25 + 0x26 + 0x26 + 0x26 + 0x26 + 0x27 + 0x26 + 0x27 + 0x27 + 0x28 + 0x27 + 0x28 + 0x28 + 0x28 + 0x28 + 0x29 + 0x29 + 0x29 + 0x29 + 0x2a + 0x2a + 0x2a + 0x2a + 0x2b + 0x2b + 0x2b + 0x2b + 0x2c + 0x2b + 0x2c + 0x2c + 0x2d + 0x2c + 0x2d + 0x2d + 0x26 + 0x26 + 0x27 + 0x26 + 0x27 + 0x27 + 0x28 + 0x27 + 0x28 + 0x28 + 0x28 + 0x28 + 0x29 + 0x29 + 0x29 + 0x29 + 0x2a + 0x2a + 0x2a + 0x2a + 0x2b + 0x2b + 0x2b + 0x2b + 0x2c + 0x2b + 0x2c + 0x2c + 0x2d + 0x2c + 0x2d + 0x2d + 0x2e + 0x2d + 0x2e + 0x2e + 0x2f + 0x2e + 0x2f + 0x2f + 0x2f + 0x2f + 0x30 + 0x30 + 0x30 + 0x30 + 0x31 + 0x31 + 0x31 + 0x31 + 0x32 + 0x32 + 0x32 + 0x32 + 0x33 + 0x33 + 0x33 + 0x33 + 0x34 + 0x34 + 0x34 + 0x34 + 0x35 + 0x35 + 0x2e + 0x2d + 0x2e + 0x2e + 0x2f + 0x2e + 0x2f + 0x2f + 0x2f + 0x2f + 0x30 + 0x30 + 0x30 + 0x30 + 0x31 + 0x31 + 0x31 + 0x31 + 0x32 + 0x32 + 0x32 + 0x32 + 0x33 + 0x33 + 0x33 + 0x33 + 0x34 + 0x34 + 0x34 + 0x34 + 0x35 + 0x35 + 0x36 + 0x35 + 0x36 + 0x36 + 0x37 + 0x37 + 0x38 + 0x38 + 0x39 + 0x39 + 0x3a + 0x3a + 0x3c + 0x3b + 0x3d + 0x3c + 0x3e + 0x3d + 0x3f + 0x3e + 0x40 + 0x3f + 0x41 + 0x40 + 0x42 + 0x41 + 0x43 + 0x42 + 0x44 + 0x43 + 0x45 + 0x44 + 0x36 + 0x35 + 0x36 + 0x36 + 0x37 + 0x37 + 0x38 + 0x38 + 0x39 + 0x39 + 0x3a + 0x3a + 0x3c + 0x3b + 0x3d + 0x3c + 0x3e + 0x3d + 0x3f + 0x3e + 0x40 + 0x3f + 0x41 + 0x40 + 0x42 + 0x41 + 0x43 + 0x42 + 0x44 + 0x43 + 0x45 + 0x44 + 0x46 + 0x45 + 0x47 + 0x46 + 0x48 + 0x48 + 0x49 + 0x49 + 0x4a + 0x4a + 0x4b + 0x4b + 0x4c + 0x4c + 0x4e + 0x4d + 0x4f + 0x4e + 0x50 + 0x4f + 0x51 + 0x50 + 0x52 + 0x52 + 0x53 + 0x53 + 0x54 + 0x54 + 0x56 + 0x55 + 0x57 + 0x56 + 0x46 + 0x45 + 0x47 + 0x46 + 0x48 + 0x48 + 0x49 + 0x49 + 0x4a + 0x4a + 0x4b + 0x4b + 0x4c + 0x4c + 0x4e + 0x4d + 0x4f + 0x4e + 0x50 + 0x4f + 0x51 + 0x50 + 0x52 + 0x52 + 0x53 + 0x53 + 0x54 + 0x54 + 0x56 + 0x55 + 0x57 + 0x56 + 0x58 + 0x57 + 0x59 + 0x58 + 0x5a + 0x5a + 0x5b + 0x5b + 0x5d + 0x5c + 0x5e + 0x5d + 0x5f + 0x5e + 0x60 + 0x60 + 0x61 + 0x61 + 0x63 + 0x62 + 0x64 + 0x63 + 0x65 + 0x64 + 0x66 + 0x66 + 0x68 + 0x67 + 0x69 + 0x68 + 0x6a + 0x69 + 0x58 + 0x57 + 0x59 + 0x58 + 0x5a + 0x5a + 0x5b + 0x5b + 0x5d + 0x5c + 0x5e + 0x5d + 0x5f + 0x5e + 0x60 + 0x60 + 0x61 + 0x61 + 0x63 + 0x62 + 0x64 + 0x63 + 0x65 + 0x64 + 0x66 + 0x66 + 0x68 + 0x67 + 0x69 + 0x68 + 0x6a + 0x69 + 0x6b + 0x6b + 0x6d + 0x6c + 0x6e + 0x6d + 0x6f + 0x6f + 0x71 + 0x70 + 0x72 + 0x71 + 0x73 + 0x73 + 0x74 + 0x74 + 0x76 + 0x75 + 0x77 + 0x76 + 0x78 + 0x78 + 0x7a + 0x79 + 0x7b + 0x7b + 0x7d + 0x7c + 0x7e + 0x7d + 0x7f + 0x7f + 0x6b + 0x6b + 0x6d + 0x6c + 0x6e + 0x6d + 0x6f + 0x6f + 0x71 + 0x70 + 0x72 + 0x71 + 0x73 + 0x73 + 0x74 + 0x74 + 0x76 + 0x75 + 0x77 + 0x76 + 0x78 + 0x78 + 0x7a + 0x79 + 0x7b + 0x7b + 0x7d + 0x7c + 0x7e + 0x7d + 0x7f + 0x7f + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x7 + 0x0 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x7 + 0x0 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x8 + 0x7 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x9 + 0x9 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x7 + 0x8 + 0x7 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x8 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0x9 + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xb + 0xa + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xc + 0xb + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xa + 0xb + 0xa + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xb + 0xc + 0xb + +.data_segment_name +.data weak 64 _ZN8mish_lutILj0ELj2048EE7data_abE DMb + 0x1 + 0x80 + 0x1 + 0x80 + 0x2 + 0x80 + 0x3 + 0x80 + 0x6 + 0x80 + 0x9 + 0x80 + 0xf + 0x80 + 0x19 + 0x80 + 0x29 + 0x80 + 0x44 + 0x80 + 0x6f + 0x80 + 0xb6 + 0x80 + 0x15 + 0x81 + 0x75 + 0x81 + 0xc9 + 0x81 + 0x25 + 0x82 + 0x1 + 0x80 + 0x1 + 0x80 + 0x2 + 0x80 + 0x3 + 0x80 + 0x6 + 0x80 + 0x9 + 0x80 + 0xf + 0x80 + 0x19 + 0x80 + 0x29 + 0x80 + 0x44 + 0x80 + 0x6f + 0x80 + 0xb6 + 0x80 + 0x15 + 0x81 + 0x75 + 0x81 + 0xc9 + 0x81 + 0x25 + 0x82 + 0x87 + 0x82 + 0xdd + 0x82 + 0x35 + 0x83 + 0x95 + 0x83 + 0xf4 + 0x83 + 0x48 + 0x84 + 0xa4 + 0x84 + 0x6 + 0x85 + 0x5c + 0x85 + 0xb4 + 0x85 + 0x14 + 0x86 + 0x72 + 0x86 + 0xc6 + 0x86 + 0x22 + 0x87 + 0x85 + 0x87 + 0xda + 0x87 + 0x87 + 0x82 + 0xdd + 0x82 + 0x35 + 0x83 + 0x95 + 0x83 + 0xf4 + 0x83 + 0x48 + 0x84 + 0xa4 + 0x84 + 0x6 + 0x85 + 0x5c + 0x85 + 0xb4 + 0x85 + 0x14 + 0x86 + 0x72 + 0x86 + 0xc6 + 0x86 + 0x22 + 0x87 + 0x85 + 0x87 + 0xda + 0x87 + 0x33 + 0x88 + 0x92 + 0x88 + 0xf0 + 0x88 + 0x45 + 0x89 + 0xa1 + 0x89 + 0x4 + 0x8a + 0x58 + 0x8a + 0xb1 + 0x8a + 0x11 + 0x8b + 0x6d + 0x8b + 0xc2 + 0x8b + 0x1f + 0x8c + 0x82 + 0x8c + 0xd6 + 0x8c + 0x2f + 0x8d + 0x8f + 0x8d + 0x33 + 0x88 + 0x92 + 0x88 + 0xf0 + 0x88 + 0x45 + 0x89 + 0xa1 + 0x89 + 0x4 + 0x8a + 0x58 + 0x8a + 0xb1 + 0x8a + 0x11 + 0x8b + 0x6d + 0x8b + 0xc2 + 0x8b + 0x1f + 0x8c + 0x82 + 0x8c + 0xd6 + 0x8c + 0x2f + 0x8d + 0x8f + 0x8d + 0xea + 0x8d + 0x40 + 0x8e + 0x9d + 0x8e + 0x1 + 0x8f + 0x53 + 0x8f + 0xac + 0x8f + 0xd + 0x90 + 0x67 + 0x90 + 0xbd + 0x90 + 0x1b + 0x91 + 0x7d + 0x91 + 0xcf + 0x91 + 0x2a + 0x92 + 0x8b + 0x92 + 0xe3 + 0x92 + 0x3a + 0x93 + 0xea + 0x8d + 0x40 + 0x8e + 0x9d + 0x8e + 0x1 + 0x8f + 0x53 + 0x8f + 0xac + 0x8f + 0xd + 0x90 + 0x67 + 0x90 + 0xbd + 0x90 + 0x1b + 0x91 + 0x7d + 0x91 + 0xcf + 0x91 + 0x2a + 0x92 + 0x8b + 0x92 + 0xe3 + 0x92 + 0x3a + 0x93 + 0x98 + 0x93 + 0xf9 + 0x93 + 0x4b + 0x94 + 0x82 + 0x94 + 0xa6 + 0x94 + 0xd5 + 0x94 + 0x8 + 0x95 + 0x2e + 0x95 + 0x5e + 0x95 + 0x8e + 0x95 + 0xb6 + 0x95 + 0xe9 + 0x95 + 0x15 + 0x96 + 0x3e + 0x96 + 0x73 + 0x96 + 0x9c + 0x96 + 0x98 + 0x93 + 0xf9 + 0x93 + 0x4b + 0x94 + 0x82 + 0x94 + 0xa6 + 0x94 + 0xd5 + 0x94 + 0x8 + 0x95 + 0x2e + 0x95 + 0x5e + 0x95 + 0x8e + 0x95 + 0xb6 + 0x95 + 0xe9 + 0x95 + 0x15 + 0x96 + 0x3e + 0x96 + 0x73 + 0x96 + 0x9c + 0x96 + 0xc7 + 0x96 + 0xfe + 0x96 + 0x23 + 0x97 + 0x50 + 0x97 + 0x85 + 0x97 + 0xaa + 0x97 + 0xd9 + 0x97 + 0xb + 0x98 + 0x32 + 0x98 + 0x63 + 0x98 + 0x91 + 0x98 + 0xba + 0x98 + 0xed + 0x98 + 0x18 + 0x99 + 0x42 + 0x99 + 0x78 + 0x99 + 0xc7 + 0x96 + 0xfe + 0x96 + 0x23 + 0x97 + 0x50 + 0x97 + 0x85 + 0x97 + 0xaa + 0x97 + 0xd9 + 0x97 + 0xb + 0x98 + 0x32 + 0x98 + 0x63 + 0x98 + 0x91 + 0x98 + 0xba + 0x98 + 0xed + 0x98 + 0x18 + 0x99 + 0x42 + 0x99 + 0x78 + 0x99 + 0x9e + 0x99 + 0xcb + 0x99 + 0x1 + 0x9a + 0x25 + 0x9a + 0x54 + 0x9a + 0x87 + 0x9a + 0xad + 0x9a + 0xdd + 0x9a + 0xd + 0x9b + 0x34 + 0x9b + 0x67 + 0x9b + 0x93 + 0x9b + 0xbc + 0x9b + 0xf1 + 0x9b + 0x1a + 0x9c + 0x45 + 0x9c + 0x9e + 0x99 + 0xcb + 0x99 + 0x1 + 0x9a + 0x25 + 0x9a + 0x54 + 0x9a + 0x87 + 0x9a + 0xad + 0x9a + 0xdd + 0x9a + 0xd + 0x9b + 0x34 + 0x9b + 0x67 + 0x9b + 0x93 + 0x9b + 0xbc + 0x9b + 0xf1 + 0x9b + 0x1a + 0x9c + 0x45 + 0x9c + 0x7b + 0x9c + 0xa1 + 0x9c + 0xcd + 0x9c + 0x3 + 0x9d + 0x27 + 0x9d + 0x56 + 0x9d + 0x89 + 0x9d + 0xaf + 0x9d + 0xdf + 0x9d + 0xf + 0x9e + 0x36 + 0x9e + 0x69 + 0x9e + 0x95 + 0x9e + 0xbe + 0x9e + 0xf3 + 0x9e + 0x1b + 0x9f + 0x7b + 0x9c + 0xa1 + 0x9c + 0xcd + 0x9c + 0x3 + 0x9d + 0x27 + 0x9d + 0x56 + 0x9d + 0x89 + 0x9d + 0xaf + 0x9d + 0xdf + 0x9d + 0xf + 0x9e + 0x36 + 0x9e + 0x69 + 0x9e + 0x95 + 0x9e + 0xbe + 0x9e + 0xf3 + 0x9e + 0x1b + 0x9f + 0x46 + 0x9f + 0x7d + 0x9f + 0xa2 + 0x9f + 0xce + 0x9f + 0x4 + 0xa0 + 0x28 + 0xa0 + 0x57 + 0xa0 + 0x89 + 0xa0 + 0xaf + 0xa0 + 0xe0 + 0xa0 + 0xf + 0xa1 + 0x37 + 0xa1 + 0x69 + 0xa1 + 0x95 + 0xa1 + 0xbe + 0xa1 + 0xf3 + 0xa1 + 0x46 + 0x9f + 0x7d + 0x9f + 0xa2 + 0x9f + 0xce + 0x9f + 0x4 + 0xa0 + 0x28 + 0xa0 + 0x57 + 0xa0 + 0x89 + 0xa0 + 0xaf + 0xa0 + 0xe0 + 0xa0 + 0xf + 0xa1 + 0x37 + 0xa1 + 0x69 + 0xa1 + 0x95 + 0xa1 + 0xbe + 0xa1 + 0xf3 + 0xa1 + 0x1b + 0xa2 + 0x46 + 0xa2 + 0x7d + 0xa2 + 0xa1 + 0xa2 + 0xce + 0xa2 + 0x3 + 0xa3 + 0x28 + 0xa3 + 0x56 + 0xa3 + 0x89 + 0xa3 + 0xae + 0xa3 + 0xdf + 0xa3 + 0xe + 0xa4 + 0x35 + 0xa4 + 0x68 + 0xa4 + 0x94 + 0xa4 + 0xbd + 0xa4 + 0x1b + 0xa2 + 0x46 + 0xa2 + 0x7d + 0xa2 + 0xa1 + 0xa2 + 0xce + 0xa2 + 0x3 + 0xa3 + 0x28 + 0xa3 + 0x56 + 0xa3 + 0x89 + 0xa3 + 0xae + 0xa3 + 0xdf + 0xa3 + 0xe + 0xa4 + 0x35 + 0xa4 + 0x68 + 0xa4 + 0x94 + 0xa4 + 0xbd + 0xa4 + 0xf1 + 0xa4 + 0x1a + 0xa5 + 0x44 + 0xa5 + 0x7a + 0xa5 + 0x9f + 0xa5 + 0xcb + 0xa5 + 0x2 + 0xa6 + 0x26 + 0xa6 + 0x53 + 0xa6 + 0x87 + 0xa6 + 0xac + 0xa6 + 0xdb + 0xa6 + 0xc + 0xa7 + 0x32 + 0xa7 + 0x64 + 0xa7 + 0x91 + 0xa7 + 0xf1 + 0xa4 + 0x1a + 0xa5 + 0x44 + 0xa5 + 0x7a + 0xa5 + 0x9f + 0xa5 + 0xcb + 0xa5 + 0x2 + 0xa6 + 0x26 + 0xa6 + 0x53 + 0xa6 + 0x87 + 0xa6 + 0xac + 0xa6 + 0xdb + 0xa6 + 0xc + 0xa7 + 0x32 + 0xa7 + 0x64 + 0xa7 + 0x91 + 0xa7 + 0xb9 + 0xa7 + 0xec + 0xa7 + 0x16 + 0xa8 + 0x40 + 0xa8 + 0x75 + 0xa8 + 0x9c + 0xa8 + 0xc7 + 0xa8 + 0xfd + 0xa8 + 0x22 + 0xa9 + 0x4e + 0xa9 + 0x83 + 0xa9 + 0xa7 + 0xa9 + 0xd5 + 0xa9 + 0x8 + 0xaa + 0x2d + 0xaa + 0x5d + 0xaa + 0xb9 + 0xa7 + 0xec + 0xa7 + 0x16 + 0xa8 + 0x40 + 0xa8 + 0x75 + 0xa8 + 0x9c + 0xa8 + 0xc7 + 0xa8 + 0xfd + 0xa8 + 0x22 + 0xa9 + 0x4e + 0xa9 + 0x83 + 0xa9 + 0xa7 + 0xa9 + 0xd5 + 0xa9 + 0x8 + 0xaa + 0x2d + 0xaa + 0x5d + 0xaa + 0x8d + 0xaa + 0xb3 + 0xaa + 0xe4 + 0xaa + 0x1 + 0xab + 0x11 + 0xab + 0x24 + 0xab + 0x39 + 0xab + 0x51 + 0xab + 0x6c + 0xab + 0x85 + 0xab + 0x96 + 0xab + 0xaa + 0xab + 0xbf + 0xab + 0xd8 + 0xab + 0xf4 + 0xab + 0x9 + 0xac + 0x8d + 0xaa + 0xb3 + 0xaa + 0xe4 + 0xaa + 0x1 + 0xab + 0x11 + 0xab + 0x24 + 0xab + 0x39 + 0xab + 0x51 + 0xab + 0x6c + 0xab + 0x85 + 0xab + 0x96 + 0xab + 0xaa + 0xab + 0xbf + 0xab + 0xd8 + 0xab + 0xf4 + 0xab + 0x9 + 0xac + 0x1b + 0xac + 0x2f + 0xac + 0x46 + 0xac + 0x5f + 0xac + 0x7c + 0xac + 0x8e + 0xac + 0xa0 + 0xac + 0xb5 + 0xac + 0xcc + 0xac + 0xe6 + 0xac + 0x2 + 0xad + 0x12 + 0xad + 0x25 + 0xad + 0x3a + 0xad + 0x52 + 0xad + 0x6d + 0xad + 0x1b + 0xac + 0x2f + 0xac + 0x46 + 0xac + 0x5f + 0xac + 0x7c + 0xac + 0x8e + 0xac + 0xa0 + 0xac + 0xb5 + 0xac + 0xcc + 0xac + 0xe6 + 0xac + 0x2 + 0xad + 0x12 + 0xad + 0x25 + 0xad + 0x3a + 0xad + 0x52 + 0xad + 0x6d + 0xad + 0x86 + 0xad + 0x97 + 0xad + 0xaa + 0xad + 0xc0 + 0xad + 0xd9 + 0xad + 0xf4 + 0xad + 0xa + 0xae + 0x1c + 0xae + 0x2f + 0xae + 0x46 + 0xae + 0x5f + 0xae + 0x7c + 0xae + 0x8e + 0xae + 0xa0 + 0xae + 0xb5 + 0xae + 0xcc + 0xae + 0x86 + 0xad + 0x97 + 0xad + 0xaa + 0xad + 0xc0 + 0xad + 0xd9 + 0xad + 0xf4 + 0xad + 0xa + 0xae + 0x1c + 0xae + 0x2f + 0xae + 0x46 + 0xae + 0x5f + 0xae + 0x7c + 0xae + 0x8e + 0xae + 0xa0 + 0xae + 0xb5 + 0xae + 0xcc + 0xae + 0xe6 + 0xae + 0x2 + 0xaf + 0x12 + 0xaf + 0x25 + 0xaf + 0x3a + 0xaf + 0x51 + 0xaf + 0x6c + 0xaf + 0x85 + 0xaf + 0x96 + 0xaf + 0xa9 + 0xaf + 0xbf + 0xaf + 0xd7 + 0xaf + 0xf3 + 0xaf + 0x9 + 0xb0 + 0x1a + 0xb0 + 0x2e + 0xb0 + 0xe6 + 0xae + 0x2 + 0xaf + 0x12 + 0xaf + 0x25 + 0xaf + 0x3a + 0xaf + 0x51 + 0xaf + 0x6c + 0xaf + 0x85 + 0xaf + 0x96 + 0xaf + 0xa9 + 0xaf + 0xbf + 0xaf + 0xd7 + 0xaf + 0xf3 + 0xaf + 0x9 + 0xb0 + 0x1a + 0xb0 + 0x2e + 0xb0 + 0x44 + 0xb0 + 0x5d + 0xb0 + 0x79 + 0xb0 + 0x8c + 0xb0 + 0x9e + 0xb0 + 0xb2 + 0xb0 + 0xc9 + 0xb0 + 0xe3 + 0xb0 + 0xff + 0xb0 + 0x10 + 0xb1 + 0x22 + 0xb1 + 0x37 + 0xb1 + 0x4e + 0xb1 + 0x68 + 0xb1 + 0x83 + 0xb1 + 0x93 + 0xb1 + 0x44 + 0xb0 + 0x5d + 0xb0 + 0x79 + 0xb0 + 0x8c + 0xb0 + 0x9e + 0xb0 + 0xb2 + 0xb0 + 0xc9 + 0xb0 + 0xe3 + 0xb0 + 0xff + 0xb0 + 0x10 + 0xb1 + 0x22 + 0xb1 + 0x37 + 0xb1 + 0x4e + 0xb1 + 0x68 + 0xb1 + 0x83 + 0xb1 + 0x93 + 0xb1 + 0xa6 + 0xb1 + 0xbb + 0xb1 + 0xd3 + 0xb1 + 0xee + 0xb1 + 0x6 + 0xb2 + 0x17 + 0xb2 + 0x2a + 0xb2 + 0x3f + 0xb2 + 0x58 + 0xb2 + 0x73 + 0xb2 + 0x89 + 0xb2 + 0x9a + 0xb2 + 0xae + 0xb2 + 0xc3 + 0xb2 + 0xdc + 0xb2 + 0xf8 + 0xb2 + 0xa6 + 0xb1 + 0xbb + 0xb1 + 0xd3 + 0xb1 + 0xee + 0xb1 + 0x6 + 0xb2 + 0x17 + 0xb2 + 0x2a + 0xb2 + 0x3f + 0xb2 + 0x58 + 0xb2 + 0x73 + 0xb2 + 0x89 + 0xb2 + 0x9a + 0xb2 + 0xae + 0xb2 + 0xc3 + 0xb2 + 0xdc + 0xb2 + 0xf8 + 0xb2 + 0xc + 0xb3 + 0x1d + 0xb3 + 0x31 + 0xb3 + 0x47 + 0xb3 + 0x60 + 0xb3 + 0x7d + 0xb3 + 0x8e + 0xb3 + 0xa0 + 0xb3 + 0xb4 + 0xb3 + 0xcb + 0xb3 + 0xe5 + 0xb3 + 0x1 + 0xb4 + 0x11 + 0xb4 + 0x23 + 0xb4 + 0x37 + 0xb4 + 0x4f + 0xb4 + 0xc + 0xb3 + 0x1d + 0xb3 + 0x31 + 0xb3 + 0x47 + 0xb3 + 0x60 + 0xb3 + 0x7d + 0xb3 + 0x8e + 0xb3 + 0xa0 + 0xb3 + 0xb4 + 0xb3 + 0xcb + 0xb3 + 0xe5 + 0xb3 + 0x1 + 0xb4 + 0x11 + 0xb4 + 0x23 + 0xb4 + 0x37 + 0xb4 + 0x4f + 0xb4 + 0x68 + 0xb4 + 0x83 + 0xb4 + 0x93 + 0xb4 + 0xa6 + 0xb4 + 0xba + 0xb4 + 0xd2 + 0xb4 + 0xec + 0xb4 + 0x5 + 0xb5 + 0x15 + 0xb5 + 0x28 + 0xb5 + 0x3d + 0xb5 + 0x55 + 0xb5 + 0x6f + 0xb5 + 0x86 + 0xb5 + 0x97 + 0xb5 + 0xaa + 0xb5 + 0x68 + 0xb4 + 0x83 + 0xb4 + 0x93 + 0xb4 + 0xa6 + 0xb4 + 0xba + 0xb4 + 0xd2 + 0xb4 + 0xec + 0xb4 + 0x5 + 0xb5 + 0x15 + 0xb5 + 0x28 + 0xb5 + 0x3d + 0xb5 + 0x55 + 0xb5 + 0x6f + 0xb5 + 0x86 + 0xb5 + 0x97 + 0xb5 + 0xaa + 0xb5 + 0xbf + 0xb5 + 0xd7 + 0xb5 + 0xf2 + 0xb5 + 0x0 + 0xb6 + 0x8 + 0xb6 + 0x10 + 0xb6 + 0x19 + 0xb6 + 0x22 + 0xb6 + 0x2c + 0xb6 + 0x36 + 0xb6 + 0x41 + 0xb6 + 0x4d + 0xb6 + 0x59 + 0xb6 + 0x66 + 0xb6 + 0x74 + 0xb6 + 0x81 + 0xb6 + 0xbf + 0xb5 + 0xd7 + 0xb5 + 0xf2 + 0xb5 + 0x0 + 0xb6 + 0x8 + 0xb6 + 0x10 + 0xb6 + 0x19 + 0xb6 + 0x22 + 0xb6 + 0x2c + 0xb6 + 0x36 + 0xb6 + 0x41 + 0xb6 + 0x4d + 0xb6 + 0x59 + 0xb6 + 0x66 + 0xb6 + 0x74 + 0xb6 + 0x81 + 0xb6 + 0x89 + 0xb6 + 0x91 + 0xb6 + 0x9a + 0xb6 + 0xa3 + 0xb6 + 0xad + 0xb6 + 0xb7 + 0xb6 + 0xc2 + 0xb6 + 0xce + 0xb6 + 0xda + 0xb6 + 0xe8 + 0xb6 + 0xf5 + 0xb6 + 0x2 + 0xb7 + 0xa + 0xb7 + 0x12 + 0xb7 + 0x1b + 0xb7 + 0x24 + 0xb7 + 0x89 + 0xb6 + 0x91 + 0xb6 + 0x9a + 0xb6 + 0xa3 + 0xb6 + 0xad + 0xb6 + 0xb7 + 0xb6 + 0xc2 + 0xb6 + 0xce + 0xb6 + 0xda + 0xb6 + 0xe8 + 0xb6 + 0xf5 + 0xb6 + 0x2 + 0xb7 + 0xa + 0xb7 + 0x12 + 0xb7 + 0x1b + 0xb7 + 0x24 + 0xb7 + 0x2e + 0xb7 + 0x38 + 0xb7 + 0x43 + 0xb7 + 0x4f + 0xb7 + 0x5b + 0xb7 + 0x68 + 0xb7 + 0x76 + 0xb7 + 0x82 + 0xb7 + 0x8a + 0xb7 + 0x93 + 0xb7 + 0x9b + 0xb7 + 0xa5 + 0xb7 + 0xae + 0xb7 + 0xb9 + 0xb7 + 0xc4 + 0xb7 + 0xcf + 0xb7 + 0x2e + 0xb7 + 0x38 + 0xb7 + 0x43 + 0xb7 + 0x4f + 0xb7 + 0x5b + 0xb7 + 0x68 + 0xb7 + 0x76 + 0xb7 + 0x82 + 0xb7 + 0x8a + 0xb7 + 0x93 + 0xb7 + 0x9b + 0xb7 + 0xa5 + 0xb7 + 0xae + 0xb7 + 0xb9 + 0xb7 + 0xc4 + 0xb7 + 0xcf + 0xb7 + 0xdc + 0xb7 + 0xe9 + 0xb7 + 0xf6 + 0xb7 + 0x3 + 0xb8 + 0xa + 0xb8 + 0x13 + 0xb8 + 0x1b + 0xb8 + 0x24 + 0xb8 + 0x2e + 0xb8 + 0x38 + 0xb8 + 0x43 + 0xb8 + 0x4f + 0xb8 + 0x5b + 0xb8 + 0x68 + 0xb8 + 0x76 + 0xb8 + 0x82 + 0xb8 + 0xdc + 0xb7 + 0xe9 + 0xb7 + 0xf6 + 0xb7 + 0x3 + 0xb8 + 0xa + 0xb8 + 0x13 + 0xb8 + 0x1b + 0xb8 + 0x24 + 0xb8 + 0x2e + 0xb8 + 0x38 + 0xb8 + 0x43 + 0xb8 + 0x4f + 0xb8 + 0x5b + 0xb8 + 0x68 + 0xb8 + 0x76 + 0xb8 + 0x82 + 0xb8 + 0x8a + 0xb8 + 0x92 + 0xb8 + 0x9b + 0xb8 + 0xa4 + 0xb8 + 0xad + 0xb8 + 0xb8 + 0xb8 + 0xc2 + 0xb8 + 0xce + 0xb8 + 0xda + 0xb8 + 0xe7 + 0xb8 + 0xf4 + 0xb8 + 0x1 + 0xb9 + 0x9 + 0xb9 + 0x11 + 0xb9 + 0x19 + 0xb9 + 0x22 + 0xb9 + 0x8a + 0xb8 + 0x92 + 0xb8 + 0x9b + 0xb8 + 0xa4 + 0xb8 + 0xad + 0xb8 + 0xb8 + 0xb8 + 0xc2 + 0xb8 + 0xce + 0xb8 + 0xda + 0xb8 + 0xe7 + 0xb8 + 0xf4 + 0xb8 + 0x1 + 0xb9 + 0x9 + 0xb9 + 0x11 + 0xb9 + 0x19 + 0xb9 + 0x22 + 0xb9 + 0x2c + 0xb9 + 0x36 + 0xb9 + 0x41 + 0xb9 + 0x4c + 0xb9 + 0x58 + 0xb9 + 0x64 + 0xb9 + 0x72 + 0xb9 + 0x80 + 0xb9 + 0x87 + 0xb9 + 0x8f + 0xb9 + 0x98 + 0xb9 + 0xa0 + 0xb9 + 0xaa + 0xb9 + 0xb4 + 0xb9 + 0xbe + 0xb9 + 0xc9 + 0xb9 + 0x2c + 0xb9 + 0x36 + 0xb9 + 0x41 + 0xb9 + 0x4c + 0xb9 + 0x58 + 0xb9 + 0x64 + 0xb9 + 0x72 + 0xb9 + 0x80 + 0xb9 + 0x87 + 0xb9 + 0x8f + 0xb9 + 0x98 + 0xb9 + 0xa0 + 0xb9 + 0xaa + 0xb9 + 0xb4 + 0xb9 + 0xbe + 0xb9 + 0xc9 + 0xb9 + 0xd5 + 0xb9 + 0xe1 + 0xb9 + 0xee + 0xb9 + 0xfc + 0xb9 + 0x5 + 0xba + 0xd + 0xba + 0x15 + 0xba + 0x1e + 0xba + 0x27 + 0xba + 0x30 + 0xba + 0x3a + 0xba + 0x45 + 0xba + 0x50 + 0xba + 0x5c + 0xba + 0x69 + 0xba + 0x76 + 0xba + 0xd5 + 0xb9 + 0xe1 + 0xb9 + 0xee + 0xb9 + 0xfc + 0xb9 + 0x5 + 0xba + 0xd + 0xba + 0x15 + 0xba + 0x1e + 0xba + 0x27 + 0xba + 0x30 + 0xba + 0x3a + 0xba + 0x45 + 0xba + 0x50 + 0xba + 0x5c + 0xba + 0x69 + 0xba + 0x76 + 0xba + 0x82 + 0xba + 0x8a + 0xba + 0x92 + 0xba + 0x9a + 0xba + 0xa3 + 0xba + 0xac + 0xba + 0xb6 + 0xba + 0xc0 + 0xba + 0xcb + 0xba + 0xd6 + 0xba + 0xe3 + 0xba + 0xf0 + 0xba + 0xfd + 0xba + 0x6 + 0xbb + 0xd + 0xbb + 0x15 + 0xbb + 0x82 + 0xba + 0x8a + 0xba + 0x92 + 0xba + 0x9a + 0xba + 0xa3 + 0xba + 0xac + 0xba + 0xb6 + 0xba + 0xc0 + 0xba + 0xcb + 0xba + 0xd6 + 0xba + 0xe3 + 0xba + 0xf0 + 0xba + 0xfd + 0xba + 0x6 + 0xbb + 0xd + 0xbb + 0x15 + 0xbb + 0x1e + 0xbb + 0x26 + 0xbb + 0x30 + 0xbb + 0x35 + 0xbb + 0x3a + 0xbb + 0x3f + 0xbb + 0x44 + 0xbb + 0x4a + 0xbb + 0x4f + 0xbb + 0x55 + 0xbb + 0x5b + 0xbb + 0x61 + 0xbb + 0x67 + 0xbb + 0x6d + 0xbb + 0x74 + 0xbb + 0x7b + 0xbb + 0x1e + 0xbb + 0x26 + 0xbb + 0x30 + 0xbb + 0x35 + 0xbb + 0x3a + 0xbb + 0x3f + 0xbb + 0x44 + 0xbb + 0x4a + 0xbb + 0x4f + 0xbb + 0x55 + 0xbb + 0x5b + 0xbb + 0x61 + 0xbb + 0x67 + 0xbb + 0x6d + 0xbb + 0x74 + 0xbb + 0x7b + 0xbb + 0x81 + 0xbb + 0x84 + 0xbb + 0x88 + 0xbb + 0x8c + 0xbb + 0x8f + 0xbb + 0x93 + 0xbb + 0x97 + 0xbb + 0x9c + 0xbb + 0xa0 + 0xbb + 0xa4 + 0xbb + 0xa9 + 0xbb + 0xad + 0xbb + 0xb2 + 0xbb + 0xb7 + 0xbb + 0xbc + 0xbb + 0xc1 + 0xbb + 0x81 + 0xbb + 0x84 + 0xbb + 0x88 + 0xbb + 0x8c + 0xbb + 0x8f + 0xbb + 0x93 + 0xbb + 0x97 + 0xbb + 0x9c + 0xbb + 0xa0 + 0xbb + 0xa4 + 0xbb + 0xa9 + 0xbb + 0xad + 0xbb + 0xb2 + 0xbb + 0xb7 + 0xbb + 0xbc + 0xbb + 0xc1 + 0xbb + 0xc6 + 0xbb + 0xcc + 0xbb + 0xd1 + 0xbb + 0xd7 + 0xbb + 0xdd + 0xbb + 0xe3 + 0xbb + 0xe9 + 0xbb + 0xef + 0xbb + 0xf5 + 0xbb + 0xfc + 0xbb + 0x1 + 0xbc + 0x5 + 0xbc + 0x8 + 0xbc + 0xc + 0xbc + 0x10 + 0xbc + 0x14 + 0xbc + 0xc6 + 0xbb + 0xcc + 0xbb + 0xd1 + 0xbb + 0xd7 + 0xbb + 0xdd + 0xbb + 0xe3 + 0xbb + 0xe9 + 0xbb + 0xef + 0xbb + 0xf5 + 0xbb + 0xfc + 0xbb + 0x1 + 0xbc + 0x5 + 0xbc + 0x8 + 0xbc + 0xc + 0xbc + 0x10 + 0xbc + 0x14 + 0xbc + 0x18 + 0xbc + 0x1c + 0xbc + 0x20 + 0xbc + 0x24 + 0xbc + 0x29 + 0xbc + 0x2d + 0xbc + 0x32 + 0xbc + 0x37 + 0xbc + 0x3b + 0xbc + 0x40 + 0xbc + 0x45 + 0xbc + 0x4b + 0xbc + 0x50 + 0xbc + 0x56 + 0xbc + 0x5b + 0xbc + 0x61 + 0xbc + 0x18 + 0xbc + 0x1c + 0xbc + 0x20 + 0xbc + 0x24 + 0xbc + 0x29 + 0xbc + 0x2d + 0xbc + 0x32 + 0xbc + 0x37 + 0xbc + 0x3b + 0xbc + 0x40 + 0xbc + 0x45 + 0xbc + 0x4b + 0xbc + 0x50 + 0xbc + 0x56 + 0xbc + 0x5b + 0xbc + 0x61 + 0xbc + 0x67 + 0xbc + 0x6d + 0xbc + 0x73 + 0xbc + 0x7a + 0xbc + 0x80 + 0xbc + 0x84 + 0xbc + 0x87 + 0xbc + 0x8b + 0xbc + 0x8e + 0xbc + 0x92 + 0xbc + 0x96 + 0xbc + 0x9a + 0xbc + 0x9e + 0xbc + 0xa2 + 0xbc + 0xa6 + 0xbc + 0xaa + 0xbc + 0x67 + 0xbc + 0x6d + 0xbc + 0x73 + 0xbc + 0x7a + 0xbc + 0x80 + 0xbc + 0x84 + 0xbc + 0x87 + 0xbc + 0x8b + 0xbc + 0x8e + 0xbc + 0x92 + 0xbc + 0x96 + 0xbc + 0x9a + 0xbc + 0x9e + 0xbc + 0xa2 + 0xbc + 0xa6 + 0xbc + 0xaa + 0xbc + 0xaf + 0xbc + 0xb3 + 0xbc + 0xb8 + 0xbc + 0xbd + 0xbc + 0xc1 + 0xbc + 0xc6 + 0xbc + 0xcb + 0xbc + 0xd1 + 0xbc + 0xd6 + 0xbc + 0xdb + 0xbc + 0xe1 + 0xbc + 0xe7 + 0xbc + 0xed + 0xbc + 0xf3 + 0xbc + 0xf9 + 0xbc + 0xff + 0xbc + 0xaf + 0xbc + 0xb3 + 0xbc + 0xb8 + 0xbc + 0xbd + 0xbc + 0xc1 + 0xbc + 0xc6 + 0xbc + 0xcb + 0xbc + 0xd1 + 0xbc + 0xd6 + 0xbc + 0xdb + 0xbc + 0xe1 + 0xbc + 0xe7 + 0xbc + 0xed + 0xbc + 0xf3 + 0xbc + 0xf9 + 0xbc + 0xff + 0xbc + 0x3 + 0xbd + 0x6 + 0xbd + 0xa + 0xbd + 0xd + 0xbd + 0x11 + 0xbd + 0x14 + 0xbd + 0x18 + 0xbd + 0x1c + 0xbd + 0x20 + 0xbd + 0x24 + 0xbd + 0x28 + 0xbd + 0x2c + 0xbd + 0x30 + 0xbd + 0x34 + 0xbd + 0x39 + 0xbd + 0x3d + 0xbd + 0x3 + 0xbd + 0x6 + 0xbd + 0xa + 0xbd + 0xd + 0xbd + 0x11 + 0xbd + 0x14 + 0xbd + 0x18 + 0xbd + 0x1c + 0xbd + 0x20 + 0xbd + 0x24 + 0xbd + 0x28 + 0xbd + 0x2c + 0xbd + 0x30 + 0xbd + 0x34 + 0xbd + 0x39 + 0xbd + 0x3d + 0xbd + 0x42 + 0xbd + 0x47 + 0xbd + 0x4c + 0xbd + 0x51 + 0xbd + 0x56 + 0xbd + 0x5b + 0xbd + 0x60 + 0xbd + 0x66 + 0xbd + 0x6b + 0xbd + 0x71 + 0xbd + 0x77 + 0xbd + 0x7c + 0xbd + 0x81 + 0xbd + 0x84 + 0xbd + 0x87 + 0xbd + 0x8b + 0xbd + 0x42 + 0xbd + 0x47 + 0xbd + 0x4c + 0xbd + 0x51 + 0xbd + 0x56 + 0xbd + 0x5b + 0xbd + 0x60 + 0xbd + 0x66 + 0xbd + 0x6b + 0xbd + 0x71 + 0xbd + 0x77 + 0xbd + 0x7c + 0xbd + 0x81 + 0xbd + 0x84 + 0xbd + 0x87 + 0xbd + 0x8b + 0xbd + 0x8e + 0xbd + 0x91 + 0xbd + 0x95 + 0xbd + 0x96 + 0xbd + 0x98 + 0xbd + 0x9a + 0xbd + 0x9c + 0xbd + 0x9d + 0xbd + 0x9f + 0xbd + 0xa1 + 0xbd + 0xa3 + 0xbd + 0xa5 + 0xbd + 0xa7 + 0xbd + 0xa9 + 0xbd + 0xab + 0xbd + 0xad + 0xbd + 0x8e + 0xbd + 0x91 + 0xbd + 0x95 + 0xbd + 0x96 + 0xbd + 0x98 + 0xbd + 0x9a + 0xbd + 0x9c + 0xbd + 0x9d + 0xbd + 0x9f + 0xbd + 0xa1 + 0xbd + 0xa3 + 0xbd + 0xa5 + 0xbd + 0xa7 + 0xbd + 0xa9 + 0xbd + 0xab + 0xbd + 0xad + 0xbd + 0xaf + 0xbd + 0xb0 + 0xbd + 0xb2 + 0xbd + 0xb5 + 0xbd + 0xb7 + 0xbd + 0xb9 + 0xbd + 0xbb + 0xbd + 0xbd + 0xbd + 0xbf + 0xbd + 0xc1 + 0xbd + 0xc3 + 0xbd + 0xc5 + 0xbd + 0xc8 + 0xbd + 0xca + 0xbd + 0xcc + 0xbd + 0xce + 0xbd + 0xaf + 0xbd + 0xb0 + 0xbd + 0xb2 + 0xbd + 0xb5 + 0xbd + 0xb7 + 0xbd + 0xb9 + 0xbd + 0xbb + 0xbd + 0xbd + 0xbd + 0xbf + 0xbd + 0xc1 + 0xbd + 0xc3 + 0xbd + 0xc5 + 0xbd + 0xc8 + 0xbd + 0xca + 0xbd + 0xcc + 0xbd + 0xce + 0xbd + 0xd1 + 0xbd + 0xd3 + 0xbd + 0xd5 + 0xbd + 0xd8 + 0xbd + 0xda + 0xbd + 0xdc + 0xbd + 0xdf + 0xbd + 0xe1 + 0xbd + 0xe4 + 0xbd + 0xe6 + 0xbd + 0xe8 + 0xbd + 0xeb + 0xbd + 0xed + 0xbd + 0xf0 + 0xbd + 0xf3 + 0xbd + 0xf5 + 0xbd + 0xd1 + 0xbd + 0xd3 + 0xbd + 0xd5 + 0xbd + 0xd8 + 0xbd + 0xda + 0xbd + 0xdc + 0xbd + 0xdf + 0xbd + 0xe1 + 0xbd + 0xe4 + 0xbd + 0xe6 + 0xbd + 0xe8 + 0xbd + 0xeb + 0xbd + 0xed + 0xbd + 0xf0 + 0xbd + 0xf3 + 0xbd + 0xf5 + 0xbd + 0xf8 + 0xbd + 0xfa + 0xbd + 0xfd + 0xbd + 0x0 + 0xbe + 0x1 + 0xbe + 0x3 + 0xbe + 0x4 + 0xbe + 0x5 + 0xbe + 0x7 + 0xbe + 0x8 + 0xbe + 0xa + 0xbe + 0xb + 0xbe + 0xc + 0xbe + 0xe + 0xbe + 0xf + 0xbe + 0x11 + 0xbe + 0xf8 + 0xbd + 0xfa + 0xbd + 0xfd + 0xbd + 0x0 + 0xbe + 0x1 + 0xbe + 0x3 + 0xbe + 0x4 + 0xbe + 0x5 + 0xbe + 0x7 + 0xbe + 0x8 + 0xbe + 0xa + 0xbe + 0xb + 0xbe + 0xc + 0xbe + 0xe + 0xbe + 0xf + 0xbe + 0x11 + 0xbe + 0x12 + 0xbe + 0x14 + 0xbe + 0x15 + 0xbe + 0x17 + 0xbe + 0x18 + 0xbe + 0x1a + 0xbe + 0x1b + 0xbe + 0x1d + 0xbe + 0x1e + 0xbe + 0x20 + 0xbe + 0x21 + 0xbe + 0x23 + 0xbe + 0x25 + 0xbe + 0x26 + 0xbe + 0x28 + 0xbe + 0x29 + 0xbe + 0x12 + 0xbe + 0x14 + 0xbe + 0x15 + 0xbe + 0x17 + 0xbe + 0x18 + 0xbe + 0x1a + 0xbe + 0x1b + 0xbe + 0x1d + 0xbe + 0x1e + 0xbe + 0x20 + 0xbe + 0x21 + 0xbe + 0x23 + 0xbe + 0x25 + 0xbe + 0x26 + 0xbe + 0x28 + 0xbe + 0x29 + 0xbe + 0x2b + 0xbe + 0x2d + 0xbe + 0x2e + 0xbe + 0x30 + 0xbe + 0x32 + 0xbe + 0x33 + 0xbe + 0x35 + 0xbe + 0x37 + 0xbe + 0x38 + 0xbe + 0x3a + 0xbe + 0x3c + 0xbe + 0x3d + 0xbe + 0x3f + 0xbe + 0x41 + 0xbe + 0x43 + 0xbe + 0x44 + 0xbe + 0x2b + 0xbe + 0x2d + 0xbe + 0x2e + 0xbe + 0x30 + 0xbe + 0x32 + 0xbe + 0x33 + 0xbe + 0x35 + 0xbe + 0x37 + 0xbe + 0x38 + 0xbe + 0x3a + 0xbe + 0x3c + 0xbe + 0x3d + 0xbe + 0x3f + 0xbe + 0x41 + 0xbe + 0x43 + 0xbe + 0x44 + 0xbe + 0x46 + 0xbe + 0x48 + 0xbe + 0x4a + 0xbe + 0x4b + 0xbe + 0x4d + 0xbe + 0x4f + 0xbe + 0x51 + 0xbe + 0x52 + 0xbe + 0x54 + 0xbe + 0x56 + 0xbe + 0x58 + 0xbe + 0x5a + 0xbe + 0x5b + 0xbe + 0x5d + 0xbe + 0x5f + 0xbe + 0x61 + 0xbe + 0x46 + 0xbe + 0x48 + 0xbe + 0x4a + 0xbe + 0x4b + 0xbe + 0x4d + 0xbe + 0x4f + 0xbe + 0x51 + 0xbe + 0x52 + 0xbe + 0x54 + 0xbe + 0x56 + 0xbe + 0x58 + 0xbe + 0x5a + 0xbe + 0x5b + 0xbe + 0x5d + 0xbe + 0x5f + 0xbe + 0x61 + 0xbe + 0x63 + 0xbe + 0x64 + 0xbe + 0x66 + 0xbe + 0x68 + 0xbe + 0x6a + 0xbe + 0x6c + 0xbe + 0x6d + 0xbe + 0x6f + 0xbe + 0x71 + 0xbe + 0x73 + 0xbe + 0x74 + 0xbe + 0x76 + 0xbe + 0x78 + 0xbe + 0x7a + 0xbe + 0x7c + 0xbe + 0x7d + 0xbe + 0x63 + 0xbe + 0x64 + 0xbe + 0x66 + 0xbe + 0x68 + 0xbe + 0x6a + 0xbe + 0x6c + 0xbe + 0x6d + 0xbe + 0x6f + 0xbe + 0x71 + 0xbe + 0x73 + 0xbe + 0x74 + 0xbe + 0x76 + 0xbe + 0x78 + 0xbe + 0x7a + 0xbe + 0x7c + 0xbe + 0x7d + 0xbe + 0x7f + 0xbe + 0x80 + 0xbe + 0x81 + 0xbe + 0x82 + 0xbe + 0x82 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x84 + 0xbe + 0x84 + 0xbe + 0x85 + 0xbe + 0x85 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x87 + 0xbe + 0x7f + 0xbe + 0x80 + 0xbe + 0x81 + 0xbe + 0x82 + 0xbe + 0x82 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x84 + 0xbe + 0x84 + 0xbe + 0x85 + 0xbe + 0x85 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x87 + 0xbe + 0x87 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x89 + 0xbe + 0x89 + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8b + 0xbe + 0x8b + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8d + 0xbe + 0x8d + 0xbe + 0x87 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x89 + 0xbe + 0x89 + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8b + 0xbe + 0x8b + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8d + 0xbe + 0x8d + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x90 + 0xbe + 0x90 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x90 + 0xbe + 0x90 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x93 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9c + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x93 + 0xbe + 0x93 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.data_segment_name +.data weak 64 _ZN8mish_lutILj0ELj2048EE7data_cdE DMb + 0x1 + 0x80 + 0x1 + 0x80 + 0x2 + 0x80 + 0x3 + 0x80 + 0x6 + 0x80 + 0x9 + 0x80 + 0xf + 0x80 + 0x19 + 0x80 + 0x29 + 0x80 + 0x44 + 0x80 + 0x6f + 0x80 + 0xb6 + 0x80 + 0x15 + 0x81 + 0x75 + 0x81 + 0xc9 + 0x81 + 0x25 + 0x82 + 0x1 + 0x80 + 0x1 + 0x80 + 0x2 + 0x80 + 0x3 + 0x80 + 0x6 + 0x80 + 0x9 + 0x80 + 0xf + 0x80 + 0x19 + 0x80 + 0x29 + 0x80 + 0x44 + 0x80 + 0x6f + 0x80 + 0xb6 + 0x80 + 0x15 + 0x81 + 0x75 + 0x81 + 0xc9 + 0x81 + 0x25 + 0x82 + 0x87 + 0x82 + 0xdd + 0x82 + 0x35 + 0x83 + 0x95 + 0x83 + 0xf4 + 0x83 + 0x48 + 0x84 + 0xa4 + 0x84 + 0x6 + 0x85 + 0x5c + 0x85 + 0xb4 + 0x85 + 0x14 + 0x86 + 0x72 + 0x86 + 0xc6 + 0x86 + 0x22 + 0x87 + 0x85 + 0x87 + 0xda + 0x87 + 0x87 + 0x82 + 0xdd + 0x82 + 0x35 + 0x83 + 0x95 + 0x83 + 0xf4 + 0x83 + 0x48 + 0x84 + 0xa4 + 0x84 + 0x6 + 0x85 + 0x5c + 0x85 + 0xb4 + 0x85 + 0x14 + 0x86 + 0x72 + 0x86 + 0xc6 + 0x86 + 0x22 + 0x87 + 0x85 + 0x87 + 0xda + 0x87 + 0x33 + 0x88 + 0x92 + 0x88 + 0xf0 + 0x88 + 0x45 + 0x89 + 0xa1 + 0x89 + 0x4 + 0x8a + 0x58 + 0x8a + 0xb1 + 0x8a + 0x11 + 0x8b + 0x6d + 0x8b + 0xc2 + 0x8b + 0x1f + 0x8c + 0x82 + 0x8c + 0xd6 + 0x8c + 0x2f + 0x8d + 0x8f + 0x8d + 0x33 + 0x88 + 0x92 + 0x88 + 0xf0 + 0x88 + 0x45 + 0x89 + 0xa1 + 0x89 + 0x4 + 0x8a + 0x58 + 0x8a + 0xb1 + 0x8a + 0x11 + 0x8b + 0x6d + 0x8b + 0xc2 + 0x8b + 0x1f + 0x8c + 0x82 + 0x8c + 0xd6 + 0x8c + 0x2f + 0x8d + 0x8f + 0x8d + 0xea + 0x8d + 0x40 + 0x8e + 0x9d + 0x8e + 0x1 + 0x8f + 0x53 + 0x8f + 0xac + 0x8f + 0xd + 0x90 + 0x67 + 0x90 + 0xbd + 0x90 + 0x1b + 0x91 + 0x7d + 0x91 + 0xcf + 0x91 + 0x2a + 0x92 + 0x8b + 0x92 + 0xe3 + 0x92 + 0x3a + 0x93 + 0xea + 0x8d + 0x40 + 0x8e + 0x9d + 0x8e + 0x1 + 0x8f + 0x53 + 0x8f + 0xac + 0x8f + 0xd + 0x90 + 0x67 + 0x90 + 0xbd + 0x90 + 0x1b + 0x91 + 0x7d + 0x91 + 0xcf + 0x91 + 0x2a + 0x92 + 0x8b + 0x92 + 0xe3 + 0x92 + 0x3a + 0x93 + 0x98 + 0x93 + 0xf9 + 0x93 + 0x4b + 0x94 + 0x82 + 0x94 + 0xa6 + 0x94 + 0xd5 + 0x94 + 0x8 + 0x95 + 0x2e + 0x95 + 0x5e + 0x95 + 0x8e + 0x95 + 0xb6 + 0x95 + 0xe9 + 0x95 + 0x15 + 0x96 + 0x3e + 0x96 + 0x73 + 0x96 + 0x9c + 0x96 + 0x98 + 0x93 + 0xf9 + 0x93 + 0x4b + 0x94 + 0x82 + 0x94 + 0xa6 + 0x94 + 0xd5 + 0x94 + 0x8 + 0x95 + 0x2e + 0x95 + 0x5e + 0x95 + 0x8e + 0x95 + 0xb6 + 0x95 + 0xe9 + 0x95 + 0x15 + 0x96 + 0x3e + 0x96 + 0x73 + 0x96 + 0x9c + 0x96 + 0xc7 + 0x96 + 0xfe + 0x96 + 0x23 + 0x97 + 0x50 + 0x97 + 0x85 + 0x97 + 0xaa + 0x97 + 0xd9 + 0x97 + 0xb + 0x98 + 0x32 + 0x98 + 0x63 + 0x98 + 0x91 + 0x98 + 0xba + 0x98 + 0xed + 0x98 + 0x18 + 0x99 + 0x42 + 0x99 + 0x78 + 0x99 + 0xc7 + 0x96 + 0xfe + 0x96 + 0x23 + 0x97 + 0x50 + 0x97 + 0x85 + 0x97 + 0xaa + 0x97 + 0xd9 + 0x97 + 0xb + 0x98 + 0x32 + 0x98 + 0x63 + 0x98 + 0x91 + 0x98 + 0xba + 0x98 + 0xed + 0x98 + 0x18 + 0x99 + 0x42 + 0x99 + 0x78 + 0x99 + 0x9e + 0x99 + 0xcb + 0x99 + 0x1 + 0x9a + 0x25 + 0x9a + 0x54 + 0x9a + 0x87 + 0x9a + 0xad + 0x9a + 0xdd + 0x9a + 0xd + 0x9b + 0x34 + 0x9b + 0x67 + 0x9b + 0x93 + 0x9b + 0xbc + 0x9b + 0xf1 + 0x9b + 0x1a + 0x9c + 0x45 + 0x9c + 0x9e + 0x99 + 0xcb + 0x99 + 0x1 + 0x9a + 0x25 + 0x9a + 0x54 + 0x9a + 0x87 + 0x9a + 0xad + 0x9a + 0xdd + 0x9a + 0xd + 0x9b + 0x34 + 0x9b + 0x67 + 0x9b + 0x93 + 0x9b + 0xbc + 0x9b + 0xf1 + 0x9b + 0x1a + 0x9c + 0x45 + 0x9c + 0x7b + 0x9c + 0xa1 + 0x9c + 0xcd + 0x9c + 0x3 + 0x9d + 0x27 + 0x9d + 0x56 + 0x9d + 0x89 + 0x9d + 0xaf + 0x9d + 0xdf + 0x9d + 0xf + 0x9e + 0x36 + 0x9e + 0x69 + 0x9e + 0x95 + 0x9e + 0xbe + 0x9e + 0xf3 + 0x9e + 0x1b + 0x9f + 0x7b + 0x9c + 0xa1 + 0x9c + 0xcd + 0x9c + 0x3 + 0x9d + 0x27 + 0x9d + 0x56 + 0x9d + 0x89 + 0x9d + 0xaf + 0x9d + 0xdf + 0x9d + 0xf + 0x9e + 0x36 + 0x9e + 0x69 + 0x9e + 0x95 + 0x9e + 0xbe + 0x9e + 0xf3 + 0x9e + 0x1b + 0x9f + 0x46 + 0x9f + 0x7d + 0x9f + 0xa2 + 0x9f + 0xce + 0x9f + 0x4 + 0xa0 + 0x28 + 0xa0 + 0x57 + 0xa0 + 0x89 + 0xa0 + 0xaf + 0xa0 + 0xe0 + 0xa0 + 0xf + 0xa1 + 0x37 + 0xa1 + 0x69 + 0xa1 + 0x95 + 0xa1 + 0xbe + 0xa1 + 0xf3 + 0xa1 + 0x46 + 0x9f + 0x7d + 0x9f + 0xa2 + 0x9f + 0xce + 0x9f + 0x4 + 0xa0 + 0x28 + 0xa0 + 0x57 + 0xa0 + 0x89 + 0xa0 + 0xaf + 0xa0 + 0xe0 + 0xa0 + 0xf + 0xa1 + 0x37 + 0xa1 + 0x69 + 0xa1 + 0x95 + 0xa1 + 0xbe + 0xa1 + 0xf3 + 0xa1 + 0x1b + 0xa2 + 0x46 + 0xa2 + 0x7d + 0xa2 + 0xa1 + 0xa2 + 0xce + 0xa2 + 0x3 + 0xa3 + 0x28 + 0xa3 + 0x56 + 0xa3 + 0x89 + 0xa3 + 0xae + 0xa3 + 0xdf + 0xa3 + 0xe + 0xa4 + 0x35 + 0xa4 + 0x68 + 0xa4 + 0x94 + 0xa4 + 0xbd + 0xa4 + 0x1b + 0xa2 + 0x46 + 0xa2 + 0x7d + 0xa2 + 0xa1 + 0xa2 + 0xce + 0xa2 + 0x3 + 0xa3 + 0x28 + 0xa3 + 0x56 + 0xa3 + 0x89 + 0xa3 + 0xae + 0xa3 + 0xdf + 0xa3 + 0xe + 0xa4 + 0x35 + 0xa4 + 0x68 + 0xa4 + 0x94 + 0xa4 + 0xbd + 0xa4 + 0xf1 + 0xa4 + 0x1a + 0xa5 + 0x44 + 0xa5 + 0x7a + 0xa5 + 0x9f + 0xa5 + 0xcb + 0xa5 + 0x2 + 0xa6 + 0x26 + 0xa6 + 0x53 + 0xa6 + 0x87 + 0xa6 + 0xac + 0xa6 + 0xdb + 0xa6 + 0xc + 0xa7 + 0x32 + 0xa7 + 0x64 + 0xa7 + 0x91 + 0xa7 + 0xf1 + 0xa4 + 0x1a + 0xa5 + 0x44 + 0xa5 + 0x7a + 0xa5 + 0x9f + 0xa5 + 0xcb + 0xa5 + 0x2 + 0xa6 + 0x26 + 0xa6 + 0x53 + 0xa6 + 0x87 + 0xa6 + 0xac + 0xa6 + 0xdb + 0xa6 + 0xc + 0xa7 + 0x32 + 0xa7 + 0x64 + 0xa7 + 0x91 + 0xa7 + 0xb9 + 0xa7 + 0xec + 0xa7 + 0x16 + 0xa8 + 0x40 + 0xa8 + 0x75 + 0xa8 + 0x9c + 0xa8 + 0xc7 + 0xa8 + 0xfd + 0xa8 + 0x22 + 0xa9 + 0x4e + 0xa9 + 0x83 + 0xa9 + 0xa7 + 0xa9 + 0xd5 + 0xa9 + 0x8 + 0xaa + 0x2d + 0xaa + 0x5d + 0xaa + 0xb9 + 0xa7 + 0xec + 0xa7 + 0x16 + 0xa8 + 0x40 + 0xa8 + 0x75 + 0xa8 + 0x9c + 0xa8 + 0xc7 + 0xa8 + 0xfd + 0xa8 + 0x22 + 0xa9 + 0x4e + 0xa9 + 0x83 + 0xa9 + 0xa7 + 0xa9 + 0xd5 + 0xa9 + 0x8 + 0xaa + 0x2d + 0xaa + 0x5d + 0xaa + 0x8d + 0xaa + 0xb3 + 0xaa + 0xe4 + 0xaa + 0x1 + 0xab + 0x11 + 0xab + 0x24 + 0xab + 0x39 + 0xab + 0x51 + 0xab + 0x6c + 0xab + 0x85 + 0xab + 0x96 + 0xab + 0xaa + 0xab + 0xbf + 0xab + 0xd8 + 0xab + 0xf4 + 0xab + 0x9 + 0xac + 0x8d + 0xaa + 0xb3 + 0xaa + 0xe4 + 0xaa + 0x1 + 0xab + 0x11 + 0xab + 0x24 + 0xab + 0x39 + 0xab + 0x51 + 0xab + 0x6c + 0xab + 0x85 + 0xab + 0x96 + 0xab + 0xaa + 0xab + 0xbf + 0xab + 0xd8 + 0xab + 0xf4 + 0xab + 0x9 + 0xac + 0x1b + 0xac + 0x2f + 0xac + 0x46 + 0xac + 0x5f + 0xac + 0x7c + 0xac + 0x8e + 0xac + 0xa0 + 0xac + 0xb5 + 0xac + 0xcc + 0xac + 0xe6 + 0xac + 0x2 + 0xad + 0x12 + 0xad + 0x25 + 0xad + 0x3a + 0xad + 0x52 + 0xad + 0x6d + 0xad + 0x1b + 0xac + 0x2f + 0xac + 0x46 + 0xac + 0x5f + 0xac + 0x7c + 0xac + 0x8e + 0xac + 0xa0 + 0xac + 0xb5 + 0xac + 0xcc + 0xac + 0xe6 + 0xac + 0x2 + 0xad + 0x12 + 0xad + 0x25 + 0xad + 0x3a + 0xad + 0x52 + 0xad + 0x6d + 0xad + 0x86 + 0xad + 0x97 + 0xad + 0xaa + 0xad + 0xc0 + 0xad + 0xd9 + 0xad + 0xf4 + 0xad + 0xa + 0xae + 0x1c + 0xae + 0x2f + 0xae + 0x46 + 0xae + 0x5f + 0xae + 0x7c + 0xae + 0x8e + 0xae + 0xa0 + 0xae + 0xb5 + 0xae + 0xcc + 0xae + 0x86 + 0xad + 0x97 + 0xad + 0xaa + 0xad + 0xc0 + 0xad + 0xd9 + 0xad + 0xf4 + 0xad + 0xa + 0xae + 0x1c + 0xae + 0x2f + 0xae + 0x46 + 0xae + 0x5f + 0xae + 0x7c + 0xae + 0x8e + 0xae + 0xa0 + 0xae + 0xb5 + 0xae + 0xcc + 0xae + 0xe6 + 0xae + 0x2 + 0xaf + 0x12 + 0xaf + 0x25 + 0xaf + 0x3a + 0xaf + 0x51 + 0xaf + 0x6c + 0xaf + 0x85 + 0xaf + 0x96 + 0xaf + 0xa9 + 0xaf + 0xbf + 0xaf + 0xd7 + 0xaf + 0xf3 + 0xaf + 0x9 + 0xb0 + 0x1a + 0xb0 + 0x2e + 0xb0 + 0xe6 + 0xae + 0x2 + 0xaf + 0x12 + 0xaf + 0x25 + 0xaf + 0x3a + 0xaf + 0x51 + 0xaf + 0x6c + 0xaf + 0x85 + 0xaf + 0x96 + 0xaf + 0xa9 + 0xaf + 0xbf + 0xaf + 0xd7 + 0xaf + 0xf3 + 0xaf + 0x9 + 0xb0 + 0x1a + 0xb0 + 0x2e + 0xb0 + 0x44 + 0xb0 + 0x5d + 0xb0 + 0x79 + 0xb0 + 0x8c + 0xb0 + 0x9e + 0xb0 + 0xb2 + 0xb0 + 0xc9 + 0xb0 + 0xe3 + 0xb0 + 0xff + 0xb0 + 0x10 + 0xb1 + 0x22 + 0xb1 + 0x37 + 0xb1 + 0x4e + 0xb1 + 0x68 + 0xb1 + 0x83 + 0xb1 + 0x93 + 0xb1 + 0x44 + 0xb0 + 0x5d + 0xb0 + 0x79 + 0xb0 + 0x8c + 0xb0 + 0x9e + 0xb0 + 0xb2 + 0xb0 + 0xc9 + 0xb0 + 0xe3 + 0xb0 + 0xff + 0xb0 + 0x10 + 0xb1 + 0x22 + 0xb1 + 0x37 + 0xb1 + 0x4e + 0xb1 + 0x68 + 0xb1 + 0x83 + 0xb1 + 0x93 + 0xb1 + 0xa6 + 0xb1 + 0xbb + 0xb1 + 0xd3 + 0xb1 + 0xee + 0xb1 + 0x6 + 0xb2 + 0x17 + 0xb2 + 0x2a + 0xb2 + 0x3f + 0xb2 + 0x58 + 0xb2 + 0x73 + 0xb2 + 0x89 + 0xb2 + 0x9a + 0xb2 + 0xae + 0xb2 + 0xc3 + 0xb2 + 0xdc + 0xb2 + 0xf8 + 0xb2 + 0xa6 + 0xb1 + 0xbb + 0xb1 + 0xd3 + 0xb1 + 0xee + 0xb1 + 0x6 + 0xb2 + 0x17 + 0xb2 + 0x2a + 0xb2 + 0x3f + 0xb2 + 0x58 + 0xb2 + 0x73 + 0xb2 + 0x89 + 0xb2 + 0x9a + 0xb2 + 0xae + 0xb2 + 0xc3 + 0xb2 + 0xdc + 0xb2 + 0xf8 + 0xb2 + 0xc + 0xb3 + 0x1d + 0xb3 + 0x31 + 0xb3 + 0x47 + 0xb3 + 0x60 + 0xb3 + 0x7d + 0xb3 + 0x8e + 0xb3 + 0xa0 + 0xb3 + 0xb4 + 0xb3 + 0xcb + 0xb3 + 0xe5 + 0xb3 + 0x1 + 0xb4 + 0x11 + 0xb4 + 0x23 + 0xb4 + 0x37 + 0xb4 + 0x4f + 0xb4 + 0xc + 0xb3 + 0x1d + 0xb3 + 0x31 + 0xb3 + 0x47 + 0xb3 + 0x60 + 0xb3 + 0x7d + 0xb3 + 0x8e + 0xb3 + 0xa0 + 0xb3 + 0xb4 + 0xb3 + 0xcb + 0xb3 + 0xe5 + 0xb3 + 0x1 + 0xb4 + 0x11 + 0xb4 + 0x23 + 0xb4 + 0x37 + 0xb4 + 0x4f + 0xb4 + 0x68 + 0xb4 + 0x83 + 0xb4 + 0x93 + 0xb4 + 0xa6 + 0xb4 + 0xba + 0xb4 + 0xd2 + 0xb4 + 0xec + 0xb4 + 0x5 + 0xb5 + 0x15 + 0xb5 + 0x28 + 0xb5 + 0x3d + 0xb5 + 0x55 + 0xb5 + 0x6f + 0xb5 + 0x86 + 0xb5 + 0x97 + 0xb5 + 0xaa + 0xb5 + 0x68 + 0xb4 + 0x83 + 0xb4 + 0x93 + 0xb4 + 0xa6 + 0xb4 + 0xba + 0xb4 + 0xd2 + 0xb4 + 0xec + 0xb4 + 0x5 + 0xb5 + 0x15 + 0xb5 + 0x28 + 0xb5 + 0x3d + 0xb5 + 0x55 + 0xb5 + 0x6f + 0xb5 + 0x86 + 0xb5 + 0x97 + 0xb5 + 0xaa + 0xb5 + 0xbf + 0xb5 + 0xd7 + 0xb5 + 0xf2 + 0xb5 + 0x0 + 0xb6 + 0x8 + 0xb6 + 0x10 + 0xb6 + 0x19 + 0xb6 + 0x22 + 0xb6 + 0x2c + 0xb6 + 0x36 + 0xb6 + 0x41 + 0xb6 + 0x4d + 0xb6 + 0x59 + 0xb6 + 0x66 + 0xb6 + 0x74 + 0xb6 + 0x81 + 0xb6 + 0xbf + 0xb5 + 0xd7 + 0xb5 + 0xf2 + 0xb5 + 0x0 + 0xb6 + 0x8 + 0xb6 + 0x10 + 0xb6 + 0x19 + 0xb6 + 0x22 + 0xb6 + 0x2c + 0xb6 + 0x36 + 0xb6 + 0x41 + 0xb6 + 0x4d + 0xb6 + 0x59 + 0xb6 + 0x66 + 0xb6 + 0x74 + 0xb6 + 0x81 + 0xb6 + 0x89 + 0xb6 + 0x91 + 0xb6 + 0x9a + 0xb6 + 0xa3 + 0xb6 + 0xad + 0xb6 + 0xb7 + 0xb6 + 0xc2 + 0xb6 + 0xce + 0xb6 + 0xda + 0xb6 + 0xe8 + 0xb6 + 0xf5 + 0xb6 + 0x2 + 0xb7 + 0xa + 0xb7 + 0x12 + 0xb7 + 0x1b + 0xb7 + 0x24 + 0xb7 + 0x89 + 0xb6 + 0x91 + 0xb6 + 0x9a + 0xb6 + 0xa3 + 0xb6 + 0xad + 0xb6 + 0xb7 + 0xb6 + 0xc2 + 0xb6 + 0xce + 0xb6 + 0xda + 0xb6 + 0xe8 + 0xb6 + 0xf5 + 0xb6 + 0x2 + 0xb7 + 0xa + 0xb7 + 0x12 + 0xb7 + 0x1b + 0xb7 + 0x24 + 0xb7 + 0x2e + 0xb7 + 0x38 + 0xb7 + 0x43 + 0xb7 + 0x4f + 0xb7 + 0x5b + 0xb7 + 0x68 + 0xb7 + 0x76 + 0xb7 + 0x82 + 0xb7 + 0x8a + 0xb7 + 0x93 + 0xb7 + 0x9b + 0xb7 + 0xa5 + 0xb7 + 0xae + 0xb7 + 0xb9 + 0xb7 + 0xc4 + 0xb7 + 0xcf + 0xb7 + 0x2e + 0xb7 + 0x38 + 0xb7 + 0x43 + 0xb7 + 0x4f + 0xb7 + 0x5b + 0xb7 + 0x68 + 0xb7 + 0x76 + 0xb7 + 0x82 + 0xb7 + 0x8a + 0xb7 + 0x93 + 0xb7 + 0x9b + 0xb7 + 0xa5 + 0xb7 + 0xae + 0xb7 + 0xb9 + 0xb7 + 0xc4 + 0xb7 + 0xcf + 0xb7 + 0xdc + 0xb7 + 0xe9 + 0xb7 + 0xf6 + 0xb7 + 0x3 + 0xb8 + 0xa + 0xb8 + 0x13 + 0xb8 + 0x1b + 0xb8 + 0x24 + 0xb8 + 0x2e + 0xb8 + 0x38 + 0xb8 + 0x43 + 0xb8 + 0x4f + 0xb8 + 0x5b + 0xb8 + 0x68 + 0xb8 + 0x76 + 0xb8 + 0x82 + 0xb8 + 0xdc + 0xb7 + 0xe9 + 0xb7 + 0xf6 + 0xb7 + 0x3 + 0xb8 + 0xa + 0xb8 + 0x13 + 0xb8 + 0x1b + 0xb8 + 0x24 + 0xb8 + 0x2e + 0xb8 + 0x38 + 0xb8 + 0x43 + 0xb8 + 0x4f + 0xb8 + 0x5b + 0xb8 + 0x68 + 0xb8 + 0x76 + 0xb8 + 0x82 + 0xb8 + 0x8a + 0xb8 + 0x92 + 0xb8 + 0x9b + 0xb8 + 0xa4 + 0xb8 + 0xad + 0xb8 + 0xb8 + 0xb8 + 0xc2 + 0xb8 + 0xce + 0xb8 + 0xda + 0xb8 + 0xe7 + 0xb8 + 0xf4 + 0xb8 + 0x1 + 0xb9 + 0x9 + 0xb9 + 0x11 + 0xb9 + 0x19 + 0xb9 + 0x22 + 0xb9 + 0x8a + 0xb8 + 0x92 + 0xb8 + 0x9b + 0xb8 + 0xa4 + 0xb8 + 0xad + 0xb8 + 0xb8 + 0xb8 + 0xc2 + 0xb8 + 0xce + 0xb8 + 0xda + 0xb8 + 0xe7 + 0xb8 + 0xf4 + 0xb8 + 0x1 + 0xb9 + 0x9 + 0xb9 + 0x11 + 0xb9 + 0x19 + 0xb9 + 0x22 + 0xb9 + 0x2c + 0xb9 + 0x36 + 0xb9 + 0x41 + 0xb9 + 0x4c + 0xb9 + 0x58 + 0xb9 + 0x64 + 0xb9 + 0x72 + 0xb9 + 0x80 + 0xb9 + 0x87 + 0xb9 + 0x8f + 0xb9 + 0x98 + 0xb9 + 0xa0 + 0xb9 + 0xaa + 0xb9 + 0xb4 + 0xb9 + 0xbe + 0xb9 + 0xc9 + 0xb9 + 0x2c + 0xb9 + 0x36 + 0xb9 + 0x41 + 0xb9 + 0x4c + 0xb9 + 0x58 + 0xb9 + 0x64 + 0xb9 + 0x72 + 0xb9 + 0x80 + 0xb9 + 0x87 + 0xb9 + 0x8f + 0xb9 + 0x98 + 0xb9 + 0xa0 + 0xb9 + 0xaa + 0xb9 + 0xb4 + 0xb9 + 0xbe + 0xb9 + 0xc9 + 0xb9 + 0xd5 + 0xb9 + 0xe1 + 0xb9 + 0xee + 0xb9 + 0xfc + 0xb9 + 0x5 + 0xba + 0xd + 0xba + 0x15 + 0xba + 0x1e + 0xba + 0x27 + 0xba + 0x30 + 0xba + 0x3a + 0xba + 0x45 + 0xba + 0x50 + 0xba + 0x5c + 0xba + 0x69 + 0xba + 0x76 + 0xba + 0xd5 + 0xb9 + 0xe1 + 0xb9 + 0xee + 0xb9 + 0xfc + 0xb9 + 0x5 + 0xba + 0xd + 0xba + 0x15 + 0xba + 0x1e + 0xba + 0x27 + 0xba + 0x30 + 0xba + 0x3a + 0xba + 0x45 + 0xba + 0x50 + 0xba + 0x5c + 0xba + 0x69 + 0xba + 0x76 + 0xba + 0x82 + 0xba + 0x8a + 0xba + 0x92 + 0xba + 0x9a + 0xba + 0xa3 + 0xba + 0xac + 0xba + 0xb6 + 0xba + 0xc0 + 0xba + 0xcb + 0xba + 0xd6 + 0xba + 0xe3 + 0xba + 0xf0 + 0xba + 0xfd + 0xba + 0x6 + 0xbb + 0xd + 0xbb + 0x15 + 0xbb + 0x82 + 0xba + 0x8a + 0xba + 0x92 + 0xba + 0x9a + 0xba + 0xa3 + 0xba + 0xac + 0xba + 0xb6 + 0xba + 0xc0 + 0xba + 0xcb + 0xba + 0xd6 + 0xba + 0xe3 + 0xba + 0xf0 + 0xba + 0xfd + 0xba + 0x6 + 0xbb + 0xd + 0xbb + 0x15 + 0xbb + 0x1e + 0xbb + 0x26 + 0xbb + 0x30 + 0xbb + 0x35 + 0xbb + 0x3a + 0xbb + 0x3f + 0xbb + 0x44 + 0xbb + 0x4a + 0xbb + 0x4f + 0xbb + 0x55 + 0xbb + 0x5b + 0xbb + 0x61 + 0xbb + 0x67 + 0xbb + 0x6d + 0xbb + 0x74 + 0xbb + 0x7b + 0xbb + 0x1e + 0xbb + 0x26 + 0xbb + 0x30 + 0xbb + 0x35 + 0xbb + 0x3a + 0xbb + 0x3f + 0xbb + 0x44 + 0xbb + 0x4a + 0xbb + 0x4f + 0xbb + 0x55 + 0xbb + 0x5b + 0xbb + 0x61 + 0xbb + 0x67 + 0xbb + 0x6d + 0xbb + 0x74 + 0xbb + 0x7b + 0xbb + 0x81 + 0xbb + 0x84 + 0xbb + 0x88 + 0xbb + 0x8c + 0xbb + 0x8f + 0xbb + 0x93 + 0xbb + 0x97 + 0xbb + 0x9c + 0xbb + 0xa0 + 0xbb + 0xa4 + 0xbb + 0xa9 + 0xbb + 0xad + 0xbb + 0xb2 + 0xbb + 0xb7 + 0xbb + 0xbc + 0xbb + 0xc1 + 0xbb + 0x81 + 0xbb + 0x84 + 0xbb + 0x88 + 0xbb + 0x8c + 0xbb + 0x8f + 0xbb + 0x93 + 0xbb + 0x97 + 0xbb + 0x9c + 0xbb + 0xa0 + 0xbb + 0xa4 + 0xbb + 0xa9 + 0xbb + 0xad + 0xbb + 0xb2 + 0xbb + 0xb7 + 0xbb + 0xbc + 0xbb + 0xc1 + 0xbb + 0xc6 + 0xbb + 0xcc + 0xbb + 0xd1 + 0xbb + 0xd7 + 0xbb + 0xdd + 0xbb + 0xe3 + 0xbb + 0xe9 + 0xbb + 0xef + 0xbb + 0xf5 + 0xbb + 0xfc + 0xbb + 0x1 + 0xbc + 0x5 + 0xbc + 0x8 + 0xbc + 0xc + 0xbc + 0x10 + 0xbc + 0x14 + 0xbc + 0xc6 + 0xbb + 0xcc + 0xbb + 0xd1 + 0xbb + 0xd7 + 0xbb + 0xdd + 0xbb + 0xe3 + 0xbb + 0xe9 + 0xbb + 0xef + 0xbb + 0xf5 + 0xbb + 0xfc + 0xbb + 0x1 + 0xbc + 0x5 + 0xbc + 0x8 + 0xbc + 0xc + 0xbc + 0x10 + 0xbc + 0x14 + 0xbc + 0x18 + 0xbc + 0x1c + 0xbc + 0x20 + 0xbc + 0x24 + 0xbc + 0x29 + 0xbc + 0x2d + 0xbc + 0x32 + 0xbc + 0x37 + 0xbc + 0x3b + 0xbc + 0x40 + 0xbc + 0x45 + 0xbc + 0x4b + 0xbc + 0x50 + 0xbc + 0x56 + 0xbc + 0x5b + 0xbc + 0x61 + 0xbc + 0x18 + 0xbc + 0x1c + 0xbc + 0x20 + 0xbc + 0x24 + 0xbc + 0x29 + 0xbc + 0x2d + 0xbc + 0x32 + 0xbc + 0x37 + 0xbc + 0x3b + 0xbc + 0x40 + 0xbc + 0x45 + 0xbc + 0x4b + 0xbc + 0x50 + 0xbc + 0x56 + 0xbc + 0x5b + 0xbc + 0x61 + 0xbc + 0x67 + 0xbc + 0x6d + 0xbc + 0x73 + 0xbc + 0x7a + 0xbc + 0x80 + 0xbc + 0x84 + 0xbc + 0x87 + 0xbc + 0x8b + 0xbc + 0x8e + 0xbc + 0x92 + 0xbc + 0x96 + 0xbc + 0x9a + 0xbc + 0x9e + 0xbc + 0xa2 + 0xbc + 0xa6 + 0xbc + 0xaa + 0xbc + 0x67 + 0xbc + 0x6d + 0xbc + 0x73 + 0xbc + 0x7a + 0xbc + 0x80 + 0xbc + 0x84 + 0xbc + 0x87 + 0xbc + 0x8b + 0xbc + 0x8e + 0xbc + 0x92 + 0xbc + 0x96 + 0xbc + 0x9a + 0xbc + 0x9e + 0xbc + 0xa2 + 0xbc + 0xa6 + 0xbc + 0xaa + 0xbc + 0xaf + 0xbc + 0xb3 + 0xbc + 0xb8 + 0xbc + 0xbd + 0xbc + 0xc1 + 0xbc + 0xc6 + 0xbc + 0xcb + 0xbc + 0xd1 + 0xbc + 0xd6 + 0xbc + 0xdb + 0xbc + 0xe1 + 0xbc + 0xe7 + 0xbc + 0xed + 0xbc + 0xf3 + 0xbc + 0xf9 + 0xbc + 0xff + 0xbc + 0xaf + 0xbc + 0xb3 + 0xbc + 0xb8 + 0xbc + 0xbd + 0xbc + 0xc1 + 0xbc + 0xc6 + 0xbc + 0xcb + 0xbc + 0xd1 + 0xbc + 0xd6 + 0xbc + 0xdb + 0xbc + 0xe1 + 0xbc + 0xe7 + 0xbc + 0xed + 0xbc + 0xf3 + 0xbc + 0xf9 + 0xbc + 0xff + 0xbc + 0x3 + 0xbd + 0x6 + 0xbd + 0xa + 0xbd + 0xd + 0xbd + 0x11 + 0xbd + 0x14 + 0xbd + 0x18 + 0xbd + 0x1c + 0xbd + 0x20 + 0xbd + 0x24 + 0xbd + 0x28 + 0xbd + 0x2c + 0xbd + 0x30 + 0xbd + 0x34 + 0xbd + 0x39 + 0xbd + 0x3d + 0xbd + 0x3 + 0xbd + 0x6 + 0xbd + 0xa + 0xbd + 0xd + 0xbd + 0x11 + 0xbd + 0x14 + 0xbd + 0x18 + 0xbd + 0x1c + 0xbd + 0x20 + 0xbd + 0x24 + 0xbd + 0x28 + 0xbd + 0x2c + 0xbd + 0x30 + 0xbd + 0x34 + 0xbd + 0x39 + 0xbd + 0x3d + 0xbd + 0x42 + 0xbd + 0x47 + 0xbd + 0x4c + 0xbd + 0x51 + 0xbd + 0x56 + 0xbd + 0x5b + 0xbd + 0x60 + 0xbd + 0x66 + 0xbd + 0x6b + 0xbd + 0x71 + 0xbd + 0x77 + 0xbd + 0x7c + 0xbd + 0x81 + 0xbd + 0x84 + 0xbd + 0x87 + 0xbd + 0x8b + 0xbd + 0x42 + 0xbd + 0x47 + 0xbd + 0x4c + 0xbd + 0x51 + 0xbd + 0x56 + 0xbd + 0x5b + 0xbd + 0x60 + 0xbd + 0x66 + 0xbd + 0x6b + 0xbd + 0x71 + 0xbd + 0x77 + 0xbd + 0x7c + 0xbd + 0x81 + 0xbd + 0x84 + 0xbd + 0x87 + 0xbd + 0x8b + 0xbd + 0x8e + 0xbd + 0x91 + 0xbd + 0x95 + 0xbd + 0x96 + 0xbd + 0x98 + 0xbd + 0x9a + 0xbd + 0x9c + 0xbd + 0x9d + 0xbd + 0x9f + 0xbd + 0xa1 + 0xbd + 0xa3 + 0xbd + 0xa5 + 0xbd + 0xa7 + 0xbd + 0xa9 + 0xbd + 0xab + 0xbd + 0xad + 0xbd + 0x8e + 0xbd + 0x91 + 0xbd + 0x95 + 0xbd + 0x96 + 0xbd + 0x98 + 0xbd + 0x9a + 0xbd + 0x9c + 0xbd + 0x9d + 0xbd + 0x9f + 0xbd + 0xa1 + 0xbd + 0xa3 + 0xbd + 0xa5 + 0xbd + 0xa7 + 0xbd + 0xa9 + 0xbd + 0xab + 0xbd + 0xad + 0xbd + 0xaf + 0xbd + 0xb0 + 0xbd + 0xb2 + 0xbd + 0xb5 + 0xbd + 0xb7 + 0xbd + 0xb9 + 0xbd + 0xbb + 0xbd + 0xbd + 0xbd + 0xbf + 0xbd + 0xc1 + 0xbd + 0xc3 + 0xbd + 0xc5 + 0xbd + 0xc8 + 0xbd + 0xca + 0xbd + 0xcc + 0xbd + 0xce + 0xbd + 0xaf + 0xbd + 0xb0 + 0xbd + 0xb2 + 0xbd + 0xb5 + 0xbd + 0xb7 + 0xbd + 0xb9 + 0xbd + 0xbb + 0xbd + 0xbd + 0xbd + 0xbf + 0xbd + 0xc1 + 0xbd + 0xc3 + 0xbd + 0xc5 + 0xbd + 0xc8 + 0xbd + 0xca + 0xbd + 0xcc + 0xbd + 0xce + 0xbd + 0xd1 + 0xbd + 0xd3 + 0xbd + 0xd5 + 0xbd + 0xd8 + 0xbd + 0xda + 0xbd + 0xdc + 0xbd + 0xdf + 0xbd + 0xe1 + 0xbd + 0xe4 + 0xbd + 0xe6 + 0xbd + 0xe8 + 0xbd + 0xeb + 0xbd + 0xed + 0xbd + 0xf0 + 0xbd + 0xf3 + 0xbd + 0xf5 + 0xbd + 0xd1 + 0xbd + 0xd3 + 0xbd + 0xd5 + 0xbd + 0xd8 + 0xbd + 0xda + 0xbd + 0xdc + 0xbd + 0xdf + 0xbd + 0xe1 + 0xbd + 0xe4 + 0xbd + 0xe6 + 0xbd + 0xe8 + 0xbd + 0xeb + 0xbd + 0xed + 0xbd + 0xf0 + 0xbd + 0xf3 + 0xbd + 0xf5 + 0xbd + 0xf8 + 0xbd + 0xfa + 0xbd + 0xfd + 0xbd + 0x0 + 0xbe + 0x1 + 0xbe + 0x3 + 0xbe + 0x4 + 0xbe + 0x5 + 0xbe + 0x7 + 0xbe + 0x8 + 0xbe + 0xa + 0xbe + 0xb + 0xbe + 0xc + 0xbe + 0xe + 0xbe + 0xf + 0xbe + 0x11 + 0xbe + 0xf8 + 0xbd + 0xfa + 0xbd + 0xfd + 0xbd + 0x0 + 0xbe + 0x1 + 0xbe + 0x3 + 0xbe + 0x4 + 0xbe + 0x5 + 0xbe + 0x7 + 0xbe + 0x8 + 0xbe + 0xa + 0xbe + 0xb + 0xbe + 0xc + 0xbe + 0xe + 0xbe + 0xf + 0xbe + 0x11 + 0xbe + 0x12 + 0xbe + 0x14 + 0xbe + 0x15 + 0xbe + 0x17 + 0xbe + 0x18 + 0xbe + 0x1a + 0xbe + 0x1b + 0xbe + 0x1d + 0xbe + 0x1e + 0xbe + 0x20 + 0xbe + 0x21 + 0xbe + 0x23 + 0xbe + 0x25 + 0xbe + 0x26 + 0xbe + 0x28 + 0xbe + 0x29 + 0xbe + 0x12 + 0xbe + 0x14 + 0xbe + 0x15 + 0xbe + 0x17 + 0xbe + 0x18 + 0xbe + 0x1a + 0xbe + 0x1b + 0xbe + 0x1d + 0xbe + 0x1e + 0xbe + 0x20 + 0xbe + 0x21 + 0xbe + 0x23 + 0xbe + 0x25 + 0xbe + 0x26 + 0xbe + 0x28 + 0xbe + 0x29 + 0xbe + 0x2b + 0xbe + 0x2d + 0xbe + 0x2e + 0xbe + 0x30 + 0xbe + 0x32 + 0xbe + 0x33 + 0xbe + 0x35 + 0xbe + 0x37 + 0xbe + 0x38 + 0xbe + 0x3a + 0xbe + 0x3c + 0xbe + 0x3d + 0xbe + 0x3f + 0xbe + 0x41 + 0xbe + 0x43 + 0xbe + 0x44 + 0xbe + 0x2b + 0xbe + 0x2d + 0xbe + 0x2e + 0xbe + 0x30 + 0xbe + 0x32 + 0xbe + 0x33 + 0xbe + 0x35 + 0xbe + 0x37 + 0xbe + 0x38 + 0xbe + 0x3a + 0xbe + 0x3c + 0xbe + 0x3d + 0xbe + 0x3f + 0xbe + 0x41 + 0xbe + 0x43 + 0xbe + 0x44 + 0xbe + 0x46 + 0xbe + 0x48 + 0xbe + 0x4a + 0xbe + 0x4b + 0xbe + 0x4d + 0xbe + 0x4f + 0xbe + 0x51 + 0xbe + 0x52 + 0xbe + 0x54 + 0xbe + 0x56 + 0xbe + 0x58 + 0xbe + 0x5a + 0xbe + 0x5b + 0xbe + 0x5d + 0xbe + 0x5f + 0xbe + 0x61 + 0xbe + 0x46 + 0xbe + 0x48 + 0xbe + 0x4a + 0xbe + 0x4b + 0xbe + 0x4d + 0xbe + 0x4f + 0xbe + 0x51 + 0xbe + 0x52 + 0xbe + 0x54 + 0xbe + 0x56 + 0xbe + 0x58 + 0xbe + 0x5a + 0xbe + 0x5b + 0xbe + 0x5d + 0xbe + 0x5f + 0xbe + 0x61 + 0xbe + 0x63 + 0xbe + 0x64 + 0xbe + 0x66 + 0xbe + 0x68 + 0xbe + 0x6a + 0xbe + 0x6c + 0xbe + 0x6d + 0xbe + 0x6f + 0xbe + 0x71 + 0xbe + 0x73 + 0xbe + 0x74 + 0xbe + 0x76 + 0xbe + 0x78 + 0xbe + 0x7a + 0xbe + 0x7c + 0xbe + 0x7d + 0xbe + 0x63 + 0xbe + 0x64 + 0xbe + 0x66 + 0xbe + 0x68 + 0xbe + 0x6a + 0xbe + 0x6c + 0xbe + 0x6d + 0xbe + 0x6f + 0xbe + 0x71 + 0xbe + 0x73 + 0xbe + 0x74 + 0xbe + 0x76 + 0xbe + 0x78 + 0xbe + 0x7a + 0xbe + 0x7c + 0xbe + 0x7d + 0xbe + 0x7f + 0xbe + 0x80 + 0xbe + 0x81 + 0xbe + 0x82 + 0xbe + 0x82 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x84 + 0xbe + 0x84 + 0xbe + 0x85 + 0xbe + 0x85 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x87 + 0xbe + 0x7f + 0xbe + 0x80 + 0xbe + 0x81 + 0xbe + 0x82 + 0xbe + 0x82 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x83 + 0xbe + 0x84 + 0xbe + 0x84 + 0xbe + 0x85 + 0xbe + 0x85 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x86 + 0xbe + 0x87 + 0xbe + 0x87 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x89 + 0xbe + 0x89 + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8b + 0xbe + 0x8b + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8d + 0xbe + 0x8d + 0xbe + 0x87 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x88 + 0xbe + 0x89 + 0xbe + 0x89 + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8a + 0xbe + 0x8b + 0xbe + 0x8b + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8c + 0xbe + 0x8d + 0xbe + 0x8d + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x90 + 0xbe + 0x90 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8e + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x8f + 0xbe + 0x90 + 0xbe + 0x90 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x93 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9c + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9e + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9d + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9c + 0xbe + 0x9c + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9b + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x9a + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x99 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x98 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x97 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x96 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x95 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x94 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x93 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x93 + 0xbe + 0x93 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x92 + 0xbe + 0x91 + 0xbe + 0x91 + 0xbe + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.data_segment_name +.data weak 64 _ZN8mish_lutILj1ELj512EE7data_abE DMb + 0x42 + 0x3f + 0x43 + 0x3f + 0x45 + 0x3f + 0x46 + 0x3f + 0x47 + 0x3f + 0x48 + 0x3f + 0x49 + 0x3f + 0x4a + 0x3f + 0x4b + 0x3f + 0x4c + 0x3f + 0x4d + 0x3f + 0x4e + 0x3f + 0x4f + 0x3f + 0x50 + 0x3f + 0x51 + 0x3f + 0x52 + 0x3f + 0x42 + 0x3f + 0x43 + 0x3f + 0x45 + 0x3f + 0x46 + 0x3f + 0x47 + 0x3f + 0x48 + 0x3f + 0x49 + 0x3f + 0x4a + 0x3f + 0x4b + 0x3f + 0x4c + 0x3f + 0x4d + 0x3f + 0x4e + 0x3f + 0x4f + 0x3f + 0x50 + 0x3f + 0x51 + 0x3f + 0x52 + 0x3f + 0x53 + 0x3f + 0x54 + 0x3f + 0x55 + 0x3f + 0x56 + 0x3f + 0x57 + 0x3f + 0x58 + 0x3f + 0x59 + 0x3f + 0x5a + 0x3f + 0x5b + 0x3f + 0x5c + 0x3f + 0x5d + 0x3f + 0x60 + 0x3f + 0x62 + 0x3f + 0x64 + 0x3f + 0x66 + 0x3f + 0x68 + 0x3f + 0x53 + 0x3f + 0x54 + 0x3f + 0x55 + 0x3f + 0x56 + 0x3f + 0x57 + 0x3f + 0x58 + 0x3f + 0x59 + 0x3f + 0x5a + 0x3f + 0x5b + 0x3f + 0x5c + 0x3f + 0x5d + 0x3f + 0x60 + 0x3f + 0x62 + 0x3f + 0x64 + 0x3f + 0x66 + 0x3f + 0x68 + 0x3f + 0x6a + 0x3f + 0x6c + 0x3f + 0x6e + 0x3f + 0x70 + 0x3f + 0x73 + 0x3f + 0x75 + 0x3f + 0x77 + 0x3f + 0x79 + 0x3f + 0x7b + 0x3f + 0x7d + 0x3f + 0x7f + 0x3f + 0x81 + 0x3f + 0x82 + 0x3f + 0x83 + 0x3f + 0x84 + 0x3f + 0x85 + 0x3f + 0x6a + 0x3f + 0x6c + 0x3f + 0x6e + 0x3f + 0x70 + 0x3f + 0x73 + 0x3f + 0x75 + 0x3f + 0x77 + 0x3f + 0x79 + 0x3f + 0x7b + 0x3f + 0x7d + 0x3f + 0x7f + 0x3f + 0x81 + 0x3f + 0x82 + 0x3f + 0x83 + 0x3f + 0x84 + 0x3f + 0x85 + 0x3f + 0x86 + 0x3f + 0x87 + 0x3f + 0x88 + 0x3f + 0x89 + 0x3f + 0x8a + 0x3f + 0x8b + 0x3f + 0x8d + 0x3f + 0x8e + 0x3f + 0x8f + 0x3f + 0x90 + 0x3f + 0x91 + 0x3f + 0x92 + 0x3f + 0x93 + 0x3f + 0x94 + 0x3f + 0x95 + 0x3f + 0x96 + 0x3f + 0x86 + 0x3f + 0x87 + 0x3f + 0x88 + 0x3f + 0x89 + 0x3f + 0x8a + 0x3f + 0x8b + 0x3f + 0x8d + 0x3f + 0x8e + 0x3f + 0x8f + 0x3f + 0x90 + 0x3f + 0x91 + 0x3f + 0x92 + 0x3f + 0x93 + 0x3f + 0x94 + 0x3f + 0x95 + 0x3f + 0x96 + 0x3f + 0x97 + 0x3f + 0x98 + 0x3f + 0x9a + 0x3f + 0x9b + 0x3f + 0x9c + 0x3f + 0x9d + 0x3f + 0x9e + 0x3f + 0x9f + 0x3f + 0xa0 + 0x3f + 0xa1 + 0x3f + 0xa2 + 0x3f + 0xa3 + 0x3f + 0xa4 + 0x3f + 0xa5 + 0x3f + 0xa7 + 0x3f + 0xa8 + 0x3f + 0x97 + 0x3f + 0x98 + 0x3f + 0x9a + 0x3f + 0x9b + 0x3f + 0x9c + 0x3f + 0x9d + 0x3f + 0x9e + 0x3f + 0x9f + 0x3f + 0xa0 + 0x3f + 0xa1 + 0x3f + 0xa2 + 0x3f + 0xa3 + 0x3f + 0xa4 + 0x3f + 0xa5 + 0x3f + 0xa7 + 0x3f + 0xa8 + 0x3f + 0xa9 + 0x3f + 0xaa + 0x3f + 0xab + 0x3f + 0xac + 0x3f + 0xad + 0x3f + 0xae + 0x3f + 0xaf + 0x3f + 0xb0 + 0x3f + 0xb1 + 0x3f + 0xb3 + 0x3f + 0xb4 + 0x3f + 0xb5 + 0x3f + 0xb6 + 0x3f + 0xb7 + 0x3f + 0xb8 + 0x3f + 0xb9 + 0x3f + 0xa9 + 0x3f + 0xaa + 0x3f + 0xab + 0x3f + 0xac + 0x3f + 0xad + 0x3f + 0xae + 0x3f + 0xaf + 0x3f + 0xb0 + 0x3f + 0xb1 + 0x3f + 0xb3 + 0x3f + 0xb4 + 0x3f + 0xb5 + 0x3f + 0xb6 + 0x3f + 0xb7 + 0x3f + 0xb8 + 0x3f + 0xb9 + 0x3f + 0xba + 0x3f + 0xbb + 0x3f + 0xbc + 0x3f + 0xbd + 0x3f + 0xbf + 0x3f + 0xc0 + 0x3f + 0xc1 + 0x3f + 0xc2 + 0x3f + 0xc3 + 0x3f + 0xc4 + 0x3f + 0xc5 + 0x3f + 0xc6 + 0x3f + 0xc7 + 0x3f + 0xc8 + 0x3f + 0xc9 + 0x3f + 0xca + 0x3f + 0xba + 0x3f + 0xbb + 0x3f + 0xbc + 0x3f + 0xbd + 0x3f + 0xbf + 0x3f + 0xc0 + 0x3f + 0xc1 + 0x3f + 0xc2 + 0x3f + 0xc3 + 0x3f + 0xc4 + 0x3f + 0xc5 + 0x3f + 0xc6 + 0x3f + 0xc7 + 0x3f + 0xc8 + 0x3f + 0xc9 + 0x3f + 0xca + 0x3f + 0xcc + 0x3f + 0xcd + 0x3f + 0xce + 0x3f + 0xcf + 0x3f + 0xd0 + 0x3f + 0xd1 + 0x3f + 0xd2 + 0x3f + 0xd3 + 0x3f + 0xd4 + 0x3f + 0xd5 + 0x3f + 0xd6 + 0x3f + 0xd7 + 0x3f + 0xd9 + 0x3f + 0xda + 0x3f + 0xdb + 0x3f + 0xdc + 0x3f + 0xcc + 0x3f + 0xcd + 0x3f + 0xce + 0x3f + 0xcf + 0x3f + 0xd0 + 0x3f + 0xd1 + 0x3f + 0xd2 + 0x3f + 0xd3 + 0x3f + 0xd4 + 0x3f + 0xd5 + 0x3f + 0xd6 + 0x3f + 0xd7 + 0x3f + 0xd9 + 0x3f + 0xda + 0x3f + 0xdb + 0x3f + 0xdc + 0x3f + 0xdd + 0x3f + 0xde + 0x3f + 0xdf + 0x3f + 0xe0 + 0x3f + 0xe1 + 0x3f + 0xe2 + 0x3f + 0xe3 + 0x3f + 0xe4 + 0x3f + 0xe6 + 0x3f + 0xe7 + 0x3f + 0xe8 + 0x3f + 0xe9 + 0x3f + 0xea + 0x3f + 0xeb + 0x3f + 0xec + 0x3f + 0xed + 0x3f + 0xdd + 0x3f + 0xde + 0x3f + 0xdf + 0x3f + 0xe0 + 0x3f + 0xe1 + 0x3f + 0xe2 + 0x3f + 0xe3 + 0x3f + 0xe4 + 0x3f + 0xe6 + 0x3f + 0xe7 + 0x3f + 0xe8 + 0x3f + 0xe9 + 0x3f + 0xea + 0x3f + 0xeb + 0x3f + 0xec + 0x3f + 0xed + 0x3f + 0xee + 0x3f + 0xef + 0x3f + 0xf0 + 0x3f + 0xf1 + 0x3f + 0xf2 + 0x3f + 0xf3 + 0x3f + 0xf5 + 0x3f + 0xf6 + 0x3f + 0xf7 + 0x3f + 0xf8 + 0x3f + 0xf9 + 0x3f + 0xfb + 0x3f + 0xfd + 0x3f + 0xff + 0x3f + 0x1 + 0x40 + 0x2 + 0x40 + 0xee + 0x3f + 0xef + 0x3f + 0xf0 + 0x3f + 0xf1 + 0x3f + 0xf2 + 0x3f + 0xf3 + 0x3f + 0xf5 + 0x3f + 0xf6 + 0x3f + 0xf7 + 0x3f + 0xf8 + 0x3f + 0xf9 + 0x3f + 0xfb + 0x3f + 0xfd + 0x3f + 0xff + 0x3f + 0x1 + 0x40 + 0x2 + 0x40 + 0x3 + 0x40 + 0x4 + 0x40 + 0x5 + 0x40 + 0x6 + 0x40 + 0x7 + 0x40 + 0x8 + 0x40 + 0x9 + 0x40 + 0xa + 0x40 + 0xb + 0x40 + 0xc + 0x40 + 0xd + 0x40 + 0xe + 0x40 + 0x10 + 0x40 + 0x11 + 0x40 + 0x12 + 0x40 + 0x13 + 0x40 + 0x3 + 0x40 + 0x4 + 0x40 + 0x5 + 0x40 + 0x6 + 0x40 + 0x7 + 0x40 + 0x8 + 0x40 + 0x9 + 0x40 + 0xa + 0x40 + 0xb + 0x40 + 0xc + 0x40 + 0xd + 0x40 + 0xe + 0x40 + 0x10 + 0x40 + 0x11 + 0x40 + 0x12 + 0x40 + 0x13 + 0x40 + 0x14 + 0x40 + 0x15 + 0x40 + 0x16 + 0x40 + 0x17 + 0x40 + 0x18 + 0x40 + 0x19 + 0x40 + 0x1a + 0x40 + 0x1b + 0x40 + 0x1c + 0x40 + 0x1d + 0x40 + 0x1e + 0x40 + 0x1f + 0x40 + 0x20 + 0x40 + 0x21 + 0x40 + 0x22 + 0x40 + 0x23 + 0x40 + 0x14 + 0x40 + 0x15 + 0x40 + 0x16 + 0x40 + 0x17 + 0x40 + 0x18 + 0x40 + 0x19 + 0x40 + 0x1a + 0x40 + 0x1b + 0x40 + 0x1c + 0x40 + 0x1d + 0x40 + 0x1e + 0x40 + 0x1f + 0x40 + 0x20 + 0x40 + 0x21 + 0x40 + 0x22 + 0x40 + 0x23 + 0x40 + 0x24 + 0x40 + 0x25 + 0x40 + 0x26 + 0x40 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x24 + 0x40 + 0x25 + 0x40 + 0x26 + 0x40 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.data_segment_name +.data weak 64 _ZN8mish_lutILj1ELj512EE7data_cdE DMb + 0x42 + 0x3f + 0x43 + 0x3f + 0x45 + 0x3f + 0x46 + 0x3f + 0x47 + 0x3f + 0x48 + 0x3f + 0x49 + 0x3f + 0x4a + 0x3f + 0x4b + 0x3f + 0x4c + 0x3f + 0x4d + 0x3f + 0x4e + 0x3f + 0x4f + 0x3f + 0x50 + 0x3f + 0x51 + 0x3f + 0x52 + 0x3f + 0x42 + 0x3f + 0x43 + 0x3f + 0x45 + 0x3f + 0x46 + 0x3f + 0x47 + 0x3f + 0x48 + 0x3f + 0x49 + 0x3f + 0x4a + 0x3f + 0x4b + 0x3f + 0x4c + 0x3f + 0x4d + 0x3f + 0x4e + 0x3f + 0x4f + 0x3f + 0x50 + 0x3f + 0x51 + 0x3f + 0x52 + 0x3f + 0x53 + 0x3f + 0x54 + 0x3f + 0x55 + 0x3f + 0x56 + 0x3f + 0x57 + 0x3f + 0x58 + 0x3f + 0x59 + 0x3f + 0x5a + 0x3f + 0x5b + 0x3f + 0x5c + 0x3f + 0x5d + 0x3f + 0x60 + 0x3f + 0x62 + 0x3f + 0x64 + 0x3f + 0x66 + 0x3f + 0x68 + 0x3f + 0x53 + 0x3f + 0x54 + 0x3f + 0x55 + 0x3f + 0x56 + 0x3f + 0x57 + 0x3f + 0x58 + 0x3f + 0x59 + 0x3f + 0x5a + 0x3f + 0x5b + 0x3f + 0x5c + 0x3f + 0x5d + 0x3f + 0x60 + 0x3f + 0x62 + 0x3f + 0x64 + 0x3f + 0x66 + 0x3f + 0x68 + 0x3f + 0x6a + 0x3f + 0x6c + 0x3f + 0x6e + 0x3f + 0x70 + 0x3f + 0x73 + 0x3f + 0x75 + 0x3f + 0x77 + 0x3f + 0x79 + 0x3f + 0x7b + 0x3f + 0x7d + 0x3f + 0x7f + 0x3f + 0x81 + 0x3f + 0x82 + 0x3f + 0x83 + 0x3f + 0x84 + 0x3f + 0x85 + 0x3f + 0x6a + 0x3f + 0x6c + 0x3f + 0x6e + 0x3f + 0x70 + 0x3f + 0x73 + 0x3f + 0x75 + 0x3f + 0x77 + 0x3f + 0x79 + 0x3f + 0x7b + 0x3f + 0x7d + 0x3f + 0x7f + 0x3f + 0x81 + 0x3f + 0x82 + 0x3f + 0x83 + 0x3f + 0x84 + 0x3f + 0x85 + 0x3f + 0x86 + 0x3f + 0x87 + 0x3f + 0x88 + 0x3f + 0x89 + 0x3f + 0x8a + 0x3f + 0x8b + 0x3f + 0x8d + 0x3f + 0x8e + 0x3f + 0x8f + 0x3f + 0x90 + 0x3f + 0x91 + 0x3f + 0x92 + 0x3f + 0x93 + 0x3f + 0x94 + 0x3f + 0x95 + 0x3f + 0x96 + 0x3f + 0x86 + 0x3f + 0x87 + 0x3f + 0x88 + 0x3f + 0x89 + 0x3f + 0x8a + 0x3f + 0x8b + 0x3f + 0x8d + 0x3f + 0x8e + 0x3f + 0x8f + 0x3f + 0x90 + 0x3f + 0x91 + 0x3f + 0x92 + 0x3f + 0x93 + 0x3f + 0x94 + 0x3f + 0x95 + 0x3f + 0x96 + 0x3f + 0x97 + 0x3f + 0x98 + 0x3f + 0x9a + 0x3f + 0x9b + 0x3f + 0x9c + 0x3f + 0x9d + 0x3f + 0x9e + 0x3f + 0x9f + 0x3f + 0xa0 + 0x3f + 0xa1 + 0x3f + 0xa2 + 0x3f + 0xa3 + 0x3f + 0xa4 + 0x3f + 0xa5 + 0x3f + 0xa7 + 0x3f + 0xa8 + 0x3f + 0x97 + 0x3f + 0x98 + 0x3f + 0x9a + 0x3f + 0x9b + 0x3f + 0x9c + 0x3f + 0x9d + 0x3f + 0x9e + 0x3f + 0x9f + 0x3f + 0xa0 + 0x3f + 0xa1 + 0x3f + 0xa2 + 0x3f + 0xa3 + 0x3f + 0xa4 + 0x3f + 0xa5 + 0x3f + 0xa7 + 0x3f + 0xa8 + 0x3f + 0xa9 + 0x3f + 0xaa + 0x3f + 0xab + 0x3f + 0xac + 0x3f + 0xad + 0x3f + 0xae + 0x3f + 0xaf + 0x3f + 0xb0 + 0x3f + 0xb1 + 0x3f + 0xb3 + 0x3f + 0xb4 + 0x3f + 0xb5 + 0x3f + 0xb6 + 0x3f + 0xb7 + 0x3f + 0xb8 + 0x3f + 0xb9 + 0x3f + 0xa9 + 0x3f + 0xaa + 0x3f + 0xab + 0x3f + 0xac + 0x3f + 0xad + 0x3f + 0xae + 0x3f + 0xaf + 0x3f + 0xb0 + 0x3f + 0xb1 + 0x3f + 0xb3 + 0x3f + 0xb4 + 0x3f + 0xb5 + 0x3f + 0xb6 + 0x3f + 0xb7 + 0x3f + 0xb8 + 0x3f + 0xb9 + 0x3f + 0xba + 0x3f + 0xbb + 0x3f + 0xbc + 0x3f + 0xbd + 0x3f + 0xbf + 0x3f + 0xc0 + 0x3f + 0xc1 + 0x3f + 0xc2 + 0x3f + 0xc3 + 0x3f + 0xc4 + 0x3f + 0xc5 + 0x3f + 0xc6 + 0x3f + 0xc7 + 0x3f + 0xc8 + 0x3f + 0xc9 + 0x3f + 0xca + 0x3f + 0xba + 0x3f + 0xbb + 0x3f + 0xbc + 0x3f + 0xbd + 0x3f + 0xbf + 0x3f + 0xc0 + 0x3f + 0xc1 + 0x3f + 0xc2 + 0x3f + 0xc3 + 0x3f + 0xc4 + 0x3f + 0xc5 + 0x3f + 0xc6 + 0x3f + 0xc7 + 0x3f + 0xc8 + 0x3f + 0xc9 + 0x3f + 0xca + 0x3f + 0xcc + 0x3f + 0xcd + 0x3f + 0xce + 0x3f + 0xcf + 0x3f + 0xd0 + 0x3f + 0xd1 + 0x3f + 0xd2 + 0x3f + 0xd3 + 0x3f + 0xd4 + 0x3f + 0xd5 + 0x3f + 0xd6 + 0x3f + 0xd7 + 0x3f + 0xd9 + 0x3f + 0xda + 0x3f + 0xdb + 0x3f + 0xdc + 0x3f + 0xcc + 0x3f + 0xcd + 0x3f + 0xce + 0x3f + 0xcf + 0x3f + 0xd0 + 0x3f + 0xd1 + 0x3f + 0xd2 + 0x3f + 0xd3 + 0x3f + 0xd4 + 0x3f + 0xd5 + 0x3f + 0xd6 + 0x3f + 0xd7 + 0x3f + 0xd9 + 0x3f + 0xda + 0x3f + 0xdb + 0x3f + 0xdc + 0x3f + 0xdd + 0x3f + 0xde + 0x3f + 0xdf + 0x3f + 0xe0 + 0x3f + 0xe1 + 0x3f + 0xe2 + 0x3f + 0xe3 + 0x3f + 0xe4 + 0x3f + 0xe6 + 0x3f + 0xe7 + 0x3f + 0xe8 + 0x3f + 0xe9 + 0x3f + 0xea + 0x3f + 0xeb + 0x3f + 0xec + 0x3f + 0xed + 0x3f + 0xdd + 0x3f + 0xde + 0x3f + 0xdf + 0x3f + 0xe0 + 0x3f + 0xe1 + 0x3f + 0xe2 + 0x3f + 0xe3 + 0x3f + 0xe4 + 0x3f + 0xe6 + 0x3f + 0xe7 + 0x3f + 0xe8 + 0x3f + 0xe9 + 0x3f + 0xea + 0x3f + 0xeb + 0x3f + 0xec + 0x3f + 0xed + 0x3f + 0xee + 0x3f + 0xef + 0x3f + 0xf0 + 0x3f + 0xf1 + 0x3f + 0xf2 + 0x3f + 0xf3 + 0x3f + 0xf5 + 0x3f + 0xf6 + 0x3f + 0xf7 + 0x3f + 0xf8 + 0x3f + 0xf9 + 0x3f + 0xfb + 0x3f + 0xfd + 0x3f + 0xff + 0x3f + 0x1 + 0x40 + 0x2 + 0x40 + 0xee + 0x3f + 0xef + 0x3f + 0xf0 + 0x3f + 0xf1 + 0x3f + 0xf2 + 0x3f + 0xf3 + 0x3f + 0xf5 + 0x3f + 0xf6 + 0x3f + 0xf7 + 0x3f + 0xf8 + 0x3f + 0xf9 + 0x3f + 0xfb + 0x3f + 0xfd + 0x3f + 0xff + 0x3f + 0x1 + 0x40 + 0x2 + 0x40 + 0x3 + 0x40 + 0x4 + 0x40 + 0x5 + 0x40 + 0x6 + 0x40 + 0x7 + 0x40 + 0x8 + 0x40 + 0x9 + 0x40 + 0xa + 0x40 + 0xb + 0x40 + 0xc + 0x40 + 0xd + 0x40 + 0xe + 0x40 + 0x10 + 0x40 + 0x11 + 0x40 + 0x12 + 0x40 + 0x13 + 0x40 + 0x3 + 0x40 + 0x4 + 0x40 + 0x5 + 0x40 + 0x6 + 0x40 + 0x7 + 0x40 + 0x8 + 0x40 + 0x9 + 0x40 + 0xa + 0x40 + 0xb + 0x40 + 0xc + 0x40 + 0xd + 0x40 + 0xe + 0x40 + 0x10 + 0x40 + 0x11 + 0x40 + 0x12 + 0x40 + 0x13 + 0x40 + 0x14 + 0x40 + 0x15 + 0x40 + 0x16 + 0x40 + 0x17 + 0x40 + 0x18 + 0x40 + 0x19 + 0x40 + 0x1a + 0x40 + 0x1b + 0x40 + 0x1c + 0x40 + 0x1d + 0x40 + 0x1e + 0x40 + 0x1f + 0x40 + 0x20 + 0x40 + 0x21 + 0x40 + 0x22 + 0x40 + 0x23 + 0x40 + 0x14 + 0x40 + 0x15 + 0x40 + 0x16 + 0x40 + 0x17 + 0x40 + 0x18 + 0x40 + 0x19 + 0x40 + 0x1a + 0x40 + 0x1b + 0x40 + 0x1c + 0x40 + 0x1d + 0x40 + 0x1e + 0x40 + 0x1f + 0x40 + 0x20 + 0x40 + 0x21 + 0x40 + 0x22 + 0x40 + 0x23 + 0x40 + 0x24 + 0x40 + 0x25 + 0x40 + 0x26 + 0x40 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x24 + 0x40 + 0x25 + 0x40 + 0x26 + 0x40 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.data_segment_name +.data weak 2 _ZN19reduce_prod_c8_implI8bfloat16E20zero_iter_init_valueE DMb + 0x80 + 0x3f + +.data_segment_name +.bss global 32 gem_bfp_param DMb 352 + +.data_segment_name +.bss weak 64 _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params DMb 128 + +.data_segment_name +.bss weak 4 _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep DMb 4 + +.data_segment_name +.bss global 64 add1d_attribute_broadcasting_params DMb 64 + +.data_segment_name +.bss global 64 mul1d_attribute_broadcasting_params DMb 64 + +.data_segment_name +.bss global 64 add1d_params DMb 64 + +.data_segment_name +.bss global 64 mul1d_params DMb 64 + +.data_segment_name +.bss global 64 sub1d_params DMb 64 + +.data_segment_name +.bss global 64 sigmoid1d_params DMb 64 + +.data_segment_name +.data global 64 reducesum_params DMb + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x1 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.data_segment_name +.bss global 64 conv2d_params DMb 448 + +.data_segment_name +.bss global 64 maxpool2d_params DMb 64 + +.data_segment_name +.bss global 64 pad2d_params DMb 64 + +.data_segment_name +.bss global 64 conv2d_dw_params DMb 256 + +.data_segment_name +.bss global 64 pad_3d_params DMb 64 + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global text _Z15_b13786_wrapperPPv + +.undef global text _Z14_b8148_wrapperPPv + +.undef global text _Z14_b8170_wrapperPPv + +.undef global text _Z14_b7835_wrapperPPv + +.text_segment_name +.text weak 16 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.function_start + 0 0x03 0x85 0xd0 0x00 0x00 0x00 0x08 0x00 0x10 0xba LDA el0, [p0], #4; MOVXM r0, #conv2d_params + 10 0x03 0x81 0xd2 0x80 0x0b 0x3f 0x27 0x88 0xb0 0x02 0x48 0x76 LDA eh0, [p0], #4; MOVS p2, r0; MOVX r18, #-4; ADD.NC p1, r0, #9 + 22 0xff 0xe3 0x00 0x01 0x30 0x28 0x48 0x04 0x58 0xba MOVA r3, #-1; MOVX r19, #1; MOV r2, #4 + 32 0x01 0xe5 0x00 0x01 0x01 0x88 0x00 0x10 0x58 0xba MOVA r5, #15; MOVX r16, #12; MOV m0, #16 + 42 0xff 0xa4 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r4, #-3; PADDXM [sp], #64 + 52 0x00 0x01 0xac 0x20 0x01 0x64 MOVX r0, #3; MOV r24, #0 + 58 0x00 0x00 NOPX + 60 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 64 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 68 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 72 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 76 0x00 0x00 NOPX + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 90 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 94 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 98 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 102 0x00 0x00 NOPX + 104 0x00 0x00 NOPX + 106 0x00 0x00 NOPX + 108 0x00 0x00 NOPX + 110 0x00 0x00 NOPX + 112 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 116 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 120 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 124 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 128 0x00 0x00 NOPX + 130 0x00 0x00 NOPX + 132 0x00 0x00 NOPX + 134 0x00 0x00 NOPX + 136 0x00 0x00 NOPX + 138 0x0a 0x04 0x29 0x98 ST el0, [p2] + 142 0x0a 0x14 0x09 0x98 ST eh0, [p2, #4] + 146 0x01 0xdc 0xea 0x98 LDA.u8 r7, [p1], #-3 + 150 0x01 0x1e 0x8a 0x98 LDA.u8 r20, [p1], #1 + 154 0x01 0xbc 0x2a 0x98 LDA.u8 r1, [p1], #-5 + 158 0x01 0xfc 0xda 0x98 LDA.u16 r6, [p1], #-2 + 162 0x01 0x0a 0xaa 0x98 LDA.u8 r21, [p1], m0 + 166 0x01 0x1e 0x2a 0x98 LDA.u8 r17, [p1], #1 + 170 0x00 0x00 NOPX + 172 0x11 0xfd 0x2d 0x98 LSHL r30, r7, r18 + 176 0x9c 0xbc 0xf9 0xb4 0x01 0x24 EQ r18, r19, r30; ADD.NC r19, r20, #1 + 182 0x14 0xe6 0x3d 0x98 LSHL r19, r19, r3 + 186 0x11 0xb6 0x27 0x98 EQ r27, r6, r2 + 190 0xc0 0xc6 0x40 0x37 0x39 0xe4 SEL.EQZ r3, r24, r3, r27; MOV el0, r27 + 196 0x15 0x29 0x32 0x18 SEL.EQZ r20, r20, r19, r27 + 200 0x14 0x61 0x07 0x98 EQ r16, r17, r16 + 204 0x14 0xb7 0x04 0x98 AND r27, r18, r16 + 208 0xa8 0xc7 0xbc 0xbb 0x41 0xe4 LSHL r3, r21, r3; MOV r25, r27 + 214 0xfd 0x8e 0xb8 0xc3 0x04 0x5c ST r3, [sp, #-20]; SEL.EQZ r16, r17, r24, r27 + 220 0xc8 0x00 0x98 0x40 0x01 0x84 JNZ r25, #__ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.delay_slot + 226 0x11 0xf0 0x54 0x98 AND r24, r7, r5 +.delay_slot + 230 0x10 0xb7 0x07 0x98 EQ r27, r2, r16 +.delay_slot + 234 0x11 0x8e 0x4d 0x98 LSHL r7, r6, r4 +.delay_slot + 238 0x16 0xca 0x2d 0x98 LSHL r5, r27, r2 +.delay_slot + 242 0x14 0x0c 0x02 0x18 SEL.EQZ r6, r16, r0, r27 + 246 0xd8 0x00 0x98 0x40 0x01 0x84 JNZ r27, #__ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.delay_slot +.swstall delay_slot + 252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 254 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 256 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 258 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 260 0x00 0x00 NOPX + 262 0x02 0x1a 0x00 0x00 0x00 0x2c 0x00 0x00 0x20 0xba MOVA r26, #16; J #TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.delay_slot + 272 0xff 0xa2 0x00 0x3e 0x47 0x8a 0x08 0x0f 0x58 0xba MOVA r2, #-3; MOVX r4, #-4; MOV r16, #15 +.delay_slot + 282 0x01 0x9c 0x00 0x3f 0x17 0xe8 0x08 0x01 0x58 0xba MOVA r28, #12; MOVX r17, #-1; MOV r0, #1 +.delay_slot + 292 0x10 0x26 0x01 0x18 MOVX r19, #0 +.delay_slot + 296 0x10 0x2e 0x0d 0x18 MOVX r23, #3 +.delay_slot + 300 0x10 0x24 0x11 0x18 MOVX r18, #4 +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 304 0x01 0x9c 0x00 0x00 0x00 0x00 0x30 0x08 0x10 0xba MOVA r28, #12; MOVXM p0, #(conv2d_params + 16) + 314 0x00 0x98 0xe0 0x00 0x00 0x2a 0xe8 0x03 0x58 0xba ST.s8 r6, [p0]; MOVX r0, #1; MOV r23, #3 + 324 0x00 0x13 0x00 0x3e 0x27 0xaa 0x08 0x0f 0x58 0xba MOVA r19, #0; MOVX r2, #-3; MOV r16, #15 + 334 0x02 0x1a 0x00 0x3e 0x47 0x8a 0x48 0x04 0x58 0xba MOVA r26, #16; MOVX r4, #-4; MOV r18, #4 + 344 0x00 0x3f 0x17 0xe9 0x00 0x00 0x1c 0x22 MOVX r17, #-1; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 + 352 0x01 0x1d 0x00 0x2f 0xb3 0x3c 0x00 0x37 0x58 0xba MOVA r29, #8; EQ r27, r23, r6; MOV m0, #55 + 362 0x21 0x55 0x50 0x3d 0xf9 0x90 0x07 0xbc 0x58 0xba LDA.u8 r21, [p1], m0; SEL.EQZ r31, r30, r19, r27; MOV m0, #-68 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 372 0x03 0x19 0x00 0x29 0x49 0x12 0x00 0x70 0x58 0xba MOVA r25, #24; SEL.EQZ r20, r20, r18, r27; MOV m4, #112 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 382 0x21 0x54 0xe0 0x27 0xef 0x8f 0x87 0x97 0x58 0xba ST.s8 r21, [p1], m0; SUB r30, r19, r31; MOV m7, #-105 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 392 0x31 0x0d 0x50 0x06 0x3a 0x0e 0x80 0x31 0x58 0xba LDA.u8 r3, [p1], m4; SUB r3, r3, r20; MOV m5, #49 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 402 0x18 0xfd 0xd6 0x1f 0x05 0x64 ASHL r3, r3, r30; MOV m3, #-63 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 408 0x07 0x84 0x80 0x01 0x6c 0x6f 0xc8 0xc0 0x48 0xba MOVA m1, #60; LSHL r22, r0, r24; ADD.NC r30, r3, #1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 418 0xb5 0x92 0x04 0x1e 0xc9 0x64 EXTEND.u8 r22, r22; MOV m2, #-78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 424 0xad 0x60 0x9c 0x03 0xd9 0x64 AND r21, r21, r16; MOV m6, #246 + 430 0xa6 0xfb 0xb0 0x1d 0x41 0x64 LSHL r27, r20, r29; MOV m0, #-176 + 436 0x23 0xce 0x30 0x3d 0x9c 0xee 0x00 0xa8 0x59 0x3a ST r19, [p1], #4; LSHL r25, r30, r25; MOV m4, #168 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 446 0x3d 0x4e 0x39 0xe3 0x03 0x5c ST r19, [p1], m7; SUB r24, r19, r24 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 452 0x35 0x75 0x57 0xfe 0xd5 0x80 0x05 0xad 0xad 0xfa LDA.u8 r29, [p1], m5; ST r22, [sp, #-4]; LSHL r22, r22, r26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 462 0x16 0xed 0x65 0x98 OR r22, r27, r22 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 466 0x11 0xb7 0xc7 0x98 EQ r27, r6, r28 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 470 0x15 0xf9 0x22 0x18 SEL.EQZ r28, r23, r18, r27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 474 0x17 0xae 0x90 0x18 EXTEND.u8 r23, r30 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 478 0xe9 0x4c 0xff 0x37 0x2a 0xa4 EQ r5, r29, r6; ADD.NC r30, r23, r5 + 484 0x10 0x3f 0xfd 0x98 LSHL r31, r0, r31 + 488 0x10 0x3b 0xd1 0x98 SUB r29, r0, r29 + 492 0x2d 0x76 0x3f 0xfd 0x20 0x5c ST r29, [p1], m3; EXTEND.u8 r31, r31 + 498 0x01 0x7f 0xaa 0x98 LDA.u8 r29, [p1], #7 + 502 0x00 0x00 NOPX + 504 0x00 0x00 NOPX + 506 0x00 0x00 NOPX + 508 0x00 0x00 NOPX + 510 0x00 0x00 NOPX + 512 0x00 0x00 NOPX + 514 0x17 0x7a 0x11 0x98 SUB r29, r29, r1 + 518 0xef 0x71 0xd0 0xb7 0x39 0xe4 ASHL r29, r29, r24; MOV eh0, r27 + 524 0xee 0xb5 0xbd 0xb9 0x41 0xe4 LSHL r26, r29, r26; MOV r27, r25 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 530 0x00 0x01 0x0e 0xa0 0x00 0x44 MOVXM r29, #65536 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 536 0x25 0x6d 0x50 0x03 0x9b 0x2f 0xaf 0x74 0xa8 0xba LDA.u8 r27, [p1], m1; OR r25, r1, r22; ADD.NC r29, r29, r26 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 546 0x00 0x5a 0x00 0x00 0x3f 0xc3 0x08 0x00 0x10 0xba MOVA r26, #2; MOVXM r24, #16711680 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 556 0x17 0x7b 0x84 0x98 AND r29, r29, r24 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 560 0x11 0xb1 0xa7 0x98 EQ r24, r6, r26 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 564 0xdf 0x7a 0xbd 0xb8 0x41 0xe4 OR r29, r27, r29; MOV r27, r24 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 570 0x15 0xf7 0x32 0x18 SEL.EQZ r27, r23, r19, r27 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 574 0x16 0xf9 0xcd 0x98 LSHL r28, r27, r28 + 578 0x10 0x37 0xb1 0x98 SUB r27, r0, r27 + 582 0x16 0xf9 0xcf 0x98 MUL r28, r27, r28 + 586 0x17 0xef 0x7f 0x98 MUL r23, r31, r23 + 590 0x2b 0xf2 0x3e 0xf0 0x0b 0x5c ST r28, [p1], #20; OR r28, r29, r0 + 596 0x23 0xf2 0x32 0xf3 0x0b 0x5c ST r28, [p1], #4; OR r28, r5, r24 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 602 0x29 0x66 0x30 0x07 0xb0 0x6c 0x4e 0x4e 0x79 0x3a ST r25, [p1], m2; LSHL r27, r3, r0; MOV el1, r28 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 612 0x01 0xcb 0x8a 0x98 LDA.u8 r28, [p1], m6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 616 0x01 0x0b 0x77 0x18 ST.s16 r27, [p1], m0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 620 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 622 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 624 0xc0 0x01 0x68 0x40 0x01 0x84 JNZ r24, #TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_720 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 630 0x17 0xf8 0xe1 0x18 MOVX r28, #-8 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 634 0x15 0xad 0xcd 0x98 LSHL r22, r22, r28 +.delay_slot + 638 0xad 0x78 0x1e 0x3f 0xc1 0x64 ADD r21, r21, r28; MOV r28, #-16 +.delay_slot + 644 0xed 0x79 0xbc 0xb5 0xf2 0xa4 LSHL r21, r29, r28; ADD.NC r25, r21, r30 +.delay_slot + 650 0x31 0x6e 0x30 0x33 0xd9 0x6e 0x48 0x01 0x59 0x3a ST r27, [p1], m4; LSHL r29, r25, r18; MOV r18, #1 + 660 0x00 0x72 0x00 0x3f 0xef 0x7f 0x70 0x8e 0x78 0xba MOVA r18, #3; MUL r30, r31, r30; MOV r27, eh0 + 670 0x10 0x32 0x11 0x18 MOVX r25, #4 + 674 0x00 0xb9 0x00 0x33 0x29 0x13 0x68 0x40 0x58 0xba MOVA r25, #5; SEL.EQZ r18, r25, r18, r27; MOV r27, #64 + 684 0x16 0xf6 0x67 0x98 EQ r27, r27, r6 + 688 0x94 0xb2 0x4d 0xa5 0x41 0xe4 SEL.EQZ r18, r18, r25, r27; MOV r27, r5 + 694 0x14 0xe5 0x21 0x98 SUB r18, r19, r18 + 698 0x17 0xa5 0x2d 0x98 LSHL r18, r30, r18 + 702 0x14 0xa4 0x90 0x18 EXTEND.u8 r18, r18 + 706 0x00 0x2c 0xf0 0x00 0x24 0xa4 0x02 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r18, r18, r0, r27; NOPM +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_720 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 720 0x20 0x19 0x00 0x2b 0x54 0x83 0x70 0x8e 0x78 0xba MOVA r25, #256; EXTEND.u8 r21, r21; MOV r27, eh0 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 730 0xed 0x90 0x80 0x2f 0x4a 0x07 0x71 0x0e 0x78 0xba MOVA m4, #-148; ADD r20, r23, r20; MOV r27, el1 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 740 0x09 0x17 0x00 0x27 0x18 0x91 0x80 0x58 0x58 0xba MOVA r23, #72; SEL.EQZ r17, r19, r17, r27; MOV m3, #88 + 750 0x01 0x34 0x07 0xfa 0x35 0xad 0x14 0x83 0xcd 0x3f 0xc8 0x76 MOVA r20, #9; ST r17, [sp, #-8]; EXTEND.u8 r17, r22; ADD.NC r30, r20, #-1 + 762 0x05 0x00 0x80 0x29 0x6b 0x92 0x6c 0xbf 0xc8 0xba MOVA m0, #40; SEL.EQZ r22, r20, r23, r27; ADD.NC r19, r18, #-1 + 772 0x03 0xf6 0x00 0x3d 0x0b 0x33 0xc8 0x40 0x58 0xba MOVA r22, #31; MAC r16, r16, r30, r22; MOV r30, #64 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 782 0xf9 0x08 0x81 0x80 0x01 0x54 MOVA m2, #-56; MOV dc0, #0 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 788 0xe7 0x20 0x9d 0xc0 0x39 0xe4 AND r28, r28, r16; MOV r27, el0 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 794 0x17 0x21 0x72 0x18 SEL.EQZ r16, r28, r23, r27 + 798 0x11 0xce 0x02 0x18 SEL.EQZ r7, r7, r0, r27 + 802 0xf6 0xcd 0x1f 0x20 0x1d 0x64 NE r27, r30, r6; MOV r30, #7 + 808 0x11 0x08 0x40 0x37 0x39 0xe4 SEL.EQZ r4, r2, r4, r27; MOV el0, r27 + 814 0x3d 0xac 0x93 0xa2 0x01 0x64 AND r22, r7, r22; MOV r7, #128 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 820 0xfe 0x85 0xbf 0xef 0x51 0x5c ST el0, [sp, #-12]; NE r27, r31, r26 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 826 0xfe 0x6e 0xb0 0x37 0xaf 0x6f 0x70 0x0e 0x79 0x3a ST r27, [sp, #-16]; LSHL r26, r27, r30; MOV r27, el0 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 836 0x11 0xff 0x92 0x18 SEL.EQZ r31, r7, r25, r27 + 840 0x10 0xc6 0x4d 0x98 LSHL r3, r3, r4 + 844 0x17 0x49 0xa1 0x98 SUB r4, r29, r26 + 848 0x17 0xff 0xa1 0x98 SUB r31, r31, r26 + 852 0xf8 0x04 0x00 0x3f 0xf2 0x13 0x69 0x50 0x78 0xba MOVA r4, #-64; SEL.EQZ r31, r31, r4, r27; MOV r27, r5 + 862 0xf9 0x48 0x4d 0xb8 0x41 0xe4 SEL.EQZ r5, r31, r4, r27; MOV r27, r24 + 868 0x8f 0xed 0xf8 0xb1 0xff 0x24 MUL r31, r17, r22; ADD.NC r17, r17, #-1 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 874 0x29 0x08 0x42 0xa3 0xff 0x24 SEL.EQZ r4, r5, r4, r27; ADD.NC r5, r3, #-1 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 880 0xfd 0x9e 0x21 0x1c 0xb1 0x87 0x29 0x7f 0x71 0x0e 0x78 0x76 LDA r7, [sp, #-20]; ST r5, [p1], #4; MUL r18, r3, r18; MOV r27, el1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 892 0x31 0x4e 0x3f 0xfc 0x3f 0x5c ST r19, [p1], m4; MUL r31, r31, r1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 898 0xac 0xa5 0xfa 0xb5 0xff 0x24 MUL r18, r21, r18; ADD.NC r21, r21, #-1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 904 0x23 0xfe 0x33 0xf7 0xa3 0x5c ST r31, [p1], #4; SUB r29, r7, r29 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 910 0xfe 0x6e 0x21 0x6a 0x51 0xac 0x6e 0x7f 0x8e 0xe0 0x58 0x76 LDA r27, [sp, #-16]; ST r18, [p1], m3; MUL r6, r22, r28; MOV r28, #-288 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 922 0x3d 0xea 0x30 0x00 0x00 0x03 0x2b 0x03 0x11 0x3a ST r26, [p1], #-8; MOVXM r25, #1542 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 932 0x27 0x9e 0x30 0x03 0x70 0x0e 0x70 0x02 ST r7, [p1], #12; MOV r27, el0 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 940 0xee 0x04 0x01 0xec 0x91 0x80 0x01 0xce 0x02 0x7a MOVA r4, #-144; ST r4, [p1], #-8; SEL.EQZ r7, r7, r0, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 950 0x02 0xdd 0x01 0x0b 0xb1 0xb9 0xc2 0x13 0x6f 0xf7 0x58 0x76 MOVA r29, #22; ST r29, [p1], m0; SEL.EQZ r28, r28, r4, r27; MOV r27, #-9 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 962 0xff 0x46 0x2e 0x52 0x8b 0x2c LDA r17, [sp, #-8]; OR r20, r28, r20 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 968 0x16 0xf9 0x16 0x18 MAC r28, r28, r27, r17 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 972 0x16 0x7b 0xd2 0x18 SEL.EQZ r29, r25, r29, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 976 0x22 0xfd 0x01 0x1f 0xb1 0x80 0x00 0x3e 0x4f 0xf8 0x10 0x76 MOVA r29, #279; ST r29, [p1], #4; MOVXM r18, #65520 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 988 0x24 0x19 0x00 0x33 0xbe 0x93 0xa8 0x7f 0xc8 0xba MOVA r25, #288; SEL.EQZ r27, r25, r29, r27; ADD.NC r29, r1, #-1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 998 0x2f 0xee 0x30 0x0d 0xa9 0x27 0x70 0x0e 0x79 0x3a ST r27, [p1], #28; AND r26, r6, r18; MOV r27, el0 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1008 0x23 0xc6 0x3d 0x63 0x81 0x5c ST r17, [p1], #4; ADD r24, r26, r28 + 1014 0x23 0xd2 0x30 0x39 0xc8 0x06 0x88 0x90 0x59 0x3a ST r20, [p1], #4; ADD r28, r28, r16; MOV r20, #144 + 1024 0x00 0x7d 0x01 0x1f 0xb1 0xbb 0xcd 0x73 0x49 0xc1 0xc8 0x76 MOVA r29, #3; ST r29, [p1], #4; MSC r28, r28, r29, r26; ADD.NC r26, r7, #7 + 1036 0x23 0xe2 0x3c 0xe2 0x84 0x5c ST r24, [p1], #4; SEL.EQZ r24, r25, r20, r27 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 1042 0x23 0xf2 0x3c 0x46 0x3b 0x5c ST r28, [p1], #4; LSHL r17, r24, r17 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 1048 0xff 0xda 0x21 0x1c 0x61 0xad 0xc8 0x73 0x8c 0x50 0x78 0x76 LDA r22, [sp, #-4]; ST dc0, [p1], #4; MSC r28, r28, r22, r16; MOV r28, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1060 0xfe 0xee 0x21 0x1c 0x61 0x80 0x06 0x84 0x2d 0xfa LDA r27, [sp, #-12]; ST dc0, [p1], #4; LSHL r2, r26, r2 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1070 0x23 0xce 0x39 0xc6 0x3f 0x5c ST r19, [p1], #4; MUL r17, r19, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1076 0x23 0xf2 0x30 0x2d 0x18 0x33 0x88 0x06 0x59 0x3a ST r28, [p1], #4; MAC r17, r17, r22, r16; MOV r28, #6 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1086 0x23 0x96 0x30 0x2d 0xa1 0x7f 0x71 0x0e 0x79 0x3a ST r5, [p1], #4; MUL r26, r22, r2; MOV r27, el1 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1096 0x15 0xe9 0x42 0x18 SEL.EQZ r20, r23, r20, r27 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1100 0x17 0x79 0xc2 0x18 SEL.EQZ r28, r29, r28, r27 + 1104 0x15 0x8c 0x6f 0x98 MUL r6, r22, r6 + 1108 0x3f 0x39 0xb3 0xa0 0x01 0x64 LSHL r28, r7, r28; MOV r7, #0 + 1114 0x17 0x00 0x0d 0x98 LSHL r0, r28, r0 + 1118 0x31 0xa4 0x99 0x3e 0xe1 0x64 AND r6, r6, r18; MOV r18, #-72 + 1124 0x11 0xf9 0x11 0x98 SUB r28, r7, r17 + 1128 0x23 0xf2 0x30 0x0d 0x18 0x8c 0xc8 0xbf 0xc9 0x3a ST r28, [p1], #4; SUB r17, r6, r17; ADD.NC r6, r2, #-1 + 1138 0x23 0xc6 0x39 0x10 0x84 0x5c ST r17, [p1], #4; SEL.EQZ r4, r18, r4, r27 + 1144 0x23 0x8c 0x3f 0x90 0x9f 0x5c ST dc0, [p1], #4; MUL r4, r31, r4 + 1150 0x29 0x0c 0x3f 0xc6 0x9f 0x5c ST dc0, [p1], m2; MUL r17, r31, r20 + 1156 0x25 0x1e 0x31 0x5f 0xdb 0x5c ST r7, [p1], m1; LSHL r23, r2, r30 + 1162 0x23 0xce 0x39 0x40 0x4c 0x5c ST r19, [p1], #4; MAC r16, r16, r18, r2 + 1168 0x23 0x92 0x38 0x88 0x81 0x5c ST r4, [p1], #4; ADD r2, r17, r4 + 1174 0x23 0x96 0x32 0x92 0x3c 0x5c ST r5, [p1], #4; MSC r4, r4, r5, r17 + 1180 0x23 0x8a 0x3a 0x86 0xcc 0x5c ST r2, [p1], #4; MAC r1, r1, r21, r22 + 1186 0x23 0x92 0x30 0x02 0xe3 0x5c ST r4, [p1], #4; SUB r0, r0, r23 + 1192 0x23 0x8c 0x30 0x87 0x5f 0x5c ST dc0, [p1], #4; MUL r1, r1, r26 + 1198 0x23 0x8c 0x31 0x8f 0x7b 0x5c ST dc0, [p1], #4; LSHL r3, r3, r27 + 1204 0x23 0x86 0x3f 0x88 0x7f 0x5c ST r1, [p1], #4; MUL r2, r31, r3 + 1210 0x23 0x9a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r6, [p1], #4; PADDXM [sp], #-64 + 1220 0x23 0x82 0x30 0x50 0x00 0x5c ST r0, [p1], #4; RET lr +.delay_slot + 1226 0x09 0x5e 0x11 0x98 ST r16, [p1], #20 +.delay_slot + 1230 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 +.delay_slot + 1234 0x09 0x1c 0xf1 0x98 ST r7, [p1], #4 +.delay_slot + 1238 0x09 0x04 0xf1 0x98 ST r7, [p1] +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end last +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 last +.delay_slot + 1242 0x09 0x14 0xf1 0x98 ST r7, [p1, #4] + +.undef global data conv2d_params + +.text_segment_name +.text weak 10 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.function_start + 0 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 + 12 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 + 26 0x63 0x94 0xd0 0x00 0x00 0x02 0x30 0x00 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #_ZN12me_primitive11control_rndE + 36 0x63 0x90 0xd0 0x00 0x00 0x00 0x78 0x48 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #(ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 + 0) + 46 0x60 0x80 0xd0 0x00 0x00 0x01 0xb8 0x60 0x10 0xba LDA m0, [p3]; MOVXM le, #(ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 + 0) + 56 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0 + 62 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 + 76 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 80 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 84 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 90 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 96 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 100 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 144 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 160 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 176 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 192 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 208 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 210 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 214 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 222 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0 + 226 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 230 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 + 238 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 + 246 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 250 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr +.delay_slot + 256 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26] +.delay_slot + 260 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] +.delay_slot +.swstall delay_slot + 264 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 266 0x00 0x00 NOPX +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end last +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 last +.delay_slot +.swstall delay_slot + 268 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text weak 10 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.function_start + 0 0xf5 0xe0 0x83 0x88 0x8b 0x00 0x00 0x01 0x30 0x2e 0x10 0x76 MOVA m0, #-81; MOVS p3, p2; MOVXM p2, #(conv2d_params + 92) + 12 0x40 0xca 0xd4 0x88 0x8b 0x00 0x40 0x02 0xa8 0x00 0x10 0x76 LDA r18, [p2]; MOVS p4, p2; MOVXM r21, #16777216 + 24 0xfd 0x1c 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r28, #-24; PADDXM [sp], #64 + 34 0x00 0x31 0x07 0xf5 0x15 0x80 0x7f 0xff 0xaf 0xff 0x90 0x76 MOVA r17, #1; ST r8, [sp, #-12]; MOVXM r29, #33554431 + 46 0x0b 0x18 0x87 0xe1 0x55 0x81 0x80 0x0a 0x68 0x03 0x58 0x76 MOVA m6, #88; ST r10, [sp, #-32]; MOVX r24, #0; MOV r19, #3 + 58 0x00 0xb4 0x07 0xfd 0x35 0x81 0x71 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r9, [sp, #-4]; MOVX r23, #8; MOV m4, #-20 + 70 0x01 0x96 0x07 0xed 0xf5 0x87 0xa7 0xca 0x87 0xc4 0x58 0x76 MOVA r22, #12; ST r15, [sp, #-20]; MOVX r26, #254; MOV m5, #-60 + 82 0xfe 0x2e 0xb0 0x03 0x80 0x40 0x50 0x02 ST r11, [sp, #-16]; MOV m7, #64 + 90 0xff 0x73 0xba 0xfe 0x41 0x5c ST p7, [sp, #-8]; ADD r31, r21, r18 + 96 0x41 0x7e 0x3f 0xf3 0x9b 0x5c ST r31, [p2], m0; LSHL r28, r31, r28 + 102 0x5b 0xf9 0x57 0xe4 0x3d 0x80 0x04 0x6b 0xc7 0xfa LDA.u8 r30, [p2], #-3; ST lr, [sp, #-28]; EQ r21, r17, r28 + 112 0x02 0xc9 0x0a 0x98 LDA.u8 r8, [p2], m6 + 116 0x00 0x00 NOPX + 118 0x00 0x00 NOPX + 120 0x00 0x00 NOPX + 122 0x00 0x00 NOPX + 124 0x00 0x00 NOPX + 126 0x17 0x37 0xec 0x98 LTU r27, r28, r30 + 130 0x16 0x15 0x32 0x18 SEL.EQZ r10, r24, r19, r27 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 134 0x17 0x77 0xfc 0x98 LTU r27, r29, r31 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 138 0x51 0x54 0xef 0x27 0x8f 0x2c ST.s8 r21, [p2], m4; EQ r9, r30, r28 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 144 0x12 0x7b 0x1d 0x98 LSHL r29, r9, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 148 0x16 0x3e 0xa2 0x18 SEL.EQZ r31, r24, r10, r27 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 152 0x17 0x6b 0x55 0x98 OR r21, r29, r21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 156 0x8e 0xfd 0x9a 0xbf 0xaa 0xa4 LTU r27, r17, r30; ADD.NC r21, r31, r21 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 162 0x16 0x2b 0x52 0x18 SEL.EQZ r21, r24, r21, r27 + 166 0x14 0x7b 0x5c 0x98 LTU r29, r17, r21 + 170 0x55 0x76 0x3f 0x73 0x91 0x5c ST r29, [p2], m5; NE r28, r30, r28 + 176 0x5d 0x75 0x58 0xf9 0x11 0x2c LDA.u8 r29, [p2], m7; NE r30, r17, r8 + 182 0x00 0x00 NOPX + 184 0x00 0x00 NOPX + 186 0x00 0x00 NOPX + 188 0x00 0x00 NOPX + 190 0x00 0x00 NOPX + 192 0x00 0x00 NOPX + 194 0xbd 0xfb 0x1f 0xbd 0xfc 0x24 NE r23, r23, r29; ADD.NC r31, r29, #-4 + 200 0xf0 0x00 0x88 0x40 0x01 0x84 JNZ r30, #__ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.delay_slot + 206 0x17 0x51 0x48 0x98 NE r8, r29, r20 +.delay_slot + 210 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31 +.delay_slot + 214 0x15 0xee 0x84 0x98 AND r23, r23, r8 +.delay_slot + 218 0x17 0xf5 0xac 0x98 LTU r26, r31, r26 +.delay_slot + 222 0x15 0xe1 0xa4 0x98 AND r16, r23, r26 + 226 0xe0 0x00 0x88 0x40 0x01 0x84 JNZ r28, #__ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.delay_slot + 232 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24] +.delay_slot +.swstall delay_slot + 236 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 238 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 240 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 242 0x00 0x00 NOPX + 244 0x00 0x3b 0x00 0x00 0x00 0x28 0x00 0x00 0x20 0xba MOVA r27, #1; J #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.delay_slot + 254 0x10 0x26 0x05 0x18 MOVX r19, #1 +.delay_slot +.swstall delay_slot + 258 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 262 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 264 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 272 0xfd 0x63 0xb0 0x3b 0xbb 0x3e 0xc8 0x04 0x59 0x3a ST p6, [sp, #-24]; EQ r27, r29, r22; MOV r22, #4 + 282 0x17 0x6d 0x6d 0x98 LSHL r22, r29, r22 + 286 0x15 0xad 0x82 0x18 SEL.EQZ r22, r22, r24, r27 + 290 0x14 0xf7 0xd7 0x98 EQ r27, r19, r29 + 294 0xb4 0xf0 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r22, r24, r27; MOV r27, r16 + 300 0x16 0x27 0x32 0x18 SEL.EQZ r19, r24, r19, r27 + 304 0x15 0x69 0x44 0x98 AND r20, r21, r20 + 308 0x15 0x36 0xf0 0x18 NEZ r27, r20 + 312 0x00 0x27 0x3a 0xad 0x00 0x00 0x1c 0x22 OR r19, r19, r21; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 + 320 0x01 0x90 0x82 0x6f 0x71 0xb8 0x00 0x2e 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r28, #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.delay_slot + 332 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4 +.delay_slot +.swstall delay_slot + 336 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 338 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 342 0x00 0x00 NOPX + 344 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215 + 350 0x14 0xa5 0x44 0x98 AND r18, r18, r20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 354 0x00 0x2c 0xf8 0x0c 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p4]; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 368 0x51 0xd2 0xd0 0x27 0x44 0x82 0xef 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r23, #-3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 378 0x00 0x52 0x00 0x29 0x6f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r22, r20, #-1; MOV m4, #36 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 388 0x51 0x5e 0xd0 0x2f 0x7b 0x26 0x07 0xcc 0x58 0xba LDA r23, [p2], m4; AND r23, r23, r22; MOV m4, #-52 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 398 0x51 0x6a 0xd7 0xde 0xf5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r26, [p2], m4; ST r23, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 + 410 0x5f 0xf2 0xd0 0x64 0x02 0x2c LDA r28, [p2], #-4; MOVX r25, #0 + 416 0x51 0x7a 0xdf 0xae 0x6b 0x0c LDA r30, [p2], m4; ST r25, [sp, #-44] + 422 0x01 0x06 0xd6 0x98 LDA r22, [p1] + 426 0x00 0xd2 0xda 0x7e 0x5b 0x2c LDA r20, [p0]; LSHL r31, r20, r18 + 432 0x03 0x07 0xb6 0x98 LDA r29, [p3] + 436 0x15 0xef 0x2d 0x98 LSHL r23, r23, r18 + 440 0x00 0x00 NOPX + 442 0x17 0x27 0x3e 0x98 ASHL r19, r28, r19 + 446 0x14 0x63 0xe8 0x98 NE r17, r17, r30 + 450 0x88 0x01 0x10 0x40 0x01 0x84 JNZ r17, #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_544 +.delay_slot + 456 0xd5 0xe5 0xbb 0x37 0xb2 0xa4 LSHL r23, r26, r18; ADD.NC r22, r23, r22 +.delay_slot + 462 0x9d 0xa5 0xb0 0x97 0xb2 0xa4 LSHL r22, r19, r18; ADD.NC dn0, r23, r22 +.delay_slot + 468 0xfb 0x04 0xb0 0x01 0x6f 0xe8 0xa0 0x02 ST dn0, [sp, #-40]; ADD.NC r11, r31, r20 +.delay_slot + 476 0x1b 0xd0 0x80 0xf8 MOV r15, dn0 +.delay_slot + 480 0x1e 0x6e 0xd9 0x58 ADD.NC p6, r29, r22 + 484 0x00 0x00 0x0e 0xc0 0x00 0x44 MOVXM p7, #_ZN12me_primitive11control_rndE + 490 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1 + 496 0x00 0x00 NOPX + 498 0x00 0x00 NOPX + 500 0x00 0x00 NOPX + 502 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144 + 508 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20 + 512 0x18 0x00 0x92 0xf8 VMOV bmll0, x0 + 516 0x14 0x7a 0x80 0x18 MOVX crRnd, r17 + 520 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 + 524 0x00 0x00 NOPX + 526 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0 + 530 0x00 0x00 NOPX + 532 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xeb 0x1a 0xc1 0x36 NOPA; NOPB; ST r17, [sp, #-44]; NOPX +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_544 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 544 0x0b 0x90 0x81 0x8b 0x0b 0x00 0x00 0x01 0xb0 0x00 0x10 0x76 MOVA m4, #92; MOVS p1, r11; MOVXM p3, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 3 +.noswbrkpt + 556 0x51 0x47 0x50 0x84 0x8b 0x31 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u16 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r24, r19, r27; MOV r19, #11 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 568 0x61 0x95 0x00 0x2b 0xb9 0x65 0x4a 0x60 0x78 0xba MOVA r21, #780; LTU r27, r21, r18; MOV r10, p2 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 578 0x03 0x06 0x67 0x18 ST.s8 r19, [p3] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 582 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 584 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 590 0x14 0x6d 0x2d 0x98 LSHL r22, r17, r18 +.delay_slot + 594 0x1f 0x6e 0xd9 0x58 ADD.NC p7, r29, r22 +.delay_slot + 598 0xc4 0x62 0x3a 0x2c 0x35 0x64 SUB r17, r24, r17; MOV r20, #781 +.delay_slot + 604 0x14 0x63 0x2d 0x98 LSHL r17, r17, r18 +.delay_slot + 608 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2a 0x8a 0x11 0x2b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r8, r21, r20, r27; ADD.NC r9, r15, r17; NOPV +.return_address + 624 0x07 0xd8 0x99 0x18 LDA p1, [sp, #-40] +.no_stack_arguments + 628 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.delay_slot +.swstall delay_slot + 634 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 636 0x00 0x00 NOPX +.delay_slot + 638 0x1a 0x95 0x10 0x18 ADD.NC r10, r10, #32 +.delay_slot + 642 0x1a 0x65 0x20 0xf8 MOV p2, r10 +.delay_slot + 646 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX +.return_address + 656 0xd6 0x9a 0x80 0x01 0x37 0xea 0x32 0xa3 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r10, #-116 + 666 0x83 0x84 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA dn0, [p4], #4; MOVXM p2, #_ZN12me_primitive11control_rndE + 676 0x83 0x88 0xd0 0x74 0x62 0x2c LDA dj0, [p4], #4; MOVX r29, #12 + 682 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4 + 686 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4 + 690 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4 + 694 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4 + 698 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4 + 702 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4 + 706 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4 + 710 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4 + 714 0x04 0x1e 0x96 0x98 LDA r20, [p4], #4 + 718 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4 + 722 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4 + 726 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4 + 730 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4 + 734 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4 + 738 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4 + 742 0x04 0x1f 0x56 0x98 LDA r26, [p4], #4 + 746 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4 + 750 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4 + 754 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6] + 758 0x02 0x07 0xa7 0x18 ST.s8 r29, [p2] + 762 0x07 0xd7 0xd1 0x18 LDA r30, [sp, #-44] + 766 0x04 0x04 0x56 0x98 LDA r2, [p4] + 770 0x00 0x00 NOPX + 772 0x00 0x00 NOPX + 774 0x00 0x00 NOPX + 776 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18 + 780 0x98 0x03 0x48 0x40 0x01 0x84 JNZ r19, #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1680 +.delay_slot + 786 0x1c 0xf9 0x72 0xf8 VBCST.16 x9, r30 +.delay_slot +.swstall delay_slot + 790 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 792 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 794 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 796 0x00 0x00 NOPX + 798 0x07 0x90 0x80 0x00 0x00 0x02 0x30 0x62 0x10 0xba MOVA m4, #60; MOVXM p4, #(conv2d_params + 196) + 808 0x91 0x7e 0xd0 0x01 0x80 0x0a 0x87 0xc0 0x58 0xba LDA r31, [p4], m4; MOVX r24, #0; MOV m5, #-64 + 818 0x95 0x64 0xd0 0x3b 0xd4 0x03 0x07 0x7c 0x58 0xba LDA dn6, [p4], m5; MOVX crRnd, r29; MOV m6, #-132 + 828 0x85 0xaa 0xd0 0x00 0x00 0x01 0x31 0xd8 0x10 0xba LDA r10, [p4], #8; MOVXM p2, #(TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_944 + 0) + 838 0x83 0xd0 0xd0 0x01 0x37 0x8b 0x28 0x00 0x58 0xba LDA m5, [p4], #4; MOVX r19, #60; MOV r25, #0 + 848 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4 + 852 0x93 0xc0 0xde 0xee 0x4f 0x2c LDA m4, [p4], #-28; EQ r27, r29, r18 + 858 0xfb 0x8e 0x2f 0x48 0x02 0x2c LDA r3, [sp, #-36]; MOVX r18, #-128 + 864 0x83 0xbe 0xd0 0x30 0x59 0x12 0x48 0x00 0x58 0xba LDA r15, [p4], #4; SEL.EQZ r5, r24, r18, r27; MOV r18, #0 + 874 0x99 0x7e 0xdd 0x1f 0x2a 0x94 LDA r31, [p4], m6; ADD.NC dj6, r31, r5 + 880 0x82 0x82 0xdf 0x92 0x41 0xd4 LDA r0, [p4, #4]; MOV dc7, r18 + 886 0xfb 0x12 0x23 0x5c 0x4b 0x01 0x01 0x54 0xa2 0xba LDA r4, [sp, #-40]; MOVS dn3, dc7; ADD.NC m2, r5, r10 + 896 0xfc 0x87 0x22 0x1c 0x4b 0x00 0xcd 0x00 0x72 0xba LDA lr, [sp, #-28]; MOVS dc2, dc7; MOV r6, m5 + 906 0xfc 0x2a 0x26 0x1c 0x4b 0x01 0x41 0x4c 0xa2 0xba LDA r10, [sp, #-32]; MOVS dc6, dc7; ADD.NC dj2, r5, r6 + 916 0x80 0x86 0xd3 0xb6 0x01 0xd4 LDA r1, [p4]; MOV r7, dj5 + 922 0x67 0x86 0x2c 0x06 0xc0 0x24 MOVX r30, #780; ADD.NC m6, r6, #-64 + 928 0x07 0x43 0xae 0x07 0xc0 0x24 MOVX r29, #7; ADD.NC m7, r7, #-64 + 934 0x00 0x2c 0xf0 0x00 0x0f 0xf8 0x82 0x80 0x78 0xba NOPA; ADD r0, r0, #-1; MOV m1, dj2 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_944 +.loop_nesting 1 + 944 0xc3 0x85 0x71 0x84 0x0b 0x04 0xbe 0xec 0x32 0xd0 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r4; LSHL r11, r2, r29; MOV p0, r11 + 956 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x32 0xd2 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r11, r9 + 968 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 + 980 0xd9 0x0d 0x74 0x03 0x2b 0x52 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r18; MOV m3, m2 + 992 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 + 1002 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x01 0xba 0x78 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #(ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1264 + 0) + 1014 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x00 0x7a 0x60 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #(ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1216 + 0) + 1026 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] + 1032 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] + 1038 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 1044 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 4 +.noswbrkpt + 1050 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1056 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xdf 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r15 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1066 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xff 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r31 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1078 0x02 0x81 0x73 0x00 0x54 0x1d 0x40 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r8 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1088 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x41 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r8 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1102 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x91 0xfe 0x42 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r15; VMAC.f dm2, dm2, ex10, ex8, r8 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1116 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x93 0xfe 0x98 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r31; VADD.f dm0, dm3, dm0, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1130 0x22 0x81 0x70 0x04 0x99 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r19 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 1138 0xa0 0x09 0x70 0x04 0x9a 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1146 0x71 0x01 0x70 0x04 0x43 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r8 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1154 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1160 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1164 0xa0 0x09 0x78 0x28 0x2d 0x70 0xfd 0x82 0x9b 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r1, #-5; VADD.f dm3, dm4, dm3, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1176 0x71 0x01 0x74 0x14 0x14 0x1d 0xf0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1186 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xa2 0x0f 0xf2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r20; VMAC.f dm2, dm2, ex3, ex0, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1200 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x1f 0xef 0x89 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r15; VMAC.f dm1, dm1, ex5, ex1, r30 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1216 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1216 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x3f 0xef 0x9b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r31; VMAC.f dm3, dm3, ex5, ex0, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1232 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7f 0x80 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1248 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7f 0x92 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r30 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1264 +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1264 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x1f 0xef 0x89 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r15; VMAC.f dm1, dm1, ex5, ex1, r30 +.loop_nesting 1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1280 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x93 0xfe 0xf3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r31; VMAC.f dm3, dm3, ex5, ex0, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1294 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x48 0xd8 0x07 0xf0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV r4, dj5; VMAC.f dm0, dm0, ex3, ex1, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1308 0x61 0x91 0x67 0x87 0x00 0xe4 0xf2 0x46 0x09 0x4a MOVS dc3, p3; MOV dj7, dj3; VMAC.f dm2, dm2, ex3, ex0, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1318 0x6a 0xa1 0x61 0x92 0x3f 0xc4 0xf1 0x2a 0x29 0x4a MOVS dn3, r21; VSHUFFLE ex3, ex2, ex4, r15; VMAC.f dm1, dm1, ex5, ex1, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1328 0xea 0xe1 0x62 0x92 0x7f 0xc4 0xf3 0x6a 0x09 0x4a MOVS dn7, r23; VSHUFFLE ex5, ex2, ex4, r31; VMAC.f dm3, dm3, ex5, ex0, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1338 0xb3 0x91 0x6f 0x57 0x25 0x85 0x00 0xe6 0xf0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj5, dj2; VMAC.f dm0, dm0, ex3, ex1, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1350 0x93 0x91 0x62 0x06 0x00 0xe4 0xf2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r30 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1360 0x02 0x92 0x7f 0xc6 0xf1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r31; VMAC.f dm1, dm1, ex5, ex1, r30 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1368 0x01 0x92 0x3f 0xc6 0xf3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r15; VMAC.f dm3, dm3, ex5, ex0, r30 + 1376 0x1a 0x8f 0x00 0xf8 MOV dj2, dj7 + 1380 0x03 0x0b 0x20 0xe6 0xf1 0x2a 0x29 0x62 MOV m3, r22; VMAC.f dm1, dm1, ex5, ex1, r30 + 1388 0x03 0x88 0xa0 0xe6 0xf0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r30 + 1396 0x07 0x82 0x20 0xe6 0xf3 0x6a 0x09 0x62 MOV dj7, r4; VMAC.f dm3, dm3, ex5, ex0, r30 + 1404 0x00 0xf7 0x23 0x05 0x00 0xe6 0xf2 0x46 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm2, dm2, ex3, ex0, r30 + 1414 0x71 0x89 0x67 0x6b 0x90 0x03 0xc5 0x80 0x76 0xba PADDB [p7], m3; MOVS p3, dc3; MOV dj7, dj5 + 1424 0x62 0x89 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc3, dc5; MOV m3, m1 + 1432 0xa0 0x41 0x60 0x02 0xc6 0x90 0x70 0x02 MOVS dc5, r2; MOV dj5, r26 + 1440 0xa2 0x02 0xc0 0x00 0x87 0x10 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV m1, r28 + 1448 0x13 0x91 0x61 0x3b 0x90 0x01 0x68 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r11, p0 + 1458 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 + 1466 0x52 0x2a 0xc0 0x2b 0x90 0x00 0x89 0x60 0x76 0xba PADDB [p0], m1; VCONV.bf16.fp32 x5, cmh2; MOV r4, p1 + 1476 0xb2 0x12 0xc0 0x06 0x00 0xc8 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x11, cml1; JZ r3, #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1600 +.delay_slot + 1486 0x72 0x1a 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV dn7, dc7 +.delay_slot + 1494 0x82 0x32 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x8, cml3; MOV p1, p5 +.delay_slot + 1504 0x12 0x3a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x1, cmh3; MOV dj5, dj2 +.delay_slot + 1512 0x22 0x22 0xc0 0x00 0x4d 0xc0 0x70 0x02 VCONV.bf16.fp32 x2, cml2; MOV r2, dc5 +.delay_slot + 1520 0x1f 0xc7 0x80 0xf8 MOV dc7, dc3 + 1524 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 1528 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9 + 1532 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 + 1540 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 + 1548 0x20 0xd2 0x60 0x00 0x00 0xcc 0x00 0x00 0x21 0x3a VST x10, [p1]; J #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.delay_slot + 1558 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 +.delay_slot + 1566 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9 +.delay_slot + 1570 0x00 0xd2 0x60 0x02 0x8a 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x2, x9 +.delay_slot + 1578 0x02 0xba 0x60 0x01 0x56 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x5, r16, x5, x9 +.delay_slot + 1586 0x00 0x2c 0xf9 0x45 0x26 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; VST x10, [p4, dj5]; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1600 + 1600 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7] + 1604 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64] + 1608 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1] + 1612 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64] + 1616 0x08 0x06 0x13 0x18 VST x8, [p0] + 1620 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64] + 1624 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 + 1632 0x00 0x14 0x05 0x00 0x01 0x64 JNZD r0, r0, p2; MOV dj2, #0 +.delay_slot + 1638 0xf5 0x72 0x0e 0x2a 0xa6 0x4c PADDB [p7], m5; VST x5, [p7, #64] +.delay_slot + 1644 0x1b 0x44 0x80 0xf8 MOV dn3, dn2 +.delay_slot + 1648 0x1a 0x49 0x20 0xf8 MOV dn2, r18 +.delay_slot + 1652 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7 +.delay_slot + 1658 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7 +.loop_nesting 0 + 1662 0x00 0x04 0x90 0x00 0x00 0x84 J #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.delay_slot +.swstall delay_slot + 1668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1670 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1672 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1674 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1676 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1680 + 1680 0x07 0x90 0x80 0x8b 0x0b 0x00 0x00 0x02 0x30 0x64 0x10 0x76 MOVA m4, #60; MOVS p0, r11; MOVXM p4, #(conv2d_params + 200) + 1692 0x83 0xb8 0xd5 0x02 0x0b 0x3b 0xd4 0x03 0x07 0x7c 0x58 0x76 LDA dj3, [p4], #4; MOVS dc5, r2; MOVX crRnd, r29; MOV m6, #-132 + 1704 0x91 0x40 0xd0 0x00 0x00 0x01 0x33 0x90 0x10 0xba LDA m4, [p4], m4; MOVXM p2, #(TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1824 + 0) +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 1714 0x93 0xd0 0xd0 0x01 0x30 0xcb 0x08 0x00 0x58 0xba LDA m5, [p4], #-28; MOVX r19, #6; MOV r24, #0 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 1724 0x83 0xd6 0xd4 0x16 0x41 0xd4 LDA r21, [p4], #4; MOV m2, r22 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1730 0x99 0x5a 0xd0 0x01 0x90 0x08 0x87 0x10 0x78 0xba LDA r22, [p4], m6; MOVX r25, #0; MOV m1, r28 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1740 0x82 0xf2 0xdb 0x1a 0x41 0xd4 LDA r28, [p4, #4]; MOV dj5, r26 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1746 0x07 0xd8 0x99 0x18 LDA p1, [sp, #-40] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1750 0x80 0xde 0xdd 0x14 0x41 0xd4 LDA r23, [p4]; MOV dj6, r20 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1756 0xfb 0xea 0x26 0x0a 0x4b 0x03 0xac 0x00 0x72 0xba LDA r26, [sp, #-36]; MOVS dc6, dn2; MOV r29, m4 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1766 0xfc 0x2a 0x22 0x55 0x0b 0x03 0x03 0x80 0x72 0xba LDA r10, [sp, #-32]; MOVS dn2, r21; MOV m6, dj3 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1776 0xfc 0x87 0x26 0x57 0x0b 0x03 0xce 0x00 0x72 0xba LDA lr, [sp, #-28]; MOVS dn6, r23; MOV r30, m6 + 1786 0x04 0x9e 0x26 0x1e 0xc0 0x24 MOVX r18, #60; ADD.NC m3, r30, #-64 + 1792 0x61 0x94 0x00 0x00 0x20 0x01 0x5b 0x39 0xcf 0xfb 0x87 0x70 0x08 0x00 0x00 0xe1 MOVA r20, #780; NOPB; NOPS; ADD r28, r28, #-1; ADD.NC m7, r29, #-64; NOPV + 1808 0x00 0x2c 0xf0 0x00 0x22 0x0c 0x8b 0x00 0x00 0x03 0xc7 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; MOVS dc2, p3; NOPX; MOV dj7, r29; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1824 +.loop_nesting 1 + 1824 0xc3 0x85 0x78 0x28 0x28 0x00 0x00 0x0f 0x7d 0x02 0x71 0x21 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p0, lf0, r24];MOVS p3, r9; MOVXM ls, #(ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2000 + 0) + 1838 0xcd 0x0d 0x78 0x28 0x28 0x00 0x00 0x01 0xbc 0x00 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #(ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2048 + 0) + 1850 0x22 0x81 0x74 0x05 0x28 0x05 0xd9 0xee 0xbd 0xff 0x48 0xb6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex10, [p0, lf0, r24]; LSHL r29, r2, r19; ADD.NC lc, r23, #-3 + 1862 0x75 0x59 0x72 0x01 0x14 0x01 0x47 0x50 0x7e 0xba VLDA.POP.576 ex11, [p1, lf1, r25, m5];VLDB.POP.576 ex4, [p0, lf0, r24]; MOV dj2, r29 + 1872 0xc3 0x95 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p0, lf0, r24] + 1878 0xdd 0x1d 0x78 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p0, lf0, r24, d0] + 1884 0xc3 0xa5 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p0, lf0, r24] + 1890 0xcd 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p0, lf0, r24] + 1896 0xc3 0xb5 0x74 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p0, lf0, r24] + 1902 0xdd 0x3d 0x74 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p0, lf0, r24] + 1908 0x68 0x45 0x74 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p0, lf0, r24] + 1914 0x68 0x4d 0x74 0x12 0x14 0x01 0x69 0x2b 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p0, lf0, r24, d0]; VSHUFFLE ex5, ex10, ex4, r21 + 1924 0x22 0x81 0x74 0x14 0x14 0x02 0xa9 0x2d 0xee 0xba VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex10, ex4, r22 + 1934 0x75 0x01 0x78 0x28 0x2a 0x11 0xd7 0xc2 0x40 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p1, lf1, r25, m5];VLDB.FILL.512 [p0, lf0, r24]; VSHUFFLE ex4, ex2, ex3, r21; VMAC.f dm0, dm0, ex5, ex11, r8 + 1946 0x22 0x81 0x75 0x11 0xdb 0xc2 0x41 0x35 0x69 0x4a VLDA.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex2, ex3, r22; VMAC.f dm1, dm1, ex10, ex11, r8 + 1956 0x42 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r8 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 1960 0x43 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r8 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 1964 0x04 0x00 0xaa 0x8b 0x5b 0xc6 0x91 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p0, lf0, r24]; VSHUFFLE ex5, ex1, ex6, r22; VADD.f dm1, dm4, dm1, r18 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 1974 0x02 0x01 0x94 0x00 0x90 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p0, lf0, r24]; VADD.f dm0, dm4, dm0, r18 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 1982 0x02 0x01 0xd4 0x00 0x92 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p0, lf0, r24]; VADD.f dm2, dm4, dm2, r18 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 1990 0x75 0x01 0x74 0x12 0x14 0x1d 0x93 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m5];VLDB.POP.576.3D ex8, [p0, lf0, r24, d0]; VADD.f dm3, dm4, dm3, r18 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2000 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2000 0x82 0x82 0x82 0x16 0xaf 0xb4 VLDB.FILL.512 [p0, lf0, r24]; VSHUFFLE ex2, ex1, ex6, r21 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2006 0x08 0x28 0x2a 0x3c 0x5b 0xc6 0xa1 0x2a 0x09 0x4a VLDB.FILL.512 [p0, lf0, r24]; VSHUFFLE ex4, ex7, ex8, r22; VMAC.f dm1, dm1, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2016 0x04 0x00 0xa9 0xbc 0x57 0xc6 0xa0 0x04 0x09 0x4a VLDB.POP.576 ex1, [p0, lf0, r24]; VSHUFFLE ex3, ex7, ex8, r21; VMAC.f dm0, dm0, ex2, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2026 0x02 0x01 0x94 0x00 0xa3 0x68 0x09 0x62 VLDB.POP.576 ex6, [p0, lf0, r24]; VMAC.f dm3, dm3, ex4, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2034 0x22 0x81 0x74 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0xa2 0x46 0x09 0x6e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex7, [p0, lf0, r24];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r20 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2048 +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2048 0x75 0x01 0x78 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xad 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p1, lf1, r25, m5];VLDB.POP.576.3D ex8, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r22; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2064 0x27 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xab 0xe0 0xf6 PADDA.3D [p1], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r21 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2076 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5b 0xc6 0xa1 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r22; VMAC.f dm1, dm1, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2088 0x73 0x91 0x6f 0x97 0x21 0xbc 0x57 0xc6 0xa0 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r21; VMAC.f dm0, dm0, ex2, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 2100 0x02 0x88 0xa0 0xe6 0xa3 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2108 0x00 0xb7 0x20 0x9b 0x80 0xe6 0xa2 0x46 0x09 0x4a PADDB.3D [p0], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r20 + 2118 0x19 0x0b 0x57 0xd8 VSHUFFLE ex2, ex1, ex6, r21 + 2122 0x1a 0x8b 0x5b 0xd8 VSHUFFLE ex5, ex1, ex6, r22 + 2126 0x01 0xbc 0x57 0xc6 0xa0 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r21; VMAC.f dm0, dm0, ex2, ex0, r20 + 2134 0x02 0x3c 0x5b 0xc6 0xa1 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r22; VMAC.f dm1, dm1, ex5, ex0, r20 + 2142 0xa2 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r20 + 2146 0xa3 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r20 + 2150 0x00 0x00 NOPX + 2152 0x00 0x00 NOPX + 2154 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0 + 2158 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1 + 2162 0x12 0x1a 0xc0 0x34 0x01 0x1c 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r26, #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2272 +.delay_slot + 2172 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0 +.delay_slot + 2176 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3 +.delay_slot + 2180 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3 +.delay_slot + 2184 0x0c 0x11 0x16 0x18 VCONV.bf16.fp32 x8, cml2 +.delay_slot + 2188 0x0a 0x91 0x56 0x18 VCONV.bf16.fp32 x5, cmh2 + 2192 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 2196 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9 + 2200 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 + 2208 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 + 2216 0xa0 0xd2 0x60 0x00 0x01 0x20 0x00 0x00 0x21 0x3a VST x10, [p5]; J #TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.delay_slot + 2226 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 +.delay_slot + 2234 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9 +.delay_slot + 2238 0x6c 0x52 0x60 0x02 0xa2 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x8, x9 +.delay_slot + 2246 0x00 0x2c 0xf7 0x14 0x53 0x01 0x56 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x5, r16, x5, x9 +.delay_slot + 2256 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2272 + 2272 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3] + 2276 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64] + 2280 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5] + 2284 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64] + 2288 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3] + 2292 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64] + 2296 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 + 2304 0x62 0xaa 0x6e 0x71 0x40 0x5c VST x5, [p3, #64]; JNZD r28, r28, p2 +.delay_slot + 2310 0x3f 0x8b 0x90 0x18 PADDB [p7], m4 +.delay_slot +.swstall delay_slot + 2314 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2316 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2318 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.loop_nesting 0 + 2336 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20] + 2340 0x07 0xf1 0x71 0x18 LDA r11, [sp, #-16] + 2344 0x07 0xf5 0x11 0x18 LDA r8, [sp, #-12] + 2348 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24] + 2352 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 2356 0x07 0xfd 0x31 0x18 LDA r9, [sp, #-4] + 2360 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 2364 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 2370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2374 0x00 0x00 NOPX +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end last +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 last +.delay_slot +.swstall delay_slot + 2376 0x00 0x00 NOPX + +.undef global data conv2d_params + +.undef global data _ZN12me_primitive11control_rndE + +.undef global text _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + +.text_segment_name +.text global 10 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.function_start + 0 0x00 0x20 0x00 0x00 0x00 0x02 0x30 0x00 0x10 0xba MOVA r0, #1; MOVXM p4, #_ZL9curr_iter + 10 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 + 20 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 26 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16] + 30 0x00 0x00 NOPX + 32 0x00 0x00 NOPX + 34 0x00 0x00 NOPX + 36 0x00 0x00 NOPX + 38 0x80 0x00 0x68 0x40 0x01 0x84 JNZ r16, #TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.delay_slot + 44 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 48 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 52 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20] +.delay_slot + 56 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 60 0x00 0x00 0x07 0xa0 0x00 0x44 MOVXM r15, #conv2d_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 66 0xd0 0x91 0x60 0x00 0x00 0x03 0xb0 0x00 0x11 0x3a MOVS p6, p1; MOVXM p7, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 76 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x00 0x03 0xb0 0x00 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 88 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 90 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 92 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 98 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 100 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 104 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 108 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0 +.delay_slot + 116 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2 +.return_address + 128 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11 + 132 0x4f 0xc1 0x50 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #_ZL14num_depth_iter + 142 0x43 0xcf 0x50 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #_ZL8num_iter + 152 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 156 0x00 0x00 NOPX + 158 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2] + 162 0x00 0x00 NOPX + 164 0x00 0x00 NOPX + 166 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6 + 174 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 178 0x00 0x00 NOPX + 180 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 184 0x00 0x00 NOPX + 186 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 + 190 0x00 0x00 NOPX + 192 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 208 0xfd 0xbe 0x20 0x00 0x00 0x03 0x30 0x00 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #_ZL10depth_iter + 218 0xc0 0xc2 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r16, [p6]; MOVXM p2, #_ZL11total_iters + 228 0x40 0xc6 0xd0 0x00 0x00 0x03 0xb0 0x00 0x10 0xba LDA r17, [p2]; MOVXM p7, #_ZL9curr_iter + 238 0x07 0x06 0x56 0x98 LDA r18, [p7] + 242 0x00 0x00 NOPX + 244 0x00 0x00 NOPX + 246 0x00 0x00 NOPX + 248 0x00 0x00 NOPX + 250 0x80 0x00 0xa8 0x40 0x01 0x84 JNZ r16, #TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.delay_slot + 256 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 260 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1 +.delay_slot + 266 0x14 0x26 0x07 0x18 ADD r19, r16, #1 +.delay_slot + 270 0x0e 0x06 0x71 0x98 ST r19, [p6] +.delay_slot + 274 0x0f 0x06 0x31 0x98 ST r17, [p7] + 278 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 282 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 286 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 290 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 294 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 296 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 300 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 302 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 304 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 306 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 308 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 312 0x0a 0x06 0x11 0x98 ST r16, [p2] + 316 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 320 0x00 0x00 NOPX + 322 0x00 0x00 NOPX + 324 0x00 0x00 NOPX + 326 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.no_stack_arguments + 336 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.delay_slot + 342 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #conv2d_params +.delay_slot +.swstall delay_slot + 348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 350 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 352 0x00 0x00 NOPX +.delay_slot + 354 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV +.return_address + 368 0xc0 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r16, [p6]; MOVXM p1, #_ZL14num_depth_iter + 378 0x01 0x06 0x36 0x98 LDA r17, [p1] + 382 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16] + 386 0x00 0x00 NOPX + 388 0x00 0x00 NOPX + 390 0x00 0x00 NOPX + 392 0x00 0x00 NOPX + 394 0x00 0x00 NOPX + 396 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 400 0x80 0x00 0xf0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.delay_slot + 406 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 416 0x00 0x00 NOPX + 418 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20 + 424 0x01 0x06 0x36 0x98 LDA r17, [p1] + 428 0x00 0x00 NOPX + 430 0x00 0x00 NOPX + 432 0x00 0x00 NOPX + 434 0x00 0x00 NOPX + 436 0x00 0x00 NOPX + 438 0x00 0x00 NOPX + 440 0x14 0x51 0x08 0x18 REL r17, r16 + 444 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6] + 450 0x00 0x00 NOPX + 452 0x00 0x00 NOPX + 454 0x00 0x00 NOPX + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX + 460 0x00 0x00 NOPX + 462 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 466 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 + 480 0xe0 0xc2 0xd0 0x00 0x00 0x03 0x30 0x00 0x10 0xba LDA r16, [p7]; MOVXM p6, #_ZL8num_iter + 490 0x06 0x06 0x36 0x98 LDA r17, [p6] + 494 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8] + 498 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 502 0x00 0x00 NOPX + 504 0x00 0x00 NOPX + 506 0x00 0x00 NOPX + 508 0x00 0x00 NOPX + 510 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 514 0x80 0x01 0x10 0x40 0x01 0x84 JNZ r16, #TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 +.delay_slot +.swstall delay_slot + 520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 526 0x00 0x00 NOPX +.delay_slot + 528 0x1b 0xd0 0x20 0xf8 MOV r15, r0 + 532 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 544 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 548 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 552 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 558 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 560 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 562 0x00 0x00 NOPX +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end last +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 last +.delay_slot + 564 0x0f 0x84 0x8b 0x18 MOVS p7, p1 + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data conv2d_params + +.undef global text _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + +.undef global text _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + +.text_segment_name +.text weak 10 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.function_start + 0 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 4 0x00 0x00 0x00 0xc0 0x40 0x44 MOVXM p0, #(add1d_attribute_broadcasting_params + 32) +.delay_slot + 10 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 14 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 18 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end last +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 last +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX + +.undef global data add1d_attribute_broadcasting_params + +.text_segment_name +.text weak 10 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.function_start + 0 0x23 0x85 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #add1d_attribute_broadcasting_params + 10 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 16 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 20 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 34 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 54 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 58 0x00 0x00 NOPX + 60 0x00 0x00 NOPX + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 74 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX +.no_stack_arguments + 82 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.delay_slot +.swstall delay_slot + 88 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 90 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 92 0x00 0x00 NOPX +.delay_slot + 94 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 98 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x5e 0x86 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV r15, p0; NOPV +.return_address + 112 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 122 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 132 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 142 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 146 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 148 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 150 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 154 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 158 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 162 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 168 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 172 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end last +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 176 0x00 0x00 NOPX + +.undef global data add1d_attribute_broadcasting_params + +.undef global text _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + +.text_segment_name +.text weak 10 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.function_start + 0 0x02 0x80 0x80 0x00 0x00 0x00 0x30 0x06 0x10 0xba MOVA m0, #20; MOVXM p0, #(add1d_attribute_broadcasting_params + 12) + 10 0x01 0x01 0x50 0x00 0x20 0x28 0x28 0x06 0x58 0xba LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 34 0x10 0x06 0xf0 0x18 NEZ r3, r0 +.delay_slot + 38 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 42 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 46 0x02 0x82 0x31 0x88 0x3b 0x5c ST r0, [p0, #4]; LSHL r2, r3, r1 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end last +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 last +.delay_slot + 52 0x08 0x04 0x51 0x98 ST r2, [p0] + +.undef global data add1d_attribute_broadcasting_params + +.text_segment_name +.text weak 10 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.function_start + 0 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 10 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.delay_slot + 16 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #add1d_attribute_broadcasting_params +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 24 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 26 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 28 0x00 0x01 0x67 0x98 NOPA +.return_address + 32 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call + 36 0x00 0x00 0x00 0x00 0x00 0x84 J #_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.delay_slot + 42 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #add1d_attribute_broadcasting_params +.delay_slot + 48 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 54 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 56 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end last +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 58 0x00 0x00 NOPX + +.undef global data add1d_attribute_broadcasting_params + +.undef global text _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + +.undef global text _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + +.text_segment_name +.text weak 10 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.function_start + 0 0x02 0x80 0x80 0x00 0x00 0x01 0xb0 0x00 0x10 0xba MOVA m0, #20; MOVXM p3, #add1d_attribute_broadcasting_params + 10 0x03 0x3c 0x16 0x98 LDA r0, [p3], #12 + 14 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x00 0x00 NOPX + 32 0x08 0x00 0x30 0x40 0x01 0x84 JNZ r1, #TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.delay_slot + 38 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 42 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 46 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 48 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 50 0x00 0x00 NOPX + 52 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 56 0x00 0x00 NOPX + 58 0x00 0x00 NOPX + 60 0x00 0x00 NOPX + 62 0x00 0x00 0x40 0x00 0x00 0x84 J #TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.delay_slot +.swstall delay_slot + 68 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 70 0x00 0x00 NOPX +.delay_slot + 72 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 76 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 80 0x00 0x2c 0xf0 0x00 0x20 0x04 0x13 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 + 96 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 100 0x00 0x00 NOPX + 102 0x00 0x00 NOPX + 104 0x00 0x00 NOPX + 106 0x00 0x00 NOPX + 108 0x00 0x00 NOPX + 110 0x00 0x00 NOPX + 112 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 116 0x00 0x00 NOPX + 118 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 + 128 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 138 0x62 0x90 0xd0 0x00 0x00 0x00 0x78 0x78 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #(ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 + 0) + 148 0x00 0x00 0x06 0xe2 0x20 0x44 MOVXM le, #(ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 + 0) + 154 0x00 0x00 0x08 0xc0 0x00 0x44 MOVXM p4, #_ZN12me_primitive11control_rndE + 160 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 164 0x00 0x00 NOPX + 166 0x00 0x00 NOPX + 168 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 172 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 176 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 180 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 184 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 190 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 198 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 202 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 210 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 214 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 222 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 226 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 240 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 244 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 256 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 272 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 288 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 290 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 298 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 300 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 308 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 310 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 316 0x00 0x00 NOPX +.delay_slot + 318 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 322 0x00 0x00 NOPX +.delay_slot + 324 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end last +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 last +.delay_slot +.swstall delay_slot + 328 0x00 0x00 NOPX + +.undef global data add1d_attribute_broadcasting_params + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text weak 10 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.function_start + 0 0x50 0x91 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p2, p1; PADDXM [sp], #128 + 10 0xff 0x87 0xb0 0x02 0x08 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p0 + 18 0x1c 0x55 0xe0 0xf8 MOV r17, sp + 22 0x00 0x00 0x06 0xc0 0x18 0x44 MOVXM p3, #(add1d_attribute_broadcasting_params + 12) + 28 0x65 0xed 0x50 0xd1 0x80 0x14 LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 + 34 0x73 0xca 0x50 0x0e 0x56 0x0c LDA.s16 r18, [p3], #-14; VST sfh, [p0] + 40 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX +.no_stack_arguments + 48 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.delay_slot + 54 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 58 0x00 0x00 NOPX +.delay_slot + 60 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 64 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 70 0x00 0x2c 0xf0 0x00 0x10 0x00 0x34 0x10 0x7e 0xba NOPA; NOPB; MOV p0, r16 +.return_address + 80 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 84 0x00 0x00 NOPX + 86 0x00 0x00 NOPX + 88 0x00 0x00 NOPX + 90 0x00 0x00 NOPX + 92 0x00 0x00 NOPX + 94 0x00 0x00 NOPX + 96 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 100 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 106 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 110 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end last +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 last +.delay_slot +.swstall delay_slot + 112 0x00 0x00 NOPX + +.undef global data add1d_attribute_broadcasting_params + +.undef global text _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + +.text_segment_name +.text global 10 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.function_start + 0 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL9curr_iter + 6 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 12 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 18 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 26 0xff 0x82 0xb0 0x00 0x00 0x03 0x30 0x00 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #_ZL8core_row + 36 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x80 0x00 0x68 0x40 0x01 0x84 JNZ r16, #TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.delay_slot + 50 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 54 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 58 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 62 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 70 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #add1d_attribute_broadcasting_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 76 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 82 0x40 0xc0 0xe0 0x00 0x00 0x01 0x30 0x00 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 92 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 94 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 104 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 108 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 112 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 128 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8core_row + 134 0x40 0xc2 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r16, [p2]; MOVXM p2, #add1d_attribute_broadcasting_params + 144 0x40 0xc6 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r17, [p2]; MOVXM p2, #add1d_attribute_broadcasting_params + 154 0x4a 0xcb 0x50 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #_ZL11ifm1_offset + 164 0x00 0x00 NOPX + 166 0x00 0x00 NOPX + 168 0x00 0x00 0x70 0x00 0x00 0x84 J #TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.delay_slot + 174 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #_ZL8num_iter +.delay_slot +.swstall delay_slot + 180 0x00 0x00 NOPX +.delay_slot + 182 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 186 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 192 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 208 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x00 0x00 0xb0 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #_ZL11ifm1_offset; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 224 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 232 0xff 0xee 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #_ZL9curr_iter + 242 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 246 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 250 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 254 0x00 0x00 NOPX + 256 0x00 0x00 NOPX + 258 0x00 0x00 NOPX + 260 0x00 0x00 NOPX + 262 0x00 0x00 NOPX + 264 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 268 0x0f 0x06 0x11 0x98 ST r16, [p7] + 272 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 276 0x00 0x00 NOPX + 278 0x00 0x00 NOPX + 280 0x00 0x00 NOPX + 282 0x14 0x93 0x08 0x18 ACQ r18, r16 + 286 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 292 0x00 0x00 NOPX + 294 0x00 0x00 NOPX + 296 0x00 0x06 0x36 0x98 LDA r17, [p0] + 300 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 306 0x01 0x06 0x76 0x98 LDA r19, [p1] + 310 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 314 0x00 0x00 NOPX +.no_stack_arguments + 316 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.delay_slot +.swstall delay_slot + 322 0x00 0x00 NOPX +.delay_slot + 324 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 328 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 332 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 352 0xca 0xc6 0xd0 0x00 0x00 0x03 0x30 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #_ZL9curr_iter + 362 0x10 0x20 0x05 0x18 MOVX r16, #1 + 366 0x00 0x00 NOPX + 368 0x00 0x00 NOPX + 370 0x00 0x00 NOPX + 372 0x00 0x00 NOPX + 374 0x00 0x00 NOPX + 376 0x14 0x51 0x08 0x18 REL r17, r16 + 380 0xfc 0xce 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #_ZL8num_iter + 390 0x06 0x06 0x36 0x98 LDA r17, [p6] + 394 0x02 0x06 0x56 0x98 LDA r18, [p2] + 398 0x00 0x00 NOPX + 400 0x00 0x00 NOPX + 402 0x00 0x00 NOPX + 404 0x00 0x00 NOPX + 406 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 410 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 414 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 418 0x80 0x00 0xe0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.delay_slot +.swstall delay_slot + 424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 426 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 428 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 432 0x00 0x00 NOPX + 434 0x10 0x20 0x01 0x18 MOVX r16, #0 + 438 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 448 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 452 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 460 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 462 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 466 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 468 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 472 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 476 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 484 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end last +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 last +.delay_slot +.swstall delay_slot + 486 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data add1d_attribute_broadcasting_params + +.undef global text _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + +.undef global text _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.function_start + 0 0x23 0x85 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #mul1d_attribute_broadcasting_params + 10 0xf0 0x00 0x00 0x08 0x10 0x0b 0x08 0x00 0x58 0xba MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 34 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 54 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 58 0x00 0x00 NOPX + 60 0x00 0x00 NOPX + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 74 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x00 0x00 NOPX + 88 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 90 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 94 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 98 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x00 0x00 0xf1 0x3e 0x00 0x44 MOVXM r2, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 108 0x10 0xc4 0x24 0x98 AND r2, r3, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 116 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end last +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 120 0x00 0x00 NOPX + +.undef global data mul1d_attribute_broadcasting_params + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.function_start + 0 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 10 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.delay_slot + 16 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #mul1d_attribute_broadcasting_params +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 24 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 26 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 28 0x00 0x01 0x67 0x98 NOPA +.return_address + 32 0xff 0x87 0x20 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA lr, [sp, #-4]; MOVXM p1, #mul1d_attribute_broadcasting_params + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x00 0x00 NOPX + 52 0x00 0x00 NOPX + 54 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 58 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot + 62 0x09 0x46 0x11 0x98 ST r16, [p1, #16] +.delay_slot + 66 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 72 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end last +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 74 0x00 0x00 NOPX + +.undef global data mul1d_attribute_broadcasting_params + +.undef global text _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.function_start + 0 0x00 0x00 0x04 0xc0 0x18 0x44 MOVXM p2, #(mul1d_attribute_broadcasting_params + 12) + 6 0x40 0x81 0x52 0x84 0x8b 0x00 0x00 0x01 0xb0 0x00 0x10 0x76 LDA.u8 r0, [p2]; MOVS p2, p1; MOVXM p3, #mul1d_attribute_broadcasting_params + 18 0x00 0x00 NOPX + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x00 0x00 0x30 0x00 0x01 0x84 JZ r0, #TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.delay_slot + 36 0x18 0xc1 0xe0 0xf8 MOV dc0, lr +.delay_slot + 40 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 46 0x18 0x55 0xe0 0xf8 MOV r1, sp +.delay_slot + 50 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64 +.delay_slot + 54 0x09 0x07 0x2b 0x18 VST sfh, [p1] +.no_stack_arguments + 58 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.delay_slot +.swstall delay_slot + 64 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 66 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 68 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 70 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 72 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.return_address + 80 0x00 0x00 0x40 0x00 0x00 0x84 J #TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.delay_slot +.swstall delay_slot + 86 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 88 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 90 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 92 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 94 0x00 0x00 NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.no_stack_arguments + 96 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.delay_slot + 102 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0 +.delay_slot +.swstall delay_slot + 110 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 116 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.return_address + 128 0x1f 0x71 0x80 0xf8 MOV lr, dc0 + 132 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 136 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 142 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 144 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 146 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end last +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 last +.delay_slot +.swstall delay_slot + 148 0x00 0x00 NOPX + +.undef global data mul1d_attribute_broadcasting_params + +.text_segment_name +.text global 10 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.function_start + 0 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL9curr_iter + 6 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 12 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 18 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 26 0xff 0x82 0xb0 0x00 0x00 0x03 0x30 0x00 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #_ZL8core_row + 36 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x80 0x00 0x68 0x40 0x01 0x84 JNZ r16, #TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.delay_slot + 50 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 54 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 58 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 62 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 70 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #mul1d_attribute_broadcasting_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 76 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 82 0x40 0xc0 0xe0 0x00 0x00 0x01 0x30 0x00 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 92 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 94 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 104 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 108 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 112 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 128 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8core_row + 134 0x40 0xc2 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r16, [p2]; MOVXM p2, #mul1d_attribute_broadcasting_params + 144 0x40 0xc6 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r17, [p2]; MOVXM p2, #mul1d_attribute_broadcasting_params + 154 0x4a 0xcb 0x50 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #_ZL11ifm1_offset + 164 0x00 0x00 NOPX + 166 0x00 0x00 NOPX + 168 0x00 0x00 0x70 0x00 0x00 0x84 J #TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.delay_slot + 174 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #_ZL8num_iter +.delay_slot +.swstall delay_slot + 180 0x00 0x00 NOPX +.delay_slot + 182 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 186 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 192 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 208 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x00 0x00 0xb0 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #_ZL11ifm1_offset; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 224 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 232 0xff 0xee 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #_ZL9curr_iter + 242 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 246 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 250 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 254 0x00 0x00 NOPX + 256 0x00 0x00 NOPX + 258 0x00 0x00 NOPX + 260 0x00 0x00 NOPX + 262 0x00 0x00 NOPX + 264 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 268 0x0f 0x06 0x11 0x98 ST r16, [p7] + 272 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 276 0x00 0x00 NOPX + 278 0x00 0x00 NOPX + 280 0x00 0x00 NOPX + 282 0x14 0x93 0x08 0x18 ACQ r18, r16 + 286 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 292 0x00 0x00 NOPX + 294 0x00 0x00 NOPX + 296 0x00 0x06 0x36 0x98 LDA r17, [p0] + 300 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 306 0x01 0x06 0x76 0x98 LDA r19, [p1] + 310 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 314 0x00 0x00 NOPX +.no_stack_arguments + 316 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.delay_slot +.swstall delay_slot + 322 0x00 0x00 NOPX +.delay_slot + 324 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 328 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 332 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 352 0xca 0xc6 0xd0 0x00 0x00 0x03 0x30 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #_ZL9curr_iter + 362 0x10 0x20 0x05 0x18 MOVX r16, #1 + 366 0x00 0x00 NOPX + 368 0x00 0x00 NOPX + 370 0x00 0x00 NOPX + 372 0x00 0x00 NOPX + 374 0x00 0x00 NOPX + 376 0x14 0x51 0x08 0x18 REL r17, r16 + 380 0xfc 0xce 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #_ZL8num_iter + 390 0x06 0x06 0x36 0x98 LDA r17, [p6] + 394 0x02 0x06 0x56 0x98 LDA r18, [p2] + 398 0x00 0x00 NOPX + 400 0x00 0x00 NOPX + 402 0x00 0x00 NOPX + 404 0x00 0x00 NOPX + 406 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 410 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 414 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 418 0x80 0x00 0xe0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.delay_slot +.swstall delay_slot + 424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 426 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 428 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 432 0x00 0x00 NOPX + 434 0x10 0x20 0x01 0x18 MOVX r16, #0 + 438 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 448 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 452 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 460 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 462 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 466 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 468 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 472 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 476 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 484 0x00 0x00 NOPX +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end last +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 last +.delay_slot +.swstall delay_slot + 486 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data mul1d_attribute_broadcasting_params + +.undef global text _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + +.undef global text _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + +.text_segment_name +.text weak 10 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.function_start + 0 0x23 0x85 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #sigmoid1d_params + 10 0x00 0x00 NOPX + 12 0x00 0x00 NOPX + 14 0x00 0x00 NOPX + 16 0x00 0x00 NOPX + 18 0x00 0x00 NOPX + 20 0x00 0x00 NOPX + 22 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 26 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 30 0x00 0x00 NOPX + 32 0x00 0x00 NOPX + 34 0x00 0x00 NOPX + 36 0x00 0x00 NOPX + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x08 0x04 0x29 0x98 ST el0, [p0] + 46 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 50 0x00 0x00 NOPX + 52 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 56 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 58 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 60 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 62 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end last +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 last +.delay_slot + 64 0x08 0x14 0x29 0x98 ST el0, [p0, #4] + +.undef global data sigmoid1d_params + +.text_segment_name +.text weak 10 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.function_start + 0 0xff 0x40 0x00 0x3d 0x68 0x00 0x00 0x01 0x30 0x00 0x10 0xb6 MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #sigmoid1d_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12 0x40 0x8a 0xd0 0x3b 0xe8 0x00 0x00 0x01 0x30 0x00 0x10 0xb6 LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 24 0x40 0x84 0x50 0x3d 0x68 0x00 0x00 0x10 0xc8 0x40 0x10 0xb6 LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 36 0x03 0xbe 0x80 0x32 0xe5 0xf4 VLDB x7, [p0], #64; VBCST.16 x0, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 42 0x00 0x00 0xc2 0x21 0x00 0x44 MOVXM r4, #49280 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 48 0x18 0x91 0x72 0xf8 VBCST.16 x1, r4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 52 0x00 0x00 0x71 0xbf 0xfe 0x44 MOVXM r3, #32767 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 58 0x1c 0x50 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 62 0x10 0x01 0xb6 0x81 0xd9 0xe4 LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 68 0x0f 0x50 0x08 0x70 0x59 0xe4 MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 + 74 0x19 0x0d 0x72 0xf8 VBCST.16 x2, r3 + 78 0x00 0x00 0x32 0xba 0x00 0x44 MOVXM r5, #15616 + 84 0x19 0x95 0x72 0xf8 VBCST.16 x3, r5 + 88 0x00 0x00 0x38 0xbe 0x00 0x44 MOVXM r17, #16128 + 94 0x1d 0xb1 0x2b 0x78 VBAND x11, x6, x2 + 98 0x64 0x5e 0x25 0x8a 0xe5 0xe4 MOVX r17, #828; VBCST.16 x5, r17 + 104 0x04 0xc0 0xec 0xe6 0x8c 0xe7 0x61 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 + 112 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 + 116 0x00 0x00 0x31 0x3d 0x00 0x44 MOVXM r2, #16000 + 122 0x02 0x09 0x72 0xe6 0x8a 0xe7 0x01 0x62 VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 + 130 0x18 0x0b 0x8a 0xf8 VCONV.fp32.bf16 cml0, x5 + 134 0x04 0x50 0x2c 0xe6 0x8b 0x0c 0x81 0x62 VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 142 0xb2 0x42 0xc0 0x00 0x00 0x0f 0x0c 0x02 0x89 0x12 0x81 0x56 VCONV.bf16.fp32 x11, cml4; MOVXM ls, #(ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 + 0);VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 154 0x1b 0x40 0xec 0xf8 VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 158 0x00 0x00 0x00 0x37 0x12 0x02 0x8a 0x76 0xc3 0x5a MOVXM le, #(ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 + 0);VMSC.f dm2, dm3, x11, x6, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 168 0x52 0x22 0xc0 0x02 0xb8 0x3f 0x80 0x02 VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 + 176 0x1c 0x38 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x7, x0 + 180 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0xd8 0x95 0xb0 0xf6 NOPA; NOPB; NOPS; VBAND x11, x6, x2 +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 192 0x00 0x3d 0x6c 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x4a VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 202 0x00 0x3b 0xec 0x49 0x2b 0x66 0x8c 0xe7 0x61 0x4a VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 212 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 216 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 220 0x23 0xa4 0x60 0x02 0x89 0x12 0x81 0x62 VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 228 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 230 0x00 0x2c 0xf1 0x1e 0x23 0x00 0x00 0x00 0x00 0x7a NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 240 0x00 0x2c 0xf0 0x00 0x25 0x92 0x16 0x00 0x00 0x02 0x28 0x16 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 256 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0xa0 0x76 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 272 0x00 0x2c 0xf0 0x00 0x22 0x91 0x16 0x00 0x00 0x02 0x1c 0x16 0x7c 0x53 0xb6 0x1b NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.end_of_loop + 288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xd8 0x95 0xb8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV +.loop_nesting 0 + 304 0x04 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 312 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 316 0x8c 0xe7 0x61 0x48 VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 320 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 324 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 328 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 330 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 + 334 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 + 338 0xb2 0x42 0xc0 0x02 0x89 0x12 0x81 0x62 VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 + 346 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 + 350 0x8a 0x76 0xc3 0x48 VMSC.f dm2, dm3, x11, x6, r17 + 354 0x8c 0x2b 0x23 0x48 VMSC.f dm4, dm1, x5, x9, r17 + 358 0x00 0x00 NOPX + 360 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 364 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 366 0x00 0x00 NOPX +.delay_slot + 368 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.delay_slot + 372 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end last +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 last +.delay_slot +.swstall delay_slot + 376 0x00 0x00 NOPX + +.undef global data sigmoid1d_params + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text global 10 _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.function_start + 0 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL9curr_iter + 6 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 12 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 18 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 26 0xff 0x82 0xb0 0x00 0x00 0x03 0x30 0x00 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #_ZL8core_row + 36 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x80 0x00 0x68 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.delay_slot + 50 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 54 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 58 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 62 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 70 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #sigmoid1d_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 76 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 82 0x40 0xc0 0xe0 0x00 0x00 0x01 0x30 0x00 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 92 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 94 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 104 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 108 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 112 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 128 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8core_row + 134 0x40 0xc2 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r16, [p2]; MOVXM p2, #sigmoid1d_params + 144 0x40 0xc6 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r17, [p2]; MOVXM p2, #sigmoid1d_params + 154 0x48 0xcb 0x50 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #_ZL11ifm1_offset + 164 0x00 0x00 NOPX + 166 0x00 0x00 NOPX + 168 0x00 0x00 0x70 0x00 0x00 0x84 J #TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.delay_slot + 174 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #_ZL8num_iter +.delay_slot +.swstall delay_slot + 180 0x00 0x00 NOPX +.delay_slot + 182 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 186 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 192 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 208 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x00 0x00 0xb0 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #_ZL11ifm1_offset; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 224 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 232 0xff 0xee 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #_ZL9curr_iter + 242 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 246 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 250 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 254 0x00 0x00 NOPX + 256 0x00 0x00 NOPX + 258 0x00 0x00 NOPX + 260 0x00 0x00 NOPX + 262 0x00 0x00 NOPX + 264 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 268 0x0f 0x06 0x11 0x98 ST r16, [p7] + 272 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 276 0x00 0x00 NOPX + 278 0x00 0x00 NOPX + 280 0x00 0x00 NOPX + 282 0x14 0x93 0x08 0x18 ACQ r18, r16 + 286 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 292 0x00 0x00 NOPX + 294 0x00 0x00 NOPX + 296 0x00 0x06 0x36 0x98 LDA r17, [p0] + 300 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 306 0x01 0x06 0x76 0x98 LDA r19, [p1] + 310 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 314 0x00 0x00 NOPX +.no_stack_arguments + 316 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.delay_slot +.swstall delay_slot + 322 0x00 0x00 NOPX +.delay_slot + 324 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 328 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 332 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 352 0xca 0xc6 0xd0 0x00 0x00 0x03 0x30 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #_ZL9curr_iter + 362 0x10 0x20 0x05 0x18 MOVX r16, #1 + 366 0x00 0x00 NOPX + 368 0x00 0x00 NOPX + 370 0x00 0x00 NOPX + 372 0x00 0x00 NOPX + 374 0x00 0x00 NOPX + 376 0x14 0x51 0x08 0x18 REL r17, r16 + 380 0xfc 0xce 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #_ZL8num_iter + 390 0x06 0x06 0x36 0x98 LDA r17, [p6] + 394 0x02 0x06 0x56 0x98 LDA r18, [p2] + 398 0x00 0x00 NOPX + 400 0x00 0x00 NOPX + 402 0x00 0x00 NOPX + 404 0x00 0x00 NOPX + 406 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 410 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 414 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 418 0x80 0x00 0xe0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.delay_slot +.swstall delay_slot + 424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 426 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 428 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 432 0x00 0x00 NOPX + 434 0x10 0x20 0x01 0x18 MOVX r16, #0 + 438 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 448 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 452 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 460 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 462 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 466 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 468 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 472 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 476 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 484 0x00 0x00 NOPX +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end last +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 last +.delay_slot +.swstall delay_slot + 486 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data sigmoid1d_params + +.undef global text _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + +.undef global text _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.function_start + 0 0x23 0x85 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #add1d_params + 10 0xf0 0x00 0x00 0x08 0x10 0x0b 0x08 0x00 0x58 0xba MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 34 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 54 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 58 0x00 0x00 NOPX + 60 0x00 0x00 NOPX + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 74 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x00 0x00 NOPX + 88 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 90 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 94 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 98 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x00 0x00 0xf1 0x3e 0x00 0x44 MOVXM r2, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 108 0x10 0xc4 0x24 0x98 AND r2, r3, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 116 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end last +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 120 0x00 0x00 NOPX + +.undef global data add1d_params + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.function_start + 0 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 10 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.delay_slot + 16 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.delay_slot + 20 0xff 0x82 0xb0 0x00 0x00 0x01 0xe8 0x00 0x11 0x3a ST r0, [sp, #-4]; MOVXM r15, #add1d_params +.delay_slot + 30 0x18 0x67 0xa0 0xf8 MOV p0, r15 +.delay_slot +.swstall delay_slot + 34 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 36 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 48 0xff 0x07 0x20 0x01 0x00 0x68 0xb3 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p1, r15, #16 + 58 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 68 0xff 0xbe 0x21 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p1], m0; MOVX r16, #-128 + 78 0x01 0x06 0x4a 0x98 LDA.u8 r18, [p1] + 82 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 84 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 86 0x01 0x02 0x17 0x18 ST.s16 r16, [p1, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 90 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 94 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 98 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 104 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 108 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end last +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 112 0x00 0x00 NOPX + +.undef global data add1d_params + +.undef global text _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.tail_call +.function_start + 0 0x00 0x00 0x00 0x00 0x00 0x84 J #_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.delay_slot + 6 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #add1d_params +.delay_slot +.swstall delay_slot + 12 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 16 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end last +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 last +.delay_slot +.swstall delay_slot + 18 0x00 0x00 NOPX + +.undef global data add1d_params + +.text_segment_name +.text global 10 _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.function_start + 0 0x00 0x00 0x08 0xc0 0x00 0x44 MOVXM p4, #_ZL9curr_iter + 6 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 12 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 18 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 28 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 36 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 40 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 44 0x00 0x00 NOPX + 46 0x80 0x00 0x60 0x40 0x01 0x84 JNZ r16, #TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.delay_slot + 52 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 56 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8core_row +.delay_slot + 62 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 70 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 74 0xfe 0xa3 0xb0 0x00 0x00 0x03 0xb0 0x00 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #add1d_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 84 0x13 0x91 0x60 0x00 0x00 0x01 0x30 0x00 0x11 0x3a MOVS p0, p7; MOVXM p2, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 94 0x40 0xc0 0xe0 0x00 0x00 0x01 0x30 0x00 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 104 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 106 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 114 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 116 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 120 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 124 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 128 0xe0 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r16, [p7]; MOVXM p1, #_ZL8core_row + 138 0x20 0xc6 0xd0 0x00 0x00 0x01 0xb0 0x00 0x10 0xba LDA r17, [p1]; MOVXM p3, #_ZL11ifm1_offset + 148 0xea 0xcb 0x50 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #_ZL11ifm2_offset + 158 0x00 0x00 NOPX + 160 0x00 0x00 NOPX + 162 0x00 0x00 NOPX + 164 0x00 0x00 0x68 0x00 0x00 0x84 J #TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.delay_slot + 170 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8num_iter +.delay_slot + 176 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 180 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 184 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 188 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 192 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL11ifm1_offset + 198 0x00 0x2c 0xf0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba NOPA; MOVXM p1, #_ZL11ifm2_offset +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 208 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 212 0x1f 0xee 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #_ZL9curr_iter + 222 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 226 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 230 0x02 0x06 0x56 0x98 LDA r18, [p2] + 234 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 238 0x00 0x00 NOPX + 240 0x00 0x00 NOPX + 242 0x00 0x00 NOPX + 244 0x00 0x00 NOPX + 246 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 250 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 256 0x0a 0x06 0x11 0x98 ST r16, [p2] + 260 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 264 0x00 0x00 NOPX + 266 0x00 0x00 NOPX + 268 0x00 0x00 NOPX + 270 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 274 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 278 0x00 0x00 NOPX + 280 0x00 0x00 NOPX + 282 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 286 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 290 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 294 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 298 0x00 0x00 NOPX + 300 0x00 0x00 NOPX + 302 0x00 0x00 NOPX + 304 0x00 0x00 NOPX + 306 0x00 0x00 NOPX + 308 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 312 0x0a 0x06 0x31 0x98 ST r17, [p2] + 316 0x00 0x00 NOPX + 318 0x00 0x00 NOPX + 320 0x00 0x00 NOPX + 322 0x00 0x00 NOPX + 324 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 328 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 338 0x00 0x00 NOPX + 340 0x00 0x00 NOPX + 342 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 346 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 352 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 358 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 362 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 366 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 372 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 376 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 378 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 384 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 388 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 392 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 396 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 400 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 416 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 426 0x00 0x00 0x0c 0xc0 0x00 0x44 MOVXM p6, #_ZL8num_iter + 432 0x00 0x00 NOPX + 434 0x00 0x00 NOPX + 436 0x00 0x00 NOPX + 438 0x00 0x00 NOPX + 440 0x00 0x00 NOPX + 442 0x14 0x51 0x08 0x18 REL r17, r16 + 446 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 450 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 454 0x00 0x00 NOPX + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX + 460 0x00 0x00 NOPX + 462 0x00 0x00 NOPX + 464 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 468 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 474 0x00 0x00 NOPX + 476 0x00 0x00 NOPX + 478 0x00 0x00 NOPX + 480 0x00 0x00 NOPX + 482 0x00 0x00 NOPX + 484 0x00 0x00 NOPX + 486 0x14 0x51 0x08 0x18 REL r17, r16 + 490 0xfc 0xce 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #_ZL9curr_iter + 500 0x06 0x06 0x56 0x98 LDA r18, [p6] + 504 0x01 0x06 0x36 0x98 LDA r17, [p1] + 508 0x00 0x00 NOPX + 510 0x00 0x00 NOPX + 512 0x00 0x00 NOPX + 514 0x00 0x00 NOPX + 516 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 520 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 524 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 528 0x80 0x01 0x18 0x40 0x01 0x84 JNZ r16, #TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.delay_slot +.swstall delay_slot + 534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 536 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 538 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 540 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 542 0x00 0x00 NOPX + 544 0x10 0x20 0x01 0x18 MOVX r16, #0 + 548 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 560 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 564 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 568 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 572 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 574 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 578 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 580 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 582 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 586 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 590 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 596 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 598 0x00 0x00 NOPX +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end last +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 last +.delay_slot +.swstall delay_slot + 600 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data add1d_params + +.undef global text _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + +.undef global text _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + +.text_segment_name +.text weak 10 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.function_start + 0 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 4 0x00 0x00 0x00 0xc0 0x40 0x44 MOVXM p0, #(mul1d_params + 32) +.delay_slot + 10 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 14 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 18 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end last +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 last +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX + +.undef global data mul1d_params + +.text_segment_name +.text weak 10 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.function_start + 0 0x23 0x85 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #mul1d_params + 10 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 16 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 32 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 36 0x00 0x00 NOPX + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 52 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 56 0x00 0x00 NOPX + 58 0x00 0x00 NOPX + 60 0x00 0x00 NOPX + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 72 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 76 0x00 0x00 NOPX + 78 0x00 0x00 NOPX +.no_stack_arguments + 80 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.delay_slot + 86 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 90 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 92 0x00 0x00 NOPX +.delay_slot + 94 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 98 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x7b 0x06 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p7, p0; NOPV +.return_address + 112 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 116 0x00 0x00 NOPX + 118 0x00 0x00 NOPX + 120 0x00 0x00 NOPX + 122 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 124 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 126 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 130 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 134 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 136 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 138 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 140 0x10 0x20 0x01 0x18 MOVX r16, #0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end last +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 last +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 144 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 + +.undef global data mul1d_params + +.undef global text _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + +.text_segment_name +.text weak 10 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.function_start + 0 0x04 0x00 0x80 0x00 0x00 0x01 0xb0 0x00 0x10 0xba MOVA m0, #32; MOVXM p3, #mul1d_params + 10 0x61 0x06 0xd0 0x00 0x00 0x02 0x30 0x00 0x10 0xba LDA r1, [p3], m0; MOVXM p4, #_ZN12me_primitive11control_rndE + 20 0x60 0x90 0xd0 0x18 0x07 0x88 0x6f 0xfa 0x58 0xba LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 + 30 0x62 0x80 0xd0 0x00 0x00 0x00 0x78 0x60 0x10 0xba LDA m0, [p3, #4]; MOVXM ls, #(ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 + 0) + 40 0x80 0x88 0x50 0x00 0x00 0x01 0xb8 0x68 0x10 0xba LDA.s8 r2, [p4]; MOVXM le, #(ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 + 0) + 50 0x00 0x00 NOPX + 52 0x00 0x00 NOPX + 54 0x00 0x00 NOPX + 56 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 + 60 0x1d 0x70 0xfc 0x98 ADD.NC lc, r1, #-7 + 64 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 70 0x21 0x1b 0x70 0x50 0x68 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 78 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 84 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 90 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 96 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 108 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 118 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 128 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 138 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 148 0x21 0x1b 0x70 0x50 0x68 0x00 0xad 0x8e 0x00 0xe2 0x41 0x66 VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 160 0x21 0x13 0x70 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 176 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 192 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 208 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 224 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 232 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 240 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 248 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 256 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 264 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 272 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 280 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 284 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 290 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 294 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 298 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 302 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end last +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 last +.delay_slot +.swstall delay_slot + 306 0x00 0x00 NOPX + +.undef global data mul1d_params + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text global 10 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.function_start + 0 0x00 0x00 0x08 0xc0 0x00 0x44 MOVXM p4, #_ZL9curr_iter + 6 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 12 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 18 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 28 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 36 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 40 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 44 0x00 0x00 NOPX + 46 0x80 0x00 0x60 0x40 0x01 0x84 JNZ r16, #TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.delay_slot + 52 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 56 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8core_row +.delay_slot + 62 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 70 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 74 0xfe 0xa3 0xb0 0x00 0x00 0x03 0xb0 0x00 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #mul1d_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 84 0x13 0x91 0x60 0x00 0x00 0x01 0x30 0x00 0x11 0x3a MOVS p0, p7; MOVXM p2, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 94 0x40 0xc0 0xe0 0x00 0x00 0x01 0x30 0x00 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 104 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 106 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 114 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 116 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 120 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 124 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 128 0xe0 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r16, [p7]; MOVXM p1, #_ZL8core_row + 138 0x20 0xc6 0xd0 0x00 0x00 0x01 0xb0 0x00 0x10 0xba LDA r17, [p1]; MOVXM p3, #_ZL11ifm1_offset + 148 0xea 0xcb 0x50 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #_ZL11ifm2_offset + 158 0x00 0x00 NOPX + 160 0x00 0x00 NOPX + 162 0x00 0x00 NOPX + 164 0x00 0x00 0x68 0x00 0x00 0x84 J #TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.delay_slot + 170 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8num_iter +.delay_slot + 176 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 180 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 184 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 188 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 192 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL11ifm1_offset + 198 0x00 0x2c 0xf0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba NOPA; MOVXM p1, #_ZL11ifm2_offset +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 208 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 212 0x1f 0xee 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #_ZL9curr_iter + 222 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 226 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 230 0x02 0x06 0x56 0x98 LDA r18, [p2] + 234 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 238 0x00 0x00 NOPX + 240 0x00 0x00 NOPX + 242 0x00 0x00 NOPX + 244 0x00 0x00 NOPX + 246 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 250 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 256 0x0a 0x06 0x11 0x98 ST r16, [p2] + 260 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 264 0x00 0x00 NOPX + 266 0x00 0x00 NOPX + 268 0x00 0x00 NOPX + 270 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 274 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 278 0x00 0x00 NOPX + 280 0x00 0x00 NOPX + 282 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 286 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 290 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 294 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 298 0x00 0x00 NOPX + 300 0x00 0x00 NOPX + 302 0x00 0x00 NOPX + 304 0x00 0x00 NOPX + 306 0x00 0x00 NOPX + 308 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 312 0x0a 0x06 0x31 0x98 ST r17, [p2] + 316 0x00 0x00 NOPX + 318 0x00 0x00 NOPX + 320 0x00 0x00 NOPX + 322 0x00 0x00 NOPX + 324 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 328 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 338 0x00 0x00 NOPX + 340 0x00 0x00 NOPX + 342 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 346 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 352 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 358 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 362 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 366 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 372 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 376 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 378 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 384 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 388 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 392 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 396 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 400 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 416 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 426 0x00 0x00 0x0c 0xc0 0x00 0x44 MOVXM p6, #_ZL8num_iter + 432 0x00 0x00 NOPX + 434 0x00 0x00 NOPX + 436 0x00 0x00 NOPX + 438 0x00 0x00 NOPX + 440 0x00 0x00 NOPX + 442 0x14 0x51 0x08 0x18 REL r17, r16 + 446 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 450 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 454 0x00 0x00 NOPX + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX + 460 0x00 0x00 NOPX + 462 0x00 0x00 NOPX + 464 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 468 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 474 0x00 0x00 NOPX + 476 0x00 0x00 NOPX + 478 0x00 0x00 NOPX + 480 0x00 0x00 NOPX + 482 0x00 0x00 NOPX + 484 0x00 0x00 NOPX + 486 0x14 0x51 0x08 0x18 REL r17, r16 + 490 0xfc 0xce 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #_ZL9curr_iter + 500 0x06 0x06 0x56 0x98 LDA r18, [p6] + 504 0x01 0x06 0x36 0x98 LDA r17, [p1] + 508 0x00 0x00 NOPX + 510 0x00 0x00 NOPX + 512 0x00 0x00 NOPX + 514 0x00 0x00 NOPX + 516 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 520 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 524 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 528 0x80 0x01 0x18 0x40 0x01 0x84 JNZ r16, #TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.delay_slot +.swstall delay_slot + 534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 536 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 538 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 540 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 542 0x00 0x00 NOPX + 544 0x10 0x20 0x01 0x18 MOVX r16, #0 + 548 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 560 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 564 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 568 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 572 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 574 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 578 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 580 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 582 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 586 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 590 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 596 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 598 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end last +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 last +.delay_slot +.swstall delay_slot + 600 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data mul1d_params + +.undef global text _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + +.undef global text _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.function_start + 0 0x23 0x85 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #sub1d_params + 10 0xf0 0x00 0x00 0x08 0x10 0x0b 0x08 0x00 0x58 0xba MOVA r0, #-128; MOVX r1, #256; MOV r24, #0 + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 34 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 54 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 58 0x00 0x00 NOPX + 60 0x00 0x00 NOPX + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 74 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x00 0x00 NOPX + 88 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 90 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 94 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 98 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 102 0x00 0x00 0xf1 0x3e 0x00 0x44 MOVXM r2, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 108 0x10 0xc4 0x24 0x98 AND r2, r3, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 116 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end last +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 120 0x00 0x00 NOPX + +.undef global data sub1d_params + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.function_start + 0 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 10 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.delay_slot + 16 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #sub1d_params +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 24 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 26 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 28 0x00 0x01 0x67 0x98 NOPA +.return_address + 32 0xff 0x87 0x20 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA lr, [sp, #-4]; MOVXM p1, #sub1d_params + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x00 0x00 NOPX + 52 0x00 0x00 NOPX + 54 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 58 0x10 0x20 0x19 0x18 MOVX r16, #6 +.delay_slot + 62 0x09 0x46 0x11 0x98 ST r16, [p1, #16] +.delay_slot + 66 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 72 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end last +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 last +.delay_slot +.swstall delay_slot + 74 0x00 0x00 NOPX + +.undef global data sub1d_params + +.undef global text _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + +.text_segment_name +.text weak 10 _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.tail_call +.function_start + 0 0x00 0x00 0x00 0x00 0x00 0x84 J #_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.delay_slot + 6 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #sub1d_params +.delay_slot +.swstall delay_slot + 12 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 16 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E__end last +.label __ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E___func_end0 last +.delay_slot +.swstall delay_slot + 18 0x00 0x00 NOPX + +.undef global data sub1d_params + +.text_segment_name +.text global 10 _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.function_start + 0 0x00 0x00 0x08 0xc0 0x00 0x44 MOVXM p4, #_ZL9curr_iter + 6 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 12 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 18 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 28 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 36 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 40 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 44 0x00 0x00 NOPX + 46 0x80 0x00 0x60 0x40 0x01 0x84 JNZ r16, #TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.delay_slot + 52 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 56 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8core_row +.delay_slot + 62 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 70 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 74 0xfe 0xa3 0xb0 0x00 0x00 0x03 0xb0 0x00 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #sub1d_params +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 84 0x13 0x91 0x60 0x00 0x00 0x01 0x30 0x00 0x11 0x3a MOVS p0, p7; MOVXM p2, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 94 0x40 0xc0 0xe0 0x00 0x00 0x01 0x30 0x00 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 104 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 106 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 114 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 116 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 120 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 124 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 128 0xe0 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r16, [p7]; MOVXM p1, #_ZL8core_row + 138 0x20 0xc6 0xd0 0x00 0x00 0x01 0xb0 0x00 0x10 0xba LDA r17, [p1]; MOVXM p3, #_ZL11ifm1_offset + 148 0xea 0xcb 0x50 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #_ZL11ifm2_offset + 158 0x00 0x00 NOPX + 160 0x00 0x00 NOPX + 162 0x00 0x00 NOPX + 164 0x00 0x00 0x68 0x00 0x00 0x84 J #TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.delay_slot + 170 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL8num_iter +.delay_slot + 176 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 180 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 184 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 188 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 192 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL11ifm1_offset + 198 0x00 0x2c 0xf0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba NOPA; MOVXM p1, #_ZL11ifm2_offset +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 208 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 212 0x1f 0xee 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #_ZL9curr_iter + 222 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 226 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 230 0x02 0x06 0x56 0x98 LDA r18, [p2] + 234 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 238 0x00 0x00 NOPX + 240 0x00 0x00 NOPX + 242 0x00 0x00 NOPX + 244 0x00 0x00 NOPX + 246 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 250 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 256 0x0a 0x06 0x11 0x98 ST r16, [p2] + 260 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 264 0x00 0x00 NOPX + 266 0x00 0x00 NOPX + 268 0x00 0x00 NOPX + 270 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 274 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 278 0x00 0x00 NOPX + 280 0x00 0x00 NOPX + 282 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 286 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 290 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 294 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 298 0x00 0x00 NOPX + 300 0x00 0x00 NOPX + 302 0x00 0x00 NOPX + 304 0x00 0x00 NOPX + 306 0x00 0x00 NOPX + 308 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 312 0x0a 0x06 0x31 0x98 ST r17, [p2] + 316 0x00 0x00 NOPX + 318 0x00 0x00 NOPX + 320 0x00 0x00 NOPX + 322 0x00 0x00 NOPX + 324 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 328 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 338 0x00 0x00 NOPX + 340 0x00 0x00 NOPX + 342 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 346 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 352 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 358 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 362 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 366 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 372 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 376 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 378 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 384 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 388 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 392 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 396 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 400 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 416 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 426 0x00 0x00 0x0c 0xc0 0x00 0x44 MOVXM p6, #_ZL8num_iter + 432 0x00 0x00 NOPX + 434 0x00 0x00 NOPX + 436 0x00 0x00 NOPX + 438 0x00 0x00 NOPX + 440 0x00 0x00 NOPX + 442 0x14 0x51 0x08 0x18 REL r17, r16 + 446 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 450 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 454 0x00 0x00 NOPX + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX + 460 0x00 0x00 NOPX + 462 0x00 0x00 NOPX + 464 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 468 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 474 0x00 0x00 NOPX + 476 0x00 0x00 NOPX + 478 0x00 0x00 NOPX + 480 0x00 0x00 NOPX + 482 0x00 0x00 NOPX + 484 0x00 0x00 NOPX + 486 0x14 0x51 0x08 0x18 REL r17, r16 + 490 0xfc 0xce 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #_ZL9curr_iter + 500 0x06 0x06 0x56 0x98 LDA r18, [p6] + 504 0x01 0x06 0x36 0x98 LDA r17, [p1] + 508 0x00 0x00 NOPX + 510 0x00 0x00 NOPX + 512 0x00 0x00 NOPX + 514 0x00 0x00 NOPX + 516 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 520 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 524 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 528 0x80 0x01 0x18 0x40 0x01 0x84 JNZ r16, #TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.delay_slot +.swstall delay_slot + 534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 536 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 538 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 540 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 542 0x00 0x00 NOPX + 544 0x10 0x20 0x01 0x18 MOVX r16, #0 + 548 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 560 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 564 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 568 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 572 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 574 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 578 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 580 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 582 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 586 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 590 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 596 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 598 0x00 0x00 NOPX +.label _Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end last +.label __Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 last +.delay_slot +.swstall delay_slot + 600 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data sub1d_params + +.undef global text _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + +.undef global text _ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E + +.undef global data conv2d_dw_params + +.undef global data _ZN12me_primitive11control_rndE + +.undef global text _ZN12me_primitive10udiv_dstepEjjRjS0_ + +.text_segment_name +.text weak 10 _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.function_start + 0 0xf1 0x18 0x80 0x3b 0x68 0x00 0x00 0x02 0x30 0x36 0x10 0xb6 MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #(conv2d_dw_params + 108) + 12 0x9f 0xa8 0xd0 0x38 0xe8 0x00 0x12 0x0a 0x80 0x80 0x58 0xb6 LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 + 24 0x87 0xa4 0xd0 0x00 0x07 0x8a 0x07 0x90 0x58 0xba LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 + 34 0x9f 0xe8 0xd0 0x00 0x24 0x0a 0x60 0x00 0x58 0xba LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 + 44 0x85 0xe4 0xd7 0x10 0x4b 0x00 0x00 0x00 0x78 0x78 0x10 0x76 LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #(ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 + 0) + 56 0x85 0xa0 0xd2 0x10 0x4b 0x00 0x00 0x01 0xb8 0xa0 0x10 0x76 LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #(ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 + 0) + 68 0x9f 0x88 0xd6 0x10 0x4b 0x00 0x00 0x02 0xb0 0x00 0x10 0x76 LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #_ZN12me_primitive11control_rndE + 80 0x87 0x84 0xd1 0x10 0x4b 0x00 0x36 0x09 0xe4 0xc0 0x78 0x76 LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 + 92 0x9f 0xc8 0xd0 0x10 0x4b 0x01 0x18 0x43 0x62 0xba LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 + 102 0x85 0xc4 0xdb 0x93 0x01 0xd4 LDA dn4, [p4], #8; MOV dc5, dc4 + 108 0x04 0x2c 0x06 0x98 LDA m0, [p4], #8 + 112 0x04 0xfc 0xc6 0x98 LDA dj1, [p4], #-4 + 116 0x87 0x94 0xd0 0xb1 0x68 0x3c LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 + 122 0x04 0xfe 0xc6 0x98 LDA dj5, [p4], #-4 + 126 0x04 0x2e 0xa6 0x98 LDA dn5, [p4], #8 + 130 0x04 0x2c 0x86 0x98 LDA m1, [p4], #8 + 134 0x04 0xff 0xc6 0x98 LDA dj7, [p4], #-4 + 138 0x04 0x2f 0xa6 0x98 LDA dn7, [p4], #8 + 142 0x04 0x2f 0x86 0x98 LDA m7, [p4], #8 + 146 0x04 0xfd 0xc6 0x98 LDA dj3, [p4], #-4 + 150 0x04 0x2d 0xa6 0x98 LDA dn3, [p4], #8 + 154 0x04 0xc9 0x86 0x98 LDA m3, [p4], m6 + 158 0x04 0xa8 0x96 0x98 LDA r4, [p4], m5 + 162 0x04 0x88 0xf2 0x98 LDA.s16 r7, [p4], m4 + 166 0x04 0x4e 0x06 0x98 LDA m4, [p4], #16 + 170 0x92 0x96 0xd3 0xe1 0xe8 0x3c LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 + 176 0x02 0x04 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p2] + 180 0x00 0x00 NOPX + 182 0x05 0x04 0xc2 0x98 LDA.s8 r6, [p5] + 186 0x11 0x09 0xfb 0x18 ADD r4, r4, #-2 + 190 0x80 0xc6 0xd0 0x00 0x00 0x02 0x30 0xc0 0x10 0xba LDA r17, [p4]; MOVXM p4, #(TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 + 0) + 200 0x18 0x1d 0x72 0xf8 VBCST.16 x0, r7 + 204 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 + 208 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 + 212 0x00 0x2c 0xf0 0x00 0x23 0x00 0x8a 0xe2 0x04 0x6d 0x41 0x66 NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0d 0xd4 0x02 0x0e 0x03 0xa8 0x08 0x1a 0x0b NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 240 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 250 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 260 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 270 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 274 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 282 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 290 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 294 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 302 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 310 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 316 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 336 0x03 0x0c 0xf2 0x73 0x90 0x02 0x84 0x83 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 346 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 354 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 362 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 366 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 374 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x02 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.loop_nesting 1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 384 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 390 0x00 0x00 0x00 0x37 0x23 0x02 0x03 0x92 0xe1 0x5a MOVXM le, #(ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 + 0);VMAC.f dm3, dm4, x9, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 400 0x40 0x85 0x70 0x00 0x00 0x0f 0x1e 0x02 0x00 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #(ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 + 0);VMAC.f dm0, dm2, x11, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 412 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 416 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 420 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 424 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 428 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 432 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 436 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 440 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 444 0x71 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 452 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 456 0x67 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p3], d1; VMOV cml3, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 464 0x04 0x1c 0x07 0x46 0x04 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 472 0x02 0x30 0x86 0xc6 0x01 0x03 0x41 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 480 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 490 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 500 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 510 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 514 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 522 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 530 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 534 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 542 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 550 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 556 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 1 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 576 0x03 0x0c 0xf4 0xe7 0x20 0x08 0x49 0x02 0x84 0x83 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 588 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 596 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 604 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 608 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 616 0x04 0xb0 0x8e 0xc6 0x02 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 624 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 628 0x03 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r0 + 632 0x00 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r0 + 636 0x00 0x00 NOPX + 638 0x00 0x00 NOPX + 640 0x00 0x00 NOPX + 642 0x00 0x00 NOPX + 644 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 + 648 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr +.delay_slot + 654 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.delay_slot + 658 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.delay_slot + 662 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0 +.delay_slot + 666 0x0b 0x8a 0x13 0x18 VST x8, [p3], m4 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end last +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 last +.delay_slot + 670 0x0b 0x3a 0x93 0x18 VST.3D x10, [p3], d1 + +.undef global data conv2d_dw_params + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text weak 10 _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.function_start + 0 0xfb 0x90 0x82 0x39 0x68 0x00 0x00 0x01 0xb0 0x6c 0x10 0xb6 MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #(conv2d_dw_params + 216) + 12 0x63 0x84 0xd4 0x38 0x68 0x3e 0x47 0x68 0x68 0x01 0x58 0xb6 LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 + 24 0x63 0x88 0xd0 0x00 0x00 0x00 0x78 0x38 0x10 0xba LDA dj0, [p3], #4; MOVXM ls, #(ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 + 0) + 34 0x63 0xc4 0xd0 0x00 0x00 0x01 0xb8 0x68 0x10 0xba LDA dn4, [p3], #4; MOVXM le, #(ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 + 0) + 44 0x63 0xc8 0xd0 0x00 0x16 0x48 0x08 0x12 0x58 0xba LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 + 54 0x63 0x80 0xd0 0x08 0x9a 0x2c LDA m0, [p3], #4; MOVX r2, #19 + 60 0x03 0x1c 0x66 0x98 LDA dc0, [p3], #4 + 64 0x03 0x8a 0x66 0x98 LDA dc4, [p3], m4 + 68 0x03 0x04 0xb6 0x98 LDA r5, [p3] + 72 0x03 0x24 0xd6 0x98 LDA r6, [p3, #8] + 76 0x00 0x00 NOPX + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x11 0x48 0x4d 0x98 LSHL r4, r5, r4 + 90 0x30 0xc7 0xba 0xe4 0xff 0x24 LSHL r3, r6, r3; ADD.NC lc, r4, #-1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 96 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0xd0 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 112 0x43 0x83 0x72 0x39 0x6c 0x80 0x8b 0x00 0x00 0x00 0x48 0x02 0x38 0x00 0x00 0xe1 VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0x02 0x38 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV + 144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0xc0 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV + 160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0xc4 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV + 176 0x00 0x2c 0xf0 0x00 0x20 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV + 192 0x00 0x2c 0xf0 0x00 0x24 0x20 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 224 0x90 0x11 0x60 0x01 0x40 0x00 0x48 0x02 0x39 0x3a MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 +.delay_slot + 234 0x19 0x80 0x04 0x78 VSHUFFLE x3, x0, x0, r1 +.delay_slot + 238 0x18 0x89 0x81 0xd8 VSHUFFLE bmlh0, x1, x3, r0 +.delay_slot + 242 0x18 0x09 0x89 0xd8 VSHUFFLE bmll0, x1, x3, r2 +.delay_slot + 246 0x08 0x18 0x26 0x98 VST.3D bmlh0, [p0], d0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end last +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 last +.delay_slot + 250 0x0c 0x20 0x06 0x98 VST bmll0, [p4, dj1] + +.undef global data conv2d_dw_params + +.text_segment_name +.text weak 10 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.function_start + 0 0x20 0x93 0xd0 0x01 0x10 0x28 0x07 0x3f 0x58 0xba LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 + 10 0xe6 0x04 0x80 0x00 0x00 0x02 0x30 0x64 0x10 0xba MOVA m1, #-208; MOVXM p4, #(conv2d_dw_params + 200) + 20 0x81 0x42 0xd0 0x03 0x25 0x54 LDA r16, [p4], m0; MOV m0, #201 + 26 0x04 0x0a 0x6a 0x98 LDA.u8 r19, [p4], m0 + 30 0x04 0x2a 0x56 0x98 LDA r18, [p4], m1 + 34 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 40 0x00 0x83 0xdf 0xf0 0x7b 0x0c LDA p0, [p0]; ST lr, [sp, #-8] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 46 0x40 0xbe 0xdf 0xe2 0x3b 0x0c LDA r15, [p2]; ST p2, [sp, #-16] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 52 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 56 0xfd 0xe3 0xb0 0x00 0x00 0x00 0x00 0x00 0x41 0x3a ST p6, [sp, #-20]; JL #_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 66 0xfe 0xbe 0xb0 0x27 0x08 0x7d 0x31 0x60 0x79 0x3a ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 +.delay_slot + 76 0x1e 0x68 0xc0 0xf8 MOV p6, p4 +.delay_slot + 80 0xfd 0x13 0xb8 0x42 0x3b 0x5c ST p1, [sp, #-24]; LSHL r16, r16, r17 +.delay_slot + 86 0xf0 0x11 0x60 0x25 0x08 0xec 0x04 0x10 0x79 0x3a MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 +.delay_slot + 96 0x00 0x2c 0xf2 0x17 0x20 0x01 0x5b 0x00 0x00 0x01 0xb3 0xe0 0xa8 0x00 0x00 0xe1 NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV +.return_address + 112 0xce 0xc1 0x50 0x44 0x12 0x2c LDA.u8 r16, [p6, #7]; MOVX r17, #2 + 118 0x00 0x00 NOPX + 120 0x00 0x00 NOPX + 122 0x00 0x00 NOPX + 124 0x00 0x00 NOPX + 126 0x00 0x00 NOPX + 128 0x00 0x00 NOPX + 130 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 134 0x80 0x00 0xa0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.delay_slot + 140 0x00 0x00 0x08 0xc1 0x90 0x44 MOVXM p4, #(conv2d_dw_params + 200) +.delay_slot +.swstall delay_slot + 146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 148 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 150 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 152 0x00 0x00 NOPX + 154 0x9f 0xc2 0xd0 0x00 0x00 0x28 0x07 0x30 0x58 0xba LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 + 164 0x04 0x2e 0xb6 0x98 LDA r21, [p4], #8 + 168 0x04 0x1e 0x56 0x98 LDA r18, [p4], #4 + 172 0xfd 0x4e 0x20 0xd1 0x81 0xd4 LDA r19, [sp, #-24]; MOV p0, p4 + 178 0x81 0x52 0xd0 0x9c 0x8b 0x03 0xb0 0x60 0x72 0xba LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 + 188 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 192 0x00 0x00 NOPX + 194 0x14 0x23 0x1d 0x98 LSHL r17, r16, r17 + 198 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 +.no_stack_arguments + 202 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.delay_slot + 208 0x94 0x81 0xbb 0x33 0x8a 0xa4 LSHL r18, r18, r0; ADD.NC r22, r19, r17 +.delay_slot + 214 0xac 0x41 0xba 0xaf 0x92 0xa4 LSHL r17, r21, r0; ADD.NC r21, r15, r18 +.delay_slot + 220 0xa4 0x81 0xb2 0xd1 0xb2 0xa4 LSHL r18, r20, r0; ADD.NC p1, r17, r22 +.delay_slot + 226 0x1a 0x69 0xc1 0x58 ADD.NC p2, r19, r16 +.delay_slot + 230 0x00 0x2c 0xf0 0x00 0x10 0x01 0xb5 0x64 0xae 0xba NOPA; NOPB; ADD.NC p3, r21, r18 +.return_address + 240 0xfe 0x43 0x20 0x00 0x00 0x28 0x07 0x34 0x58 0xba LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 + 250 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 254 0xff 0xf3 0x26 0xdd 0x81 0xd4 LDA p7, [sp, #-4]; MOV p3, p7 + 260 0x03 0x0a 0x36 0x98 LDA r17, [p3], m0 + 264 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 268 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 272 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 278 0x04 0x06 0x56 0x98 LDA r18, [p4] + 282 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 + 286 0x00 0x00 NOPX + 288 0x00 0x00 NOPX + 290 0x00 0x00 NOPX +.tail_call + 292 0x00 0x00 0x00 0x00 0x00 0x84 J #_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.delay_slot + 298 0x14 0x62 0x0d 0x98 LSHL r17, r17, r0 +.delay_slot + 302 0x1c 0x58 0xc9 0x58 ADD.NC r17, r17, r18 +.delay_slot + 306 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 310 0x18 0x69 0x20 0xf8 MOV p0, r18 +.delay_slot + 314 0x00 0x2c 0xf4 0xd1 0x82 0x94 NOPA; ADD.NC p2, r17, r16 +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.return_address + 320 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 324 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 328 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 332 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 336 0x00 0x00 NOPX + 338 0x00 0x00 NOPX + 340 0x00 0x00 NOPX + 342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 346 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 352 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 354 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 356 0x00 0x00 NOPX +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end last +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 last +.delay_slot +.swstall delay_slot + 358 0x00 0x00 NOPX + +.undef global data conv2d_dw_params + +.undef global text _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + +.undef global text _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + +.text_segment_name +.text global 10 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.function_start + 0 0x00 0x00 0x08 0xc0 0x00 0x44 MOVXM p4, #_ZL9curr_iter + 6 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15 + 12 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 18 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID + 26 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr + 34 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20] + 38 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 42 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2 + 50 0x80 0x00 0x58 0x40 0x01 0x84 JNZ r16, #TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.delay_slot + 56 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 60 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 64 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 68 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL8core_row +.delay_slot + 74 0x0b 0x06 0x31 0x98 ST r17, [p3] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 78 0xf0 0x91 0x60 0x00 0x00 0x00 0xb0 0x00 0x11 0x3a MOVS p7, p1; MOVXM p1, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 88 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x00 0x00 0xb0 0x00 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 100 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 102 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 110 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 112 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 116 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 120 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM +.return_address + 128 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8 + 136 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 140 0x44 0xc3 0x50 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #_ZL8num_iter + 150 0x00 0x00 NOPX + 152 0x00 0x00 0x60 0x00 0x00 0x84 J #TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.delay_slot + 158 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL10ifmsv_size +.delay_slot +.swstall delay_slot + 164 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 166 0x00 0x00 NOPX +.delay_slot + 168 0x0b 0x06 0x31 0x98 ST r17, [p3] +.delay_slot + 172 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 + 176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0xb0 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #_ZL10ifmsv_size; NOPV +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 + 192 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 196 0x5f 0xee 0xd0 0x00 0x00 0x02 0x30 0x00 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #_ZL8core_row + 206 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 210 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 214 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 218 0x00 0x00 NOPX + 220 0x00 0x00 NOPX + 222 0x00 0x00 NOPX + 224 0x00 0x00 NOPX + 226 0x00 0x00 NOPX + 228 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 232 0x0a 0x06 0x11 0x98 ST r16, [p2] + 236 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 240 0x00 0x00 NOPX + 242 0x00 0x00 NOPX + 244 0x00 0x00 NOPX + 246 0x14 0x93 0x08 0x18 ACQ r18, r16 + 250 0x00 0x2f 0x00 0x00 0x00 0x03 0xb0 0x00 0x10 0xba MOVA r15, #1; MOVXM p7, #_ZL9curr_iter + 260 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp + 266 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76 + 270 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2 + 276 0x04 0x06 0x36 0x98 LDA r17, [p4] + 280 0x60 0xc2 0xd0 0x00 0x00 0x01 0xb0 0x00 0x10 0xba LDA r16, [p3]; MOVXM p3, #conv2d_dw_params + 290 0x07 0x06 0x56 0x98 LDA r18, [p7] + 294 0x00 0x00 NOPX + 296 0x00 0x00 NOPX + 298 0x00 0x00 NOPX + 300 0x05 0x06 0x76 0x98 LDA r19, [p5] + 304 0x00 0x00 NOPX + 306 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 310 0x14 0xa2 0x07 0x18 ADD r17, r18, #1 + 314 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15 +.no_stack_arguments + 318 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.delay_slot + 324 0x0f 0x06 0x31 0x98 ST r17, [p7] +.delay_slot + 328 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16 +.delay_slot + 332 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76] +.delay_slot + 336 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72] +.delay_slot + 340 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX +.return_address + 352 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20 + 356 0x02 0x06 0x16 0x98 LDA r16, [p2] + 360 0x00 0x00 NOPX + 362 0x00 0x00 NOPX + 364 0x00 0x00 NOPX + 366 0x00 0x00 NOPX + 368 0x00 0x00 NOPX + 370 0x00 0x00 NOPX + 372 0x14 0x10 0xf8 0x18 REL r16, r15 + 376 0x5c 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #_ZL8num_iter + 386 0x01 0x06 0x56 0x98 LDA r18, [p1] + 390 0x07 0x06 0x36 0x98 LDA r17, [p7] + 394 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12] + 398 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] + 402 0x00 0x00 NOPX + 404 0x00 0x00 NOPX + 406 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 410 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8] + 414 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 418 0x80 0x00 0xe0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.delay_slot + 424 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 428 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 434 0x00 0x00 NOPX + 436 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 448 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13 + 454 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] + 458 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 462 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 472 0x00 0x00 NOPX +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end last +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 last +.delay_slot + 474 0x1f 0x62 0xc0 0xf8 MOV p7, p1 + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data conv2d_dw_params + +.undef global text _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + +.undef global data gem_bfp_param + +.text_segment_name +.text weak 10 _Z8init_accILt1EEvPaS0_iii +.label __Z8init_accILt1EEvPaS0_iii___func_begin0 +.function_start + 0 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZN12me_primitive11control_rndE + 6 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 12 0x02 0x04 0x82 0x98 LDA.s8 r4, [p2] + 16 0x00 0x00 0x01 0xe1 0x60 0x44 MOVXM ls, #(ZLS_F_Z8init_accILt1EEvPaS0_iii_176 + 0) + 22 0x00 0x1a 0x00 0x00 0x00 0x01 0xb8 0x78 0x10 0xba MOVA r26, #0; MOVXM le, #(ZLE_F_Z8init_accILt1EEvPaS0_iii_240 + 0) + 32 0xff 0x85 0x00 0x00 0x00 0x01 0xb0 0x40 0x10 0xba MOVA r5, #-4; MOVXM p3, #(TGT_F_Z8init_accILt1EEvPaS0_iii_128 + 0) + 42 0x00 0xa3 0x00 0x02 0x52 0xec 0x41 0xa8 0xb8 0xba MOVA r3, #5; LSHL r5, r1, r5; VINSERT.32 x1, x0, #0, r26 + 52 0x01 0xc7 0xb4 0xcb 0xc1 0xe4 LSHL r7, r0, r3; MOV p2, sp + 58 0x10 0xc7 0xb0 0x05 0x25 0xe4 LSHL r3, r2, r3; VMOV bmll0, x1 + 64 0x27 0x50 0x00 0xa5 0x81 0xe4 MOVX crRnd, r4; MOV r1, p1 + 70 0x08 0x02 0xc5 0xff 0x20 0x01 0x00 0x28 0x80 0xd0 0x78 0x36 PADDB [p2], #-64; VCONV.bf16.fp32 wl0, bmll0; MOVX r16, #1; MOV m1, r3 + 82 0xff 0xa6 0x00 0x05 0xb8 0x3c 0x6a 0x60 0x78 0xba MOVA r6, #-3; EQ r27, r2, r16; MOV r3, p2 + 92 0x00 0x0d 0xb1 0x02 0x06 0xa4 LSHL r0, r0, r6; VEXTBCST.16 x1, x0, #0 + 98 0x00 0x2c 0xf0 0x00 0x20 0xc2 0x12 0x00 0x3a 0x0f 0x30 0x11 0x60 0x7e NOPA; NOPB; MOVS p1, p0; SEL.EQZ r1, r3, r1, r27; MOV m0, r7 + 112 0x00 0x2c 0xf0 0x17 0x22 0x04 0x53 0x0a 0x2f 0xf9 0x30 0x50 0x78 0x00 0x00 0xe1 NOPA; PADDB [p0], m0; VST x1, [p2]; ADD r2, r5, #-1; MOV p2, r1; NOPV +.label TGT_F_Z8init_accILt1EEvPaS0_iii_128 +.loop_nesting 1 + 128 0x40 0x84 0x8a 0xe0 0x41 0xf4 VLDB wl0, [p2]; MOV lc, r0 + 134 0x00 0x00 NOPX + 136 0x00 0x00 NOPX + 138 0x00 0x00 NOPX + 140 0x00 0x00 NOPX + 142 0x00 0x00 NOPX + 144 0x00 0x00 NOPX + 146 0x18 0x01 0x22 0xf8 VMOV wh0, wl0 + 150 0x19 0x84 0x03 0x58 VEXTBCST.128 x3, x0, #0 + 154 0x18 0x84 0x07 0x58 VEXTBCST.128 x1, x0, #1 + 158 0x18 0x07 0x8a 0xf8 VCONV.fp32.bf16 cml0, x3 + 162 0x18 0x83 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x1 + 166 0x19 0x00 0x12 0xf8 VMOV bmll1, bmll0 + 170 0x00 0x2c 0xf2 0x84 0x25 0xd4 NOPA; VMOV bmlh1, bmhl0 +.label ZLS_F_Z8init_accILt1EEvPaS0_iii_176 +.loop_nesting 2 +.begin_of_loop + 176 0x09 0x14 0x26 0x98 VST bmlh0, [p1, #64] + 180 0x09 0x2c 0x86 0x98 VST bmll1, [p1], #128 + 184 0x09 0x14 0x26 0x98 VST bmlh0, [p1, #64] + 188 0x09 0x2c 0x86 0x98 VST bmll1, [p1], #128 + 192 0x00 0x2c 0xf0 0x00 0x20 0x14 0x66 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV + 208 0x00 0x2c 0xf0 0x00 0x20 0x2c 0xa6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV + 224 0x00 0x2c 0xf0 0x00 0x20 0x14 0x66 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV +.label ZLE_F_Z8init_accILt1EEvPaS0_iii_240 +.end_of_loop + 240 0x00 0x2c 0xf0 0x00 0x20 0x2c 0xa6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV +.loop_nesting 1 + 256 0x21 0x0c 0xf4 0x57 0x20 0x84 0xe0 0x12 PADDA [p1], m0; PADDB [p2], m1; JNZD r2, r2, p3 +.delay_slot + 264 0x38 0x0b 0x90 0x18 PADDB [p0], m0 +.delay_slot +.swstall delay_slot + 268 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 270 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 272 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 274 0x00 0x00 NOPX +.loop_nesting 0 + 276 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 280 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 286 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 290 0x00 0x00 NOPX +.label _Z8init_accILt1EEvPaS0_iii__end last +.label __Z8init_accILt1EEvPaS0_iii___func_end0 last +.delay_slot +.swstall delay_slot + 292 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text weak 10 _Z12post_processPai +.label __Z12post_processPai___func_begin0 +.function_start + 0 0x40 0x00 0x82 0x80 0x8b 0x00 0x00 0x00 0xb0 0x00 0x10 0x76 MOVA m0, #512; MOVS p2, p0; MOVXM p1, #_ZN12me_primitive11control_rndE + 12 0xff 0x21 0x00 0x00 0x20 0x08 0x88 0x01 0x58 0xba MOVA r1, #-7; MOVX r2, #0; MOV r4, #1 + 22 0x20 0xe0 0x51 0x80 0x8b 0x05 0x02 0x2c 0x68 0x07 0x58 0x76 LDA.s8 r24, [p1]; MOVS p1, p0; OR r16, r2, r4; MOV r3, #7 + 34 0x22 0x96 0xb0 0x00 0x10 0xec 0xa8 0x02 0x58 0xba VLDA bmlh1, [p1, #64]; LSHL r1, r0, r1; MOV r5, #2 + 44 0x21 0x12 0xb0 0x21 0x21 0xec 0x08 0x60 0x78 0xba VLDA bmll1, [p1], m0; LSHL r18, r16, r3; MOV r0, p0 + 54 0x00 0x66 0x00 0x0a 0x71 0x2d 0xb4 0x80 0xa8 0xba MOVA r6, #3; OR r7, r5, r2; ADD.NC p3, r18, r0 + 64 0x62 0x8e 0xb0 0x0f 0x31 0xee 0xb8 0x7f 0xc8 0xba VLDA bmhh0, [p3, #64]; LSHL r19, r7, r3; ADD.NC lc, r1, #-1 + 74 0x60 0x8a 0xb0 0x0d 0x11 0x2e 0x34 0xc0 0xa8 0xba VLDA bmhl0, [p3]; OR r17, r6, r2; ADD.NC p4, r19, r0 + 84 0x82 0x86 0xb0 0x23 0x41 0xec 0x48 0x81 0x08 0xba VLDA bmlh0, [p4, #64]; LSHL r20, r17, r3; ADD.NC r2, r2, #4 + 94 0x80 0x82 0xb0 0x31 0xd4 0x02 0xb5 0x00 0xa8 0xba VLDA bmll0, [p4]; MOVX crRnd, r24; ADD.NC p5, r20, r0 + 104 0xa2 0x9e 0xb0 0x00 0x00 0x00 0x78 0x48 0x10 0xba VLDA bmhh1, [p5, #64]; MOVXM ls, #(ZLS_F_Z12post_processPai_144 + 0) + 114 0xa0 0x9a 0xb0 0x00 0x00 0x01 0xb8 0x78 0x10 0xba VLDA bmhl1, [p5]; MOVXM le, #(ZLE_F_Z12post_processPai_240 + 0) + 124 0x10 0x02 0x19 0x18 MOVX r1, #6 + 128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z12post_processPai_144 +.loop_nesting 1 +.begin_of_loop + 144 0x49 0x94 0x68 0x54 0x3b 0x5c VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 + 150 0x3d 0x83 0xb5 0x15 0x41 0xe4 LSHL r22, r7, r1; MOV dj2, r21 + 156 0x08 0x0c 0x60 0x23 0x70 0xec 0x45 0x90 0x79 0x3a VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r23, r17, r1; MOV dj0, r22 + 166 0x00 0x04 0x60 0x05 0x02 0x2c 0xc5 0xd0 0x79 0x3a VST.CONV.bf16.fp32 cml0, [p0, dj0];OR r16, r2, r4; MOV dj1, r23 + 176 0x11 0x4e 0x25 0x98 OR r7, r5, r2 + 180 0x04 0x1c 0x60 0x0d 0x11 0x2c 0x48 0x81 0x09 0x3a VST.CONV.bf16.fp32 cmh1, [p0, dj1];OR r17, r6, r2; ADD.NC r2, r2, #4 + 190 0x22 0x96 0xb8 0x48 0x7b 0x2c VLDA bmlh1, [p1, #64]; LSHL r18, r16, r3 + 196 0x21 0x12 0xb0 0x0f 0x31 0xed 0xb4 0x80 0xa8 0xba VLDA bmll1, [p1], m0; LSHL r19, r7, r3; ADD.NC p3, r18, r0 + 206 0x62 0x8e 0xb0 0x23 0x41 0xee 0x34 0xc0 0xa8 0xba VLDA bmhh0, [p3, #64]; LSHL r20, r17, r3; ADD.NC p4, r19, r0 + 216 0x60 0x8a 0xba 0xd4 0x02 0x94 VLDA bmhl0, [p3]; ADD.NC p5, r20, r0 + 222 0x04 0x14 0x35 0x98 VLDA bmlh0, [p4, #64] + 226 0x04 0x04 0x15 0x98 VLDA bmll0, [p4] + 230 0x05 0x14 0xf5 0x98 VLDA bmhh1, [p5, #64] + 234 0xa0 0x9a 0xb0 0x00 0x20 0x3c VLDA bmhl1, [p5]; NOPB +.label ZLE_F_Z12post_processPai_240 +.end_of_loop + 240 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 256 0x49 0x94 0x68 0x54 0x3b 0x5c VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 + 262 0x05 0x00 0x05 0x15 0x41 0xe4 RET lr; MOV dj2, r21 +.delay_slot + 268 0x08 0x0c 0x63 0xd8 0x3b 0x5c VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r22, r7, r1 +.delay_slot + 274 0x8d 0xc3 0xb1 0x16 0x41 0xe4 LSHL r23, r17, r1; MOV dj0, r22 +.delay_slot + 280 0x00 0x04 0x60 0x00 0xc5 0xd0 0x70 0x02 VST.CONV.bf16.fp32 cml0, [p0, dj0]; MOV dj1, r23 +.delay_slot + 288 0x08 0x20 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p0, dj1] +.label _Z12post_processPai__end last +.label __Z12post_processPai___func_end0 last +.delay_slot +.swstall delay_slot + 292 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text weak 10 _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_begin0 +.function_start + 0 0xda 0x90 0x84 0x9c 0x8b 0x00 0x00 0x03 0xb0 0xa0 0x10 0x76 MOVA m4, #-300; MOVS p4, p7; MOVXM p7, #(gem_bfp_param + 320) + 12 0x07 0x8a 0x16 0x98 LDA r16, [p7], m4 + 16 0x00 0x00 NOPX + 18 0x00 0x00 NOPX + 20 0x00 0x00 NOPX + 22 0x13 0x11 0x60 0x03 0x30 0x60 0x70 0x02 MOVS p0, p6; MOV p6, p0 + 30 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 36 0x0f 0xfc 0x1d 0x98 ST p0, [sp, #-4] + 40 0xfe 0x43 0xb0 0x20 0x00 0x10 0x10 0x00 0x61 0x3a ST p4, [sp, #-16]; JNZ r16, #TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.delay_slot + 50 0x0f 0xf5 0x1d 0x98 ST p2, [sp, #-12] +.delay_slot + 54 0x0f 0xec 0x9d 0x98 ST p1, [sp, #-20] +.delay_slot + 58 0x00 0x32 0x07 0xf8 0x3d 0x80 0x00 0x00 0x30 0x00 0x10 0x76 MOVA r18, #1; ST lr, [sp, #-8]; MOVXM p0, #_ZN12me_primitive11control_satE +.delay_slot + 70 0x01 0x71 0x00 0x06 0x51 0x80 0x00 0x00 0x30 0x00 0x10 0x76 MOVA r17, #11; ST r18, [p0]; MOVXM p0, #_ZN12me_primitive11control_rndE +.delay_slot + 82 0x00 0xc4 0xe0 0x00 0x00 0x02 0xb0 0x00 0x10 0xba ST.s8 r17, [p0]; MOVXM p5, #gem_bfp_param + 92 0xa5 0x82 0xd0 0xc9 0x81 0xd4 LDA r0, [p5], #8; MOV p0, p2 + 98 0x05 0x04 0x36 0x98 LDA r1, [p5] + 102 0x05 0x14 0x56 0x98 LDA r2, [p5, #4] +.no_stack_arguments + 106 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z8init_accILt1EEvPaS0_iii +.delay_slot + 112 0x19 0x66 0xc0 0xf8 MOV p1, p3 +.delay_slot +.swstall delay_slot + 116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 120 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 122 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.return_address + 128 0x17 0x10 0x00 0x3b 0x21 0x0a 0x00 0xdc 0x58 0xba MOVA r16, #184; MOVX r18, #-184; MOV m4, #220 + 138 0x07 0x8b 0x76 0x98 LDA r27, [p7], m4 + 142 0x1c 0xde 0xc0 0xf8 MOV r19, p7 + 146 0x1d 0x19 0xe4 0x18 ADD.NC r20, r19, #-56 + 150 0x1f 0x69 0x51 0x58 ADD.NC p7, r18, r20 + 154 0x10 0xe2 0xc1 0x18 MOVX r17, #240 + 158 0x00 0x00 NOPX + 160 0x00 0x00 NOPX + 162 0x15 0x25 0x32 0x18 SEL.EQZ r18, r20, r19, r27 + 166 0x84 0x22 0x46 0xd2 0x04 0x24 SEL.EQZ r16, r16, r17, r27; ADD.NC p3, r18, #4 + 172 0x63 0x98 0xd0 0x36 0x00 0x36 0x00 0x00 0x60 0xba LDA dj1, [p3], #4; JZ r27, #TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 +.delay_slot + 182 0x63 0xd4 0xd7 0x10 0x41 0xd4 LDA dn5, [p3], #4; MOV dj3, r16 +.delay_slot + 188 0x03 0x1e 0xc6 0x98 LDA dj5, [p3], #4 +.delay_slot + 192 0x07 0x60 0xa6 0x98 LDA dn1, [p7, dj3] +.delay_slot + 196 0x03 0x06 0x16 0x98 LDA r16, [p3] +.delay_slot + 200 0x03 0x16 0x06 0x98 LDA m4, [p3, #4] + 204 0xfd 0xb3 0x20 0x00 0x00 0x01 0x30 0x10 0x10 0xba LDA p3, [sp, #-20]; MOVXM p2, #(gem_bfp_param + 32) + 214 0x40 0xce 0xd0 0x00 0x00 0x00 0x78 0x98 0x10 0xba LDA r19, [p2]; MOVXM ls, #(ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 + 0) + 224 0xff 0x54 0x00 0x00 0x00 0x01 0xb8 0xb8 0x10 0xba MOVA r20, #-6; MOVXM le, #(ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 + 0) + 234 0x04 0x5a 0x29 0x20 0xd5 0x64 MOVX r17, #52; MOV r18, #53 + 240 0x00 0x00 NOPX + 242 0x00 0x00 NOPX + 244 0x00 0x00 NOPX + 246 0x1a 0x66 0xc0 0xf8 MOV p2, p3 + 250 0x42 0x80 0xf9 0xce 0x9b 0x2c VLDA lfh0, [p2, #64]; LSHL r19, r19, r20 + 256 0x45 0x90 0xfa 0xf3 0xfe 0x14 VLDA lfl0, [p2], #128; ADD.NC lc, r19, #-2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 262 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 264 0x02 0x14 0x07 0x98 VLDA lfh0, [p2, #64] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 268 0x02 0x2c 0x87 0x98 VLDA lfl0, [p2], #128 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 272 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 274 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 276 0x1c 0x21 0x92 0xf8 VMOV x8, lfh0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 280 0x00 0x2b 0x60 0x00 0x50 0xc9 0x70 0x02 NOPS; VMOV x1, lfh0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x12 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 304 0x42 0x80 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc2 0x24 0x38 0x00 0x00 0xe1 VLDA lfh0, [p2, #64]; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x8, r18; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 320 0x45 0x90 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x82 0x22 0x38 0x00 0x00 0xe1 VLDA lfl0, [p2], #128; NOPB; NOPS; NOPX; VSHUFFLE x2, x0, x8, r17; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 336 0x00 0x2c 0xf0 0x00 0x23 0x14 0xd3 0x00 0x00 0x02 0x10 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p3, #64]; NOPX; VMOV x8, lfh0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 352 0x00 0x2c 0xf0 0x00 0x23 0x2c 0x93 0x00 0x00 0x00 0x50 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x2, [p3], #128; NOPX; VMOV x1, lfh0; NOPV +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x12 0xc9 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV +.loop_nesting 0 + 384 0x19 0x84 0x48 0x78 VSHUFFLE x3, x0, x8, r18 + 388 0x19 0x04 0x44 0x78 VSHUFFLE x2, x0, x8, r17 + 392 0x62 0x9a 0x60 0x02 0x10 0xc9 0x70 0x02 VST x3, [p3, #64]; VMOV x8, lfh0 + 400 0x65 0x92 0x60 0x00 0x12 0xc9 0x70 0x02 VST x2, [p3], #128; VMOV x0, lfl0 + 408 0x19 0x84 0x48 0x78 VSHUFFLE x3, x0, x8, r18 + 412 0x19 0x04 0x44 0x78 VSHUFFLE x2, x0, x8, r17 + 416 0x62 0x9a 0x60 0x00 0x50 0xc9 0x70 0x02 VST x3, [p3, #64]; VMOV x1, lfh0 + 424 0x65 0x92 0x60 0x00 0x01 0xa5 0x70 0x02 VST x2, [p3], #128; NOPM +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 + 432 0x3f 0x1f 0x90 0x18 PADDB [p7], #64 + 436 0x07 0x1d 0xc6 0x98 LDA dj3, [p7], #4 + 440 0xe3 0xb4 0xda 0x1f 0x71 0x54 LDA dn3, [p7], #4; MOV m5, #-36 + 446 0x07 0xaa 0x56 0x98 LDA r18, [p7], m5 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 450 0xe7 0xd2 0xd0 0x00 0x00 0x01 0xb0 0x00 0x10 0xba LDA r20, [p7], #12; MOVXM p3, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 2 +.noswbrkpt + 460 0x60 0xd0 0x50 0x00 0x00 0x0e 0xef 0xc0 0x10 0xba LDA.s8 r20, [p3]; MOVXM r23, #16256 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 470 0xfe 0xb3 0x25 0xba 0xe5 0xd4 LDA p3, [sp, #-12]; VBCST.16 x5, r23 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 476 0xc2 0xcd 0x7c 0x01 0x51 0x54 VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOV m6, #84 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 482 0xe3 0xf0 0xd4 0xba 0xe5 0xd4 LDA m7, [p7], #4; VBCST.16 x4, r23 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 488 0xe3 0xb0 0xda 0x51 0x25 0xd4 LDA m3, [p7], #4; VMOV x10, x4 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 494 0xe3 0x90 0xdb 0x55 0x25 0xd4 LDA m1, [p7], #4; VMOV x11, x5 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 500 0xf9 0x60 0xda 0x5f 0xf6 0x2c LDA m6, [p7], m6; ADD r23, r20, #-2 + 506 0xf9 0x80 0xd4 0x04 0x61 0x54 LDA m0, [p7], #-16; MOV m2, #280 + 512 0xe3 0x84 0xda 0x1e 0x51 0x54 LDA dn0, [p7], #4; MOV m5, #-108 + 518 0x07 0x1c 0x46 0x98 LDA dj0, [p7], #4 + 522 0x07 0x1e 0x26 0x98 LDA dn4, [p7], #4 + 526 0x07 0x2e 0x46 0x98 LDA dj4, [p7], #8 + 530 0xf5 0x50 0xd9 0x80 0x01 0x54 LDA m5, [p7], m5; MOV dc4, #0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 536 0xe9 0x6a 0xd0 0x8c 0x8b 0x03 0x2f 0x60 0x72 0xba LDA r26, [p7], m2; MOVS p0, p3; MOV r25, p7 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 546 0xfd 0xf3 0x20 0xd7 0x20 0x00 0x00 0x26 0x29 0x02 0x02 0x09 0x60 0x7e LDA p7, [sp, #-20]; PADDB [p0], m3; MOVS dc0, dc4; MOVXM p2, #(TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 + 0) +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 560 0x02 0xa6 0xb2 0x10 0x4b 0x02 0xe4 0xc0 0x72 0xba VLDA bmlh2, [p0, #64]; MOVS dc2, dc4; MOV dc5, dc4 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 570 0x04 0xaa 0xb4 0x8c 0x8b 0x01 0x43 0x80 0x72 0xba VLDA bmhl2, [p0, #128]; MOVS p4, p3; MOV dj2, dj3 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 580 0x06 0xae 0xb7 0x97 0x23 0x22 0x31 0x61 0x20 0x0f 0x31 0x91 0x60 0x7e VLDA bmhh2, [p0, #192]; PADDB [p3], m6; MOVS p1, p3; MOVX r17, #780; MOV r24, m1 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 594 0x62 0x86 0xb0 0x00 0x25 0x3a 0x80 0x51 0xec 0x0f 0x60 0x09 0x60 0x7e VLDA bmlh0, [p3, #64]; NOPB; MOVS dc3, dc0; MOVX crRnd, r20; MOV r20, p7 + 608 0x64 0x8a 0xb0 0x00 0x22 0x4e 0x4b 0x01 0x36 0x89 0x03 0x00 0x78 0x00 0x00 0xe1 VLDA bmhl0, [p3, #128]; NOPB; MOVS dn2, dn3; MOVX r19, #52; MOV m2, m3; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 624 0x66 0x8e 0xb0 0x00 0x21 0x0c 0x4b 0x01 0x56 0xa9 0x84 0x90 0x78 0x00 0x00 0xe1 VLDA bmhh0, [p3, #192]; NOPB; MOVS dc1, dc3; MOVX r21, #53; MOV m3, r18; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 640 0x60 0x82 0xbe 0x2a 0x6c 0x28 0x5b 0x01 0x67 0x8a 0xb4 0x60 0x78 0x00 0x00 0xe1 VLDA bmll0, [p3]; VLDB x4, [p7, #64]; PADDS [p4], m1; MOVX r22, #60; MOV p5, p4; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 +.loop_nesting 1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 656 0x82 0x96 0xbb 0xd7 0x23 0x94 0x8b 0x00 0x84 0x10 0x70 0xf6 VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 668 0x84 0x9a 0xb3 0x73 0x90 0x02 0xbe 0xbf 0x4e 0xba VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 678 0x86 0x9e 0xb0 0x00 0x00 0x00 0x79 0xb0 0x10 0xba VLDA bmhh1, [p4, #192]; MOVXM ls, #(ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 + 0) +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 688 0x80 0x92 0xbe 0x73 0xe8 0x00 0x00 0x37 0x3b 0x02 0x93 0x91 0x60 0x7e VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #(ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 + 0) +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 702 0xa2 0xb6 0xb4 0x8b 0x90 0x01 0x82 0x00 0x7e 0xba VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 712 0x80 0xcb 0x78 0x2a 0xec 0x9c 0x8b 0x32 VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 720 0xa4 0xba 0xbe 0x2a 0x6c 0x88 0x5b 0x32 VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 728 0xa6 0xbe 0xbe 0x73 0xe8 0x3c VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 734 0xa0 0xb2 0xb4 0x15 0x74 0x02 0xb6 0x60 0x7e 0xba VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 744 0xc3 0x45 0x7b 0x57 0x20 0x3c VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 750 0x00 0xa2 0xb4 0x06 0x74 0x01 0x9d 0x26 0x3e 0xba VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 760 0x1b 0xba 0x54 0x78 VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 764 0x93 0x91 0x67 0x15 0x34 0x02 0x25 0x66 0x36 0xba VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 774 0xa0 0xc5 0x79 0x17 0x24 0xca 0xd4 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 786 0xa2 0xcd 0x78 0x2a 0xed 0x98 0x8b 0x01 0x9d 0x26 0x30 0xf6 VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 798 0xb5 0x0c 0xf7 0x74 0xa8 0xd4 PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 804 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 814 0xc3 0x45 0x77 0x39 0xf4 0x02 0x25 0x66 0x3e 0xba VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 824 0x1c 0xca 0xd4 0x78 VSHUFFLE x9, x9, x5, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 828 0x08 0x8a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex1, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 832 0xa0 0xc5 0x74 0x06 0x74 0x1d 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 842 0xa2 0xcd 0x72 0x14 0x6c 0x0c VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 848 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 864 0x0e 0x2a 0x6d 0x6c 0xc0 0xe6 0xb4 0xf1 0x51 0x4a VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 874 0xc2 0xcd 0x7e 0x73 0xec 0x9c 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 890 0xc3 0x45 0x79 0x17 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 906 0xb5 0x0c 0xf8 0x2a 0xe8 0x45 0x1b 0x0e 0x8a 0x40 0x69 0x66 PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 918 0xa0 0xc5 0x78 0x0c 0xec 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 930 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 944 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 960 0xc2 0xcd 0x7e 0x2a 0x6c 0x84 0x8b 0x00 0x00 0x02 0xb6 0x60 0x7d 0xa7 0x8a 0x8b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB x4, [p7, #64]; MOVS p4, p1; NOPX; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 976 0xc3 0x45 0x7b 0x57 0x20 0x84 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 992 0x0d 0x0c 0xf3 0xd7 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b PADDA [p0], m3; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1008 0x11 0x46 0xc1 0x0c 0x20 0xe4 0x8a 0x40 0x69 0x4a VCONV.bfp16ebs8.fp32 ex1, dm4; MOV m1, r24; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1018 0xa0 0xc5 0x74 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1028 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1042 0x31 0x46 0xc4 0x2b 0x90 0x02 0xb4 0x60 0x76 0xba PADDB [p4], m1; VCONV.bfp16ebs8.fp32 ex3, dm4; MOV p5, p4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1052 0x02 0x09 0x20 0xe6 0x8b 0x60 0x29 0x62 MOV m2, r18; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1060 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1070 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1074 0x11 0x46 0xc0 0x02 0x8a 0x40 0x69 0x62 VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1082 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1086 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1088 0x09 0x0a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1092 0x31 0x46 0xc0 0x02 0x8b 0x60 0x29 0x62 VCONV.bfp16ebs8.fp32 ex3, dm4; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1100 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1102 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1104 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1108 0x8a 0x40 0x69 0x48 VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1112 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1116 0x09 0x15 0xa6 0x98 VST bmlh3, [p1, #64] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1120 0x09 0x25 0xc6 0x98 VST bmhl3, [p1, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1124 0x09 0x35 0xe6 0x98 VST bmhh3, [p1, #192] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1128 0x20 0xb0 0xd5 0xcb 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1138 0x02 0xa4 0xd1 0x53 0x90 0x01 0x03 0x00 0x76 0xba PADDB.2D [p1], d2; VST bmlh2, [p0, #64]; MOV m2, m3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1148 0x08 0x25 0x46 0x98 VST bmhl2, [p0, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1152 0x08 0x35 0x66 0x98 VST bmhh2, [p0, #192] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1156 0x00 0xa0 0xd0 0x00 0x33 0x60 0x70 0x02 VST bmll2, [p0]; MOV p0, p3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1164 0x82 0x94 0xd0 0x6b 0x90 0x01 0x84 0x90 0x76 0xba PADDB [p0], m3; VST bmlh1, [p4, #64]; MOV m3, r18 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1174 0x02 0xa6 0xb8 0x49 0x8d 0x0c VLDA bmlh2, [p0, #64]; VST bmhl1, [p4, #128] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1180 0x04 0xaa 0xb4 0x34 0xe6 0x80 0x05 0xee 0xa0 0x7a VLDA bmhl2, [p0, #128]; VST bmhh1, [p4, #192]; JNZD r23, r23, p2 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1190 0x06 0xae 0xb7 0x97 0x24 0x04 0x86 0x82 0x33 0x60 0x70 0xf6 VLDA bmhh2, [p0, #192]; PADDB [p3], m6; VST bmll1, [p4]; MOV p4, p3 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1202 0x62 0x86 0xba 0x28 0x4d 0x0c VLDA bmlh0, [p3, #64]; VST bmlh0, [p5, #64] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1208 0x64 0x8a 0xba 0x48 0x8d 0x0c VLDA bmhl0, [p3, #128]; VST bmhl0, [p5, #128] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1214 0x66 0x8e 0xba 0x68 0xcd 0x0c VLDA bmhh0, [p3, #192]; VST bmhh0, [p5, #192] +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1220 0x60 0x82 0xb8 0x57 0x25 0x04 0x06 0x82 0xb4 0x60 0x70 0xf6 VLDA bmll0, [p3]; PADDB [p4], m1; VST bmll0, [p5]; MOV p5, p4 +.loop_nesting 0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1232 0x82 0x96 0xbb 0xd7 0x23 0x94 0x8b 0x00 0x84 0x10 0x70 0xf6 VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1244 0x84 0x9a 0xb3 0x73 0x90 0x02 0xbe 0xbf 0x4e 0xba VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1254 0x86 0x9e 0xb0 0x00 0x00 0x00 0x7a 0xd0 0x10 0xba VLDA bmhh1, [p4, #192]; MOVXM ls, #(ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 + 0) +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1264 0x80 0x92 0xbe 0x73 0xe8 0x00 0x00 0x37 0x5f 0x02 0x93 0x91 0x60 0x7e VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #(ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 + 0) +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1278 0xa2 0xb6 0xb4 0x8b 0x90 0x01 0x82 0x00 0x7e 0xba VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1288 0x80 0xcb 0x78 0x2a 0xec 0x9c 0x8b 0x32 VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1296 0xa4 0xba 0xbe 0x2a 0x6c 0x88 0x5b 0x32 VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1304 0xa6 0xbe 0xbe 0x73 0xe8 0x3c VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1310 0xa0 0xb2 0xb4 0x15 0x74 0x02 0xb6 0x60 0x7e 0xba VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1320 0xc3 0x45 0x7b 0x57 0x20 0x3c VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1326 0x00 0xa2 0xb4 0x06 0x74 0x01 0x9d 0x26 0x3e 0xba VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1336 0x1b 0xba 0x54 0x78 VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1340 0x93 0x91 0x67 0x15 0x34 0x02 0x25 0x66 0x36 0xba VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1350 0xa0 0xc5 0x79 0x17 0x24 0xca 0xd4 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1362 0xa2 0xcd 0x78 0x2a 0xed 0x98 0x8b 0x01 0x9d 0x26 0x30 0xf6 VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1374 0xb5 0x0c 0xf7 0x74 0xa8 0xd4 PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1380 0xc2 0xcd 0x70 0x0a 0x36 0x09 0xb4 0xf1 0x51 0x4a VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1390 0xc3 0x45 0x77 0x39 0xf4 0x02 0x25 0x66 0x3e 0xba VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1400 0x1c 0xca 0xd4 0x78 VSHUFFLE x9, x9, x5, r21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1404 0x08 0x8a 0x36 0x18 VCONV.bfp16ebs8.fp32 ex1, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1408 0xa0 0xc5 0x74 0x06 0x74 0x1d 0xb4 0xed 0x51 0x4a VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1418 0xa2 0xcd 0x72 0x14 0x6c 0x0c VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1424 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1440 0x0e 0x2a 0x6d 0x6c 0xc0 0xe6 0xb4 0xf1 0x51 0x4a VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1450 0xc2 0xcd 0x7e 0x73 0xec 0x9c 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1466 0xc3 0x45 0x79 0x17 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1482 0xb5 0x0c 0xf8 0x2a 0xe8 0x45 0x1b 0x0e 0x8a 0x40 0x69 0x66 PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1494 0xa0 0xc5 0x78 0x0c 0xec 0x4a 0xcc 0x62 0xb4 0xed 0x51 0x66 VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1506 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1520 0x00 0x2c 0xf0 0x00 0x21 0x8a 0x36 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1536 0xc2 0xcd 0x79 0x09 0x16 0x00 0x00 0x6b 0x66 0x07 0xb4 0xf1 0x51 0x6e VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOVS p4, p1; MOV p5, p6; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1550 0xc3 0x45 0x7b 0x57 0x20 0x84 0x8b 0x00 0x00 0x01 0x9d 0x26 0x3c 0x5b 0x01 0x4b VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1566 0xda 0x06 0x83 0xd7 0x20 0x0a 0x36 0x00 0x00 0x01 0xdd 0x2a 0x3c 0x49 0x21 0x4b MOVA dj1, #-304; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1582 0x0d 0x0c 0xf1 0x14 0x6c 0x00 0x00 0x7b 0x51 0x07 0x8a 0x40 0x69 0x6e PADDA [p0], m3; VCONV.bfp16ebs8.fp32 ex1, dm4; MOV p7, r20; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1596 0xa0 0xc5 0x7d 0x32 0x16 0x00 0x00 0x62 0x56 0x63 0xb4 0xed 0x51 0x6e VLDA.CONV.fp32.bf16 cml4, [p5]; MOVS p6, r25; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1610 0xa2 0xcd 0x72 0x14 0x6c 0x00 0x00 0x66 0x56 0xa3 0x88 0x04 0x69 0x6e VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1624 0x31 0x46 0xc0 0x00 0x86 0x10 0x70 0x02 VCONV.bfp16ebs8.fp32 ex3, dm4; MOV m1, r24 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1632 0x08 0x57 0x25 0x68 0xc0 0xe6 0x8b 0x60 0x29 0x4a PADDB [p4], m1; MOV p5, p4; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1642 0x01 0x46 0xc0 0x02 0xb4 0xf1 0x51 0x62 VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1650 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1654 0x11 0x46 0xc0 0x02 0x8a 0x40 0x69 0x62 VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1662 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1666 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1668 0xe4 0x46 0xd1 0x0a 0x36 0x00 0xc1 0x18 0x52 0xba LDA r17, [p7, dj1]; VCONV.bfp16ebs8.fp32 ex2, dm4; MOV dj1, #280 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1678 0xc4 0x42 0xd0 0x00 0x21 0x8a 0x36 0x00 0x00 0x03 0xb0 0x00 0x14 0x5b 0x01 0x4b LDA r16, [p6, dj1]; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;MOVXM p7, #gem_bfp_param; VMAC.f dm3, dm3, ex0, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1694 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1696 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1698 0x89 0x24 0x29 0x48 VMAC.f dm1, dm1, ex2, ex1, r17 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 1702 0x8a 0x40 0x69 0x48 VMAC.f dm2, dm2, ex0, ex3, r17 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1706 0x88 0x04 0x69 0x48 VMAC.f dm0, dm0, ex2, ex3, r17 + 1710 0x22 0xb4 0xd8 0xc7 0xfe 0x5c VST bmlh3, [p1, #64]; ADD r17, r17, #-1 + 1716 0x24 0xb8 0xd8 0xc6 0x11 0x5c VST bmhl3, [p1, #128]; NE r17, r17, r16 + 1722 0x09 0x35 0xe6 0x98 VST bmhh3, [p1, #192] + 1726 0x20 0xb0 0xd5 0xcb 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 + 1736 0x08 0x15 0x26 0x98 VST bmlh2, [p0, #64] + 1740 0x08 0x25 0x46 0x98 VST bmhl2, [p0, #128] + 1744 0x08 0x35 0x66 0x98 VST bmhh2, [p0, #192] + 1748 0x08 0x05 0x06 0x98 VST bmll2, [p0] + 1752 0x0c 0x14 0xa6 0x98 VST bmlh1, [p4, #64] + 1756 0x0c 0x24 0xc6 0x98 VST bmhl1, [p4, #128] + 1760 0x86 0x9c 0xd0 0x22 0x00 0xea 0x10 0x00 0x61 0x3a VST bmhh1, [p4, #192]; JNZ r17, #TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 +.delay_slot + 1770 0x0c 0x04 0x86 0x98 VST bmll1, [p4] +.delay_slot + 1774 0x0d 0x14 0x26 0x98 VST bmlh0, [p5, #64] +.delay_slot + 1778 0x0d 0x24 0x46 0x98 VST bmhl0, [p5, #128] +.delay_slot + 1782 0xa6 0x8c 0xd0 0x01 0x04 0x90 0x70 0x02 VST bmhh0, [p5, #192]; MOV m2, r18 +.delay_slot + 1790 0x2a 0x72 0x0a 0x08 0x0d 0x4c PADDB.2D [p1], d2; VST bmll0, [p5] + 1796 0x07 0x06 0x16 0x98 LDA r16, [p7] + 1800 0x07 0x26 0x36 0x98 LDA r17, [p7, #8] + 1804 0x07 0xf4 0x19 0x18 LDA p0, [sp, #-12] + 1808 0x00 0x00 NOPX +.no_stack_arguments + 1810 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z12post_processPai +.delay_slot +.swstall delay_slot + 1816 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1818 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1820 0x00 0x00 NOPX +.delay_slot + 1822 0x14 0x41 0x0f 0x98 MUL r0, r17, r16 +.delay_slot +.swstall delay_slot + 1826 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address + 1840 0x00 0x03 0xb0 0x00 0x00 0x84 J #TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 +.delay_slot + 1846 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.swstall delay_slot + 1850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1854 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1856 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 + 1872 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x21 0x00 0x38 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ADD r16, r16, #1; NOPM; NOPV +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 + 1888 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 1892 0x07 0xf3 0x99 0x18 LDA p7, [sp, #-16] + 1896 0x00 0x00 NOPX + 1898 0x00 0x00 NOPX + 1900 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 1902 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 1904 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1908 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1912 0x19 0x82 0x30 0xb8 MOV dj1, #280 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1916 0x0e 0x22 0x11 0x98 ST r16, [p6, dj1] +.delay_slot + 1920 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 1926 0x00 0x00 NOPX +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params__end last +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_end0 last +.delay_slot +.swstall delay_slot + 1928 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data gem_bfp_param + +.undef global text _Z8init_accILt1EEvPaS0_iii + +.undef global text _Z12post_processPai + +.text_segment_name +.text global 10 _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.function_start + 0 0x90 0x91 0x60 0x00 0x00 0x02 0xb0 0x00 0x11 0x3a MOVS p4, p1; MOVXM p5, #_ZL9curr_iter + 10 0x05 0x06 0x16 0x98 LDA r16, [p5] + 14 0x00 0x00 NOPX + 16 0x00 0x00 NOPX + 18 0x00 0x00 NOPX + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x80 0x00 0x48 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 +.delay_slot + 32 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 38 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 42 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] +.delay_slot + 46 0xff 0x07 0xb0 0x03 0xb0 0x60 0x70 0x02 ST lr, [sp, #-8]; MOV p7, p0 +.delay_slot + 54 0x1e 0x66 0xc0 0xf8 MOV p6, p3 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 58 0x11 0x11 0x60 0x00 0x00 0x01 0xb0 0x00 0x11 0x3a MOVS p0, p2; MOVXM p3, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 68 0x60 0xc0 0xe0 0x00 0x00 0x01 0xb0 0x00 0x10 0xba ST.s8 r16, [p3]; MOVXM p3, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 78 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 80 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 86 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 88 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 90 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 94 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 98 0x00 0x2c 0xf6 0x0c 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p3]; NOPM; NOPV +.return_address + 112 0x4c 0x85 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA el0, [p2, #24]; MOVXM p2, #_ZL11total_iters + 122 0x00 0x00 NOPX + 124 0x00 0x00 NOPX + 126 0x00 0x00 NOPX + 128 0x00 0x00 NOPX + 130 0x00 0x00 NOPX + 132 0x00 0x00 NOPX + 134 0x00 0x2c 0xf2 0x04 0x29 0x80 0x00 0x00 0x00 0x7a NOPA; ST el0, [p2]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 + 144 0xa0 0xc2 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r16, [p5]; MOVXM p2, #_ZL10depth_iter + 154 0x02 0x06 0x36 0x98 LDA r17, [p2] + 158 0x00 0x00 NOPX + 160 0x00 0x00 NOPX + 162 0x00 0x00 NOPX + 164 0x00 0x00 NOPX + 166 0x00 0x00 NOPX + 168 0x00 0x00 NOPX + 170 0x88 0x00 0x80 0x40 0x01 0x84 JNZ r17, #TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.delay_slot + 176 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 180 0x0d 0x06 0x11 0x98 ST r16, [p5] +.delay_slot + 184 0x14 0x60 0x07 0x18 ADD r16, r17, #1 +.delay_slot + 188 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 192 0x00 0x00 NOPX + 194 0x1c 0x1c 0xc0 0xf8 MOV r16, p6 + 198 0x1a 0x68 0x06 0x18 ADD.NC p2, r16, #12 + 202 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 206 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 210 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 214 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 216 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 220 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 222 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 224 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 226 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 228 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 232 0x0a 0x06 0x11 0x98 ST r16, [p2] + 236 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 240 0x00 0x00 NOPX + 242 0x00 0x00 NOPX + 244 0x00 0x00 NOPX + 246 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 + 256 0x00 0x18 0x00 0x1f 0xff 0xfe 0x0f 0xf0 0x10 0xba MOVA r24, #0; MOVXM r16, #2147483616 + 266 0x10 0x22 0x05 0x18 MOVX r17, #1 + 270 0x00 0x00 NOPX + 272 0x80 0xb3 0xd0 0x00 0x00 0x02 0x30 0x06 0x10 0xba LDA p3, [p4]; MOVXM p4, #(gem_bfp_param + 12) + 282 0x04 0xff 0x76 0x98 LDA r27, [p4], #-4 + 286 0x04 0xee 0x56 0x98 LDA r18, [p4], #-8 + 290 0xe0 0x83 0xde 0xd1 0x81 0xd4 LDA p0, [p7]; MOV p7, p4 + 296 0x06 0x05 0x1e 0x98 LDA p2, [p6] + 300 0x00 0x00 NOPX + 302 0x00 0x00 NOPX + 304 0x1c 0xd6 0xc0 0xf8 MOV r19, p3 +.no_stack_arguments + 308 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.delay_slot + 314 0x14 0xa4 0x7f 0x18 ADD r18, r18, #31 +.delay_slot + 318 0x14 0xa1 0x04 0x98 AND r16, r18, r16 +.delay_slot + 322 0x16 0x21 0x02 0x18 SEL.EQZ r16, r24, r16, r27 +.delay_slot + 326 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 +.delay_slot + 330 0x00 0x2c 0xf2 0xd3 0x82 0x94 NOPA; ADD.NC p1, r19, r16 +.return_address + 336 0xe8 0xc2 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r16, [p7, #16]; MOVXM p2, #_ZL10depth_iter + 346 0x40 0xca 0xd8 0xb9 0x81 0xd4 LDA r18, [p2]; MOV r17, p6 + 352 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] + 356 0x00 0x00 NOPX + 358 0x00 0x00 NOPX + 360 0x00 0x00 NOPX + 362 0x00 0x00 NOPX + 364 0x00 0x00 NOPX + 366 0x14 0xa1 0x08 0x98 NE r16, r18, r16 + 370 0x80 0x00 0xe0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.delay_slot + 376 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 382 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 384 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 386 0x00 0x00 NOPX + 388 0x40 0xe2 0x30 0x01 0x00 0x2b 0x34 0x45 0x09 0x3a ST r24, [p2]; MOVX r16, #1; ADD.NC p6, r17, #20 + 398 0x06 0x06 0x36 0x98 LDA r17, [p6] + 402 0x00 0x00 NOPX + 404 0x00 0x00 NOPX + 406 0x00 0x00 NOPX + 408 0x00 0x00 NOPX + 410 0x00 0x00 NOPX + 412 0x00 0x00 NOPX + 414 0x14 0x51 0x08 0x18 REL r17, r16 + 418 0x06 0xe6 0x36 0x98 LDA r17, [p6, #-8] + 422 0x00 0x00 NOPX + 424 0x00 0x00 NOPX + 426 0x00 0x00 NOPX + 428 0x00 0x00 NOPX + 430 0x00 0x00 NOPX + 432 0x00 0x00 NOPX + 434 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 438 0x00 0x2c 0xf6 0xe6 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6, #-8]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 448 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZL9curr_iter + 454 0x40 0xc2 0xd0 0x00 0x00 0x01 0xb0 0x00 0x10 0xba LDA r16, [p2]; MOVXM p3, #_ZL11total_iters + 464 0x03 0x06 0x36 0x98 LDA r17, [p3] + 468 0x00 0x00 NOPX + 470 0x00 0x00 NOPX + 472 0x00 0x00 NOPX + 474 0x00 0x00 NOPX + 476 0x00 0x00 NOPX + 478 0x00 0x00 NOPX + 480 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 484 0x80 0x01 0x00 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 +.delay_slot + 490 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] +.delay_slot +.swstall delay_slot + 494 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 500 0x00 0x00 NOPX + 502 0x00 0x2c 0xf2 0x07 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r24, [p2]; NOPX +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 + 512 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 516 0x00 0x00 NOPX + 518 0x00 0x00 NOPX + 520 0x00 0x00 NOPX + 522 0x00 0x00 NOPX + 524 0x00 0x00 NOPX + 526 0x00 0x00 NOPX + 528 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 532 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 538 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 540 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 542 0x00 0x00 NOPX +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end last +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 last +.delay_slot +.swstall delay_slot + 544 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data gem_bfp_param + +.undef global text _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params + +.text_segment_name +.text weak 10 _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_begin0 +.function_start + 0 0x23 0x86 0xd0 0x00 0x00 0x00 0x30 0x00 0x10 0xba LDA r1, [p1], #4; MOVXM p0, #reducesum_params + 10 0x00 0x18 0x00 0x00 0x40 0x88 0x00 0x06 0x58 0xba MOVA r24, #0; MOVX r4, #4; MOV m0, #6 + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x08 0x1c 0x31 0x98 ST r1, [p0], #4 + 34 0x01 0x1c 0xd6 0x98 LDA r6, [p1], #4 + 38 0x00 0x00 NOPX + 40 0x00 0x00 NOPX + 42 0x00 0x00 NOPX + 44 0x00 0x00 NOPX + 46 0x00 0x00 NOPX + 48 0x00 0x00 NOPX + 50 0x03 0x9a 0x33 0x03 0xfe 0x5c ST r6, [p0], #4; ADD r0, r6, #-1 + 56 0x01 0x1c 0xb6 0x98 LDA r5, [p1], #4 + 60 0x00 0x00 NOPX + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x00 0x00 NOPX + 72 0x03 0x96 0x32 0x9c 0xdf 0x5c ST r5, [p0], #4; MUL r7, r5, r6 + 78 0x01 0x1e 0x16 0x98 LDA r16, [p1], #4 + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x00 0x00 NOPX + 88 0x00 0x00 NOPX + 90 0x00 0x00 NOPX + 92 0x00 0x00 NOPX + 94 0x03 0xc2 0x32 0x12 0x0f 0x5c ST r16, [p0], #4; EQ r4, r4, r16 + 100 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 104 0x00 0x00 NOPX + 106 0x00 0x00 NOPX + 108 0x00 0x00 NOPX + 110 0x00 0x00 NOPX + 112 0x00 0x00 NOPX + 114 0x00 0x00 NOPX + 116 0x03 0x85 0x30 0x02 0x50 0x0e 0x70 0x02 ST el0, [p0], #4; MOV r18, el0 + 124 0x01 0x1e 0x76 0x98 LDA r19, [p1], #4 + 128 0x00 0x00 NOPX + 130 0x00 0x00 NOPX + 132 0x00 0x00 NOPX + 134 0x00 0x00 NOPX + 136 0x00 0x00 NOPX + 138 0x00 0x00 NOPX + 140 0x03 0xce 0x39 0xca 0x5f 0x5c ST r19, [p0], #4; MUL r18, r19, r18 + 146 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 150 0x00 0x00 NOPX + 152 0x00 0x00 NOPX + 154 0x00 0x00 NOPX + 156 0x00 0x00 NOPX + 158 0x00 0x00 NOPX + 160 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 162 0x01 0x01 0x30 0x02 0x90 0x8e 0x70 0x02 ST eh0, [p0], m0; MOV r20, eh0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 170 0x00 0x2e 0x57 0x18 ST.s16 r18, [p0], #4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 174 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 178 0x20 0x00 0x88 0x40 0x01 0x84 JNZ r4, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_272 +.delay_slot + 184 0x17 0xc4 0xed 0x18 MOVX r2, #-5 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 188 0x11 0x46 0x2d 0x98 LSHL r3, r5, r2 +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 192 0x11 0xc6 0x1f 0x98 MUL r3, r7, r1 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 196 0x10 0xe3 0xff 0x18 ADD r17, r3, #-1 +.delay_slot + 200 0x00 0xe2 0xe1 0x8c 0x5b 0x2c ST.s16 r24, [p0]; LSHL r3, r3, r2 + 206 0x10 0x24 0x09 0x18 MOVX r18, #2 + 210 0x14 0xa5 0x07 0x98 EQ r18, r18, r16 + 214 0x90 0x01 0x50 0x40 0x01 0x84 JNZ r18, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_672 +.delay_slot +.swstall delay_slot + 220 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 222 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 224 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 226 0x00 0x00 NOPX +.delay_slot + 228 0x10 0x49 0xff 0x18 ADD r4, r1, #-1 + 232 0x10 0x0c 0x05 0x18 MOVX r6, #1 + 236 0x11 0x8d 0x07 0x98 EQ r6, r6, r16 + 240 0x30 0x01 0x00 0x40 0x01 0x84 JNZ r6, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_512 +.delay_slot +.swstall delay_slot + 246 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 250 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 254 0x00 0x00 NOPX + 256 0x00 0x00 0xe8 0x00 0x00 0x84 J #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_464 +.delay_slot +.swstall delay_slot + 262 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 264 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 266 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 268 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 270 0x00 0x00 NOPX +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_272 + 272 0x04 0x01 0x00 0x00 0x00 0x01 0x30 0x13 0x10 0xba MOVA r1, #32; MOVXM p2, #(reducesum_params + 38) + 282 0x04 0x04 0x00 0x0f 0x01 0x6c 0x50 0x0e 0x78 0xba MOVA r4, #32; LSHL r16, r7, r2; MOV r2, el0 + 292 0x29 0x0d 0xc1 0x00 0x69 0x64 MSC r4, r4, r5, r6; MOV dj0, #26 + 298 0x00 0x00 NOPX + 300 0x00 0x00 NOPX + 302 0x00 0x00 NOPX + 304 0x02 0x1c 0x37 0x18 ST.s16 r1, [p2], #2 + 308 0x00 0x00 NOPX + 310 0x00 0x00 NOPX + 312 0x00 0x00 NOPX + 314 0x00 0x00 NOPX + 316 0x00 0x00 NOPX + 318 0x00 0x00 NOPX + 320 0x02 0x1e 0x37 0x18 ST.s16 r17, [p2], #2 + 324 0x00 0x00 NOPX + 326 0x00 0x00 NOPX + 328 0x00 0x00 NOPX + 330 0x00 0x00 NOPX + 332 0x00 0x00 NOPX + 334 0x00 0x00 NOPX + 336 0x02 0x1c 0x37 0x18 ST.s16 r1, [p2], #2 + 340 0x00 0x00 NOPX + 342 0x00 0x00 NOPX + 344 0x00 0x00 NOPX + 346 0x00 0x00 NOPX + 348 0x00 0x00 NOPX + 350 0x00 0x00 NOPX + 352 0x02 0x1c 0x17 0x18 ST.s16 r0, [p2], #2 + 356 0x00 0x00 NOPX + 358 0x00 0x00 NOPX + 360 0x00 0x00 NOPX + 362 0x00 0x00 NOPX + 364 0x00 0x00 NOPX + 366 0x00 0x00 NOPX + 368 0x02 0x1c 0x37 0x18 ST.s16 r1, [p2], #2 + 372 0x00 0x00 NOPX + 374 0x00 0x00 NOPX + 376 0x00 0x00 NOPX + 378 0x00 0x00 NOPX + 380 0x00 0x00 NOPX + 382 0x00 0x00 NOPX + 384 0x0a 0xcc 0xf1 0x98 ST r7, [p2], #-16 + 388 0x02 0xec 0x57 0x18 ST.s16 r2, [p2], #-4 + 392 0x00 0x00 NOPX + 394 0x00 0x00 NOPX + 396 0x00 0x00 NOPX + 398 0x00 0x00 NOPX + 400 0x00 0x00 NOPX + 402 0x00 0x00 NOPX + 404 0x02 0x00 0x97 0x18 ST.s16 r4, [p2, dj0] + 408 0x00 0x00 NOPX + 410 0x00 0x00 NOPX + 412 0x00 0x00 NOPX + 414 0x00 0x00 NOPX + 416 0x00 0x00 NOPX + 418 0x00 0x00 NOPX + 420 0x40 0x8e 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r3, [p2]; NOPB; NOPS; NOPX +.label __ll7__ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + 432 0x14 0x01 0xff 0x18 ADD r0, r16, #-1 + 436 0x00 0x00 0x04 0xc0 0x68 0x44 MOVXM p2, #(reducesum_params + 52) + 442 0x00 0x00 NOPX + 444 0x00 0x00 NOPX + 446 0x00 0x00 NOPX + 448 0x00 0x00 NOPX + 450 0x40 0x82 0xe0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e ST.s16 r0, [p2]; NOPS; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_464 + 464 0x22 0x82 0xd1 0x00 0x59 0x54 LDA r0, [p1, #4]; MOV dj0, #22 + 470 0x00 0x00 NOPX + 472 0x00 0x00 NOPX + 474 0x00 0x00 NOPX + 476 0x00 0x00 NOPX + 478 0x00 0x00 NOPX + 480 0x00 0x00 0x07 0x18 ST.s8 r0, [p0, dj0] + 484 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 488 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 490 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 492 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 494 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_512 + 512 0x04 0x06 0x00 0x00 0x00 0x01 0x30 0x13 0x10 0xba MOVA r6, #32; MOVXM p2, #(reducesum_params + 38) + 522 0x43 0x9a 0xe2 0x84 0x3f 0x2c ST.s16 r6, [p2], #2; MUL r1, r5, r1 + 528 0x00 0x00 NOPX + 530 0x10 0x60 0x2d 0x98 LSHL r16, r1, r2 + 534 0x00 0x00 NOPX + 536 0x00 0x00 NOPX + 538 0x00 0x00 NOPX + 540 0x04 0x00 0x00 0x0a 0x60 0x30 0xc8 0x20 0x58 0xba MOVA r0, #32; MAC r6, r6, r5, r0; MOV r6, #32 + 550 0x43 0xc6 0xe0 0x14 0x23 0x2c ST.s16 r17, [p2], #2; SUB r5, r0, r1 + 556 0x00 0x00 NOPX + 558 0x00 0x00 NOPX + 560 0x00 0x00 NOPX + 562 0x00 0x00 NOPX + 564 0x00 0x00 NOPX + 566 0x00 0x00 NOPX + 568 0x02 0x1c 0xd7 0x18 ST.s16 r6, [p2], #2 + 572 0x00 0x00 NOPX + 574 0x00 0x00 NOPX + 576 0x00 0x00 NOPX + 578 0x00 0x00 NOPX + 580 0x00 0x00 NOPX + 582 0x00 0x00 NOPX + 584 0x02 0x1c 0x97 0x18 ST.s16 r4, [p2], #2 + 588 0x00 0x00 NOPX + 590 0x00 0x00 NOPX + 592 0x00 0x00 NOPX + 594 0x00 0x00 NOPX + 596 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 598 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 600 0x02 0x1c 0x17 0x18 ST.s16 r0, [p2], #2 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 604 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 606 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 608 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 610 0x11 0xc0 0x4e 0x18 MSC r0, r0, r7, r4 + 614 0x00 0x00 NOPX + 616 0x00 0x00 NOPX + 618 0x0a 0xcc 0x31 0x98 ST r1, [p2], #-16 + 622 0x02 0xee 0x77 0x18 ST.s16 r19, [p2], #-4 + 626 0x00 0x00 NOPX + 628 0x00 0x00 NOPX + 630 0x00 0x00 NOPX + 632 0x00 0x00 NOPX + 634 0x00 0x00 NOPX + 636 0x00 0x00 NOPX + 638 0x02 0x04 0x77 0x18 ST.s16 r3, [p2] + 642 0x00 0x00 NOPX + 644 0x00 0x00 0xd8 0x00 0x00 0x84 J #__ll7__ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.delay_slot + 650 0x18 0x80 0x34 0xb8 MOV dj0, #26 +.delay_slot +.swstall delay_slot + 654 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 658 0x00 0x00 NOPX +.delay_slot + 660 0x40 0x16 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r5, [p2, dj0]; NOPB; NOPS; NOPX +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_672 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 672 0x02 0x04 0x80 0x00 0x00 0x01 0x30 0x0e 0x10 0xba MOVA m1, #16; MOVXM p2, #(reducesum_params + 28) +.aggressive_scheduled_block_id 4 +.noswbrkpt + 682 0x45 0x86 0xe0 0x01 0x10 0xa8 0xf0 0x8e 0x78 0xba ST.s16 r1, [p2], #4; MOVX r17, #5; MOV r7, eh0 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 692 0x04 0x03 0x00 0x03 0x03 0x7c 0x07 0xf6 0x58 0xba MOVA r3, #32; MUL r16, r1, r6; MOV m0, #-10 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 702 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 704 0x11 0x43 0x0f 0x98 MUL r1, r5, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 708 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 710 0x10 0x42 0x2d 0x98 LSHL r1, r1, r2 + 714 0x00 0x00 NOPX + 716 0x02 0x28 0xf7 0x18 ST.s16 r7, [p2], m1 + 720 0x00 0x00 NOPX + 722 0x00 0x00 NOPX + 724 0x00 0x00 NOPX + 726 0x00 0x00 NOPX + 728 0x00 0x00 NOPX + 730 0x00 0x00 NOPX + 732 0x0a 0x0a 0x11 0x98 ST r16, [p2], m0 + 736 0x02 0x1c 0xb7 0x18 ST.s16 r5, [p2], #2 + 740 0x00 0x00 NOPX + 742 0x00 0x00 NOPX + 744 0x00 0x00 NOPX + 746 0x00 0x00 NOPX + 748 0x00 0x00 NOPX + 750 0x00 0x00 NOPX + 752 0x02 0x1c 0x17 0x18 ST.s16 r0, [p2], #2 + 756 0x00 0x00 NOPX + 758 0x00 0x00 NOPX + 760 0x00 0x00 NOPX + 762 0x00 0x00 NOPX + 764 0x11 0x80 0x46 0x18 MAC r0, r0, r6, r4 + 768 0x00 0x00 NOPX + 770 0x02 0x1c 0xb7 0x18 ST.s16 r5, [p2], #2 + 774 0x00 0x00 NOPX + 776 0x00 0x00 NOPX + 778 0x00 0x00 NOPX + 780 0x00 0x00 NOPX + 782 0x00 0x00 NOPX + 784 0x00 0x00 NOPX + 786 0x02 0x1c 0x97 0x18 ST.s16 r4, [p2], #2 + 790 0x00 0x00 NOPX + 792 0x00 0x00 NOPX + 794 0x00 0x00 NOPX + 796 0x00 0x00 NOPX + 798 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 800 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 802 0x02 0x04 0x77 0x18 ST.s16 r3, [p2] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 806 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 808 0x00 0x00 0xd8 0x00 0x00 0x84 J #__ll7__ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 814 0x14 0x23 0x1d 0x98 LSHL r17, r16, r17 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 818 0x11 0x46 0x0e 0x18 MSC r3, r3, r5, r0 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 822 0x10 0xc3 0x11 0x98 SUB r1, r3, r17 +.delay_slot +.swstall delay_slot + 826 0x00 0x00 NOPX +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv__end last +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv___func_end0 last +.delay_slot + 828 0x02 0x44 0x37 0x18 ST.s16 r1, [p2, #8] + +.undef global data reducesum_params + +.text_segment_name +.text weak 10 _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_begin0 +.function_start + 0 0xfd 0x10 0x80 0x00 0x00 0x01 0x30 0x12 0x10 0xba MOVA m4, #-24; MOVXM p2, #(reducesum_params + 36) + 10 0x51 0x43 0x50 0x60 0x02 0x2c LDA.u16 r16, [p2], m4; MOVX r24, #0 + 16 0x18 0x03 0x11 0x78 VINSERT.32 x0, x0, #0, r24 + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x80 0x00 0x80 0x40 0x01 0x84 JNZ r16, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_256 +.delay_slot + 36 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 42 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.delay_slot + 46 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 50 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 52 0x00 0x00 NOPX + 54 0x00 0x00 0x0e 0xc0 0x00 0x44 MOVXM p7, #_ZN12me_primitive11control_rndE + 60 0xe0 0xc4 0x50 0x00 0x00 0x03 0xb0 0x18 0x10 0xba LDA.s8 r17, [p7]; MOVXM p7, #(reducesum_params + 48) + 70 0x07 0x06 0x16 0x98 LDA r16, [p7] + 74 0x00 0x00 NOPX + 76 0x00 0x00 NOPX + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x00 0x00 NOPX + 86 0x80 0x00 0x80 0x00 0x01 0x84 JZ r16, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_256 +.delay_slot + 92 0x19 0x80 0x92 0xf8 VMOV bmhl1, x0 +.delay_slot + 96 0x14 0x7a 0x80 0x18 MOVX crRnd, r17 +.delay_slot + 100 0x09 0x40 0xd6 0x18 VCONV.bf16.fp32 wl2, bmhl1 +.delay_slot +.swstall delay_slot + 104 0x00 0x00 NOPX +.delay_slot + 106 0x18 0x91 0x03 0x58 VEXTBCST.16 x1, x2, #0 + 110 0xff 0x71 0x07 0x84 0x8b 0x00 0x00 0x00 0x78 0x78 0x10 0x76 MOVA r17, #-5; MOVS p7, p1; MOVXM ls, #(ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_240 + 0) + 122 0x00 0x00 0x06 0xe1 0xe0 0x44 MOVXM le, #(ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_240 + 0) + 128 0x84 0x3f 0xe3 0x05 0x25 0xe4 ADD r16, r16, #-1; VMOV bmhl1, x1 + 134 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 + 138 0x00 0x2c 0xfa 0xf0 0x01 0x14 NOPA; ADD.NC lc, r16, #1 + 144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 192 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_240 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 240 0x00 0x2c 0xf0 0x00 0x27 0x1c 0xc6 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmhl1, [p7], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_256 +.loop_nesting 0 + 256 0x5b 0xc2 0xd0 0x01 0x20 0x49 0xb1 0x60 0x78 0xba LDA r16, [p2], #-12; MOVX r18, #2; MOV p3, p1 + 266 0x00 0x00 NOPX + 268 0x00 0x00 NOPX + 270 0x00 0x00 NOPX + 272 0x00 0x00 NOPX + 274 0x00 0x00 NOPX + 276 0x00 0x00 NOPX + 278 0x14 0xa1 0x08 0x98 NE r16, r18, r16 + 282 0x80 0x01 0x20 0x40 0x01 0x84 JNZ r16, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_576 +.delay_slot +.swstall delay_slot + 288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 294 0x00 0x00 NOPX +.delay_slot + 296 0x1c 0x54 0xc0 0xf8 MOV r17, p2 + 300 0x00 0x00 0x0e 0xc0 0x60 0x44 MOVXM p7, #(reducesum_params + 48) + 306 0x07 0x06 0x56 0x98 LDA r18, [p7] + 310 0x00 0x00 NOPX + 312 0x00 0x00 NOPX + 314 0x00 0x00 NOPX + 316 0x00 0x00 NOPX + 318 0x00 0x00 NOPX + 320 0x00 0x00 NOPX + 322 0x90 0x01 0x20 0x00 0x01 0x84 JZ r18, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_576 +.delay_slot +.swstall delay_slot + 328 0x00 0x00 NOPX +.delay_slot + 330 0x10 0x26 0x05 0x18 MOVX r19, #1 +.delay_slot + 334 0x94 0xe7 0xba 0x25 0x81 0xe4 LSHL r19, r18, r19; MOV r20, p1 +.delay_slot + 340 0x1b 0x69 0xd1 0x58 ADD.NC p3, r19, r20 +.delay_slot + 344 0x00 0x00 0x0e 0xc0 0x00 0x44 MOVXM p7, #_ZN12me_primitive11control_rndE + 350 0xe0 0xd0 0x52 0x8c 0x8b 0x00 0x00 0x00 0x79 0x00 0x10 0x76 LDA.s8 r20, [p7]; MOVS p2, p3; MOVXM ls, #(ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_512 + 0) + 362 0xff 0x73 0x04 0x84 0x8b 0x00 0x00 0x01 0xb9 0x08 0x10 0x76 MOVA r19, #-5; MOVS p4, p1; MOVXM le, #(ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_528 + 0) + 374 0x00 0x00 0x0e 0xc3 0x40 0x44 MOVXM p7, #(TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_416 + 0) + 380 0x10 0x5a 0x40 0x18 MOVX vaddSign0, #1 + 384 0x00 0x00 NOPX + 386 0x00 0x00 NOPX + 388 0x19 0x80 0x92 0xf8 VMOV bmhl1, x0 + 392 0xa7 0x50 0x0a 0xb2 0xff 0x24 MOVX crRnd, r20; ADD.NC r21, r18, #-1 + 398 0x08 0x1a 0xca 0xce 0x7b 0x5c VCONV.bf16.fp32 wl0, bmhl1; LSHL r19, r21, r19 + 404 0x00 0x00 NOPX + 406 0x00 0x2c 0xf0 0x00 0x10 0x00 0x00 0x81 0xae 0xba NOPA; NOPB; VEXTBCST.16 x0, x0, #0 +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_416 +.loop_nesting 1 + 416 0x83 0x8e 0x80 0x02 0xf2 0x1c VLDB x1, [p4], #64; MOVX lc, #30 + 422 0x00 0x00 NOPX + 424 0x00 0x00 NOPX + 426 0x00 0x00 NOPX + 428 0x00 0x01 0x67 0x98 NOPA + 432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x40 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r20, #0; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 464 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x29 0x40 0x3a 0xa2 0xa8 0x68 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV +.aggressive_scheduled_block_id 1 +.noswbrkpt + 480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x29 0x40 0x3a 0xa2 0xa8 0x68 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ADD r20, r20, #1; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x81 0x54 0xb8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VINSERT.16 x2, x0, #0, r21; NOPV +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_512 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 512 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xa2 0xa8 0x68 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x1, r20, vaddSign0; NOPV +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_528 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 528 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x93 0x29 0x40 0x38 0x81 0x54 0xb8 0x00 0x00 0xe1 NOPA; NOPB; VST x2, [p2], #64; ADD r20, r20, #1; VINSERT.16 x2, x0, #0, r21; NOPV +.loop_nesting 1 + 544 0x14 0xe7 0xe0 0x18 JNZD r19, r19, p7 +.delay_slot + 548 0x0a 0x1c 0x93 0x18 VST x2, [p2], #64 +.delay_slot +.swstall delay_slot + 552 0x00 0x00 NOPX +.delay_slot + 554 0x19 0x02 0xa9 0x78 VINSERT.16 x2, x0, #0, r21 +.delay_slot +.swstall delay_slot + 558 0x00 0x00 NOPX +.delay_slot + 560 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x2, [p2], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_576 +.loop_nesting 0 + 576 0x00 0x0b 0x80 0x02 0xd2 0x02 0x34 0x4d 0x88 0xba MOVA dc2, #0; MOVX vaddSign0, #1; ADD.NC p4, r17, #54 + 586 0x9f 0xce 0x53 0x08 0x4b 0x00 0x00 0x01 0x30 0x00 0x10 0x76 LDA.s16 r19, [p4], #-2; MOVS dc3, dc2; MOVXM p2, #_ZN12me_primitive11control_rndE + 598 0x9b 0xeb 0x54 0x08 0x4b 0x01 0x17 0x8b 0xb4 0x47 0x08 0x76 LDA.u16 r26, [p4], #-6; MOVS dc4, dc2; MOVX r17, #60; ADD.NC p7, r17, #28 + 610 0x9b 0xd2 0x51 0x10 0x4b 0x01 0xc0 0x40 0x52 0xba LDA.s16 r20, [p4], #-6; MOVS dc1, dc4; MOV dj3, #64 + 620 0x9f 0xf3 0x51 0x80 0x01 0x54 LDA.u16 r28, [p4], #-2; MOV dc0, #0 + 626 0x87 0xd6 0x50 0x00 0x00 0x00 0x79 0x98 0x10 0xba LDA.s16 r21, [p4], #6; MOVXM ls, #(ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_816 + 0) +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 636 0x9e 0xda 0x50 0x00 0x00 0x01 0xb9 0xa0 0x10 0xba LDA.s16 r22, [p4, #-2]; MOVXM le, #(ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_832 + 0) +.aggressive_scheduled_block_id 2 +.noswbrkpt + 646 0x80 0xeb 0x54 0x8c 0x8b 0x01 0x40 0x40 0x52 0xba LDA.u16 r26, [p4]; MOVS p4, p3; MOV dj2, #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 656 0x40 0xdc 0x54 0xdd 0x81 0xd4 LDA.s8 r23, [p2]; MOV p2, p7 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 662 0xe7 0xcf 0x50 0x01 0x20 0x29 0xa6 0x90 0x78 0xba LDA.u16 r19, [p7], #6; MOVX r18, #1; MOV dn3, r26 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 672 0x9e 0xe5 0xb4 0x9a 0x41 0xe4 LSHL r27, r19, r18; MOV dn2, r26 + 678 0x0b 0x81 0x60 0x29 0x49 0x6d 0x86 0xd0 0x79 0x3a MOVS dn0, r28; LSHL r20, r20, r18; MOV m3, r27 + 688 0xad 0x25 0xb0 0x14 0x41 0xe4 LSHL r20, r21, r18; MOV m0, r20 + 694 0xb5 0x25 0xb1 0x14 0x41 0xe4 LSHL r20, r22, r18; MOV dj0, r20 + 700 0x6e 0x15 0x74 0x5a 0x0b 0x02 0x45 0x10 0x72 0xba VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; MOVS dn4, r26; MOV dj4, r20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 710 0x03 0x25 0x70 0x2f 0xd4 0x01 0x03 0x00 0x78 0xba VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0; MOVX crRnd, r23; MOV m2, m3 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 720 0x6e 0x15 0x7a 0xf3 0xfb 0x14 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3; ADD.NC lc, r19, #-5 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 726 0x00 0x19 0x2b 0x98 VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 730 0x6e 0x15 0x70 0x00 0x20 0x3c VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 736 0x03 0x25 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 752 0x6e 0x15 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 768 0x03 0x25 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 784 0x6e 0x15 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 800 0x03 0x25 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_816 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 816 0x6e 0x15 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.2D.CONV.fp32.bf16 cml1, [p3], d3;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_832 +.end_of_loop +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 832 0x03 0x25 0x70 0x00 0x24 0x50 0x23 0x00 0x00 0x00 0x01 0xa5 0x7c 0x41 0x41 0xeb VLDA.3D.CONV.fp32.bf16 cml2, [p0], d0;NOPB; VST.2D.CONV.bf16.fp32 cml0, [p4], d2;NOPX; NOPM; VADD.f dm0, dm1, dm2, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 848 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 850 0x8a 0x04 0x60 0x02 0x88 0x28 0x3d 0x62 VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 858 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 860 0x8a 0x04 0x60 0x02 0x88 0x28 0x3d 0x62 VST.2D.CONV.bf16.fp32 cml0, [p4], d2; VADD.f dm0, dm1, dm2, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 868 0x80 0x02 0xc8 0x40 0x01 0x84 JNZ r16, #TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1424 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 874 0x0c 0x50 0x23 0x18 VST.2D.CONV.bf16.fp32 cml0, [p4], d2 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 878 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 880 0x0c 0x50 0x23 0x18 VST.2D.CONV.bf16.fp32 cml0, [p4], d2 +.delay_slot +.swstall delay_slot + 884 0x00 0x00 NOPX +.delay_slot + 886 0x0c 0x50 0x23 0x18 VST.2D.CONV.bf16.fp32 cml0, [p4], d2 + 890 0x08 0x06 0x80 0x00 0x00 0x00 0x30 0x1b 0x10 0xba MOVA dj1, #64; MOVXM p0, #(reducesum_params + 54) + 900 0x1f 0x9e 0x50 0x01 0x04 0x0a 0xe9 0x60 0x78 0xba LDA.s16 r7, [p0], #-2; MOVX r16, #32; MOV r23, p1 + 910 0x1e 0xf6 0xd0 0x01 0x32 0x0a 0x88 0x08 0x58 0xba LDA r29, [p0, #-4]; MOVX r19, #16; MOV r20, #8 + 920 0x00 0xef 0x50 0x00 0x00 0x00 0x7a 0x48 0x10 0xba LDA.u16 r27, [p0]; MOVXM ls, #(ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1168 + 0) + 930 0x00 0x96 0x00 0x00 0x00 0x01 0xba 0x78 0x10 0xba MOVA r22, #4; MOVXM le, #(ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1264 + 0) + 940 0x00 0x00 NOPX + 942 0x00 0x00 NOPX + 944 0x00 0x00 NOPX + 946 0x11 0xcf 0x2d 0x98 LSHL r7, r7, r18 + 950 0xec 0xa5 0xb2 0x07 0x41 0xe4 LSHL r18, r29, r18; MOV m1, r7 + 956 0x2b 0x61 0x60 0x02 0xbf 0x7f 0x40 0x02 MOVS dn1, r27; ADD.NC lc, r29, #-3 + 964 0x18 0x6b 0xc9 0x58 ADD.NC p0, r23, r18 + 968 0x00 0x30 0x2b 0x98 VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 + 972 0x00 0x00 NOPX + 974 0x00 0x00 NOPX + 976 0x00 0x00 NOPX + 978 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 980 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 982 0x88 0x0c 0x3d 0x48 VADD.f dm0, dm0, dm3, r17 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 986 0x1b 0x01 0x12 0xf8 VMOV bmll3, bmlh0 + 990 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 992 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 994 0x00 0x30 0x2b 0x98 VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 998 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1000 0x18 0x20 0x12 0xf8 VMOV x0, bmll0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 1004 0x00 0x80 0x42 0xc6 0x89 0x0c 0x3d 0x62 VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1012 0x1b 0x02 0x92 0xf8 VMOV bmll3, x1 + 1016 0x00 0x00 NOPX + 1018 0x00 0x00 NOPX + 1020 0x00 0x00 NOPX + 1022 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 1024 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 1028 0x01 0x90 0x4e 0xc6 0x89 0x2c 0x3d 0x62 VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1036 0x1b 0x06 0x92 0xf8 VMOV bmll3, x3 + 1040 0x03 0x01 0x12 0xe6 0x88 0x0c 0x3d 0x62 VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 + 1048 0x00 0x00 NOPX + 1050 0x00 0x00 NOPX + 1052 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 1054 0x1a 0x24 0x12 0xf8 VMOV x4, bmll1 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 1058 0x06 0x05 0x75 0x40 0xa5 0x94 VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 1064 0x18 0x20 0x12 0xf8 VMOV x0, bmll0 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 1068 0x00 0x80 0x42 0xc6 0x89 0x0c 0x3d 0x62 VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 1076 0x1b 0x02 0x92 0xf8 VMOV bmll3, x1 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1080 0x04 0x0a 0x92 0xe6 0x8a 0x30 0x3d 0x62 VMOV bmll4, x5; VADD.f dm2, dm1, dm4, r17 + 1088 0x00 0x00 NOPX + 1090 0x00 0x00 NOPX + 1092 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 1094 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 1098 0x01 0x90 0x4e 0xc6 0x89 0x2c 0x3d 0x62 VSHIFT x3, x2, x0, r19; VADD.f dm1, dm1, dm3, r17 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1106 0x1b 0x06 0x92 0xf8 VMOV bmll3, x3 + 1110 0x03 0x01 0x12 0xe6 0x88 0x0c 0x3d 0x62 VMOV bmll3, bmlh0; VADD.f dm0, dm0, dm3, r17 + 1118 0x00 0x00 NOPX + 1120 0x1b 0x28 0x12 0xf8 VMOV x6, bmll2 + 1124 0x1b 0xb0 0x5a 0xd8 VSHIFT x7, x6, x0, r22 + 1128 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 + 1132 0x02 0x24 0x12 0xe6 0x8a 0x50 0x3d 0x62 VMOV x4, bmll1; VADD.f dm2, dm2, dm4, r17 + 1140 0x18 0x20 0x12 0xf8 VMOV x0, bmll0 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 1144 0x18 0x80 0x42 0xd8 VSHIFT x1, x0, x0, r16 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 1148 0x03 0x02 0x92 0xe6 0x89 0x0c 0x3d 0x62 VMOV bmll3, x1; VADD.f dm1, dm0, dm3, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1156 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label ZLS_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1168 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1168 0x06 0x05 0x72 0xa0 0x52 0xc2 0x8a 0x30 0x3d 0x4a VLDA.2D.CONV.fp32.bf16 cml0, [p0], d1; VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1178 0x82 0x22 0xc0 0x02 0x05 0x49 0x70 0x02 VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1186 0x00 0x00 NOPX +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1188 0x1d 0x61 0x01 0xb8 VEXTRACT.16 r21, x8, #0, vaddSign0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1192 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1196 0x19 0x90 0x4e 0xd8 VSHIFT x3, x2, x0, r19 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1200 0x03 0x06 0x92 0xe6 0x89 0x2c 0x3d 0x62 VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1208 0x03 0x28 0x12 0xe6 0x88 0x0c 0x3d 0x62 VMOV x6, bmll2; VADD.f dm0, dm0, dm3, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1216 0x1b 0x01 0x12 0xf8 VMOV bmll3, bmlh0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1220 0x1b 0xb0 0x5a 0xd8 VSHIFT x7, x6, x0, r22 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1224 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1228 0x8a 0x50 0x3d 0x48 VADD.f dm2, dm2, dm4, r17 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1232 0x1a 0x24 0x12 0xf8 VMOV x4, bmll1 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1236 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x10 0x09 0x70 0xf6 NOPA; NOPB; NOPS; VMOV x0, bmll0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1248 0x23 0xd6 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0x21 0x6c 0x48 0x61 0xeb ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VSHIFT x1, x0, x0, r16; VADD.f dm1, dm0, dm3, r17 +.label ZLE_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1264 +.end_of_loop +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x81 0x49 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmll3, x1; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 1280 0x02 0xa0 0x52 0xc6 0x8a 0x30 0x3d 0x62 VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1288 0x82 0x22 0xc0 0x02 0x05 0x49 0x70 0x02 VCONV.bf16.fp32 x8, cml2; VMOV bmll4, x5 + 1296 0x00 0x00 NOPX + 1298 0x1d 0x61 0x01 0xb8 VEXTRACT.16 r21, x8, #0, vaddSign0 + 1302 0x19 0x24 0x12 0xf8 VMOV x2, bmll1 + 1306 0x23 0xd6 0xe3 0x20 0x9d 0x94 ST.s16 r21, [p1], #2; VSHIFT x3, x2, x0, r19 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 1312 0x1b 0x28 0x12 0xf8 VMOV x6, bmll2 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 1316 0x03 0xb0 0x5a 0xc6 0x8a 0x50 0x3d 0x62 VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1324 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 + 1328 0x03 0x06 0x92 0xe6 0x89 0x2c 0x3d 0x62 VMOV bmll3, x3; VADD.f dm1, dm1, dm3, r17 + 1336 0x00 0x00 NOPX + 1338 0x00 0x00 NOPX + 1340 0x00 0x00 NOPX + 1342 0x0c 0x11 0x16 0x18 VCONV.bf16.fp32 x8, cml2 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 1346 0x00 0x00 NOPX +.aggressive_scheduled_block_id 11 +.noswbrkpt + 1348 0x23 0xd6 0xe4 0x48 0x25 0xd4 ST.s16 r21, [p1], #2; VMOV x4, bmll1 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 1354 0x02 0xa0 0x52 0xc6 0x8a 0x30 0x3d 0x62 VSHIFT x5, x4, x0, r20; VADD.f dm2, dm1, dm4, r17 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 1362 0x1c 0x0a 0x92 0xf8 VMOV bmll4, x5 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1366 0x1d 0x61 0x01 0xb8 VEXTRACT.16 r21, x8, #0, vaddSign0 + 1370 0x00 0x00 NOPX + 1372 0x00 0x00 NOPX + 1374 0x00 0x00 NOPX +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 1376 0x1b 0x28 0x12 0xf8 VMOV x6, bmll2 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 1380 0x03 0xb0 0x5a 0xc6 0x8a 0x50 0x3d 0x62 VSHIFT x7, x6, x0, r22; VADD.f dm2, dm2, dm4, r17 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1388 0x1c 0x0e 0x92 0xf8 VMOV bmll4, x7 + 1392 0x00 0x00 NOPX + 1394 0x00 0x00 NOPX + 1396 0x00 0x00 NOPX + 1398 0x00 0x00 NOPX + 1400 0x0c 0x11 0x16 0x18 VCONV.bf16.fp32 x8, cml2 + 1404 0x00 0x01 0x67 0x98 NOPA + 1408 0x23 0xd6 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb0 0x80 0xd8 0x00 0x00 0xe1 ST.s16 r21, [p1], #2; NOPB; NOPS; NOPX; VEXTRACT.16 r21, x8, #0, vaddSign0; NOPV +.label TGT_F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_1424 + 1424 0x02 0x36 0x1a 0x98 LDA.u16 r16, [p2, #6] + 1428 0x00 0x00 NOPX + 1430 0x00 0x00 NOPX + 1432 0x00 0x00 NOPX + 1434 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 1436 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.noswbrkpt + 1438 0x07 0xfe 0x17 0x18 ST.s16 r16, [p7], #-2 +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 1442 0x07 0x04 0x3a 0x98 LDA.u16 r1, [p7] +.aggressive_scheduled_block_id 13 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 1446 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 1452 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 1456 0x14 0x00 0xb0 0x18 EXTEND.u16 r0, r16 +.delay_slot +.swstall delay_slot + 1460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1464 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.return_address + 1472 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 1476 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8] + 1480 0x07 0x24 0x77 0x18 ST.s16 r3, [p7, #4] + 1484 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 1490 0x00 0x00 NOPX + 1492 0x00 0x00 NOPX + 1494 0x00 0x00 NOPX + 1496 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 1500 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.delay_slot +.swstall delay_slot + 1504 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1506 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1508 0x00 0x00 NOPX +.label _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E__end last +.label __ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E___func_end0 last +.delay_slot +.swstall delay_slot + 1510 0x00 0x00 NOPX + +.undef global data reducesum_params + +.undef global data _ZN12me_primitive11control_rndE + +.undef global text _ZN12me_primitive10udiv_dstepEjjRjS0_ + +.text_segment_name +.text global 10 _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.function_start + 0 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #_ZL9curr_iter + 6 0x60 0xc2 0xd1 0xae 0x41 0xd4 LDA r16, [p3]; MOV r3, r14 + 12 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 18 0xfd 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-20]; MOV r1, r15 + 26 0xff 0xe3 0xb0 0x02 0x2d 0x70 0x70 0x02 ST p6, [sp, #-4]; MOV r17, CORE_ID + 34 0xff 0x36 0xb0 0x23 0x14 0x81 0xea 0x60 0x79 0x3a ST r13, [sp, #-8]; EXTEND.u8 r17, r17; MOV r15, p2 + 44 0xfe 0x06 0xb0 0x01 0xc8 0xf0 0x70 0x02 ST r1, [sp, #-16]; MOV r14, lr + 52 0x00 0x00 NOPX + 54 0x80 0x00 0x80 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.delay_slot + 60 0x0f 0xf4 0x75 0x98 ST r3, [sp, #-12] +.delay_slot + 64 0xf0 0x11 0x60 0x00 0x00 0x00 0x30 0x00 0x11 0x3a MOVS p7, p0; MOVXM p0, #reducesum_params +.delay_slot + 74 0x00 0x00 0x0c 0xc0 0x00 0x44 MOVXM p6, #_ZL8core_row +.delay_slot + 80 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 84 0x0e 0x06 0x31 0x98 ST r17, [p6] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 88 0x00 0x00 0x0c 0xc0 0x00 0x44 MOVXM p6, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 94 0xc0 0xc0 0xe0 0x00 0x00 0x03 0x30 0x00 0x10 0xba ST.s8 r16, [p6]; MOVXM p6, #_ZN12me_primitive11control_satE +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 104 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 106 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 114 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 116 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 120 0x00 0x01 0x00 0x29 0x00 0x00 0x1c 0x22 MOVX r16, #1; NOPV +.delay_slot + 128 0x00 0x2c 0xf0 0x00 0x26 0x06 0x11 0x80 0x00 0x03 0x31 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6]; NOPX; MOV p6, p1; NOPV +.return_address + 144 0x00 0x00 0x06 0xc0 0x00 0x44 MOVXM p3, #reducesum_params + 150 0x63 0xca 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r18, [p3], #4; MOVXM p1, #_ZL8core_row + 160 0x01 0x06 0x96 0x98 LDA r20, [p1] + 164 0x03 0x1e 0x36 0x98 LDA r17, [p3], #4 + 168 0x00 0x00 NOPX + 170 0x03 0x06 0x16 0x98 LDA r16, [p3] + 174 0x00 0x00 NOPX + 176 0x03 0x16 0x76 0x98 LDA r19, [p3, #4] + 180 0x00 0x00 NOPX + 182 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 186 0x00 0x00 NOPX + 188 0x14 0x63 0x2f 0x98 MUL r17, r17, r18 + 192 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11reduce_axis + 198 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 202 0x20 0xce 0x30 0x00 0x00 0x01 0x30 0x00 0x11 0x3a ST r19, [p1]; MOVXM p2, #_ZL11ifm1_offset + 212 0x40 0xc2 0x30 0x00 0x40 0x28 0x50 0x02 ST r16, [p2]; MOV dj0, #40 + 220 0x06 0x00 0x2e 0x98 LDA el0, [p6, dj0] + 224 0x00 0x00 NOPX + 226 0x00 0x00 0x88 0x00 0x00 0x84 J #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_272 +.delay_slot +.swstall delay_slot + 232 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 234 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 236 0x00 0x00 NOPX +.delay_slot + 238 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #_ZL8num_iter +.delay_slot + 244 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x02 0x14 0xc1 0x36 NOPA; NOPB; ST el0, [p0]; NOPX +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 + 256 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11reduce_axis + 262 0x20 0xce 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r19, [p1]; NOPB; NOPM +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_272 + 272 0x00 0x00 NOPX + 274 0x00 0x00 NOPX + 276 0x00 0x00 NOPX + 278 0x00 0x00 NOPX + 280 0x00 0x00 NOPX + 282 0x10 0x20 0x05 0x18 MOVX r16, #1 + 286 0x14 0xe1 0x08 0x98 NE r16, r19, r16 + 290 0x80 0x00 0xe8 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_464 +.delay_slot + 296 0x1e 0x67 0x86 0x18 ADD.NC p6, r15, #12 +.delay_slot +.swstall delay_slot + 300 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 302 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 304 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 306 0x00 0x00 NOPX + 308 0xfd 0x3e 0xb0 0x00 0x00 0x00 0xb0 0x00 0x11 0x3a ST r15, [sp, #-24]; MOVXM p1, #_ZL10depth_iter + 318 0x20 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x10 0x10 0xba LDA r16, [p1]; MOVXM p1, #(reducesum_params + 32) + 328 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 332 0x00 0x00 NOPX + 334 0x00 0x00 NOPX +.no_stack_arguments + 336 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot + 342 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 346 0x00 0x00 NOPX +.delay_slot + 348 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 352 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 358 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 368 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 374 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 378 0x80 0x00 0xe8 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_464 +.delay_slot + 384 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11reduce_axis +.delay_slot + 390 0x07 0xe9 0xf1 0x18 LDA r15, [sp, #-24] +.delay_slot +.swstall delay_slot + 394 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 396 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 398 0x00 0x00 NOPX + 400 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 406 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 410 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 414 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 418 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 420 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 424 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 426 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 428 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 430 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 432 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 436 0x0a 0x06 0x31 0x98 ST r17, [p2] + 440 0x00 0x00 NOPX + 442 0x00 0x00 NOPX + 444 0x00 0x00 NOPX + 446 0x00 0x00 NOPX + 448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x22 0x98 0x40 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ACQ r17, r16; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_464 + 464 0x10 0x22 0x09 0x18 MOVX r17, #2 + 468 0x00 0x00 NOPX + 470 0x00 0x00 NOPX + 472 0x01 0x06 0x16 0x98 LDA r16, [p1] + 476 0x00 0x00 NOPX + 478 0x00 0x00 NOPX + 480 0x00 0x00 NOPX + 482 0x00 0x00 NOPX + 484 0x00 0x00 NOPX + 486 0x00 0x00 NOPX + 488 0x14 0x63 0x08 0x98 NE r17, r17, r16 + 492 0x88 0x01 0x58 0x40 0x01 0x84 JNZ r17, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_688 +.delay_slot +.swstall delay_slot + 498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 504 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 506 0x00 0x00 NOPX + 508 0xfd 0x3e 0xb0 0x00 0x00 0x00 0xb0 0x00 0x11 0x3a ST r15, [sp, #-24]; MOVXM p1, #_ZL10width_iter + 518 0x20 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x10 0x10 0xba LDA r16, [p1]; MOVXM p1, #(reducesum_params + 32) + 528 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 532 0x00 0x00 NOPX + 534 0x00 0x00 NOPX +.no_stack_arguments + 536 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot + 542 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 546 0x00 0x00 NOPX +.delay_slot + 548 0x00 0x2c 0xf8 0x6d 0xb5 0x2c NOPA; LT r27, r16, r13 +.delay_slot + 554 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 576 0xfd 0x3e 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA r15, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 + 586 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 590 0x80 0x01 0x50 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_672 +.delay_slot + 596 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11reduce_axis +.delay_slot +.swstall delay_slot + 602 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 604 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 606 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 608 0x00 0x00 NOPX + 610 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 616 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 620 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 624 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 628 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 630 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 634 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 636 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 638 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 640 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 642 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 646 0x0a 0x06 0x31 0x98 ST r17, [p2] + 650 0x00 0x00 NOPX + 652 0x00 0x00 NOPX + 654 0x00 0x00 NOPX + 656 0x00 0x00 NOPX + 658 0x00 0x2c 0xf0 0x00 0x24 0x53 0x08 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; ACQ r17, r16; NOPM +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_672 + 672 0x00 0x00 NOPX + 674 0x00 0x00 NOPX + 676 0x00 0x00 NOPX + 678 0x20 0xc2 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r16, [p1]; NOPB; NOPM +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_688 + 688 0x10 0x1a 0x01 0x18 MOVX r13, #0 + 692 0x00 0x00 NOPX + 694 0x00 0x00 NOPX + 696 0x00 0x00 NOPX + 698 0x00 0x00 NOPX + 700 0x10 0x22 0x11 0x18 MOVX r17, #4 + 704 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 708 0x80 0x01 0xd0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_928 +.delay_slot + 714 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11height_iter +.delay_slot + 720 0x00 0x00 0x04 0xc0 0x40 0x44 MOVXM p2, #(reducesum_params + 32) +.delay_slot +.swstall delay_slot + 726 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 728 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 730 0x00 0x00 NOPX + 732 0x01 0x06 0x16 0x98 LDA r16, [p1] + 736 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 740 0x00 0x00 NOPX + 742 0x00 0x00 NOPX +.no_stack_arguments + 744 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot +.swstall delay_slot + 750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 752 0x00 0x00 NOPX +.delay_slot + 754 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 758 0xfd 0x6e 0xb6 0xc6 0x03 0x5c ST r27, [sp, #-24]; SUB r17, r13, r16 +.delay_slot + 764 0x14 0x01 0x12 0x18 SEL.EQZ r0, r16, r17, r27 +.return_address + 768 0xfd 0x6e 0x26 0xc0 0x63 0x2c LDA r27, [sp, #-24]; SUB r16, r13, r3 + 774 0x00 0x00 NOPX + 776 0x00 0x00 NOPX + 778 0x00 0x00 NOPX + 780 0x00 0x00 NOPX + 782 0x00 0x00 NOPX + 784 0x00 0x00 NOPX + 786 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 790 0x80 0x01 0xc0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_896 +.delay_slot + 796 0x19 0x6e 0xc0 0xf8 MOV p1, p7 +.delay_slot +.swstall delay_slot + 800 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 802 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 804 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 806 0x00 0x00 NOPX + 808 0xdf 0xee 0xd0 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA r27, [p6], #-4; MOVXM p2, #reducesum_params + 818 0xdf 0xc6 0xd0 0x00 0x00 0x03 0xb0 0x00 0x10 0xba LDA r17, [p6], #-4; MOVXM p7, #_ZL11reduce_axis + 828 0xdf 0xca 0xd0 0x00 0xf0 0x28 0x2b 0xd0 0x78 0xba LDA r18, [p6], #-4; MOVX r15, #1; MOV r1, r15 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 838 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 842 0x06 0x46 0x36 0x98 LDA r17, [p6, #16] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 846 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 848 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 850 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 852 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 854 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 858 0x0e 0x06 0x31 0x98 ST r17, [p6] + 862 0x00 0x00 NOPX + 864 0x00 0x00 NOPX + 866 0x00 0x01 0xe0 0x00 0x00 0x84 J #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_960 +.delay_slot + 872 0x1e 0x60 0xa0 0xf8 MOV p6, r1 +.delay_slot + 876 0x14 0x53 0x08 0x18 ACQ r17, r16 +.delay_slot +.swstall delay_slot + 880 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 884 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_896 + 896 0xd1 0xe1 0x60 0x00 0x00 0x78 0x00 0x00 0x21 0x3a MOVS p6, r15; J #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_960 +.delay_slot + 906 0x00 0x00 0x0e 0xc0 0x00 0x44 MOVXM p7, #_ZL11reduce_axis +.delay_slot + 912 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #reducesum_params +.delay_slot + 918 0x10 0x1e 0x05 0x18 MOVX r15, #1 +.delay_slot +.swstall delay_slot + 922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 924 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_928 + 928 0x00 0x2c 0xf0 0x00 0x21 0x9c 0x8b 0x00 0x00 0x01 0x30 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p1, p7; MOVXM p2, #reducesum_params; NOPV + 944 0x00 0x2f 0x00 0x00 0x26 0x8f 0x0b 0x00 0x00 0x03 0xb0 0x00 0x10 0x00 0x00 0xe1 MOVA r15, #1; NOPB; MOVS p6, r15; MOVXM p7, #_ZL11reduce_axis; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_960 + 960 0x20 0xc2 0xd7 0xeb 0x1d 0x80 0x00 0x01 0xb0 0x00 0x10 0x76 LDA r16, [p1]; ST p6, [sp, #-24]; MOVXM p3, #_ZL9curr_iter + 972 0x60 0xc6 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r17, [p3]; MOVXM p1, #_ZL11ifm1_offset + 982 0x01 0x06 0x56 0x98 LDA r18, [p1] + 986 0x06 0x5c 0x9e 0x98 LDA p1, [p6], #20 + 990 0x00 0x00 NOPX +.no_stack_arguments + 992 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E +.delay_slot +.swstall delay_slot + 998 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1000 0x00 0x00 NOPX +.delay_slot + 1002 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 1006 0x60 0xc6 0x39 0x49 0xfb 0x5c ST r17, [p3]; LSHL r18, r18, r15 +.delay_slot + 1012 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x34 0xa0 0xa0 0xf6 NOPA; NOPB; NOPS; ADD.NC p0, r18, r16 +.return_address + 1024 0x07 0x06 0x36 0x98 LDA r17, [p7] + 1028 0x00 0x00 NOPX + 1030 0x00 0x00 NOPX + 1032 0x00 0x00 NOPX + 1034 0x00 0x00 NOPX + 1036 0x00 0x00 NOPX + 1038 0x00 0x00 NOPX + 1040 0x13 0xe5 0x18 0x98 NE r18, r15, r17 + 1044 0x90 0x02 0x70 0x40 0x01 0x84 JNZ r18, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 +.delay_slot + 1050 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.swstall delay_slot + 1054 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1056 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1058 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1060 0x00 0x00 NOPX + 1062 0x00 0x00 0x0e 0xc0 0x00 0x44 MOVXM p7, #_ZL10depth_iter + 1068 0xe0 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x10 0x10 0xba LDA r16, [p7]; MOVXM p1, #(reducesum_params + 32) + 1078 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 1082 0x00 0x00 NOPX + 1084 0x00 0x00 NOPX + 1086 0x00 0x00 NOPX +.no_stack_arguments + 1088 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot +.swstall delay_slot + 1094 0x00 0x00 NOPX +.delay_slot + 1096 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 1100 0xe0 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p7]; LT r27, r16, r13 +.delay_slot + 1106 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 1112 0x00 0x20 0x08 0x91 0x00 0x00 0x1c 0x22 SEL.EQZ r0, r16, r17, r27; NOPV +.return_address + 1120 0xfd 0x13 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p1, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 + 1130 0x1c 0xa0 0x48 0xa0 0x05 0x64 SEL.EQZ r18, r3, r16, r27; MOV r17, #1 + 1136 0x90 0x02 0x60 0x40 0x01 0x84 JNZ r18, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1216 +.delay_slot + 1142 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.swstall delay_slot + 1146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1148 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1150 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1152 0x00 0x00 NOPX + 1154 0x2a 0xca 0xde 0x0b 0x63 0x0c LDA r18, [p1, #20]; ST r13, [p7] + 1160 0x00 0x00 NOPX + 1162 0x00 0x00 NOPX + 1164 0x00 0x00 NOPX + 1166 0x00 0x00 NOPX + 1168 0x00 0x00 NOPX + 1170 0x00 0x00 NOPX + 1172 0x14 0x91 0x18 0x18 REL r18, r17 + 1176 0xdc 0xca 0xd0 0x00 0x00 0x03 0xb0 0x00 0x10 0xba LDA r18, [p6, #-8]; MOVXM p7, #_ZL11reduce_axis + 1186 0x00 0x00 NOPX + 1188 0x00 0x00 NOPX + 1190 0x00 0x02 0x68 0x00 0x00 0x84 J #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 +.delay_slot +.swstall delay_slot + 1196 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1198 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1200 0x00 0x00 NOPX +.delay_slot + 1202 0x14 0x63 0x21 0x98 SUB r17, r17, r18 +.delay_slot + 1206 0x00 0x2c 0xf6 0xe6 0x31 0x80 0x00 0x00 0x00 0x7a NOPA; ST r17, [p6, #-8]; NOPX +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1216 + 1216 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x03 0xb0 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p7, #_ZL11reduce_axis; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 + 1232 0xe0 0xc6 0xd0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA r17, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 + 1248 0x00 0x00 NOPX + 1250 0x00 0x00 NOPX + 1252 0x00 0x00 NOPX + 1254 0x00 0x00 NOPX + 1256 0x00 0x00 NOPX + 1258 0x00 0x00 NOPX + 1260 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 1264 0x80 0x02 0xe0 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1472 +.delay_slot + 1270 0x00 0x00 0x02 0xc0 0x40 0x44 MOVXM p1, #(reducesum_params + 32) +.delay_slot + 1276 0x00 0x00 0x0e 0xc0 0x00 0x44 MOVXM p7, #_ZL10width_iter +.delay_slot +.swstall delay_slot + 1282 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1284 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1286 0x00 0x00 NOPX + 1288 0x07 0x06 0x16 0x98 LDA r16, [p7] + 1292 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 1296 0x00 0x00 NOPX + 1298 0x00 0x00 NOPX + 1300 0x00 0x00 NOPX +.no_stack_arguments + 1302 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot +.swstall delay_slot + 1308 0x00 0x00 NOPX +.delay_slot + 1310 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 1314 0xe0 0xc2 0x30 0x00 0x24 0x36 0xda 0xd2 NOPB; ST r16, [p7]; LT r27, r16, r13 +.delay_slot + 1322 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 1328 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 1344 0xfd 0x23 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p2, [sp, #-24]; SUB r16, r13, r3; MOV r27, r15 + 1354 0x10 0xe3 0x02 0x18 SEL.EQZ r17, r3, r16, r27 + 1358 0x88 0x02 0xd0 0x40 0x01 0x84 JNZ r17, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1440 +.delay_slot + 1364 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11reduce_axis +.delay_slot + 1370 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot +.swstall delay_slot + 1374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1376 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1378 0x00 0x00 NOPX + 1380 0x4a 0xc6 0xde 0x0b 0x63 0x0c LDA r17, [p2, #20]; ST r13, [p7] + 1386 0x00 0x00 NOPX + 1388 0x00 0x00 NOPX + 1390 0x00 0x00 NOPX + 1392 0x00 0x00 NOPX + 1394 0x00 0x00 NOPX + 1396 0x00 0x00 NOPX + 1398 0x14 0x51 0x08 0x18 REL r17, r16 + 1402 0x06 0xe6 0x36 0x98 LDA r17, [p6, #-8] + 1406 0x00 0x00 NOPX + 1408 0x00 0x00 NOPX + 1410 0x00 0x00 NOPX + 1412 0x00 0x00 NOPX + 1414 0x00 0x00 NOPX + 1416 0x00 0x00 NOPX + 1418 0x00 0x2c 0xf8 0x42 0x23 0x2c NOPA; SUB r16, r16, r17 + 1424 0x00 0x2c 0xf0 0x00 0x26 0xe6 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6, #-8]; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1440 + 1440 0x00 0x02 0xe8 0x00 0x00 0x84 J #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1488 +.delay_slot + 1446 0x1f 0x64 0xc0 0xf8 MOV p7, p2 +.delay_slot +.swstall delay_slot + 1450 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1452 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1454 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1456 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1472 + 1472 0xfd 0x73 0x20 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xb0 0x00 0x10 0x00 0x00 0xe1 LDA p7, [sp, #-24]; NOPB; NOPS; MOVXM p1, #_ZL11reduce_axis; NOPV +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1488 + 1488 0x01 0x06 0x16 0x98 LDA r16, [p1] + 1492 0x00 0x00 NOPX + 1494 0x00 0x00 NOPX + 1496 0x00 0x00 NOPX + 1498 0x00 0x00 NOPX + 1500 0x00 0x00 NOPX + 1502 0x10 0x22 0x11 0x18 MOVX r17, #4 + 1506 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 1510 0x80 0x03 0x40 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.delay_slot + 1516 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11height_iter +.delay_slot +.swstall delay_slot + 1522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1528 0x00 0x00 NOPX + 1530 0x20 0xc2 0xd0 0x00 0x00 0x01 0x30 0x10 0x10 0xba LDA r16, [p1]; MOVXM p2, #(reducesum_params + 32) + 1540 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 1544 0x00 0x00 NOPX + 1546 0x00 0x00 NOPX + 1548 0x00 0x00 NOPX +.no_stack_arguments + 1550 0x00 0x00 0x00 0x00 0x01 0x04 JL #_ZN12me_primitive10udiv_dstepEjjRjS0_ +.delay_slot +.swstall delay_slot + 1556 0x00 0x00 NOPX +.delay_slot + 1558 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 1562 0x20 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p1]; LT r27, r16, r13 +.delay_slot + 1568 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 1574 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 1584 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 1590 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 1594 0x80 0x03 0x40 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.delay_slot + 1600 0x00 0x00 0x02 0xc0 0x00 0x44 MOVXM p1, #_ZL11height_iter +.delay_slot +.swstall delay_slot + 1606 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1608 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1610 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1612 0x00 0x00 NOPX + 1614 0xea 0xc6 0xd1 0x05 0xb1 0x80 0x00 0x20 0x05 0x7a LDA r17, [p7, #20]; ST r13, [p1]; MOVX r16, #1 + 1624 0x00 0x00 NOPX + 1626 0x00 0x00 NOPX + 1628 0x00 0x00 NOPX + 1630 0x00 0x00 NOPX + 1632 0x00 0x00 NOPX + 1634 0x00 0x00 NOPX + 1636 0x14 0x51 0x08 0x18 REL r17, r16 + 1640 0x06 0xe6 0x36 0x98 LDA r17, [p6, #-8] + 1644 0x00 0x00 NOPX + 1646 0x00 0x00 NOPX + 1648 0x00 0x00 NOPX + 1650 0x00 0x00 NOPX + 1652 0x00 0x00 NOPX + 1654 0x00 0x00 NOPX + 1656 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 1660 0x0e 0xe6 0x11 0x98 ST r16, [p6, #-8] +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 + 1664 0xfe 0x86 0x20 0x00 0x00 0x03 0x30 0x00 0x10 0xba LDA r1, [sp, #-12]; MOVXM p6, #_ZL9curr_iter + 1674 0xc0 0xc2 0xd0 0x00 0x00 0x00 0xb0 0x00 0x10 0xba LDA r16, [p6]; MOVXM p1, #_ZL8num_iter + 1684 0x01 0x06 0x36 0x98 LDA r17, [p1] + 1688 0x00 0x00 NOPX + 1690 0x00 0x00 NOPX + 1692 0x00 0x00 NOPX + 1694 0x00 0x00 NOPX + 1696 0x00 0x00 NOPX + 1698 0x00 0x00 NOPX + 1700 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 1704 0x80 0x03 0x60 0x40 0x01 0x84 JNZ r16, #TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1728 +.delay_slot + 1710 0x07 0xef 0x99 0x18 LDA p7, [sp, #-20] +.delay_slot + 1714 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] +.delay_slot +.swstall delay_slot + 1718 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1720 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1722 0x00 0x00 NOPX + 1724 0x0e 0x05 0xb1 0x98 ST r13, [p6] +.label TGT_F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1728 + 1728 0xff 0xe3 0x2e 0xee 0x41 0xd4 LDA p6, [sp, #-4]; MOV lr, r14 + 1734 0x07 0xf9 0xb1 0x18 LDA r13, [sp, #-8] + 1738 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 1742 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 1748 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1750 0x00 0x00 NOPX +.delay_slot + 1752 0x1b 0x90 0xa0 0xf8 MOV r14, r1 +.label _Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end last +.label __Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 last +.delay_slot +.swstall delay_slot + 1756 0x00 0x00 NOPX + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.undef global data reducesum_params + +.undef global text _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv + +.undef global text _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E + +.undef global text _ZN12me_primitive10udiv_dstepEjjRjS0_ + +.text_segment_name +.text global 10 _Z20transpose4d_adf_initv +.label __Z20transpose4d_adf_initv___func_begin0 +.function_start +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 0 0x00 0x20 0x00 0x00 0x00 0x00 0x30 0x00 0x10 0xba MOVA r0, #1; MOVXM p0, #_ZN12me_primitive11control_rndE +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10 0x00 0x04 0x27 0x18 ST.s8 r1, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 16 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 20 0x10 0x02 0x31 0x18 MOVX r1, #12 +.delay_slot + 24 0x00 0x00 0x00 0xc0 0x00 0x44 MOVXM p0, #_ZN12me_primitive11control_satE +.delay_slot +.swstall delay_slot + 30 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 32 0x00 0x00 NOPX +.label _Z20transpose4d_adf_initv__end last +.label __Z20transpose4d_adf_initv___func_end0 last +.delay_slot + 34 0x08 0x04 0x11 0x98 ST r0, [p0] + +.undef global data _ZN12me_primitive11control_satE + +.undef global data _ZN12me_primitive11control_rndE + +.text_segment_name +.text global 10 _Z15_b13786_wrapperPPv +.label __Z15_b13786_wrapperPPv___func_begin0 +.function_start + 0 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 4 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 8 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 12 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 16 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 20 0x00 0x00 0x00 0x00 0x00 0x84 J #_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.delay_slot +.swstall delay_slot + 26 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 28 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 30 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 32 0x00 0x00 NOPX +.label _Z15_b13786_wrapperPPv__end last +.label __Z15_b13786_wrapperPPv___func_end0 last +.delay_slot +.swstall delay_slot + 34 0x00 0x00 NOPX + +.undef global text _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + +.text_segment_name +.text weak 10 _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.function_start + 0 0x20 0x85 0xd8 0xa9 0x81 0xd4 LDA el0, [p1]; MOV r17, p2 + 6 0x19 0x68 0x82 0x18 ADD.NC p1, r17, #4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10 0x01 0x1e 0x56 0x98 LDA r18, [p1], #4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14 0x01 0x05 0xf6 0x98 LDA r15, [p1] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 18 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 20 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 22 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 24 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 26 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 30 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 36 0x7c 0xa5 0xf8 0x3f 0xfd 0x64 MUL r18, r15, r18; MOV r16, #-1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 42 0xfd 0xca 0xb0 0x0f 0xff 0xfe 0x2f 0xff 0x91 0x3a ST r18, [sp, #-20]; MOVXM r17, #1073741823 + 52 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 + 56 0x14 0x61 0x04 0x98 AND r16, r17, r16 + 60 0x80 0x00 0x48 0x00 0x01 0x84 JZ r16, #TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 +.delay_slot + 66 0x00 0xf3 0xd0 0xdd 0x81 0xd4 LDA p7, [p0]; MOV p0, p7 +.delay_slot + 72 0x0f 0xf8 0x1d 0x98 ST p0, [sp, #-8] +.delay_slot + 76 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] +.delay_slot + 80 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] +.delay_slot + 84 0x3c 0xba 0xdf 0xf8 0x2b 0x0c LDA r14, [p1, #-8]; ST r0, [sp, #-4] + 90 0xfd 0x05 0xb0 0x00 0x02 0x5c ST el0, [sp, #-24]; MOVX r0, #0 + 96 0x07 0xe8 0x99 0x18 LDA p1, [sp, #-24] +.no_stack_arguments + 100 0x00 0x00 0x00 0x00 0x01 0x04 JL #memset +.delay_slot + 106 0x10 0x22 0x09 0x18 MOVX r17, #2 +.delay_slot + 110 0x14 0x03 0x1d 0x98 LSHL r1, r16, r17 +.delay_slot +.swstall delay_slot + 114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 118 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 128 0x00 0x00 0x50 0x00 0x00 0x84 J #TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.delay_slot +.swstall delay_slot + 134 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 136 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 138 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 140 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 142 0x00 0x00 NOPX +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 144 0x00 0x2c 0xf0 0x00 0x27 0xe8 0x2d 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 + 160 0x78 0x00 0xd8 0x00 0x01 0x84 JZ r15, #TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.delay_slot +.swstall delay_slot + 166 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 168 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 170 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 172 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 174 0x00 0x00 NOPX + 176 0xfd 0xc6 0x20 0x00 0x00 0x00 0x78 0x88 0x10 0xba LDA r17, [sp, #-20]; MOVXM ls, #(ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 + 0) + 186 0x00 0x33 0x00 0x00 0x00 0x01 0xb8 0xb8 0x10 0xba MOVA r19, #1; MOVXM le, #(ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 + 0) + 196 0xfd 0x4a 0x20 0x1d 0x49 0xee 0x0b 0xff 0xc8 0xba LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 + 206 0xfe 0x07 0x20 0x00 0x00 0x00 0x30 0x78 0x10 0xba LDA lr, [sp, #-16]; MOVXM p0, #(TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 + 0) + 216 0x18 0x0a 0x20 0xf8 MOV m0, r20 + 220 0x00 0x00 NOPX + 222 0x00 0x00 NOPX + 224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x23 0x19 0xec 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.loop_nesting 1 + 240 0x70 0x00 0xc0 0x00 0x01 0x84 JZ r14, #TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.delay_slot +.swstall delay_slot + 246 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 250 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 254 0x00 0x00 NOPX + 256 0x53 0x91 0x60 0x02 0xbb 0x90 0x70 0x02 MOVS p2, p7; MOV lc, r14 + 264 0x00 0x2b 0x60 0x00 0xb4 0x90 0x70 0x02 NOPS; MOV p1, r18 +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.loop_nesting 2 +.begin_of_loop + 272 0x43 0xce 0x50 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 304 0x23 0xce 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.loop_nesting 1 + 384 0xe1 0x72 0x08 0x40 0x40 0x1c PADDB [p7], m0; JNZD r16, r16, p0 +.delay_slot +.swstall delay_slot + 390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 394 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 396 0x00 0x00 NOPX +.delay_slot + 398 0x1c 0x98 0xc9 0x58 ADD.NC r18, r17, r18 +.loop_nesting 0 + 402 0x00 0x00 0xe0 0x00 0x00 0x84 J #TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 +.delay_slot +.swstall delay_slot + 408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 416 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 + 432 0xfe 0x07 0x20 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 448 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] + 452 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 456 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 460 0x00 0x00 NOPX + 462 0x00 0x00 NOPX + 464 0x00 0x00 NOPX + 466 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 470 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 476 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 480 0x00 0x00 NOPX +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end last +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 last +.delay_slot +.swstall delay_slot + 482 0x00 0x00 NOPX + +.undef global text memset + +.text_segment_name +.text global 10 _Z14_b8148_wrapperPPv +.label __Z14_b8148_wrapperPPv___func_begin0 +.function_start + 0 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 4 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 8 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 12 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 16 0x00 0x00 0x00 0x00 0x00 0x84 J #_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 24 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 26 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 28 0x00 0x00 NOPX +.label _Z14_b8148_wrapperPPv__end last +.label __Z14_b8148_wrapperPPv___func_end0 last +.delay_slot +.swstall delay_slot + 30 0x00 0x00 NOPX + +.undef global text _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + +.text_segment_name +.text weak 10 _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.function_start + 0 0xb0 0x91 0x60 0x00 0x0a 0x60 0x70 0x02 MOVS p5, p1; MOV r0, p2 + 8 0x1b 0x60 0x12 0x18 ADD.NC p3, r0, #36 + 12 0x63 0xa0 0xd0 0x3d 0x81 0xd4 LDA m2, [p3], #4; MOV r0, p7 + 18 0x03 0x1c 0x06 0x98 LDA m0, [p3], #4 + 22 0x03 0xd4 0x56 0x98 LDA r2, [p3, #-12] + 26 0x03 0x04 0x86 0x98 LDA m1, [p3] + 30 0x00 0x00 NOPX + 32 0x00 0x00 NOPX + 34 0x00 0x00 NOPX + 36 0x00 0x00 NOPX + 38 0x00 0x00 NOPX + 40 0x10 0x00 0xe8 0x00 0x01 0x84 JZ r2, #TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.delay_slot + 46 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot + 50 0xe1 0x72 0x06 0xdd 0x81 0xf4 PADDB [p7], m0; MOV p3, p7 +.delay_slot + 56 0x38 0x4b 0x90 0x18 PADDB [p0], m2 +.delay_slot + 60 0x01 0x72 0x08 0xc1 0x81 0xf4 PADDB [p0], m0; MOV p4, p0 +.delay_slot + 66 0x39 0x2b 0x90 0x18 PADDB [p1], m1 + 70 0x10 0x02 0x11 0x18 MOVX r1, #4 + 74 0x10 0x86 0x1c 0x98 LTU r3, r2, r1 + 78 0x18 0x00 0xa0 0x40 0x01 0x84 JNZ r3, #TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.delay_slot + 84 0x18 0x80 0x60 0xb8 MOV dj0, #48 +.delay_slot + 88 0x02 0x00 0x36 0x98 LDA r1, [p2, dj0] +.delay_slot +.swstall delay_slot + 92 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 94 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 96 0x00 0x00 NOPX + 98 0x81 0x13 0x76 0x10 0xe8 0x00 0x00 0x00 0x78 0x60 0x10 0xb6 VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #(ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 + 0) +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 110 0x81 0x0c 0xfe 0x10 0x68 0x00 0x00 0x37 0x0f 0x02 0x61 0x0b 0x60 0x7e PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #(ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 + 0) +.aggressive_scheduled_block_id 1 +.noswbrkpt + 124 0x61 0x0b 0x70 0x11 0xef 0x08 0x5b 0x02 0xb8 0xbf 0x40 0xf6 VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 136 0x01 0x0c 0xf8 0x11 0x6b 0x08 0x5b 0x32 PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 144 0x81 0x0c 0xfe 0x10 0x68 0x3c PADDA [p4], m0; VLDB x0, [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 150 0x01 0x1e 0x8e 0x10 0xb6 0x4c VLDB x3, [p0], m0; PADDS [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 156 0x01 0x0c 0xf6 0x10 0xe8 0x3c PADDA [p0], m0; VLDB x1, [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 162 0x81 0x16 0x80 0x12 0x0b 0xb4 VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 168 0x00 0x2c 0xfe 0x10 0x6b 0x08 0x5b 0x32 NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 176 0x00 0x2c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 192 0x81 0x0c 0xf6 0x10 0xef 0x08 0x5b 0x00 0x00 0x00 0x04 0x82 0xe8 0x00 0x00 0xe1 PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 208 0x01 0x0c 0xf8 0x11 0x69 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 224 0xa5 0x0c 0xfe 0x10 0x6b 0x08 0x5b 0x00 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 240 0x25 0x0c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 256 0x18 0x09 0x05 0xd8 VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 260 0x09 0x28 0x26 0x98 VST bmlh0, [p1], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 264 0x25 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 270 0x00 0x00 0xe8 0x00 0x00 0x84 J #TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 276 0xa5 0x0c 0xf1 0x28 0x26 0x80 0x04 0x82 0xe2 0xba PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 286 0x25 0x0c 0xfa 0x50 0x0d 0x0c PADDA [p1], m1; VST bmll0, [p5], m1 +.delay_slot + 292 0xa5 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 +.delay_slot + 298 0x00 0x2c 0xfa 0x50 0x0d 0x0c NOPA; VST bmll0, [p5], m1 +.delay_slot + 304 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 + 320 0x1d 0x71 0x20 0xf8 MOV lc, r2 + 324 0x00 0x00 0x01 0xe2 0xa0 0x44 MOVXM ls, #(ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 + 0) + 330 0x00 0x00 0x06 0xe3 0x80 0x44 MOVXM le, #(ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 + 0) +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.loop_nesting 1 +.begin_of_loop + 336 0x81 0x0b 0x76 0x11 0x68 0x3c VLDA x1, [p4], m0; VLDB x2, [p3], m0 + 342 0x61 0x0c 0xfe 0x10 0x6c 0x08 0x5b 0x32 PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 + 350 0xe1 0x0c 0xf0 0x11 0xe8 0x3c PADDA [p7], m0; VLDB x3, [p0], m0 + 356 0x38 0x0b 0x90 0x18 PADDB [p0], m0 + 360 0x00 0x00 NOPX + 362 0x00 0x00 NOPX + 364 0x00 0x01 0x67 0x98 NOPA + 368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x08 0x42 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV + 384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 400 0x00 0x2c 0xf0 0x00 0x25 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV + 416 0x00 0x2c 0xfa 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV + 432 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.end_of_loop + 448 0x00 0x2c 0xf2 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.loop_nesting 0 + 464 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 468 0x1f 0x60 0x20 0xf8 MOV p7, r0 +.delay_slot +.swstall delay_slot + 472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 476 0x00 0x00 NOPX +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end last +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 last +.delay_slot +.swstall delay_slot + 478 0x00 0x00 NOPX + +.text_segment_name +.text weak 10 _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.function_start + 0 0xb0 0x11 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p5, p0; PADDXM [sp], #128 + 10 0xff 0x87 0xb0 0x01 0xb1 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV p3, p1 +.no_stack_arguments + 18 0x31 0x11 0x60 0x00 0x00 0x00 0x00 0x00 0x41 0x3a MOVS p1, p2; JL #_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.delay_slot + 28 0x18 0x65 0xe0 0xf8 MOV p0, sp +.delay_slot + 32 0x38 0xef 0x90 0x18 PADDB [p0], #-128 +.delay_slot + 36 0x1c 0x60 0xc0 0xf8 MOV p4, p0 +.delay_slot +.swstall delay_slot + 40 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 42 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.return_address + 48 0xf0 0x4a 0x22 0x90 0x8b 0x02 0x2d 0x70 0x72 0xba LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID + 58 0xf0 0xda 0x28 0xc5 0x20 0x2c LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 + 64 0xf1 0x52 0x20 0x00 0x00 0x3e 0x6f 0xff 0x10 0xba LDA r20, [sp, #-120]; MOVXM r19, #65534 + 74 0x60 0x93 0xd9 0xc6 0x21 0x2c LDA p1, [p3]; ADD r17, r19, r17 + 80 0xf1 0xce 0x28 0xd5 0x60 0x2c LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 + 86 0x00 0x00 NOPX + 88 0x05 0x06 0x36 0x98 LDA r17, [p5] + 92 0x00 0x00 NOPX + 94 0x15 0xa5 0x2f 0x98 MUL r18, r22, r18 + 98 0x00 0x00 NOPX + 100 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 104 0x00 0x00 NOPX + 106 0x15 0x65 0x2f 0x98 MUL r18, r21, r18 +.no_stack_arguments + 110 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.delay_slot + 116 0x14 0xe5 0x2f 0x98 MUL r18, r19, r18 +.delay_slot + 120 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 124 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 +.delay_slot + 128 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.delay_slot +.swstall delay_slot + 132 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 144 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 148 0x00 0x00 NOPX + 150 0x00 0x00 NOPX + 152 0x00 0x00 NOPX + 154 0x00 0x00 NOPX + 156 0x00 0x00 NOPX + 158 0x00 0x00 NOPX + 160 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 164 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 170 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 172 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 174 0x00 0x00 NOPX +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end last +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 last +.delay_slot +.swstall delay_slot + 176 0x00 0x00 NOPX + +.undef global text _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + +.text_segment_name +.text global 10 _Z14_b8170_wrapperPPv +.label __Z14_b8170_wrapperPPv___func_begin0 +.function_start + 0 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 4 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 8 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 12 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 16 0x00 0x00 0x00 0x00 0x00 0x84 J #_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 24 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 26 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 28 0x00 0x00 NOPX +.label _Z14_b8170_wrapperPPv__end last +.label __Z14_b8170_wrapperPPv___func_end0 last +.delay_slot +.swstall delay_slot + 30 0x00 0x00 NOPX + +.undef global text _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + +.text_segment_name +.text weak 10 _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.function_start + 0 0x23 0x85 0xd0 0x00 0x00 0x00 0x08 0x00 0x10 0xba LDA el0, [p1], #4; MOVXM r0, #_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + 10 0x08 0x00 0x80 0x80 0x0b 0x3e 0x27 0xa9 0x30 0x01 0x08 0x76 MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 + 22 0x00 0x42 0x20 0x22 0x01 0x64 MOVX r1, #4; MOV r0, #128 + 28 0x00 0x00 NOPX + 30 0x00 0x00 NOPX + 32 0x00 0x00 NOPX + 34 0x00 0x00 NOPX + 36 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 40 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 44 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 48 0x00 0x00 NOPX + 50 0x00 0x00 NOPX + 52 0x00 0x00 NOPX + 54 0x00 0x00 NOPX + 56 0x00 0x00 NOPX + 58 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 62 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 66 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 70 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 74 0x00 0x00 NOPX + 76 0x00 0x00 NOPX + 78 0x00 0x00 NOPX + 80 0x00 0x00 NOPX + 82 0x00 0x00 NOPX + 84 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 88 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 92 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 96 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 100 0x00 0x00 NOPX + 102 0x00 0x00 NOPX + 104 0x00 0x00 NOPX + 106 0x00 0x00 NOPX + 108 0x00 0x00 NOPX + 110 0x0a 0x04 0x09 0x98 ST eh0, [p2] + 114 0x0a 0x14 0x29 0x98 ST el0, [p2, #4] + 118 0x00 0x08 0x76 0x98 LDA r3, [p0], m0 + 122 0x00 0x00 NOPX + 124 0x00 0x00 NOPX + 126 0x00 0x00 NOPX + 128 0x00 0x00 NOPX + 130 0x00 0x00 NOPX + 132 0x00 0x00 NOPX + 134 0x10 0xc8 0x2d 0x98 LSHL r4, r3, r2 + 138 0x18 0xc3 0xb0 0xa4 0xff 0x24 LSHL r3, r3, r1; ADD.NC r1, r4, #-1 + 144 0x00 0x86 0x30 0x00 0x88 0x60 0x70 0x02 ST r1, [p0]; MOV r4, p0 + 152 0x19 0x62 0x62 0x18 ADD.NC p1, r4, #-60 + 156 0x01 0x08 0x96 0x98 LDA r4, [p1], m0 + 160 0x00 0x00 NOPX + 162 0x00 0x00 NOPX + 164 0x00 0x00 NOPX + 166 0x00 0x00 NOPX + 168 0x00 0x00 NOPX + 170 0x00 0x00 NOPX + 172 0x20 0x85 0xb2 0x22 0x01 0x64 LSHL r2, r4, r2; MOV r4, #128 + 178 0x10 0x85 0xff 0x18 ADD r2, r2, #-1 + 182 0x23 0x8a 0x31 0x90 0x5c 0x5c ST r2, [p1], #4; MSC r4, r4, r3, r2 + 188 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 192 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 196 0x09 0x2c 0x11 0x98 ST r0, [p1], #8 + 200 0x09 0xfc 0x71 0x98 ST r3, [p1], #-4 + 204 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 + 208 0x20 0x82 0x30 0x00 0xa9 0x60 0x70 0x02 ST r0, [p1]; MOV r5, p1 + 216 0x19 0x62 0xde 0x18 ADD.NC p1, r5, #-68 + 220 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 224 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 228 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 232 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 236 0x23 0x82 0x30 0x50 0x00 0x5c ST r0, [p1], #4; RET lr +.delay_slot + 242 0x09 0x2c 0x71 0x98 ST r3, [p1], #8 +.delay_slot + 246 0x09 0xfc 0x51 0x98 ST r2, [p1], #-4 +.delay_slot + 250 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 +.delay_slot + 254 0x09 0x04 0x31 0x98 ST r1, [p1] +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end last +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 last +.delay_slot + 258 0x09 0x14 0x11 0x98 ST r0, [p1, #4] + +.undef weak data _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + +.text_segment_name +.text weak 10 _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.function_start + 0 0x00 0x41 0x00 0x00 0x00 0x01 0x30 0x06 0x10 0xba MOVA r1, #2; MOVXM p2, #(_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + 12) + 10 0x40 0xee 0xd0 0x00 0xb2 0x2c LDA r27, [p2]; MOVX r0, #22 + 16 0x00 0x00 NOPX + 18 0x00 0x00 NOPX + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x16 0xc2 0x17 0x98 EQ r1, r27, r1 + 32 0x08 0x00 0xf8 0x40 0x01 0x84 JNZ r1, #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.delay_slot + 38 0x10 0x04 0x75 0x18 MOVX r2, #29 +.delay_slot + 42 0x10 0x00 0x22 0x18 SEL.EQZ r0, r0, r2, r27 +.delay_slot +.swstall delay_slot + 46 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 48 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 50 0x00 0x00 NOPX + 52 0x00 0x00 0x04 0xc0 0x20 0x44 MOVXM p2, #(_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + 16) + 58 0x02 0x04 0x36 0x98 LDA r1, [p2] + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x00 0x00 NOPX + 72 0x00 0x00 NOPX + 74 0x08 0x02 0x38 0x00 0x01 0x84 JZ r1, #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.delay_slot +.swstall delay_slot + 80 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 82 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 84 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 86 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 88 0x00 0x00 NOPX + 90 0x10 0x04 0x29 0x18 MOVX r2, #10 + 94 0x10 0x44 0x2c 0x98 LTU r2, r1, r2 + 98 0x10 0x00 0xa8 0x40 0x01 0x84 JNZ r2, #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.delay_slot +.swstall delay_slot + 104 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 106 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 110 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 112 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 114 0x00 0x38 0x68 0x00 0x00 0x00 0x78 0x78 0x10 0x3a VLDB x0, [p0], #64; MOVXM ls, #(ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 + 0) +.aggressive_scheduled_block_id 1 +.noswbrkpt + 124 0x00 0x38 0x68 0x00 0x00 0x01 0xb8 0x78 0x10 0x3a VLDB x0, [p0], #64; MOVXM le, #(ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 + 0) +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 134 0x00 0x2c 0xf0 0x1c 0x34 0x02 0xb8 0x7d 0xce 0xba NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 144 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 160 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 176 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 192 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 208 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 224 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.loop_nesting 1 +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 240 0x00 0x2c 0xf0 0x38 0x69 0x1c 0x06 0x80 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 256 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 264 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 272 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 280 0x23 0x80 0xd0 0x01 0x40 0x00 0x00 0x00 0xe9 0x3a VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 290 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 298 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 306 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 314 0x00 0x2c 0xf2 0x38 0x0d 0x0c NOPA; VST bmll0, [p1], #64 +.delay_slot + 320 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 + 336 0x1d 0x70 0xa0 0xf8 MOV lc, r1 + 340 0x00 0x00 0x01 0xe2 0xc0 0x44 MOVXM ls, #(ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 + 0) + 346 0x00 0x00 0x06 0xe3 0xa0 0x44 MOVXM le, #(ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 + 0) +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.loop_nesting 1 +.begin_of_loop + 352 0x38 0x1c 0x34 0x18 VLDB x0, [p0], #64 + 356 0x00 0x00 NOPX + 358 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM + 368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 400 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 416 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV + 448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.end_of_loop + 464 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.loop_nesting 0 + 480 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 484 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 488 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 490 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 492 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 + 496 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + 502 0x02 0x04 0x16 0x98 LDA r0, [p2] + 506 0x00 0x00 NOPX + 508 0x00 0x00 NOPX + 510 0x00 0x00 NOPX + 512 0x00 0x00 NOPX + 514 0x00 0x00 NOPX + 516 0x00 0x00 NOPX + 518 0x00 0x02 0x38 0x00 0x01 0x84 JZ r0, #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.delay_slot +.swstall delay_slot + 524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 532 0x00 0x00 NOPX + 534 0x04 0x94 0x80 0x00 0x00 0x02 0x30 0x02 0x10 0xba MOVA m5, #36; MOVXM p4, #(_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + 4) + 544 0x83 0x86 0xd0 0x00 0x51 0x08 0x4f 0xfd 0x58 0xba LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 + 554 0x95 0x12 0xd0 0x00 0x30 0x2a 0x60 0x00 0x58 0xba LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 + 564 0x9d 0x90 0xd0 0x10 0x4b 0x00 0x60 0x8a 0x00 0x20 0x58 0x76 LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 + 576 0x9d 0x94 0xd1 0x10 0x4b 0x00 0x0f 0xf8 0xe8 0x34 0x58 0x76 LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 + 588 0x87 0x98 0xd5 0x10 0x4b 0x00 0x00 0x01 0x31 0x50 0x10 0x76 LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #(TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 + 0) + 600 0x9d 0xd4 0xd0 0x00 0x00 0x01 0xb1 0x60 0x10 0xba LDA dn5, [p4], #-8; MOVXM p3, #(TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 + 0) + 610 0x91 0x58 0xd0 0x41 0xaa 0x2c LDA dj5, [p4], m4; MOVX r16, #53 + 616 0x9d 0x80 0xd0 0x0b 0xb0 0xe4 0xa8 0x7f 0xc8 0xba LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 + 626 0x9d 0x84 0xd0 0x0b 0x11 0x6c 0xa9 0x3f 0xc8 0xba LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 + 636 0x87 0x88 0xd0 0x0a 0x21 0x6c 0xac 0x40 0x48 0xba LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 + 646 0x80 0xc4 0xd0 0x06 0x52 0x90 0x68 0x80 0x48 0xba LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 + 656 0x9c 0xc8 0xd0 0x00 0x20 0x01 0x5b 0x0a 0x5f 0xf8 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.loop_nesting 1 + 672 0x08 0x02 0x30 0x00 0x01 0x84 JZ r1, #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.delay_slot +.swstall delay_slot + 678 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 680 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 682 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 684 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 686 0x00 0x00 NOPX + 688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x29 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.loop_nesting 2 + 704 0x20 0x02 0x28 0x00 0x01 0x84 JZ r4, #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.delay_slot +.swstall delay_slot + 710 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 712 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 714 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 716 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 718 0x00 0x00 NOPX + 720 0x10 0xe4 0x6c 0x98 LTU r18, r3, r6 + 724 0x90 0x01 0xe0 0x40 0x01 0x84 JNZ r18, #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.delay_slot +.swstall delay_slot + 730 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 732 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 734 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 736 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 738 0x00 0x00 NOPX + 740 0x00 0x28 0x68 0x00 0x00 0x00 0x79 0xa8 0x10 0x3a VLDB x0, [p0, #64]; MOVXM ls, #(ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 + 0) + 750 0x00 0x70 0xe8 0x00 0x00 0x01 0xb9 0xb8 0x10 0x3a VLDB.3D x1, [p0], d1; MOVXM le, #(ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 + 0) +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 760 0x1d 0x71 0xfe 0x98 ADD.NC lc, r3, #-3 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 764 0x38 0x14 0x34 0x18 VLDB x0, [p0, #64] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 768 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 784 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 800 0x00 0x2c 0xf0 0x28 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 816 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 832 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.loop_nesting 3 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 848 0x00 0x2c 0xf0 0x28 0x6c 0x84 0x8b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 864 0x00 0x2c 0xf0 0x70 0xe9 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 880 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.loop_nesting 2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 896 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 904 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 908 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 916 0x18 0x08 0x41 0xd8 VSHUFFLE bmll0, x1, x0, r16 + 920 0x00 0x02 0x28 0x00 0x00 0x84 J #TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.delay_slot + 926 0x23 0x04 0xd0 0x02 0x31 0x60 0x70 0x02 VST.3D bmlh0, [p1], d0; MOV p4, p1 +.delay_slot + 934 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.delay_slot + 942 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.delay_slot + 950 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.delay_slot + 954 0x00 0x2c 0xf8 0x28 0x0d 0x0c NOPA; VST bmll0, [p4, #64] +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 + 960 0x00 0x00 0x01 0xe7 0xa0 0x44 MOVXM ls, #(ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 + 0) + 966 0x00 0x00 0x06 0xe8 0x80 0x44 MOVXM le, #(ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 + 0) + 972 0x1d 0x71 0x00 0x98 ADD.NC lc, r2, #1 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.loop_nesting 3 +.begin_of_loop + 976 0x02 0x86 0x88 0xc5 0x81 0xf4 VLDB x0, [p0, #64]; MOV p4, p1 + 982 0x38 0x38 0x74 0x18 VLDB.3D x1, [p0], d1 + 986 0x00 0x00 NOPX + 988 0x00 0x00 NOPX + 990 0x00 0x00 NOPX + 992 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 1008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 1024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 1040 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV + 1056 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV + 1072 0x00 0x2c 0xf0 0x00 0x21 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.end_of_loop + 1088 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.loop_nesting 2 + 1104 0x14 0x62 0xe0 0x18 JNZD r17, r17, p3 +.delay_slot +.swstall delay_slot + 1108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1110 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1116 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.loop_nesting 1 + 1120 0x10 0x00 0xa0 0x18 JNZD r0, r0, p2 +.delay_slot +.swstall delay_slot + 1124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1130 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1132 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.loop_nesting 0 + 1136 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 1140 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1142 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1144 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 1146 0x00 0x00 NOPX +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end last +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 last +.delay_slot +.swstall delay_slot + 1148 0x00 0x00 NOPX + +.undef weak data _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + +.text_segment_name +.text weak 10 _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.function_start + 0 0x1b 0x6c 0xc0 0xf8 MOV p3, p6 + 4 0xd0 0x91 0x60 0x00 0x00 0x00 0xb0 0x00 0x11 0x3a MOVS p6, p1; MOVXM p1, #_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep + 14 0x01 0x06 0x16 0x98 LDA r16, [p1] + 18 0x00 0x00 NOPX + 20 0x00 0x00 NOPX + 22 0x00 0x00 NOPX + 24 0x00 0x00 NOPX + 26 0x00 0x00 NOPX + 28 0x00 0x00 NOPX + 30 0x80 0x00 0x30 0x40 0x01 0x84 JNZ r16, #TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.delay_slot + 36 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 42 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 46 0xf0 0x11 0x60 0x00 0xb7 0x60 0x70 0x02 MOVS p7, p0; MOV p1, p7 +.delay_slot + 54 0x0f 0xf9 0x9d 0x98 ST p3, [sp, #-8] +.delay_slot + 58 0xff 0x93 0xb0 0x00 0x00 0x00 0x30 0x00 0x11 0x3a ST p1, [sp, #-4]; MOVXM p0, #_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params +.no_stack_arguments + 68 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.delay_slot + 74 0x19 0x64 0xc0 0xf8 MOV p1, p2 +.delay_slot +.swstall delay_slot + 78 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 80 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 82 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 84 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.return_address + 96 0xe0 0xc2 0xd0 0x00 0x00 0x03 0xb0 0x0a 0x10 0xba LDA r16, [p7]; MOVXM p7, #(_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + 20) + 106 0x07 0x06 0x36 0x98 LDA r17, [p7] + 110 0x06 0x04 0x9e 0x98 LDA p1, [p6] + 114 0x00 0x00 NOPX +.no_stack_arguments + 116 0x00 0x00 0x00 0x00 0x01 0x04 JL #_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.delay_slot + 122 0x10 0x24 0x05 0x18 MOVX r18, #1 +.delay_slot + 126 0x00 0x00 0x04 0xc0 0x00 0x44 MOVXM p2, #_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params +.delay_slot + 132 0x1e 0x64 0xc0 0xf8 MOV p6, p2 +.delay_slot + 136 0x14 0x63 0x2d 0x98 LSHL r17, r17, r18 +.delay_slot + 140 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.return_address + 144 0xfe 0x87 0x20 0x00 0x00 0x01 0x30 0x00 0x10 0xba LDA lr, [sp, #-12]; MOVXM p2, #_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep + 154 0x40 0xc2 0xd0 0x60 0x02 0x2c LDA r16, [p2]; MOVX r24, #0 + 160 0x06 0x66 0x36 0x98 LDA r17, [p6, #24] + 164 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 168 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 172 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 178 0x00 0x00 NOPX + 180 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 184 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 188 0x14 0x77 0x07 0x98 EQ r27, r17, r16 +.delay_slot + 192 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot + 196 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end last +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 last +.delay_slot +.swstall delay_slot + 200 0x00 0x00 NOPX + +.undef weak data _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep + +.undef weak data _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + +.undef global text _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + +.undef global text _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + +.text_segment_name +.text global 10 _Z14_b7835_wrapperPPv +.label __Z14_b7835_wrapperPPv___func_begin0 +.function_start + 0 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 4 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 8 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 12 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 16 0x00 0x00 0x00 0x00 0x00 0x84 J #_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.delay_slot +.swstall delay_slot + 22 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 24 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 26 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 28 0x00 0x00 NOPX +.label _Z14_b7835_wrapperPPv__end last +.label __Z14_b7835_wrapperPPv___func_end0 last +.delay_slot +.swstall delay_slot + 30 0x00 0x00 NOPX + +.undef global text _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + +.text_segment_name +.text global 10 _Z13kernelWrapperPPvjjjj +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.function_start + 0 0x00 0xc2 0xd0 0xe9 0xe0 0x2c LDA r16, [p0]; NEZ r26, r1 + 6 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 12 0x0f 0xef 0x1d 0x98 ST p6, [sp, #-20] + 16 0xfe 0x3a 0xb0 0x01 0xc8 0xd0 0x70 0x02 ST r14, [sp, #-16]; MOV r14, r3 + 24 0xff 0x3e 0xb0 0x01 0xe8 0x50 0x70 0x02 ST r15, [sp, #-8]; MOV r15, r1 + 32 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 36 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 40 0x1e 0x68 0x02 0x18 ADD.NC p6, r16, #4 + 44 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 48 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 52 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 56 0x06 0x07 0x76 0x98 LDA r27, [p6] + 60 0x00 0x00 NOPX + 62 0x00 0x00 NOPX + 64 0x00 0x00 NOPX + 66 0x00 0x00 NOPX + 68 0x00 0x00 NOPX + 70 0x00 0x00 NOPX + 72 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 76 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 80 0xfc 0x1f 0xa0 0x35 0x39 0xe4 MOVX r16, #-1; MOV el0, r26 + 86 0x00 0x00 NOPX + 88 0x00 0x00 NOPX + 90 0x00 0x00 NOPX + 92 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 96 0x04 0x41 0x29 0xa0 0x05 0x64 MOVX r17, #2; MOV r19, #1 + 102 0xd5 0x23 0xb9 0x21 0x81 0xe4 LSHL r20, r26, r17; MOV r18, p0 + 108 0x9c 0x9f 0x9c 0xd2 0xa2 0xa4 LTU r18, r19, r15; ADD.NC p6, r18, r20 + 114 0xc0 0xd2 0xd7 0xe6 0x95 0x82 0x6e 0x60 0x72 0xba LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 + 124 0xfd 0x4a 0xb0 0x03 0x4c 0x90 0x70 0x02 ST r18, [sp, #-24]; MOV r26, r18 + 132 0x00 0x00 NOPX + 134 0x00 0x00 NOPX + 136 0x00 0x00 NOPX + 138 0x00 0x00 NOPX + 140 0x00 0x00 NOPX + 142 0x1e 0x6a 0x02 0x18 ADD.NC p6, r20, #4 + 146 0x06 0x1e 0x96 0x98 LDA r20, [p6], #4 + 150 0x06 0x3e 0xd6 0x98 LDA r22, [p6], #12 + 154 0x06 0xee 0xb6 0x98 LDA r21, [p6], #-8 + 158 0x06 0x07 0x76 0x98 LDA r27, [p6] + 162 0x00 0x00 NOPX + 164 0x00 0x00 NOPX + 166 0x00 0x00 NOPX + 168 0x00 0x00 NOPX + 170 0x00 0x00 NOPX + 172 0x00 0x00 NOPX + 174 0x15 0x29 0x62 0x18 SEL.EQZ r20, r20, r22, r27 + 178 0x0e 0xd6 0x91 0x98 ST r20, [p6, #-12] + 182 0x00 0x00 NOPX + 184 0x00 0x00 NOPX + 186 0x00 0x00 NOPX + 188 0x00 0x00 NOPX + 190 0x15 0x57 0x08 0x18 ACQ.COND r21, r16, r26 + 194 0x14 0xa5 0x1d 0x98 LSHL r18, r18, r17 + 198 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 204 0x76 0x9e 0x0c 0xd3 0x92 0xa4 NEZ r26, r14; ADD.NC p6, r19, r18 + 210 0xc0 0xca 0xdf 0xc6 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-32] + 216 0x00 0x00 NOPX + 218 0x00 0x00 NOPX + 220 0x00 0x00 NOPX + 222 0x00 0x00 NOPX + 224 0x00 0x00 NOPX + 226 0x00 0x00 NOPX + 228 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 232 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 236 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 240 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 244 0x07 0x07 0x76 0x98 LDA r27, [p7] + 248 0x00 0x00 NOPX + 250 0x00 0x00 NOPX + 252 0x00 0x00 NOPX + 254 0x00 0x00 NOPX + 256 0x00 0x00 NOPX + 258 0x00 0x00 NOPX + 260 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 264 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 268 0x00 0x00 NOPX + 270 0x00 0x00 NOPX + 272 0x00 0x00 NOPX + 274 0x00 0x00 NOPX + 276 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 280 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 284 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 288 0x00 0x00 0x0e 0xc0 0x00 0x44 MOVXM p7, #_ZL20g_uniformKernelFuncs + 294 0xe0 0x13 0xdf 0xb8 0x5b 0x0c LDA p1, [p7, dj0]; ST el0, [sp, #-36] + 300 0x00 0x00 NOPX + 302 0x00 0x00 NOPX + 304 0x00 0x00 NOPX + 306 0x00 0x00 NOPX + 308 0x00 0x00 NOPX + 310 0x00 0x00 NOPX +.no_stack_arguments + 312 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 316 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 320 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 322 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 324 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 326 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 336 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 342 0x07 0xdf 0x51 0x18 LDA r26, [sp, #-36] + 346 0x07 0xe4 0x41 0x18 LDA dj0, [sp, #-28] + 350 0x07 0xe8 0x29 0x18 LDA el0, [sp, #-24] + 354 0x07 0xe0 0x09 0x18 LDA eh0, [sp, #-32] + 358 0x00 0x00 NOPX + 360 0x00 0x00 NOPX + 362 0x18 0x68 0x88 0x18 ADD.NC p0, r17, #16 + 366 0x00 0x06 0x36 0x98 LDA r17, [p0] + 370 0x00 0x00 NOPX + 372 0x00 0x00 NOPX + 374 0x00 0x00 NOPX + 376 0x00 0x00 NOPX + 378 0x00 0x00 NOPX + 380 0x00 0x00 NOPX + 382 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 386 0x1e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p0, #-4]; MOV r27, r15 + 392 0xe0 0x4a 0xdd 0x40 0x39 0xd4 LDA r18, [p7, dj0]; MOV r26, el0 + 398 0x00 0x00 NOPX + 400 0x00 0x00 NOPX + 402 0x00 0x00 NOPX + 404 0x00 0x00 NOPX + 406 0x00 0x00 NOPX + 408 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 412 0x8c 0x66 0x4e 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 + 418 0xe0 0xc6 0xd1 0xec 0x63 0x0c LDA r17, [p7]; ST r17, [p0, #-4] + 424 0x00 0x00 NOPX + 426 0x00 0x00 NOPX + 428 0x00 0x00 NOPX + 430 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 432 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 434 0x1e 0xa1 0x1c 0xf8 MOV r26, eh0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 438 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 442 0xfe 0xc6 0xdd 0xc0 0x39 0xd4 LDA r17, [p7, #-4]; MOV r27, el0 + 448 0x06 0x06 0x56 0x98 LDA r18, [p6] + 452 0x00 0x00 NOPX + 454 0x00 0x00 NOPX + 456 0x00 0x00 NOPX + 458 0x00 0x00 NOPX + 460 0x00 0x00 NOPX + 462 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 466 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 472 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 478 0x00 0x00 NOPX + 480 0x00 0x00 NOPX + 482 0x00 0x00 NOPX + 484 0x00 0x00 NOPX + 486 0x00 0x00 NOPX + 488 0x00 0x00 NOPX + 490 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 494 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 498 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] + 502 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 506 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 510 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 514 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 518 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 524 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 528 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 532 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 534 0x1e 0xd7 0x20 0xf8 MOV r27, r14 +.delay_slot + 538 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.label _Z13kernelWrapperPPvjjjj__end last +.label __Z13kernelWrapperPPvjjjj___func_end0 last +.delay_slot + 542 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] + + + +.direct_eval +,-,(,) diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.sdr b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.sdr new file mode 100644 index 0000000000000000000000000000000000000000..72afd20ce92a23a4375259b003dc4fd011788361 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.sdr @@ -0,0 +1,104 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:36:48 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable82 ../Release/0_0_reloadable82.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable82.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3593526 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol gem_bfp_param 0x0007c200 +_symbol _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep 0x0007c36c +_symbol _ZN12me_primitive11control_satE 0x0007c370 +_symbol _ZN12me_primitive11control_rndE 0x0007c374 +_symbol _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params 0x0007c3c0 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x000009e0 +_symbol _Z8init_accILt1EEvPaS0_iii 0x00000e40 +_symbol _Z12post_processPai 0x00000f70 +_symbol _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params 0x000010a0 +_symbol _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001830 +_symbol _Z15_b13786_wrapperPPv 0x00001a60 +_symbol _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj 0x00001a90 +_symbol _Z14_b8148_wrapperPPv 0x00001c80 +_symbol _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params 0x00001df0 +_symbol _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj 0x00001fd0 +_symbol _Z14_b8170_wrapperPPv 0x00002090 +_symbol _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj 0x000020b0 +_symbol _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params 0x000021c0 +_symbol _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj 0x00002640 +_symbol _Z14_b7835_wrapperPPv 0x00002710 +_symbol memset 0x00002730 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.srv b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.srv new file mode 100644 index 0000000000000000000000000000000000000000..4e4279673be0cc4fde6427ea9a9279b7c1bb08c4 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.srv @@ -0,0 +1,10727 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:36:49 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable82 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable82.cc" 60 first +.src_ref 0 "0_0_reloadable82.cc" 62 60 +.src_ref 0 "0_0_reloadable82.cc" 62 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 60 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable82.cc" 67 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable82.cc" 64 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 64 110 +.src_ref 0 "0_0_reloadable82.cc" 67 60 +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 64 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 64 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 64 60 +.src_ref 0 "0_0_reloadable82.cc" 67 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 +.src_ref 0 "0_0_reloadable82.cc" 67 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 +.src_ref 0 "0_0_reloadable82.cc" 67 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 67 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2816 "01000100" // MOVXM p7, #508800 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "00000000" // /* MW 5 */ + 2818 "11000111" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 70 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 73 60 +.src_ref 0 "0_0_reloadable82.cc" 75 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable82.cc" 73 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 75 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable82.cc" 75 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 78 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 80 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 80 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_begin0 +.function setup_gemm_bfp16_params _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv +.src_ref 2 "gemm_bfp16_params.h" 128 first +.src_ref 2 "gemm_bfp16_params.h" 130 24 +.src_ref 2 "gemm_bfp16_params.h" 130 26 first +.function_start + 3088 "10111010" // LDA r3, [p0], #4; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3089 "00010000" // /* MW 9 */ + 3090 "00000000" // /* MW 8 */ + 3091 "10110001" // /* MW 7 */ + 3092 "11110000" // /* MW 6 */ + 3093 "00000001" // /* MW 5 */ + 3094 "00000000" // /* MW 4 */ + 3095 "11010000" // /* MW 3 */ + 3096 "10001110" // /* MW 2 */ + 3097 "00000011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 58 39 +.src_ref 2 "gemm_bfp16_params.h" 59 38 +.src_ref 2 "gemm_bfp16_params.h" 61 39 +.src_ref 2 "gemm_bfp16_params.h" 71 52 +.src_ref 2 "gemm_bfp16_params.h" 86 29 +.src_ref 2 "gemm_bfp16_params.h" 93 56 + 3098 "10111010" // MOVA r29, #-2; MOVX r6, #-3; MOV r5, #-4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3099 "01011000" // /* MW 9 */ + 3100 "11111100" // /* MW 8 */ + 3101 "10101111" // /* MW 7 */ + 3102 "10101000" // /* MW 6 */ + 3103 "01100111" // /* MW 5 */ + 3104 "00111110" // /* MW 4 */ + 3105 "00000000" // /* MW 3 */ + 3106 "11011101" // /* MW 2 */ + 3107 "11111111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 3 "aie.hpp" 7072 95 +.src_ref 2 "gemm_bfp16_params.h" 44 26 +.src_ref 2 "gemm_bfp16_params.h" 44 26 +.src_ref 2 "gemm_bfp16_params.h" 80 39 +.src_ref 2 "gemm_bfp16_params.h" 99 73 +.src_ref 2 "gemm_bfp16_params.h" 138 24 + 3108 "10111010" // MOVA r24, #0; MOVX r1, #1; MOV r0, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3109 "01011000" // /* MW 9 */ + 3110 "00001000" // /* MW 8 */ + 3111 "00001000" // /* MW 7 */ + 3112 "00101000" // /* MW 6 */ + 3113 "00010000" // /* MW 5 */ + 3114 "00000000" // /* MW 4 */ + 3115 "00000000" // /* MW 3 */ + 3116 "00011000" // /* MW 2 */ + 3117 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7072 95 +.src_ref 3 "aie.hpp" 7073 95 +.src_ref 2 "gemm_bfp16_params.h" 44 26 +.src_ref 2 "gemm_bfp16_params.h" 88 55 + 3118 "10111010" // MOVA r4, #256; MOVXM r28, #16777214 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3119 "00010000" // /* MW 9 */ + 3120 "11111111" // /* MW 8 */ + 3121 "10001111" // /* MW 7 */ + 3122 "11111111" // /* MW 6 */ + 3123 "00111111" // /* MW 5 */ + 3124 "00000000" // /* MW 4 */ + 3125 "00000000" // /* MW 3 */ + 3126 "00000100" // /* MW 2 */ + 3127 "00100000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 +.src_ref 3 "aie.hpp" 7053 42 +.src_ref 3 "aie.hpp" 7053 42 +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 2 "gemm_bfp16_params.h" 85 38 +.src_ref 2 "gemm_bfp16_params.h" 88 66 + 3128 "10111010" // MOVA r16, #7; MOVX r19, #9; MOV r2, #512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3129 "01011000" // /* MW 9 */ + 3130 "00000000" // /* MW 8 */ + 3131 "01001010" // /* MW 7 */ + 3132 "00101000" // /* MW 6 */ + 3133 "00110001" // /* MW 5 */ + 3134 "00000001" // /* MW 4 */ + 3135 "00000000" // /* MW 3 */ + 3136 "11110000" // /* MW 2 */ + 3137 "00000000" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 138 24 + 3138 "01100100" // MOVX r7, #128; MOV m0, #52 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3139 "11010001" // /* MW 5 */ + 3140 "00000000" // /* MW 4 */ + 3141 "00100000" // /* MW 3 */ + 3142 "11000000" // /* MW 2 */ + 3143 "00010001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 138 24 + 3144 "11111000" // MOV dj0, m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3145 "00000000" // /* MW 3 */ + 3146 "10000000" // /* MW 2 */ + 3147 "00011000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 58 39 first +.src_ref 2 "gemm_bfp16_params.h" 130 24 first + 3148 "01011100" // ST r3, [p1], #4; LSHL r27, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3149 "11011011" // /* MW 5 */ + 3150 "11101100" // /* MW 4 */ + 3151 "00110001" // /* MW 3 */ + 3152 "10001110" // /* MW 2 */ + 3153 "00100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 93 56 first +.src_ref 2 "gemm_bfp16_params.h" 131 26 first + 3154 "00101100" // LDA r3, [p0], #4; LSHL r17, r3, r5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "10111011" // /* MW 5 */ + 3156 "11000100" // /* MW 4 */ + 3157 "11010001" // /* MW 3 */ + 3158 "10001110" // /* MW 2 */ + 3159 "00000011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 59 first +.src_ref 2 "gemm_bfp16_params.h" 80 39 first + 3160 "00100100" // LSHL r31, r27, r0; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3161 "11111111" // /* MW 5 */ + 3162 "10110001" // /* MW 4 */ + 3163 "10111000" // /* MW 3 */ + 3164 "11000001" // /* MW 2 */ + 3165 "11011111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 82 45 +.src_ref 2 "gemm_bfp16_params.h" 85 38 first + 3166 "10100100" // LSHL r19, r27, r19; ADD.NC r18, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3167 "00100010" // /* MW 5 */ + 3168 "00111111" // /* MW 4 */ + 3169 "10111001" // /* MW 3 */ + 3170 "11100111" // /* MW 2 */ + 3171 "11011100" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 first + 3172 "10011000" // LSHL r22, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3173 "00001101" // /* MW 3 */ + 3174 "11101101" // /* MW 2 */ + 3175 "00010110" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 59 38 first +.src_ref 2 "gemm_bfp16_params.h" 131 24 first + 3182 "01011100" // ST r3, [p1], #4; LSHL r26, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3183 "11011011" // /* MW 5 */ + 3184 "11101000" // /* MW 4 */ + 3185 "00110001" // /* MW 3 */ + 3186 "10001110" // /* MW 2 */ + 3187 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 59 first +.src_ref 2 "gemm_bfp16_params.h" 132 26 first + 3188 "00101100" // LDA r21, [p0], #4; ADD r20, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3189 "11111110" // /* MW 5 */ + 3190 "01010011" // /* MW 4 */ + 3191 "11011101" // /* MW 3 */ + 3192 "11010110" // /* MW 2 */ + 3193 "00000011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 + 3194 "10011000" // MUL r23, r22, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "01001111" // /* MW 3 */ + 3196 "10101111" // /* MW 2 */ + 3197 "00010101" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first + 3200 "10011000" // SUB r30, r7, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3201 "01110001" // /* MW 3 */ + 3202 "11111101" // /* MW 2 */ + 3203 "00010001" // /* MW 1 */ + 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3205 "00000000" // /* MW 1 */ + 3206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3207 "00000000" // /* MW 1 */ + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 65 24 first +.src_ref 2 "gemm_bfp16_params.h" 132 24 first + 3210 "01011100" // ST r21, [p1], #4; MUL r3, r3, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3211 "10111111" // /* MW 5 */ + 3212 "10001110" // /* MW 4 */ + 3213 "00110001" // /* MW 3 */ + 3214 "11010110" // /* MW 2 */ + 3215 "00100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 61 39 first +.src_ref 2 "gemm_bfp16_params.h" 133 26 first + 3216 "00101100" // LDA el0, [p0], #4; LSHL r6, r21, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3217 "11011011" // /* MW 5 */ + 3218 "10011000" // /* MW 4 */ + 3219 "11011010" // /* MW 3 */ + 3220 "10000101" // /* MW 2 */ + 3221 "00000011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 71 36 first +.src_ref 2 "gemm_bfp16_params.h" 88 55 + 3222 "10100100" // MUL r25, r27, r6; ADD.NC r28, r6, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3223 "11100010" // /* MW 5 */ + 3224 "00100110" // /* MW 4 */ + 3225 "11111110" // /* MW 3 */ + 3226 "01001101" // /* MW 2 */ + 3227 "11011110" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 first +.src_ref 2 "gemm_bfp16_params.h" 86 29 first + 3228 "10100100" // LSHL r5, r21, r5; ADD.NC r21, r26, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3229 "10100010" // /* MW 5 */ + 3230 "10111010" // /* MW 4 */ + 3231 "10111010" // /* MW 3 */ + 3232 "01001011" // /* MW 2 */ + 3233 "10101001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 71 52 first +.src_ref 2 "gemm_bfp16_params.h" 86 38 + 3234 "10111010" // MOVA r25, #128; LSHL r29, r25, r29; ADD.NC r5, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3235 "11001000" // /* MW 9 */ + 3236 "01111111" // /* MW 8 */ + 3237 "10101001" // /* MW 7 */ + 3238 "11101100" // /* MW 6 */ + 3239 "11011110" // /* MW 5 */ + 3240 "00110011" // /* MW 4 */ + 3241 "00000000" // /* MW 3 */ + 3242 "00011001" // /* MW 2 */ + 3243 "00010000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 88 66 first + 3244 "00011000" // MSC r2, r2, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "11001110" // /* MW 3 */ + 3246 "11000101" // /* MW 2 */ + 3247 "00010111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 first + 3248 "10011000" // LSHL r6, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00001101" // /* MW 3 */ + 3250 "10001101" // /* MW 2 */ + 3251 "00010001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 first + 3252 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "00001101" // /* MW 3 */ + 3254 "01101011" // /* MW 2 */ + 3255 "00010101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 99 73 first +.src_ref 2 "gemm_bfp16_params.h" 133 24 first + 3256 "01011100" // ST el0, [p1], #4; LSHL r28, r26, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "00011011" // /* MW 5 */ + 3258 "01110000" // /* MW 4 */ + 3259 "00111101" // /* MW 3 */ + 3260 "10000101" // /* MW 2 */ + 3261 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7053 42 first +.src_ref 2 "gemm_bfp16_params.h" 134 26 first + 3262 "00101100" // LDA el0, [p0]; LSHL r16, r26, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3263 "00011011" // /* MW 5 */ + 3264 "01000010" // /* MW 4 */ + 3265 "11011101" // /* MW 3 */ + 3266 "10000101" // /* MW 2 */ + 3267 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first + 3268 "10011000" // SUB r27, r28, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3269 "01010001" // /* MW 3 */ + 3270 "00110111" // /* MW 2 */ + 3271 "00010111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 first + 3272 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3273 "00001101" // /* MW 3 */ + 3274 "01000000" // /* MW 2 */ + 3275 "00010001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 +.src_ref 3 "aie.hpp" 7057 21 first + 3276 "00011000" // MAC r0, r0, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3277 "01000110" // /* MW 3 */ + 3278 "10000001" // /* MW 2 */ + 3279 "00010001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 first +.src_ref 3 "aie.hpp" 7056 79 first + 3280 "00011000" // MSC r25, r25, r6, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3281 "01001110" // /* MW 3 */ + 3282 "10110011" // /* MW 2 */ + 3283 "00010001" // /* MW 1 */ + 3284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3285 "00000000" // /* MW 1 */ + 3286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3287 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 134 24 first + 3288 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00101001" // /* MW 3 */ + 3290 "00011100" // /* MW 2 */ + 3291 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 135 26 first + 3292 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "00101110" // /* MW 3 */ + 3294 "00010100" // /* MW 2 */ + 3295 "00000000" // /* MW 1 */ + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3297 "00000000" // /* MW 1 */ + 3298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3299 "00000000" // /* MW 1 */ + 3300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3301 "00000000" // /* MW 1 */ + 3302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3303 "00000000" // /* MW 1 */ + 3304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3305 "00000000" // /* MW 1 */ + 3306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3307 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 135 24 + 3308 "10011000" // ST el0, [p1], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3309 "00101001" // /* MW 3 */ + 3310 "00111100" // /* MW 2 */ + 3311 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3312 "00000010" // ST r3, [p1], #4; ADD.NC r3, r6, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3313 "00000000" // /* MW 7 */ + 3314 "10100000" // /* MW 6 */ + 3315 "01101001" // /* MW 5 */ + 3316 "00000000" // /* MW 4 */ + 3317 "00110000" // /* MW 3 */ + 3318 "10001110" // /* MW 2 */ + 3319 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3320 "01011100" // ST r29, [p1], #4; SUB r29, r7, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3321 "00000011" // /* MW 5 */ + 3322 "11110110" // /* MW 4 */ + 3323 "00110011" // /* MW 3 */ + 3324 "11110110" // /* MW 2 */ + 3325 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3326 "00000010" // ST r26, [p1], #4; ADD.NC r26, r22, #-128 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3327 "00000000" // /* MW 7 */ + 3328 "10100000" // /* MW 6 */ + 3329 "01001101" // /* MW 5 */ + 3330 "00000011" // /* MW 4 */ + 3331 "00110000" // /* MW 3 */ + 3332 "11101010" // /* MW 2 */ + 3333 "00100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3334 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3335 "00110001" // /* MW 3 */ + 3336 "00011100" // /* MW 2 */ + 3337 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3338 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3339 "00010001" // /* MW 3 */ + 3340 "00011111" // /* MW 2 */ + 3341 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3342 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3343 "11110001" // /* MW 3 */ + 3344 "00011111" // /* MW 2 */ + 3345 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3346 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "10010001" // /* MW 3 */ + 3348 "00011100" // /* MW 2 */ + 3349 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7072 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3350 "01011100" // ST r18, [p1], #4; ADD r18, r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3351 "00000001" // /* MW 5 */ + 3352 "11001010" // /* MW 4 */ + 3353 "00111101" // /* MW 3 */ + 3354 "11001010" // /* MW 2 */ + 3355 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7073 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3356 "01011100" // ST r19, [p1], #4; SUB r19, r4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3357 "11100011" // /* MW 5 */ + 3358 "01001110" // /* MW 4 */ + 3359 "00110010" // /* MW 3 */ + 3360 "11001110" // /* MW 2 */ + 3361 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7054 44 first +.src_ref 3 "aie.hpp" 7072 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3362 "01011100" // ST r5, [p1], #4; MSC r4, r4, r6, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "10011100" // /* MW 5 */ + 3364 "00010010" // /* MW 4 */ + 3365 "00110011" // /* MW 3 */ + 3366 "10010110" // /* MW 2 */ + 3367 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3368 "01011100" // ST r2, [p1], #16; MOVX r2, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "00000010" // /* MW 5 */ + 3370 "00001000" // /* MW 4 */ + 3371 "00111111" // /* MW 3 */ + 3372 "10001010" // /* MW 2 */ + 3373 "00101001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7056 79 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3374 "01011100" // ST r24, [p1], #4; XOR r31, r23, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "01001101" // /* MW 5 */ + 3376 "11111100" // /* MW 4 */ + 3377 "00111011" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7072 95 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3380 "01011100" // ST r24, [p1], #-12; SUB r23, r24, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "11100011" // /* MW 5 */ + 3382 "01011110" // /* MW 4 */ + 3383 "00111100" // /* MW 3 */ + 3384 "11100010" // /* MW 2 */ + 3385 "00111011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 first +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3386 "01011100" // ST r24, [p1], #4; XOR r2, r2, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3387 "00001101" // /* MW 5 */ + 3388 "00001000" // /* MW 4 */ + 3389 "00110001" // /* MW 3 */ + 3390 "11100010" // /* MW 2 */ + 3391 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 7057 21 +.src_ref 2 "gemm_bfp16_params.h" 44 26 first + 3392 "01011100" // ST r24, [p1], #-8; SUB r0, r24, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000011" // /* MW 5 */ + 3394 "00000000" // /* MW 4 */ + 3395 "00111100" // /* MW 3 */ + 3396 "11100010" // /* MW 2 */ + 3397 "00111101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 44 26 + 3398 "10011000" // ST r24, [p1], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3399 "00010001" // /* MW 3 */ + 3400 "01011111" // /* MW 2 */ + 3401 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3402 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3403 "00110001" // /* MW 3 */ + 3404 "00011110" // /* MW 2 */ + 3405 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3406 "10011000" // ST r30, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3407 "11010001" // /* MW 3 */ + 3408 "00011111" // /* MW 2 */ + 3409 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3410 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3411 "10110001" // /* MW 3 */ + 3412 "00011100" // /* MW 2 */ + 3413 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3414 "10011000" // ST r31, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3415 "11110001" // /* MW 3 */ + 3416 "00011111" // /* MW 2 */ + 3417 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3418 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3419 "10010001" // /* MW 3 */ + 3420 "00011110" // /* MW 2 */ + 3421 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3422 "10011000" // ST r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3423 "01010001" // /* MW 3 */ + 3424 "00011111" // /* MW 2 */ + 3425 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3426 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3427 "00110001" // /* MW 3 */ + 3428 "00011100" // /* MW 2 */ + 3429 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3430 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3431 "11110001" // /* MW 3 */ + 3432 "00011100" // /* MW 2 */ + 3433 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3434 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3435 "10010001" // /* MW 3 */ + 3436 "00011110" // /* MW 2 */ + 3437 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3438 "10011000" // ST r22, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3439 "11010001" // /* MW 3 */ + 3440 "00011110" // /* MW 2 */ + 3441 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3442 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3443 "10110001" // /* MW 3 */ + 3444 "00011100" // /* MW 2 */ + 3445 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3446 "10011000" // ST r23, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3447 "11110001" // /* MW 3 */ + 3448 "00011110" // /* MW 2 */ + 3449 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 3 "aie.hpp" 7054 44 first +.src_ref 3 "aie.hpp" 7057 21 first + 3450 "01011100" // ST r19, [p1], #4; MAC r21, r21, r5, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3451 "10001100" // /* MW 5 */ + 3452 "11010111" // /* MW 4 */ + 3453 "00110010" // /* MW 3 */ + 3454 "11001110" // /* MW 2 */ + 3455 "00100011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3456 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "11110001" // /* MW 3 */ + 3458 "00011100" // /* MW 2 */ + 3459 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first +.src_ref 3 "aie.hpp" 7056 79 first + 3460 "01011100" // ST r17, [p1], #4; SUB r28, r24, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3461 "10100011" // /* MW 5 */ + 3462 "01110010" // /* MW 4 */ + 3463 "00111100" // /* MW 3 */ + 3464 "11000110" // /* MW 2 */ + 3465 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 3 "aie.hpp" 7073 95 first + 3466 "01011100" // ST r28, [p1], #4; SUB r21, r16, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3467 "10100011" // /* MW 5 */ + 3468 "01010110" // /* MW 4 */ + 3469 "00111000" // /* MW 3 */ + 3470 "11110010" // /* MW 2 */ + 3471 "00100011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3472 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3473 "10110001" // /* MW 3 */ + 3474 "00011100" // /* MW 2 */ + 3475 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3476 "10011000" // ST r27, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "01110001" // /* MW 3 */ + 3478 "00011111" // /* MW 2 */ + 3479 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3480 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3481 "10010001" // /* MW 3 */ + 3482 "00011110" // /* MW 2 */ + 3483 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3484 "10011000" // ST r29, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3485 "10110001" // /* MW 3 */ + 3486 "00011111" // /* MW 2 */ + 3487 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3488 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3489 "00110001" // /* MW 3 */ + 3490 "00011100" // /* MW 2 */ + 3491 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3492 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00010001" // /* MW 3 */ + 3494 "00011110" // /* MW 2 */ + 3495 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3496 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3497 "10010001" // /* MW 3 */ + 3498 "00011110" // /* MW 2 */ + 3499 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3500 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3501 "11110001" // /* MW 3 */ + 3502 "00011100" // /* MW 2 */ + 3503 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3504 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3505 "10110001" // /* MW 3 */ + 3506 "00011100" // /* MW 2 */ + 3507 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3508 "10011000" // ST r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "01010001" // /* MW 3 */ + 3510 "00011110" // /* MW 2 */ + 3511 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3512 "10011000" // ST r21, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "10110001" // /* MW 3 */ + 3514 "00011110" // /* MW 2 */ + 3515 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3516 "10011000" // ST r16, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "00010001" // /* MW 3 */ + 3518 "00011110" // /* MW 2 */ + 3519 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3520 "10011000" // ST r17, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "00110001" // /* MW 3 */ + 3522 "00011110" // /* MW 2 */ + 3523 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3524 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3525 "01010001" // /* MW 3 */ + 3526 "00011100" // /* MW 2 */ + 3527 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3528 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3529 "10110001" // /* MW 3 */ + 3530 "00011100" // /* MW 2 */ + 3531 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3532 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3533 "00110001" // /* MW 3 */ + 3534 "00011111" // /* MW 2 */ + 3535 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3536 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "10010001" // /* MW 3 */ + 3538 "00011110" // /* MW 2 */ + 3539 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3540 "10011000" // ST r3, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3541 "01110001" // /* MW 3 */ + 3542 "00011100" // /* MW 2 */ + 3543 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3544 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "00110001" // /* MW 3 */ + 3546 "00011100" // /* MW 2 */ + 3547 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3548 "10011000" // ST r7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11110001" // /* MW 3 */ + 3550 "00011100" // /* MW 2 */ + 3551 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first + 3552 "10011000" // ST r20, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "10010001" // /* MW 3 */ + 3554 "00011110" // /* MW 2 */ + 3555 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3556 "10011000" // ST r6, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11010001" // /* MW 3 */ + 3558 "00011100" // /* MW 2 */ + 3559 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3560 "10011000" // ST r5, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3561 "10110001" // /* MW 3 */ + 3562 "00011100" // /* MW 2 */ + 3563 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3564 "10011000" // ST r4, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3565 "10010001" // /* MW 3 */ + 3566 "00011100" // /* MW 2 */ + 3567 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3568 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3569 "00010001" // /* MW 3 */ + 3570 "00011100" // /* MW 2 */ + 3571 "00001001" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 + 3572 "10011000" // ST r7, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3573 "11110001" // /* MW 3 */ + 3574 "00001000" // /* MW 2 */ + 3575 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first + 3576 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3577 "00010001" // /* MW 3 */ + 3578 "00011111" // /* MW 2 */ + 3579 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3580 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3581 "00010001" // /* MW 3 */ + 3582 "11011111" // /* MW 2 */ + 3583 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3584 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3585 "00010001" // /* MW 3 */ + 3586 "00011111" // /* MW 2 */ + 3587 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3588 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3589 "00010001" // /* MW 3 */ + 3590 "11011111" // /* MW 2 */ + 3591 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3592 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3593 "00010001" // /* MW 3 */ + 3594 "00011111" // /* MW 2 */ + 3595 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3596 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3597 "00010001" // /* MW 3 */ + 3598 "11011111" // /* MW 2 */ + 3599 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3600 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00010001" // /* MW 3 */ + 3602 "00011111" // /* MW 2 */ + 3603 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3604 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3605 "00010001" // /* MW 3 */ + 3606 "11011111" // /* MW 2 */ + 3607 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 + 3608 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "00010001" // /* MW 3 */ + 3610 "00011111" // /* MW 2 */ + 3611 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 2 "gemm_bfp16_params.h" 139 first + 3612 "01011100" // ST r24, [p1], #-12; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3613 "00000000" // /* MW 5 */ + 3614 "01010000" // /* MW 4 */ + 3615 "00110000" // /* MW 3 */ + 3616 "11100010" // /* MW 2 */ + 3617 "00111011" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first +.delay_slot + 3618 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3619 "00010001" // /* MW 3 */ + 3620 "00011111" // /* MW 2 */ + 3621 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.delay_slot + 3622 "10011000" // ST r24, [p1], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3623 "00010001" // /* MW 3 */ + 3624 "11011111" // /* MW 2 */ + 3625 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.delay_slot + 3626 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3627 "00010001" // /* MW 3 */ + 3628 "00011111" // /* MW 2 */ + 3629 "00001001" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.delay_slot + 3630 "10011000" // ST r24, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "00010001" // /* MW 3 */ + 3632 "00000111" // /* MW 2 */ + 3633 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16_params.h" 138 24 first +.delay_slot + 3634 "10011000" // ST r24, [p1, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "00010001" // /* MW 3 */ + 3636 "00000011" // /* MW 2 */ +.label _ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv__end +.label __ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv___func_end0 + 3637 "00001001" // /* MW 1 */ +.label __Z8init_accILt1EEvPaS0_iii___func_begin0 +.label _Z8init_accILt1EEvPaS0_iii +.function init_acc<(unsigned short)1> _Z8init_accILt1EEvPaS0_iii +.src_ref 2 "gemm_bfp16.h" 38 first +.src_ref 2 "gemm_bfp16.h" 41 47 +.function_start + 3648 "01000100" // MOVXM p2, #508788 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3649 "11101000" // /* MW 5 */ + 3650 "11000110" // /* MW 4 */ + 3651 "11000100" // /* MW 3 */ + 3652 "00000111" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 38 + 3654 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3655 "00000001" // /* MW 5 */ + 3656 "00000000" // /* MW 4 */ + 3657 "00000000" // /* MW 3 */ + 3658 "00001000" // /* MW 2 */ + 3659 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 first + 3660 "10011000" // LDA.s8 r4, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3661 "10000010" // /* MW 3 */ + 3662 "00000100" // /* MW 2 */ + 3663 "00000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 68 6 first + 3664 "01000100" // MOVXM ls, #3824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3665 "11100000" // /* MW 5 */ + 3666 "11111101" // /* MW 4 */ + 3667 "00000001" // /* MW 3 */ + 3668 "00000000" // /* MW 2 */ + 3669 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 +.src_ref 2 "gemm_bfp16.h" 68 6 + 3670 "10111010" // MOVA r26, #0; MOVXM le, #3888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3671 "00010000" // /* MW 9 */ + 3672 "10011000" // /* MW 8 */ + 3673 "10111111" // /* MW 7 */ + 3674 "00000001" // /* MW 6 */ + 3675 "00000000" // /* MW 5 */ + 3676 "00000000" // /* MW 4 */ + 3677 "00000000" // /* MW 3 */ + 3678 "00011010" // /* MW 2 */ + 3679 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 53 4 +.src_ref 2 "gemm_bfp16.h" 53 29 + 3680 "10111010" // MOVA r5, #-4; MOVXM p3, #3776 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3681 "00010000" // /* MW 9 */ + 3682 "01100000" // /* MW 8 */ + 3683 "10110111" // /* MW 7 */ + 3684 "00000001" // /* MW 6 */ + 3685 "00000000" // /* MW 5 */ + 3686 "00000000" // /* MW 4 */ + 3687 "00000000" // /* MW 3 */ + 3688 "10000101" // /* MW 2 */ + 3689 "11111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 39 33 +.src_ref 2 "gemm_bfp16.h" 41 47 first +.src_ref 2 "gemm_bfp16.h" 53 29 first +.src_ref 2 "gemm_bfp16.h" 75 43 + 3690 "10111010" // MOVA r3, #5; LSHL r5, r1, r5; VINSERT.32 x1, x0, #0, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3691 "10111000" // /* MW 9 */ + 3692 "10101000" // /* MW 8 */ + 3693 "01000001" // /* MW 7 */ + 3694 "11101100" // /* MW 6 */ + 3695 "01010010" // /* MW 5 */ + 3696 "00000010" // /* MW 4 */ + 3697 "00000000" // /* MW 3 */ + 3698 "10100011" // /* MW 2 */ + 3699 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 39 33 first + 3700 "11100100" // LSHL r7, r0, r3; MOV p2, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11000001" // /* MW 5 */ + 3702 "11001011" // /* MW 4 */ + 3703 "10110100" // /* MW 3 */ + 3704 "11000111" // /* MW 2 */ + 3705 "00000001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 +.src_ref 2 "gemm_bfp16.h" 75 43 first + 3706 "11100100" // LSHL r3, r2, r3; VMOV bmll0, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00100101" // /* MW 5 */ + 3708 "00000101" // /* MW 4 */ + 3709 "10110000" // /* MW 3 */ + 3710 "11000111" // /* MW 2 */ + 3711 "00010000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 +.src_ref 2 "gemm_bfp16.h" 42 54 + 3712 "11100100" // MOVX crRnd, r4; MOV r1, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "10000001" // /* MW 5 */ + 3714 "10100101" // /* MW 4 */ + 3715 "00000000" // /* MW 3 */ + 3716 "01010000" // /* MW 2 */ + 3717 "00100111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 47 first +.src_ref 2 "gemm_bfp16.h" 42 69 +.src_ref 2 "gemm_bfp16.h" 75 14 + 3718 "00110110" // PADDB [p2], #-64; VCONV.bf16.fp32 wl0, bmll0; MOVX r16, #1; MOV m1, r3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3719 "01111000" // /* MW 11 */ + 3720 "11010000" // /* MW 10 */ + 3721 "10000000" // /* MW 9 */ + 3722 "00101000" // /* MW 8 */ + 3723 "00000000" // /* MW 7 */ + 3724 "00000001" // /* MW 6 */ + 3725 "00100000" // /* MW 5 */ + 3726 "11111111" // /* MW 4 */ + 3727 "11000101" // /* MW 3 */ + 3728 "00000010" // /* MW 2 */ + 3729 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 42 54 +.src_ref 2 "gemm_bfp16.h" 42 69 first +.src_ref 2 "gemm_bfp16.h" 75 43 + 3730 "10111010" // MOVA r6, #-3; EQ r27, r2, r16; MOV r3, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3731 "01111000" // /* MW 9 */ + 3732 "01100000" // /* MW 8 */ + 3733 "01101010" // /* MW 7 */ + 3734 "00111100" // /* MW 6 */ + 3735 "10111000" // /* MW 5 */ + 3736 "00000101" // /* MW 4 */ + 3737 "00000000" // /* MW 3 */ + 3738 "10100110" // /* MW 2 */ + 3739 "11111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 22 first +.src_ref 2 "gemm_bfp16.h" 41 47 first +.src_ref 2 "gemm_bfp16.h" 75 43 first + 3740 "10100100" // LSHL r0, r0, r6; VEXTBCST.16 x1, x0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3741 "00000110" // /* MW 5 */ + 3742 "00000010" // /* MW 4 */ + 3743 "10110001" // /* MW 3 */ + 3744 "00001101" // /* MW 2 */ + 3745 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 42 54 first +.src_ref 2 "gemm_bfp16.h" 44 44 +.src_ref 2 "gemm_bfp16.h" 69 17 +.src_ref 2 "gemm_bfp16.h" 76 14 +.src_ref 2 "gemm_bfp16.h" 77 16 + 3746 "01111110" // NOPA; NOPB; MOVS p1, p0; SEL.EQZ r1, r3, r1, r27; MOV m0, r7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3747 "01100000" // /* MW 13 */ + 3748 "00010001" // /* MW 12 */ + 3749 "00110000" // /* MW 11 */ + 3750 "00001111" // /* MW 10 */ + 3751 "00111010" // /* MW 9 */ + 3752 "00000000" // /* MW 8 */ + 3753 "00010010" // /* MW 7 */ + 3754 "11000010" // /* MW 6 */ + 3755 "00100000" // /* MW 5 */ + 3756 "00000000" // /* MW 4 */ + 3757 "11110000" // /* MW 3 */ + 3758 "00101100" // /* MW 2 */ + 3759 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 41 22 first +.src_ref 2 "gemm_bfp16.h" 44 44 first +.src_ref 2 "gemm_bfp16.h" 54 24 +.src_ref 2 "gemm_bfp16.h" 75 14 + 3760 "11100001" // NOPA; PADDB [p0], m0; VST x1, [p2]; ADD r2, r5, #-1; MOV p2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3761 "00000000" // /* MW 15 */ + 3762 "00000000" // /* MW 14 */ + 3763 "01111000" // /* MW 13 */ + 3764 "01010000" // /* MW 12 */ + 3765 "00110000" // /* MW 11 */ + 3766 "11111001" // /* MW 10 */ + 3767 "00101111" // /* MW 9 */ + 3768 "00001010" // /* MW 8 */ + 3769 "01010011" // /* MW 7 */ + 3770 "00000100" // /* MW 6 */ + 3771 "00100010" // /* MW 5 */ + 3772 "00010111" // /* MW 4 */ + 3773 "11110000" // /* MW 3 */ + 3774 "00101100" // /* MW 2 */ + 3775 "00000000" // /* MW 1 */ +.label TGT_F_Z8init_accILt1EEvPaS0_iii_128 +.src_ref 2 "gemm_bfp16.h" 54 24 first +.src_ref 2 "gemm_bfp16.h" 68 6 first +.loop_nesting 1 + 3776 "11110100" // VLDB wl0, [p2]; MOV lc, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3777 "01000001" // /* MW 5 */ + 3778 "11100000" // /* MW 4 */ + 3779 "10001010" // /* MW 3 */ + 3780 "10000100" // /* MW 2 */ + 3781 "01000000" // /* MW 1 */ + 3782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3783 "00000000" // /* MW 1 */ + 3784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3785 "00000000" // /* MW 1 */ + 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3787 "00000000" // /* MW 1 */ + 3788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3789 "00000000" // /* MW 1 */ + 3790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3791 "00000000" // /* MW 1 */ + 3792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3793 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 54 24 +.src_ref 2 "gemm_bfp16.h" 63 39 +.src_ref 2 "gemm_bfp16.h" 64 39 + 3794 "11111000" // VMOV wh0, wl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "00100010" // /* MW 3 */ + 3796 "00000001" // /* MW 2 */ + 3797 "00011000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 54 24 +.src_ref 2 "gemm_bfp16.h" 63 39 first + 3798 "01011000" // VEXTBCST.128 x3, x0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3799 "00000011" // /* MW 3 */ + 3800 "10000100" // /* MW 2 */ + 3801 "00011001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 54 24 first +.src_ref 2 "gemm_bfp16.h" 64 39 first + 3802 "01011000" // VEXTBCST.128 x1, x0, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3803 "00000111" // /* MW 3 */ + 3804 "10000100" // /* MW 2 */ + 3805 "00011000" // /* MW 1 */ + 3806 "11111000" // VCONV.fp32.bf16 cml0, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3807 "10001010" // /* MW 3 */ + 3808 "00000111" // /* MW 2 */ + 3809 "00011000" // /* MW 1 */ + 3810 "11111000" // VCONV.fp32.bf16 cmh0, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3811 "10001010" // /* MW 3 */ + 3812 "10000011" // /* MW 2 */ + 3813 "00011000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 69 17 first + 3814 "11111000" // VMOV bmll1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3815 "00010010" // /* MW 3 */ + 3816 "00000000" // /* MW 2 */ + 3817 "00011001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 71 19 first + 3818 "11010100" // NOPA; VMOV bmlh1, bmhl0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3819 "00100101" // /* MW 5 */ + 3820 "10000100" // /* MW 4 */ + 3821 "11110010" // /* MW 3 */ + 3822 "00101100" // /* MW 2 */ + 3823 "00000000" // /* MW 1 */ +.label ZLS_F_Z8init_accILt1EEvPaS0_iii_176 +.src_ref 2 "gemm_bfp16.h" 69 17 first +.begin_of_loop +.loop_nesting 2 + 3824 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3825 "00100110" // /* MW 3 */ + 3826 "00010100" // /* MW 2 */ + 3827 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 69 17 + 3828 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3829 "10000110" // /* MW 3 */ + 3830 "00101100" // /* MW 2 */ + 3831 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 70 17 first + 3832 "10011000" // VST bmlh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3833 "00100110" // /* MW 3 */ + 3834 "00010100" // /* MW 2 */ + 3835 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 70 17 + 3836 "10011000" // VST bmll1, [p1], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "10000110" // /* MW 3 */ + 3838 "00101100" // /* MW 2 */ + 3839 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 71 19 first + 3840 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3841 "00000000" // /* MW 15 */ + 3842 "00000000" // /* MW 14 */ + 3843 "01111000" // /* MW 13 */ + 3844 "10100101" // /* MW 12 */ + 3845 "00000001" // /* MW 11 */ + 3846 "00000000" // /* MW 10 */ + 3847 "00000000" // /* MW 9 */ + 3848 "10000000" // /* MW 8 */ + 3849 "01100110" // /* MW 7 */ + 3850 "00010100" // /* MW 6 */ + 3851 "00100000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 71 19 + 3856 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3857 "00000000" // /* MW 15 */ + 3858 "00000000" // /* MW 14 */ + 3859 "01111000" // /* MW 13 */ + 3860 "10100101" // /* MW 12 */ + 3861 "00000001" // /* MW 11 */ + 3862 "00000000" // /* MW 10 */ + 3863 "00000000" // /* MW 9 */ + 3864 "10000000" // /* MW 8 */ + 3865 "10100110" // /* MW 7 */ + 3866 "00101100" // /* MW 6 */ + 3867 "00100000" // /* MW 5 */ + 3868 "00000000" // /* MW 4 */ + 3869 "11110000" // /* MW 3 */ + 3870 "00101100" // /* MW 2 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 72 19 first + 3872 "11100001" // NOPA; NOPB; VST bmhh0, [p0, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3873 "00000000" // /* MW 15 */ + 3874 "00000000" // /* MW 14 */ + 3875 "01111000" // /* MW 13 */ + 3876 "10100101" // /* MW 12 */ + 3877 "00000001" // /* MW 11 */ + 3878 "00000000" // /* MW 10 */ + 3879 "00000000" // /* MW 9 */ + 3880 "10000000" // /* MW 8 */ + 3881 "01100110" // /* MW 7 */ + 3882 "00010100" // /* MW 6 */ + 3883 "00100000" // /* MW 5 */ + 3884 "00000000" // /* MW 4 */ + 3885 "11110000" // /* MW 3 */ + 3886 "00101100" // /* MW 2 */ + 3887 "00000000" // /* MW 1 */ +.label ZLE_F_Z8init_accILt1EEvPaS0_iii_240 +.src_ref 2 "gemm_bfp16.h" 72 19 +.end_of_loop + 3888 "11100001" // NOPA; NOPB; VST bmlh1, [p0], #128; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3889 "00000000" // /* MW 15 */ + 3890 "00000000" // /* MW 14 */ + 3891 "01111000" // /* MW 13 */ + 3892 "10100101" // /* MW 12 */ + 3893 "00000001" // /* MW 11 */ + 3894 "00000000" // /* MW 10 */ + 3895 "00000000" // /* MW 9 */ + 3896 "10000000" // /* MW 8 */ + 3897 "10100110" // /* MW 7 */ + 3898 "00101100" // /* MW 6 */ + 3899 "00100000" // /* MW 5 */ + 3900 "00000000" // /* MW 4 */ + 3901 "11110000" // /* MW 3 */ + 3902 "00101100" // /* MW 2 */ + 3903 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 53 4 first +.src_ref 2 "gemm_bfp16.h" 75 14 first +.src_ref 2 "gemm_bfp16.h" 76 14 first +.loop_nesting 1 + 3904 "00010010" // PADDA [p1], m0; PADDB [p2], m1; JNZD r2, r2, p3 /* MW 8 */ /* control_operation: words=8 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 3905 "11100000" // /* MW 7 */ + 3906 "10000100" // /* MW 6 */ + 3907 "00100000" // /* MW 5 */ + 3908 "01010111" // /* MW 4 */ + 3909 "11110100" // /* MW 3 */ + 3910 "00001100" // /* MW 2 */ + 3911 "00100001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 77 16 first +.delay_slot + 3912 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3913 "10010000" // /* MW 3 */ + 3914 "00001011" // /* MW 2 */ + 3915 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3923 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 80 first +.loop_nesting 0 + 3924 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3925 "00000000" // /* MW 3 */ + 3926 "00101000" // /* MW 2 */ + 3927 "00010000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 80 +.delay_slot + 3928 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3929 "00000001" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00000000" // /* MW 3 */ + 3932 "11111000" // /* MW 2 */ + 3933 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z8init_accILt1EEvPaS0_iii__end +.label __Z8init_accILt1EEvPaS0_iii___func_end0 + 3941 "00000000" // /* MW 1 */ +.label __Z12post_processPai___func_begin0 +.label _Z12post_processPai +.function post_process _Z12post_processPai +.src_ref 2 "gemm_bfp16.h" 83 first +.src_ref 2 "gemm_bfp16.h" 92 26 +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 97 21 +.src_ref 2 "gemm_bfp16.h" 97 23 +.function_start + 3952 "01110110" // MOVA m0, #512; MOVS p2, p0; MOVXM p1, #508788 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3953 "00010000" // /* MW 11 */ + 3954 "10111010" // /* MW 10 */ + 3955 "10110001" // /* MW 9 */ + 3956 "11110000" // /* MW 8 */ + 3957 "00000001" // /* MW 7 */ + 3958 "00000000" // /* MW 6 */ + 3959 "10001011" // /* MW 5 */ + 3960 "10000000" // /* MW 4 */ + 3961 "10000010" // /* MW 3 */ + 3962 "00000000" // /* MW 2 */ + 3963 "01000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 26 +.src_ref 2 "gemm_bfp16.h" 94 26 +.src_ref 2 "gemm_bfp16.h" 94 26 +.src_ref 2 "gemm_bfp16.h" 95 26 +.src_ref 2 "gemm_bfp16.h" 96 26 + 3964 "10111010" // MOVA r1, #-7; MOVX r2, #0; MOV r4, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3965 "01011000" // /* MW 9 */ + 3966 "00000001" // /* MW 8 */ + 3967 "10001000" // /* MW 7 */ + 3968 "00001000" // /* MW 6 */ + 3969 "00100000" // /* MW 5 */ + 3970 "00000000" // /* MW 4 */ + 3971 "00000000" // /* MW 3 */ + 3972 "00100001" // /* MW 2 */ + 3973 "11111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 26 first +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 94 26 first +.src_ref 2 "gemm_bfp16.h" 95 14 +.src_ref 2 "gemm_bfp16.h" 96 14 + 3974 "01110110" // LDA.s8 r24, [p1]; MOVS p1, p0; OR r16, r2, r4; MOV r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3975 "01011000" // /* MW 11 */ + 3976 "00000111" // /* MW 10 */ + 3977 "01101000" // /* MW 9 */ + 3978 "00101100" // /* MW 8 */ + 3979 "00000010" // /* MW 7 */ + 3980 "00000101" // /* MW 6 */ + 3981 "10001011" // /* MW 5 */ + 3982 "10000000" // /* MW 4 */ + 3983 "01010001" // /* MW 3 */ + 3984 "11100000" // /* MW 2 */ + 3985 "00100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 26 +.src_ref 2 "gemm_bfp16.h" 93 12 first +.src_ref 2 "gemm_bfp16.h" 95 26 + 3986 "10111010" // VLDA bmlh1, [p1, #64]; LSHL r1, r0, r1; MOV r5, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "01011000" // /* MW 9 */ + 3988 "00000010" // /* MW 8 */ + 3989 "10101000" // /* MW 7 */ + 3990 "11101100" // /* MW 6 */ + 3991 "00010000" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "10110000" // /* MW 3 */ + 3994 "10010110" // /* MW 2 */ + 3995 "00100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 94 14 first +.src_ref 2 "gemm_bfp16.h" 95 14 +.src_ref 2 "gemm_bfp16.h" 96 14 + 3996 "10111010" // VLDA bmll1, [p1], m0; LSHL r18, r16, r3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3997 "01111000" // /* MW 9 */ + 3998 "01100000" // /* MW 8 */ + 3999 "00001000" // /* MW 7 */ + 4000 "11101100" // /* MW 6 */ + 4001 "00100001" // /* MW 5 */ + 4002 "00100001" // /* MW 4 */ + 4003 "10110000" // /* MW 3 */ + 4004 "00010010" // /* MW 2 */ + 4005 "00100001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 95 26 first +.src_ref 2 "gemm_bfp16.h" 96 26 + 4006 "10111010" // MOVA r6, #3; OR r7, r5, r2; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4007 "10101000" // /* MW 9 */ + 4008 "10000000" // /* MW 8 */ + 4009 "10110100" // /* MW 7 */ + 4010 "00101101" // /* MW 6 */ + 4011 "01110001" // /* MW 5 */ + 4012 "00001010" // /* MW 4 */ + 4013 "00000000" // /* MW 3 */ + 4014 "01100110" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 2 first +.src_ref 2 "gemm_bfp16.h" 94 12 first +.src_ref 2 "gemm_bfp16.h" 95 14 + 4016 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r19, r7, r3; ADD.NC lc, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4017 "11001000" // /* MW 9 */ + 4018 "01111111" // /* MW 8 */ + 4019 "10111000" // /* MW 7 */ + 4020 "11101110" // /* MW 6 */ + 4021 "00110001" // /* MW 5 */ + 4022 "00001111" // /* MW 4 */ + 4023 "10110000" // /* MW 3 */ + 4024 "10001110" // /* MW 2 */ + 4025 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 12 +.src_ref 2 "gemm_bfp16.h" 95 14 first +.src_ref 2 "gemm_bfp16.h" 96 26 first + 4026 "10111010" // VLDA bmhl0, [p3]; OR r17, r6, r2; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4027 "10101000" // /* MW 9 */ + 4028 "11000000" // /* MW 8 */ + 4029 "00110100" // /* MW 7 */ + 4030 "00101110" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00001101" // /* MW 4 */ + 4033 "10110000" // /* MW 3 */ + 4034 "10001010" // /* MW 2 */ + 4035 "01100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 +.src_ref 2 "gemm_bfp16.h" 96 14 + 4036 "10111010" // VLDA bmlh0, [p4, #64]; LSHL r20, r17, r3; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4037 "00001000" // /* MW 9 */ + 4038 "10000001" // /* MW 8 */ + 4039 "01001000" // /* MW 7 */ + 4040 "11101100" // /* MW 6 */ + 4041 "01000001" // /* MW 5 */ + 4042 "00100011" // /* MW 4 */ + 4043 "10110000" // /* MW 3 */ + 4044 "10000110" // /* MW 2 */ + 4045 "10000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 +.src_ref 2 "gemm_bfp16.h" 96 14 +.src_ref 2 "gemm_bfp16.h" 97 21 +.src_ref 2 "gemm_bfp16.h" 97 23 +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 98 21 +.src_ref 2 "gemm_bfp16.h" 98 23 +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 99 21 +.src_ref 2 "gemm_bfp16.h" 99 23 +.src_ref 2 "gemm_bfp16.h" 100 4 +.src_ref 2 "gemm_bfp16.h" 100 21 +.src_ref 2 "gemm_bfp16.h" 100 23 + 4046 "10111010" // VLDA bmll0, [p4]; MOVX crRnd, r24; ADD.NC p5, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "10101000" // /* MW 9 */ + 4048 "00000000" // /* MW 8 */ + 4049 "10110101" // /* MW 7 */ + 4050 "00000010" // /* MW 6 */ + 4051 "11010100" // /* MW 5 */ + 4052 "00110001" // /* MW 4 */ + 4053 "10110000" // /* MW 3 */ + 4054 "10000010" // /* MW 2 */ + 4055 "10000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 2 first +.src_ref 2 "gemm_bfp16.h" 96 12 + 4056 "10111010" // VLDA bmhh1, [p5, #64]; MOVXM ls, #4096 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4057 "00010000" // /* MW 9 */ + 4058 "00000000" // /* MW 8 */ + 4059 "01111000" // /* MW 7 */ + 4060 "00000100" // /* MW 6 */ + 4061 "00000000" // /* MW 5 */ + 4062 "00000000" // /* MW 4 */ + 4063 "10110000" // /* MW 3 */ + 4064 "10011110" // /* MW 2 */ + 4065 "10100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 92 2 +.src_ref 2 "gemm_bfp16.h" 96 12 first + 4066 "10111010" // VLDA bmhl1, [p5]; MOVXM le, #4192 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4067 "00010000" // /* MW 9 */ + 4068 "00110000" // /* MW 8 */ + 4069 "10111000" // /* MW 7 */ + 4070 "00000101" // /* MW 6 */ + 4071 "00000000" // /* MW 5 */ + 4072 "00000000" // /* MW 4 */ + 4073 "10110000" // /* MW 3 */ + 4074 "10011010" // /* MW 2 */ + 4075 "10100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 100 4 + 4076 "00011000" // MOVX r1, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4077 "00011001" // /* MW 3 */ + 4078 "00000010" // /* MW 2 */ + 4079 "00010000" // /* MW 1 */ + 4080 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4081 "00000000" // /* MW 15 */ + 4082 "00000000" // /* MW 14 */ + 4083 "01111000" // /* MW 13 */ + 4084 "10100101" // /* MW 12 */ + 4085 "00000001" // /* MW 11 */ + 4086 "00000000" // /* MW 10 */ + 4087 "00000000" // /* MW 9 */ + 4088 "00000000" // /* MW 8 */ + 4089 "01011011" // /* MW 7 */ + 4090 "00000001" // /* MW 6 */ + 4091 "00100000" // /* MW 5 */ + 4092 "00000000" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.label ZLS_F_Z12post_processPai_144 +.src_ref 2 "gemm_bfp16.h" 97 21 first +.src_ref 2 "gemm_bfp16.h" 97 23 first +.src_ref 2 "gemm_bfp16.h" 98 4 first +.begin_of_loop +.loop_nesting 1 + 4096 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4097 "00111011" // /* MW 5 */ + 4098 "01010100" // /* MW 4 */ + 4099 "01101000" // /* MW 3 */ + 4100 "10010100" // /* MW 2 */ + 4101 "01001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 98 21 +.src_ref 2 "gemm_bfp16.h" 98 23 +.src_ref 2 "gemm_bfp16.h" 99 4 first + 4102 "11100100" // LSHL r22, r7, r1; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4103 "01000001" // /* MW 5 */ + 4104 "00010101" // /* MW 4 */ + 4105 "10110101" // /* MW 3 */ + 4106 "10000011" // /* MW 2 */ + 4107 "00111101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 first +.src_ref 2 "gemm_bfp16.h" 98 21 first +.src_ref 2 "gemm_bfp16.h" 98 23 first +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 99 21 +.src_ref 2 "gemm_bfp16.h" 99 23 +.src_ref 2 "gemm_bfp16.h" 100 4 first + 4108 "00111010" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r23, r17, r1; MOV dj0, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4109 "01111001" // /* MW 9 */ + 4110 "10010000" // /* MW 8 */ + 4111 "01000101" // /* MW 7 */ + 4112 "11101100" // /* MW 6 */ + 4113 "01110000" // /* MW 5 */ + 4114 "00100011" // /* MW 4 */ + 4115 "01100000" // /* MW 3 */ + 4116 "00001100" // /* MW 2 */ + 4117 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 26 first +.src_ref 2 "gemm_bfp16.h" 99 4 first +.src_ref 2 "gemm_bfp16.h" 99 21 first +.src_ref 2 "gemm_bfp16.h" 99 23 first +.src_ref 2 "gemm_bfp16.h" 100 4 +.src_ref 2 "gemm_bfp16.h" 100 21 +.src_ref 2 "gemm_bfp16.h" 100 23 + 4118 "00111010" // VST.CONV.bf16.fp32 cml0, [p0, dj0];OR r16, r2, r4; MOV dj1, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "01111001" // /* MW 9 */ + 4120 "11010000" // /* MW 8 */ + 4121 "11000101" // /* MW 7 */ + 4122 "00101100" // /* MW 6 */ + 4123 "00000010" // /* MW 5 */ + 4124 "00000101" // /* MW 4 */ + 4125 "01100000" // /* MW 3 */ + 4126 "00000100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 26 first + 4128 "10011000" // OR r7, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4129 "00100101" // /* MW 3 */ + 4130 "01001110" // /* MW 2 */ + 4131 "00010001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 96 26 first +.src_ref 2 "gemm_bfp16.h" 100 4 first +.src_ref 2 "gemm_bfp16.h" 100 21 first +.src_ref 2 "gemm_bfp16.h" 100 23 first + 4132 "00111010" // VST.CONV.bf16.fp32 cmh1, [p0, dj1];OR r17, r6, r2; ADD.NC r2, r2, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4133 "00001001" // /* MW 9 */ + 4134 "10000001" // /* MW 8 */ + 4135 "01001000" // /* MW 7 */ + 4136 "00101100" // /* MW 6 */ + 4137 "00010001" // /* MW 5 */ + 4138 "00001101" // /* MW 4 */ + 4139 "01100000" // /* MW 3 */ + 4140 "00011100" // /* MW 2 */ + 4141 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 93 12 first +.src_ref 2 "gemm_bfp16.h" 94 14 first + 4142 "00101100" // VLDA bmlh1, [p1, #64]; LSHL r18, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4143 "01111011" // /* MW 5 */ + 4144 "01001000" // /* MW 4 */ + 4145 "10111000" // /* MW 3 */ + 4146 "10010110" // /* MW 2 */ + 4147 "00100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 93 12 +.src_ref 2 "gemm_bfp16.h" 94 14 +.src_ref 2 "gemm_bfp16.h" 95 14 first + 4148 "10111010" // VLDA bmll1, [p1], m0; LSHL r19, r7, r3; ADD.NC p3, r18, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4149 "10101000" // /* MW 9 */ + 4150 "10000000" // /* MW 8 */ + 4151 "10110100" // /* MW 7 */ + 4152 "11101101" // /* MW 6 */ + 4153 "00110001" // /* MW 5 */ + 4154 "00001111" // /* MW 4 */ + 4155 "10110000" // /* MW 3 */ + 4156 "00010010" // /* MW 2 */ + 4157 "00100001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 12 first +.src_ref 2 "gemm_bfp16.h" 95 14 +.src_ref 2 "gemm_bfp16.h" 96 14 first + 4158 "10111010" // VLDA bmhh0, [p3, #64]; LSHL r20, r17, r3; ADD.NC p4, r19, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4159 "10101000" // /* MW 9 */ + 4160 "11000000" // /* MW 8 */ + 4161 "00110100" // /* MW 7 */ + 4162 "11101110" // /* MW 6 */ + 4163 "01000001" // /* MW 5 */ + 4164 "00100011" // /* MW 4 */ + 4165 "10110000" // /* MW 3 */ + 4166 "10001110" // /* MW 2 */ + 4167 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 94 12 +.src_ref 2 "gemm_bfp16.h" 96 14 + 4168 "10010100" // VLDA bmhl0, [p3]; ADD.NC p5, r20, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4169 "00000010" // /* MW 5 */ + 4170 "11010100" // /* MW 4 */ + 4171 "10111010" // /* MW 3 */ + 4172 "10001010" // /* MW 2 */ + 4173 "01100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 first + 4174 "10011000" // VLDA bmlh0, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4175 "00110101" // /* MW 3 */ + 4176 "00010100" // /* MW 2 */ + 4177 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 95 12 + 4178 "10011000" // VLDA bmll0, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4179 "00010101" // /* MW 3 */ + 4180 "00000100" // /* MW 2 */ + 4181 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 96 12 first + 4182 "10011000" // VLDA bmhh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4183 "11110101" // /* MW 3 */ + 4184 "00010100" // /* MW 2 */ + 4185 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 96 12 + 4186 "00111100" // VLDA bmhl1, [p5]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4187 "00100000" // /* MW 5 */ + 4188 "00000000" // /* MW 4 */ + 4189 "10110000" // /* MW 3 */ + 4190 "10011010" // /* MW 2 */ + 4191 "10100000" // /* MW 1 */ +.label ZLE_F_Z12post_processPai_240 +.end_of_loop + 4192 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4193 "00000000" // /* MW 15 */ + 4194 "00000000" // /* MW 14 */ + 4195 "01111000" // /* MW 13 */ + 4196 "10100101" // /* MW 12 */ + 4197 "00000001" // /* MW 11 */ + 4198 "00000000" // /* MW 10 */ + 4199 "00000000" // /* MW 9 */ + 4200 "00000000" // /* MW 8 */ + 4201 "01011011" // /* MW 7 */ + 4202 "00000001" // /* MW 6 */ + 4203 "00100000" // /* MW 5 */ + 4204 "00000000" // /* MW 4 */ + 4205 "11110000" // /* MW 3 */ + 4206 "00101100" // /* MW 2 */ + 4207 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 97 21 first +.src_ref 2 "gemm_bfp16.h" 97 23 first +.src_ref 2 "gemm_bfp16.h" 98 4 first +.loop_nesting 0 + 4208 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #256;LSHL r21, r16, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4209 "00111011" // /* MW 5 */ + 4210 "01010100" // /* MW 4 */ + 4211 "01101000" // /* MW 3 */ + 4212 "10010100" // /* MW 2 */ + 4213 "01001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 +.src_ref 2 "gemm_bfp16.h" 98 21 +.src_ref 2 "gemm_bfp16.h" 98 23 +.src_ref 2 "gemm_bfp16.h" 102 first + 4214 "11100100" // RET lr; MOV dj2, r21 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4215 "01000001" // /* MW 5 */ + 4216 "00010101" // /* MW 4 */ + 4217 "00000101" // /* MW 3 */ + 4218 "00000000" // /* MW 2 */ + 4219 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 98 4 first +.src_ref 2 "gemm_bfp16.h" 98 21 first +.src_ref 2 "gemm_bfp16.h" 98 23 first +.src_ref 2 "gemm_bfp16.h" 99 4 first +.delay_slot + 4220 "01011100" // VST.CONV.bf16.fp32 cmh0, [p0, dj2];LSHL r22, r7, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4221 "00111011" // /* MW 5 */ + 4222 "11011000" // /* MW 4 */ + 4223 "01100011" // /* MW 3 */ + 4224 "00001100" // /* MW 2 */ + 4225 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 99 4 +.src_ref 2 "gemm_bfp16.h" 99 21 +.src_ref 2 "gemm_bfp16.h" 99 23 +.src_ref 2 "gemm_bfp16.h" 100 4 first +.delay_slot + 4226 "11100100" // LSHL r23, r17, r1; MOV dj0, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4227 "01000001" // /* MW 5 */ + 4228 "00010110" // /* MW 4 */ + 4229 "10110001" // /* MW 3 */ + 4230 "11000011" // /* MW 2 */ + 4231 "10001101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 99 4 first +.src_ref 2 "gemm_bfp16.h" 99 21 first +.src_ref 2 "gemm_bfp16.h" 99 23 first +.src_ref 2 "gemm_bfp16.h" 100 4 +.src_ref 2 "gemm_bfp16.h" 100 21 +.src_ref 2 "gemm_bfp16.h" 100 23 +.delay_slot + 4232 "00000010" // VST.CONV.bf16.fp32 cml0, [p0, dj0]; MOV dj1, r23 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4233 "01110000" // /* MW 7 */ + 4234 "11010000" // /* MW 6 */ + 4235 "11000101" // /* MW 5 */ + 4236 "00000000" // /* MW 4 */ + 4237 "01100000" // /* MW 3 */ + 4238 "00000100" // /* MW 2 */ + 4239 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 100 4 first +.src_ref 2 "gemm_bfp16.h" 100 21 first +.src_ref 2 "gemm_bfp16.h" 100 23 first +.delay_slot + 4240 "00011000" // VST.CONV.bf16.fp32 cmh1, [p0, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4241 "11100011" // /* MW 3 */ + 4242 "00100000" // /* MW 2 */ + 4243 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z12post_processPai__end +.label __Z12post_processPai___func_end0 + 4245 "00000000" // /* MW 1 */ +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_begin0 +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.function gemm_bfp16 _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params +.src_ref 2 "gemm_bfp16.h" 225 first +.src_ref 2 "gemm_bfp16.h" 231 12 +.src_ref 2 "gemm_bfp16.h" 231 12 +.function_start + 4256 "01110110" // MOVA m4, #-300; MOVS p4, p7; MOVXM p7, #508736 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4257 "00010000" // /* MW 11 */ + 4258 "10100000" // /* MW 10 */ + 4259 "10110001" // /* MW 9 */ + 4260 "11110011" // /* MW 8 */ + 4261 "00000001" // /* MW 7 */ + 4262 "00000000" // /* MW 6 */ + 4263 "10001011" // /* MW 5 */ + 4264 "10011100" // /* MW 4 */ + 4265 "10000100" // /* MW 3 */ + 4266 "10010000" // /* MW 2 */ + 4267 "11011010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 231 12 first + 4268 "10011000" // LDA r16, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010110" // /* MW 3 */ + 4270 "10001010" // /* MW 2 */ + 4271 "00000111" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "accum.hpp" 940 83 + 4278 "00000010" // MOVS p0, p6; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4279 "01110000" // /* MW 7 */ + 4280 "01100000" // /* MW 6 */ + 4281 "00110000" // /* MW 5 */ + 4282 "00000011" // /* MW 4 */ + 4283 "01100000" // /* MW 3 */ + 4284 "00010001" // /* MW 2 */ + 4285 "00010011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 225 + 4286 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4287 "00000001" // /* MW 5 */ + 4288 "00000000" // /* MW 4 */ + 4289 "00000000" // /* MW 3 */ + 4290 "00001000" // /* MW 2 */ + 4291 "00000000" // /* MW 1 */ + 4292 "10011000" // ST p0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4293 "00011101" // /* MW 3 */ + 4294 "11111100" // /* MW 2 */ + 4295 "00001111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 231 6 +.src_ref 2 "gemm_bfp16.h" 231 28 + 4296 "00111010" // ST p4, [sp, #-16]; JNZ r16, #4384 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4384 delay_slots=5 */ + 4297 "01100001" // /* MW 9 */ + 4298 "00000000" // /* MW 8 */ + 4299 "00010000" // /* MW 7 */ + 4300 "00100100" // /* MW 6 */ + 4301 "00000010" // /* MW 5 */ + 4302 "00100000" // /* MW 4 */ + 4303 "10110000" // /* MW 3 */ + 4304 "01000011" // /* MW 2 */ + 4305 "11111110" // /* MW 1 */ +.delay_slot + 4306 "10011000" // ST p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4307 "00011101" // /* MW 3 */ + 4308 "11110101" // /* MW 2 */ + 4309 "00001111" // /* MW 1 */ +.delay_slot + 4310 "10011000" // ST p1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "10011101" // /* MW 3 */ + 4312 "11101100" // /* MW 2 */ + 4313 "00001111" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 +.src_ref 8 "tile.hpp" 74 8 +.delay_slot + 4314 "01110110" // MOVA r18, #1; ST lr, [sp, #-8]; MOVXM p0, #508784 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4315 "00010000" // /* MW 11 */ + 4316 "10111000" // /* MW 10 */ + 4317 "00110001" // /* MW 9 */ + 4318 "11110000" // /* MW 8 */ + 4319 "00000001" // /* MW 7 */ + 4320 "10000000" // /* MW 6 */ + 4321 "00111101" // /* MW 5 */ + 4322 "11111000" // /* MW 4 */ + 4323 "00000111" // /* MW 3 */ + 4324 "00110010" // /* MW 2 */ + 4325 "00000000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 first +.src_ref 8 "tile.hpp" 86 8 +.src_ref 8 "tile.hpp" 86 8 +.delay_slot + 4326 "01110110" // MOVA r17, #11; ST r18, [p0]; MOVXM p0, #508788 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4327 "00010000" // /* MW 11 */ + 4328 "10111010" // /* MW 10 */ + 4329 "00110001" // /* MW 9 */ + 4330 "11110000" // /* MW 8 */ + 4331 "00000001" // /* MW 7 */ + 4332 "10000000" // /* MW 6 */ + 4333 "01010001" // /* MW 5 */ + 4334 "00000110" // /* MW 4 */ + 4335 "00000000" // /* MW 3 */ + 4336 "01110001" // /* MW 2 */ + 4337 "00000001" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 first +.src_ref 2 "gemm_bfp16.h" 235 66 +.delay_slot + 4338 "10111010" // ST.s8 r17, [p0]; MOVXM p5, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4339 "00010000" // /* MW 9 */ + 4340 "00000000" // /* MW 8 */ + 4341 "10110001" // /* MW 7 */ + 4342 "11110010" // /* MW 6 */ + 4343 "00000001" // /* MW 5 */ + 4344 "00000000" // /* MW 4 */ + 4345 "11100000" // /* MW 3 */ + 4346 "11000100" // /* MW 2 */ + 4347 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 4 +.src_ref 2 "gemm_bfp16.h" 235 66 first + 4348 "11010100" // LDA r0, [p5], #8; MOV p0, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4349 "10000001" // /* MW 5 */ + 4350 "11001001" // /* MW 4 */ + 4351 "11010000" // /* MW 3 */ + 4352 "10000010" // /* MW 2 */ + 4353 "10100101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 79 + 4354 "10011000" // LDA r1, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4355 "00110110" // /* MW 3 */ + 4356 "00000100" // /* MW 2 */ + 4357 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 92 + 4358 "10011000" // LDA r2, [p5, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4359 "01010110" // /* MW 3 */ + 4360 "00010100" // /* MW 2 */ + 4361 "00000101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 4 +.no_stack_arguments + 4362 "00000100" // JL #3648 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3648 delay_slots=5 */ + 4363 "00000001" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "00100000" // /* MW 3 */ + 4366 "00000111" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 235 4 +.delay_slot + 4368 "11111000" // MOV p1, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4369 "11000000" // /* MW 3 */ + 4370 "01100110" // /* MW 2 */ + 4371 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4378 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4379 "00100000" // /* MW 5 */ + 4380 "00000000" // /* MW 4 */ + 4381 "11110000" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_128 +.src_ref 4 "tuple" 562 47 +.src_ref 8 "tile.hpp" 86 8 +.src_ref 2 "gemm_bfp16.h" 252 79 +.src_ref 2 "gemm_bfp16.h" 252 85 +.return_address + 4384 "10111010" // MOVA r16, #184; MOVX r18, #-184; MOV m4, #220 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4385 "01011000" // /* MW 9 */ + 4386 "11011100" // /* MW 8 */ + 4387 "00000000" // /* MW 7 */ + 4388 "00001010" // /* MW 6 */ + 4389 "00100001" // /* MW 5 */ + 4390 "00111011" // /* MW 4 */ + 4391 "00000000" // /* MW 3 */ + 4392 "00010000" // /* MW 2 */ + 4393 "00010111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 252 85 first + 4394 "10011000" // LDA r27, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4395 "01110110" // /* MW 3 */ + 4396 "10001011" // /* MW 2 */ + 4397 "00000111" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 +.src_ref 2 "gemm_bfp16.h" 252 79 + 4398 "11111000" // MOV r19, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4399 "11000000" // /* MW 3 */ + 4400 "11011110" // /* MW 2 */ + 4401 "00011100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 126 19 + 4402 "00011000" // ADD.NC r20, r19, #-56 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "11100100" // /* MW 3 */ + 4404 "00011001" // /* MW 2 */ + 4405 "00011101" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 + 4406 "01011000" // ADD.NC p7, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4407 "01010001" // /* MW 3 */ + 4408 "01101001" // /* MW 2 */ + 4409 "00011111" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 2 "gemm_bfp16.h" 252 79 + 4410 "00011000" // MOVX r17, #240 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4411 "11000001" // /* MW 3 */ + 4412 "11100010" // /* MW 2 */ + 4413 "00010000" // /* MW 1 */ + 4414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4415 "00000000" // /* MW 1 */ + 4416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4417 "00000000" // /* MW 1 */ +.src_ref 3 "aie.hpp" 6982 6 first +.src_ref 2 "gemm_bfp16.h" 252 79 + 4418 "00011000" // SEL.EQZ r18, r20, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00110010" // /* MW 3 */ + 4420 "00100101" // /* MW 2 */ + 4421 "00010101" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 first +.src_ref 4 "tuple" 562 47 first +.src_ref 2 "gemm_bfp16.h" 252 79 first + 4422 "00100100" // SEL.EQZ r16, r16, r17, r27; ADD.NC p3, r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4423 "00000100" // /* MW 5 */ + 4424 "11010010" // /* MW 4 */ + 4425 "01000110" // /* MW 3 */ + 4426 "00100010" // /* MW 2 */ + 4427 "10000100" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 2 "gemm_bfp16.h" 134 10 first +.src_ref 2 "gemm_bfp16.h" 252 79 + 4428 "10111010" // LDA dj1, [p3], #4; JZ r27, #4688 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4688 delay_slots=5 */ + 4429 "01100000" // /* MW 9 */ + 4430 "00000000" // /* MW 8 */ + 4431 "00000000" // /* MW 7 */ + 4432 "01001010" // /* MW 6 */ + 4433 "00000010" // /* MW 5 */ + 4434 "00110110" // /* MW 4 */ + 4435 "11010000" // /* MW 3 */ + 4436 "10011000" // /* MW 2 */ + 4437 "01100011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 first +.delay_slot + 4438 "11010100" // LDA dn5, [p3], #4; MOV dj3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4439 "01000001" // /* MW 5 */ + 4440 "00010000" // /* MW 4 */ + 4441 "11010111" // /* MW 3 */ + 4442 "11010100" // /* MW 2 */ + 4443 "01100011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.delay_slot + 4444 "10011000" // LDA dj5, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4445 "11000110" // /* MW 3 */ + 4446 "00011110" // /* MW 2 */ + 4447 "00000011" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.src_ref 4 "tuple" 562 47 +.delay_slot + 4448 "10011000" // LDA dn1, [p7, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "10100110" // /* MW 3 */ + 4450 "01100000" // /* MW 2 */ + 4451 "00000111" // /* MW 1 */ +.src_ref 4 "tuple" 562 47 +.delay_slot + 4452 "10011000" // LDA r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00010110" // /* MW 3 */ + 4454 "00000110" // /* MW 2 */ + 4455 "00000011" // /* MW 1 */ +.src_ref 4 "tuple" 562 49 +.delay_slot + 4456 "10011000" // LDA m4, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4457 "00000110" // /* MW 3 */ + 4458 "00010110" // /* MW 2 */ + 4459 "00000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 2 "gemm_bfp16.h" 113 16 +.src_ref 2 "gemm_bfp16.h" 135 60 + 4460 "10111010" // LDA p3, [sp, #-20]; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4461 "00010000" // /* MW 9 */ + 4462 "00010000" // /* MW 8 */ + 4463 "00110001" // /* MW 7 */ + 4464 "11110001" // /* MW 6 */ + 4465 "00000001" // /* MW 5 */ + 4466 "00000000" // /* MW 4 */ + 4467 "00100000" // /* MW 3 */ + 4468 "10110011" // /* MW 2 */ + 4469 "11111101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 110 4 first +.src_ref 2 "gemm_bfp16.h" 135 60 first + 4470 "10111010" // LDA r19, [p2]; MOVXM ls, #4560 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4471 "00010000" // /* MW 9 */ + 4472 "11101000" // /* MW 8 */ + 4473 "01111000" // /* MW 7 */ + 4474 "00000100" // /* MW 6 */ + 4475 "00000000" // /* MW 5 */ + 4476 "00000000" // /* MW 4 */ + 4477 "11010000" // /* MW 3 */ + 4478 "11001110" // /* MW 2 */ + 4479 "01000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 110 4 +.src_ref 2 "gemm_bfp16.h" 135 68 + 4480 "10111010" // MOVA r20, #-6; MOVXM le, #4624 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4481 "00010000" // /* MW 9 */ + 4482 "00001000" // /* MW 8 */ + 4483 "10111001" // /* MW 7 */ + 4484 "00000101" // /* MW 6 */ + 4485 "00000000" // /* MW 5 */ + 4486 "00000000" // /* MW 4 */ + 4487 "00000000" // /* MW 3 */ + 4488 "01010100" // /* MW 2 */ + 4489 "11111111" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 1365 19 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 8 "transpose.hpp" 225 15 + 4490 "01100100" // MOVX r17, #52; MOV r18, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4491 "11010101" // /* MW 5 */ + 4492 "00100000" // /* MW 4 */ + 4493 "00101001" // /* MW 3 */ + 4494 "01011010" // /* MW 2 */ + 4495 "00000100" // /* MW 1 */ + 4496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4497 "00000000" // /* MW 1 */ + 4498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4499 "00000000" // /* MW 1 */ + 4500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 + 4502 "11111000" // MOV p2, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4503 "11000000" // /* MW 3 */ + 4504 "01100110" // /* MW 2 */ + 4505 "00011010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 2 "gemm_bfp16.h" 135 68 + 4506 "00101100" // VLDA lfh0, [p2, #64]; LSHL r19, r19, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4507 "10011011" // /* MW 5 */ + 4508 "11001110" // /* MW 4 */ + 4509 "11111001" // /* MW 3 */ + 4510 "10000000" // /* MW 2 */ + 4511 "01000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 2 "gemm_bfp16.h" 110 4 first + 4512 "00010100" // VLDA lfl0, [p2], #128; ADD.NC lc, r19, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4513 "11111110" // /* MW 5 */ + 4514 "11110011" // /* MW 4 */ + 4515 "11111010" // /* MW 3 */ + 4516 "10010000" // /* MW 2 */ + 4517 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4519 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4520 "10011000" // VLDA lfh0, [p2, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4521 "00000111" // /* MW 3 */ + 4522 "00010100" // /* MW 2 */ + 4523 "00000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4524 "10011000" // VLDA lfl0, [p2], #128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4525 "10000111" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4529 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4531 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 "11111000" // VMOV x8, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4533 "10010010" // /* MW 3 */ + 4534 "00100001" // /* MW 2 */ + 4535 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4536 "00000010" // NOPS; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4537 "01110000" // /* MW 7 */ + 4538 "11001001" // /* MW 6 */ + 4539 "01010000" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "01100000" // /* MW 3 */ + 4542 "00101011" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "11001001" // /* MW 12 */ + 4549 "00010010" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "01011011" // /* MW 7 */ + 4554 "00000001" // /* MW 6 */ + 4555 "00100000" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_304 +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 4560 "11100001" // VLDA lfh0, [p2, #64]; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x8, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4561 "00000000" // /* MW 15 */ + 4562 "00000000" // /* MW 14 */ + 4563 "00111000" // /* MW 13 */ + 4564 "00100100" // /* MW 12 */ + 4565 "11000010" // /* MW 11 */ + 4566 "00000000" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "01011011" // /* MW 7 */ + 4570 "00000001" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "10000000" // /* MW 2 */ + 4575 "01000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4576 "11100001" // VLDA lfl0, [p2], #128; NOPB; NOPS; NOPX; VSHUFFLE x2, x0, x8, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4577 "00000000" // /* MW 15 */ + 4578 "00000000" // /* MW 14 */ + 4579 "00111000" // /* MW 13 */ + 4580 "00100010" // /* MW 12 */ + 4581 "10000010" // /* MW 11 */ + 4582 "00000000" // /* MW 10 */ + 4583 "00000000" // /* MW 9 */ + 4584 "00000000" // /* MW 8 */ + 4585 "01011011" // /* MW 7 */ + 4586 "00000001" // /* MW 6 */ + 4587 "00100000" // /* MW 5 */ + 4588 "00000000" // /* MW 4 */ + 4589 "11110000" // /* MW 3 */ + 4590 "10010000" // /* MW 2 */ + 4591 "01000101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 2 "gemm_bfp16.h" 113 16 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4592 "11100001" // NOPA; NOPB; VST x3, [p3, #64]; NOPX; VMOV x8, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4593 "00000000" // /* MW 15 */ + 4594 "00000000" // /* MW 14 */ + 4595 "01111000" // /* MW 13 */ + 4596 "11001001" // /* MW 12 */ + 4597 "00010000" // /* MW 11 */ + 4598 "00000010" // /* MW 10 */ + 4599 "00000000" // /* MW 9 */ + 4600 "00000000" // /* MW 8 */ + 4601 "11010011" // /* MW 7 */ + 4602 "00010100" // /* MW 6 */ + 4603 "00100011" // /* MW 5 */ + 4604 "00000000" // /* MW 4 */ + 4605 "11110000" // /* MW 3 */ + 4606 "00101100" // /* MW 2 */ + 4607 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4608 "11100001" // NOPA; NOPB; VST x2, [p3], #128; NOPX; VMOV x1, lfh0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "01111000" // /* MW 13 */ + 4612 "11001001" // /* MW 12 */ + 4613 "01010000" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "00000000" // /* MW 9 */ + 4616 "00000000" // /* MW 8 */ + 4617 "10010011" // /* MW 7 */ + 4618 "00101100" // /* MW 6 */ + 4619 "00100011" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4624 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV x0, lfl0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4625 "00000000" // /* MW 15 */ + 4626 "00000000" // /* MW 14 */ + 4627 "01111000" // /* MW 13 */ + 4628 "11001001" // /* MW 12 */ + 4629 "00010010" // /* MW 11 */ + 4630 "00000000" // /* MW 10 */ + 4631 "00000000" // /* MW 9 */ + 4632 "00000000" // /* MW 8 */ + 4633 "01011011" // /* MW 7 */ + 4634 "00000001" // /* MW 6 */ + 4635 "00100000" // /* MW 5 */ + 4636 "00000000" // /* MW 4 */ + 4637 "11110000" // /* MW 3 */ + 4638 "00101100" // /* MW 2 */ + 4639 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.loop_nesting 0 + 4640 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4641 "01001000" // /* MW 3 */ + 4642 "10000100" // /* MW 2 */ + 4643 "00011001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "transpose.hpp" 224 15 first + 4644 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4645 "01000100" // /* MW 3 */ + 4646 "00000100" // /* MW 2 */ + 4647 "00011001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 2 "gemm_bfp16.h" 113 16 first + 4648 "00000010" // VST x3, [p3, #64]; VMOV x8, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4649 "01110000" // /* MW 7 */ + 4650 "11001001" // /* MW 6 */ + 4651 "00010000" // /* MW 5 */ + 4652 "00000010" // /* MW 4 */ + 4653 "01100000" // /* MW 3 */ + 4654 "10011010" // /* MW 2 */ + 4655 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 + 4656 "00000010" // VST x2, [p3], #128; VMOV x0, lfl0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4657 "01110000" // /* MW 7 */ + 4658 "11001001" // /* MW 6 */ + 4659 "00010010" // /* MW 5 */ + 4660 "00000000" // /* MW 4 */ + 4661 "01100000" // /* MW 3 */ + 4662 "10010010" // /* MW 2 */ + 4663 "01100101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first + 4664 "01111000" // VSHUFFLE x3, x0, x8, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4665 "01001000" // /* MW 3 */ + 4666 "10000100" // /* MW 2 */ + 4667 "00011001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 109 101 first +.src_ref 8 "transpose.hpp" 224 15 first + 4668 "01111000" // VSHUFFLE x2, x0, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4669 "01000100" // /* MW 3 */ + 4670 "00000100" // /* MW 2 */ + 4671 "00011001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 first + 4672 "00000010" // VST x3, [p3, #64]; VMOV x1, lfh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4673 "01110000" // /* MW 7 */ + 4674 "11001001" // /* MW 6 */ + 4675 "01010000" // /* MW 5 */ + 4676 "00000000" // /* MW 4 */ + 4677 "01100000" // /* MW 3 */ + 4678 "10011010" // /* MW 2 */ + 4679 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 113 16 + 4680 "00000010" // VST x2, [p3], #128; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4681 "01110000" // /* MW 7 */ + 4682 "10100101" // /* MW 6 */ + 4683 "00000001" // /* MW 5 */ + 4684 "00000000" // /* MW 4 */ + 4685 "01100000" // /* MW 3 */ + 4686 "10010010" // /* MW 2 */ + 4687 "01100101" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_432 +.src_ref 2 "gemm_bfp16.h" 141 44 first + 4688 "00011000" // PADDB [p7], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4689 "10010000" // /* MW 3 */ + 4690 "00011111" // /* MW 2 */ + 4691 "00111111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 148 2 first + 4692 "10011000" // LDA dj3, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "11000110" // /* MW 3 */ + 4694 "00011101" // /* MW 2 */ + 4695 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 148 2 +.src_ref 2 "gemm_bfp16.h" 148 2 + 4696 "01010100" // LDA dn3, [p7], #4; MOV m5, #-36 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4697 "01110001" // /* MW 5 */ + 4698 "00011111" // /* MW 4 */ + 4699 "11011010" // /* MW 3 */ + 4700 "10110100" // /* MW 2 */ + 4701 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 148 2 + 4702 "10011000" // LDA r18, [p7], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4703 "01010110" // /* MW 3 */ + 4704 "10101010" // /* MW 2 */ + 4705 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4706 "10111010" // LDA r20, [p7], #12; MOVXM p3, #508788 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4707 "00010000" // /* MW 9 */ + 4708 "10111010" // /* MW 8 */ + 4709 "10110001" // /* MW 7 */ + 4710 "11110001" // /* MW 6 */ + 4711 "00000001" // /* MW 5 */ + 4712 "00000000" // /* MW 4 */ + 4713 "11010000" // /* MW 3 */ + 4714 "11010010" // /* MW 2 */ + 4715 "11100111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4716 "10111010" // LDA.s8 r20, [p3]; MOVXM r23, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4717 "00010000" // /* MW 9 */ + 4718 "11000000" // /* MW 8 */ + 4719 "11101111" // /* MW 7 */ + 4720 "00001110" // /* MW 6 */ + 4721 "00000000" // /* MW 5 */ + 4722 "00000000" // /* MW 4 */ + 4723 "01010000" // /* MW 3 */ + 4724 "11010000" // /* MW 2 */ + 4725 "01100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4726 "11010100" // LDA p3, [sp, #-12]; VBCST.16 x5, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4727 "11100101" // /* MW 5 */ + 4728 "10111010" // /* MW 4 */ + 4729 "00100101" // /* MW 3 */ + 4730 "10110011" // /* MW 2 */ + 4731 "11111110" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4732 "01010100" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOV m6, #84 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4733 "01010001" // /* MW 5 */ + 4734 "00000001" // /* MW 4 */ + 4735 "01111100" // /* MW 3 */ + 4736 "11001101" // /* MW 2 */ + 4737 "11000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4738 "11010100" // LDA m7, [p7], #4; VBCST.16 x4, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4739 "11100101" // /* MW 5 */ + 4740 "10111010" // /* MW 4 */ + 4741 "11010100" // /* MW 3 */ + 4742 "11110000" // /* MW 2 */ + 4743 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4744 "11010100" // LDA m3, [p7], #4; VMOV x10, x4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4745 "00100101" // /* MW 5 */ + 4746 "01010001" // /* MW 4 */ + 4747 "11011010" // /* MW 3 */ + 4748 "10110000" // /* MW 2 */ + 4749 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4750 "11010100" // LDA m1, [p7], #4; VMOV x11, x5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4751 "00100101" // /* MW 5 */ + 4752 "01010101" // /* MW 4 */ + 4753 "11011011" // /* MW 3 */ + 4754 "10010000" // /* MW 2 */ + 4755 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4756 "00101100" // LDA m6, [p7], m6; ADD r23, r20, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4757 "11110110" // /* MW 5 */ + 4758 "01011111" // /* MW 4 */ + 4759 "11011010" // /* MW 3 */ + 4760 "01100000" // /* MW 2 */ + 4761 "11111001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.src_ref 2 "gemm_bfp16.h" 172 37 + 4762 "01010100" // LDA m0, [p7], #-16; MOV m2, #280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4763 "01100001" // /* MW 5 */ + 4764 "00000100" // /* MW 4 */ + 4765 "11010100" // /* MW 3 */ + 4766 "10000000" // /* MW 2 */ + 4767 "11111001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.src_ref 2 "gemm_bfp16.h" 172 37 + 4768 "01010100" // LDA dn0, [p7], #4; MOV m5, #-108 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4769 "01010001" // /* MW 5 */ + 4770 "00011110" // /* MW 4 */ + 4771 "11011010" // /* MW 3 */ + 4772 "10000100" // /* MW 2 */ + 4773 "11100011" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 + 4774 "10011000" // LDA dj0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "01000110" // /* MW 3 */ + 4776 "00011100" // /* MW 2 */ + 4777 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 + 4778 "10011000" // LDA dn4, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4779 "00100110" // /* MW 3 */ + 4780 "00011110" // /* MW 2 */ + 4781 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 + 4782 "10011000" // LDA dj4, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4783 "01000110" // /* MW 3 */ + 4784 "00101110" // /* MW 2 */ + 4785 "00000111" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 +.src_ref 7 "accum.hpp" 940 83 +.src_ref 2 "gemm_bfp16.h" 172 37 + 4786 "01010100" // LDA m5, [p7], m5; MOV dc4, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4787 "00000001" // /* MW 5 */ + 4788 "10000000" // /* MW 4 */ + 4789 "11011001" // /* MW 3 */ + 4790 "01010000" // /* MW 2 */ + 4791 "11110101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 37 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4792 "10111010" // LDA r26, [p7], m2; MOVS p0, p3; MOV r25, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4793 "01110010" // /* MW 9 */ + 4794 "01100000" // /* MW 8 */ + 4795 "00101111" // /* MW 7 */ + 4796 "00000011" // /* MW 6 */ + 4797 "10001011" // /* MW 5 */ + 4798 "10001100" // /* MW 4 */ + 4799 "11010000" // /* MW 3 */ + 4800 "01101010" // /* MW 2 */ + 4801 "11101001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 +.src_ref 7 "accum.hpp" 940 83 +.src_ref 2 "gemm_bfp16.h" 172 2 +.src_ref 2 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4802 "01111110" // LDA p7, [sp, #-20]; PADDB [p0], m3; MOVS dc0, dc4; MOVXM p2, #4912 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4803 "01100000" // /* MW 13 */ + 4804 "00001001" // /* MW 12 */ + 4805 "00000010" // /* MW 11 */ + 4806 "00000010" // /* MW 10 */ + 4807 "00110011" // /* MW 9 */ + 4808 "10100110" // /* MW 8 */ + 4809 "00000000" // /* MW 7 */ + 4810 "00000000" // /* MW 6 */ + 4811 "00100000" // /* MW 5 */ + 4812 "11010111" // /* MW 4 */ + 4813 "00100000" // /* MW 3 */ + 4814 "11110011" // /* MW 2 */ + 4815 "11111101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 2 "gemm_bfp16.h" 175 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4816 "10111010" // VLDA bmlh2, [p0, #64]; MOVS dc2, dc4; MOV dc5, dc4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4817 "01110010" // /* MW 9 */ + 4818 "11000000" // /* MW 8 */ + 4819 "11100100" // /* MW 7 */ + 4820 "00000010" // /* MW 6 */ + 4821 "01001011" // /* MW 5 */ + 4822 "00010000" // /* MW 4 */ + 4823 "10110010" // /* MW 3 */ + 4824 "10100110" // /* MW 2 */ + 4825 "00000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4826 "10111010" // VLDA bmhl2, [p0, #128]; MOVS p4, p3; MOV dj2, dj3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4827 "01110010" // /* MW 9 */ + 4828 "10000000" // /* MW 8 */ + 4829 "01000011" // /* MW 7 */ + 4830 "00000001" // /* MW 6 */ + 4831 "10001011" // /* MW 5 */ + 4832 "10001100" // /* MW 4 */ + 4833 "10110100" // /* MW 3 */ + 4834 "10101010" // /* MW 2 */ + 4835 "00000100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4836 "01111110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; MOVS p1, p3; MOVX r17, #780; MOV r24, m1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4837 "01100000" // /* MW 13 */ + 4838 "10010001" // /* MW 12 */ + 4839 "00110001" // /* MW 11 */ + 4840 "00001111" // /* MW 10 */ + 4841 "00100000" // /* MW 9 */ + 4842 "01100001" // /* MW 8 */ + 4843 "00110001" // /* MW 7 */ + 4844 "00100010" // /* MW 6 */ + 4845 "00100011" // /* MW 5 */ + 4846 "10010111" // /* MW 4 */ + 4847 "10110111" // /* MW 3 */ + 4848 "10101110" // /* MW 2 */ + 4849 "00000110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4850 "01111110" // VLDA bmlh0, [p3, #64]; NOPB; MOVS dc3, dc0; MOVX crRnd, r20; MOV r20, p7 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4851 "01100000" // /* MW 13 */ + 4852 "00001001" // /* MW 12 */ + 4853 "01100000" // /* MW 11 */ + 4854 "00001111" // /* MW 10 */ + 4855 "11101100" // /* MW 9 */ + 4856 "01010001" // /* MW 8 */ + 4857 "10000000" // /* MW 7 */ + 4858 "00111010" // /* MW 6 */ + 4859 "00100101" // /* MW 5 */ + 4860 "00000000" // /* MW 4 */ + 4861 "10110000" // /* MW 3 */ + 4862 "10000110" // /* MW 2 */ + 4863 "01100010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 203 6 + 4864 "11100001" // VLDA bmhl0, [p3, #128]; NOPB; MOVS dn2, dn3; MOVX r19, #52; MOV m2, m3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4865 "00000000" // /* MW 15 */ + 4866 "00000000" // /* MW 14 */ + 4867 "01111000" // /* MW 13 */ + 4868 "00000000" // /* MW 12 */ + 4869 "00000011" // /* MW 11 */ + 4870 "10001001" // /* MW 10 */ + 4871 "00110110" // /* MW 9 */ + 4872 "00000001" // /* MW 8 */ + 4873 "01001011" // /* MW 7 */ + 4874 "01001110" // /* MW 6 */ + 4875 "00100010" // /* MW 5 */ + 4876 "00000000" // /* MW 4 */ + 4877 "10110000" // /* MW 3 */ + 4878 "10001010" // /* MW 2 */ + 4879 "01100100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1365 19 +.src_ref 8 "vector.hpp" 1365 19 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 8 "transpose.hpp" 225 15 +.src_ref 8 "transpose.hpp" 225 15 +.src_ref 2 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4880 "11100001" // VLDA bmhh0, [p3, #192]; NOPB; MOVS dc1, dc3; MOVX r21, #53; MOV m3, r18; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4881 "00000000" // /* MW 15 */ + 4882 "00000000" // /* MW 14 */ + 4883 "01111000" // /* MW 13 */ + 4884 "10010000" // /* MW 12 */ + 4885 "10000100" // /* MW 11 */ + 4886 "10101001" // /* MW 10 */ + 4887 "01010110" // /* MW 9 */ + 4888 "00000001" // /* MW 8 */ + 4889 "01001011" // /* MW 7 */ + 4890 "00001100" // /* MW 6 */ + 4891 "00100001" // /* MW 5 */ + 4892 "00000000" // /* MW 4 */ + 4893 "10110000" // /* MW 3 */ + 4894 "10001110" // /* MW 2 */ + 4895 "01100110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 177 6 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4896 "11100001" // VLDA bmll0, [p3]; VLDB x4, [p7, #64]; PADDS [p4], m1; MOVX r22, #60; MOV p5, p4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4897 "00000000" // /* MW 15 */ + 4898 "00000000" // /* MW 14 */ + 4899 "01111000" // /* MW 13 */ + 4900 "01100000" // /* MW 12 */ + 4901 "10110100" // /* MW 11 */ + 4902 "10001010" // /* MW 10 */ + 4903 "01100111" // /* MW 9 */ + 4904 "00000001" // /* MW 8 */ + 4905 "01011011" // /* MW 7 */ + 4906 "00101000" // /* MW 6 */ + 4907 "01101100" // /* MW 5 */ + 4908 "00101010" // /* MW 4 */ + 4909 "10111110" // /* MW 3 */ + 4910 "10000010" // /* MW 2 */ + 4911 "01100000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_656 +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4912 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4913 "01110000" // /* MW 11 */ + 4914 "00010000" // /* MW 10 */ + 4915 "10000100" // /* MW 9 */ + 4916 "00000000" // /* MW 8 */ + 4917 "10001011" // /* MW 7 */ + 4918 "10010100" // /* MW 6 */ + 4919 "00100011" // /* MW 5 */ + 4920 "11010111" // /* MW 4 */ + 4921 "10111011" // /* MW 3 */ + 4922 "10010110" // /* MW 2 */ + 4923 "10000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4924 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4925 "01001110" // /* MW 9 */ + 4926 "10111111" // /* MW 8 */ + 4927 "10111110" // /* MW 7 */ + 4928 "00000010" // /* MW 6 */ + 4929 "10010000" // /* MW 5 */ + 4930 "01110011" // /* MW 4 */ + 4931 "10110011" // /* MW 3 */ + 4932 "10011010" // /* MW 2 */ + 4933 "10000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4934 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #5120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4935 "00010000" // /* MW 9 */ + 4936 "00000000" // /* MW 8 */ + 4937 "01111010" // /* MW 7 */ + 4938 "00000100" // /* MW 6 */ + 4939 "00000000" // /* MW 5 */ + 4940 "00000000" // /* MW 4 */ + 4941 "10110000" // /* MW 3 */ + 4942 "10011110" // /* MW 2 */ + 4943 "10000110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4944 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #5200 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4945 "01100000" // /* MW 13 */ + 4946 "10010001" // /* MW 12 */ + 4947 "10010011" // /* MW 11 */ + 4948 "00000010" // /* MW 10 */ + 4949 "01000101" // /* MW 9 */ + 4950 "10110111" // /* MW 8 */ + 4951 "00000000" // /* MW 7 */ + 4952 "00000000" // /* MW 6 */ + 4953 "11101000" // /* MW 5 */ + 4954 "01110011" // /* MW 4 */ + 4955 "10111110" // /* MW 3 */ + 4956 "10010010" // /* MW 2 */ + 4957 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4958 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4959 "01111110" // /* MW 9 */ + 4960 "00000000" // /* MW 8 */ + 4961 "10000010" // /* MW 7 */ + 4962 "00000001" // /* MW 6 */ + 4963 "10010000" // /* MW 5 */ + 4964 "10001011" // /* MW 4 */ + 4965 "10110100" // /* MW 3 */ + 4966 "10110110" // /* MW 2 */ + 4967 "10100010" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4968 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4969 "10001011" // /* MW 7 */ + 4970 "10011100" // /* MW 6 */ + 4971 "11101100" // /* MW 5 */ + 4972 "00101010" // /* MW 4 */ + 4973 "01111000" // /* MW 3 */ + 4974 "11001011" // /* MW 2 */ + 4975 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4976 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4977 "01011011" // /* MW 7 */ + 4978 "10001000" // /* MW 6 */ + 4979 "01101100" // /* MW 5 */ + 4980 "00101010" // /* MW 4 */ + 4981 "10111110" // /* MW 3 */ + 4982 "10111010" // /* MW 2 */ + 4983 "10100100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 2 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4984 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4985 "11101000" // /* MW 5 */ + 4986 "01110011" // /* MW 4 */ + 4987 "10111110" // /* MW 3 */ + 4988 "10111110" // /* MW 2 */ + 4989 "10100110" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 4990 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4991 "01111110" // /* MW 9 */ + 4992 "01100000" // /* MW 8 */ + 4993 "10110110" // /* MW 7 */ + 4994 "00000010" // /* MW 6 */ + 4995 "01110100" // /* MW 5 */ + 4996 "00010101" // /* MW 4 */ + 4997 "10110100" // /* MW 3 */ + 4998 "10110010" // /* MW 2 */ + 4999 "10100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5000 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5001 "00100000" // /* MW 5 */ + 5002 "01010111" // /* MW 4 */ + 5003 "01111011" // /* MW 3 */ + 5004 "01000101" // /* MW 2 */ + 5005 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5006 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5007 "00111110" // /* MW 9 */ + 5008 "00100110" // /* MW 8 */ + 5009 "10011101" // /* MW 7 */ + 5010 "00000001" // /* MW 6 */ + 5011 "01110100" // /* MW 5 */ + 5012 "00000110" // /* MW 4 */ + 5013 "10110100" // /* MW 3 */ + 5014 "10100010" // /* MW 2 */ + 5015 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5016 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5017 "01010100" // /* MW 3 */ + 5018 "10111010" // /* MW 2 */ + 5019 "00011011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5020 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5021 "00110110" // /* MW 9 */ + 5022 "01100110" // /* MW 8 */ + 5023 "00100101" // /* MW 7 */ + 5024 "00000010" // /* MW 6 */ + 5025 "00110100" // /* MW 5 */ + 5026 "00010101" // /* MW 4 */ + 5027 "01100111" // /* MW 3 */ + 5028 "10010001" // /* MW 2 */ + 5029 "10010011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5030 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5031 "01010001" // /* MW 11 */ + 5032 "11101101" // /* MW 10 */ + 5033 "10110100" // /* MW 9 */ + 5034 "01100010" // /* MW 8 */ + 5035 "11010100" // /* MW 7 */ + 5036 "11001010" // /* MW 6 */ + 5037 "00100100" // /* MW 5 */ + 5038 "00010111" // /* MW 4 */ + 5039 "01111001" // /* MW 3 */ + 5040 "11000101" // /* MW 2 */ + 5041 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5042 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5043 "00110000" // /* MW 11 */ + 5044 "00100110" // /* MW 10 */ + 5045 "10011101" // /* MW 9 */ + 5046 "00000001" // /* MW 8 */ + 5047 "10001011" // /* MW 7 */ + 5048 "10011000" // /* MW 6 */ + 5049 "11101101" // /* MW 5 */ + 5050 "00101010" // /* MW 4 */ + 5051 "01111000" // /* MW 3 */ + 5052 "11001101" // /* MW 2 */ + 5053 "10100010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5054 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5055 "10101000" // /* MW 5 */ + 5056 "01110100" // /* MW 4 */ + 5057 "11110111" // /* MW 3 */ + 5058 "00001100" // /* MW 2 */ + 5059 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5060 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5061 "01010001" // /* MW 9 */ + 5062 "11110001" // /* MW 8 */ + 5063 "10110100" // /* MW 7 */ + 5064 "00001001" // /* MW 6 */ + 5065 "00110110" // /* MW 5 */ + 5066 "00001010" // /* MW 4 */ + 5067 "01110000" // /* MW 3 */ + 5068 "11001101" // /* MW 2 */ + 5069 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5070 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5071 "00111110" // /* MW 9 */ + 5072 "01100110" // /* MW 8 */ + 5073 "00100101" // /* MW 7 */ + 5074 "00000010" // /* MW 6 */ + 5075 "11110100" // /* MW 5 */ + 5076 "00111001" // /* MW 4 */ + 5077 "01110111" // /* MW 3 */ + 5078 "01000101" // /* MW 2 */ + 5079 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5080 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5081 "11010100" // /* MW 3 */ + 5082 "11001010" // /* MW 2 */ + 5083 "00011100" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5084 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5085 "00110110" // /* MW 3 */ + 5086 "10001010" // /* MW 2 */ + 5087 "00001000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5088 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5089 "01010001" // /* MW 9 */ + 5090 "11101101" // /* MW 8 */ + 5091 "10110100" // /* MW 7 */ + 5092 "00011101" // /* MW 6 */ + 5093 "01110100" // /* MW 5 */ + 5094 "00000110" // /* MW 4 */ + 5095 "01110100" // /* MW 3 */ + 5096 "11000101" // /* MW 2 */ + 5097 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5098 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5099 "01101100" // /* MW 5 */ + 5100 "00010100" // /* MW 4 */ + 5101 "01110010" // /* MW 3 */ + 5102 "11001101" // /* MW 2 */ + 5103 "10100010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5104 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5105 "00000000" // /* MW 15 */ + 5106 "00000000" // /* MW 14 */ + 5107 "01111000" // /* MW 13 */ + 5108 "10100101" // /* MW 12 */ + 5109 "00000001" // /* MW 11 */ + 5110 "00000000" // /* MW 10 */ + 5111 "00000000" // /* MW 9 */ + 5112 "00000000" // /* MW 8 */ + 5113 "00110110" // /* MW 7 */ + 5114 "10001010" // /* MW 6 */ + 5115 "00100001" // /* MW 5 */ + 5116 "00000000" // /* MW 4 */ + 5117 "11110000" // /* MW 3 */ + 5118 "00101100" // /* MW 2 */ + 5119 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_864 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5120 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5121 "01010001" // /* MW 9 */ + 5122 "11110001" // /* MW 8 */ + 5123 "10110100" // /* MW 7 */ + 5124 "11100110" // /* MW 6 */ + 5125 "11000000" // /* MW 5 */ + 5126 "01101100" // /* MW 4 */ + 5127 "01101101" // /* MW 3 */ + 5128 "00101010" // /* MW 2 */ + 5129 "00001110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5130 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5131 "00000001" // /* MW 15 */ + 5132 "01011011" // /* MW 14 */ + 5133 "00111100" // /* MW 13 */ + 5134 "00100110" // /* MW 12 */ + 5135 "10011101" // /* MW 11 */ + 5136 "00000001" // /* MW 10 */ + 5137 "00000000" // /* MW 9 */ + 5138 "00000000" // /* MW 8 */ + 5139 "10001011" // /* MW 7 */ + 5140 "10011100" // /* MW 6 */ + 5141 "11101100" // /* MW 5 */ + 5142 "01110011" // /* MW 4 */ + 5143 "01111110" // /* MW 3 */ + 5144 "11001101" // /* MW 2 */ + 5145 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5146 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5147 "00100001" // /* MW 15 */ + 5148 "01001001" // /* MW 14 */ + 5149 "00111100" // /* MW 13 */ + 5150 "00101010" // /* MW 12 */ + 5151 "11011101" // /* MW 11 */ + 5152 "00000001" // /* MW 10 */ + 5153 "00000000" // /* MW 9 */ + 5154 "00000000" // /* MW 8 */ + 5155 "00110110" // /* MW 7 */ + 5156 "00001010" // /* MW 6 */ + 5157 "00100000" // /* MW 5 */ + 5158 "00010111" // /* MW 4 */ + 5159 "01111001" // /* MW 3 */ + 5160 "01000101" // /* MW 2 */ + 5161 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5162 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5163 "01101001" // /* MW 11 */ + 5164 "01000000" // /* MW 10 */ + 5165 "10001010" // /* MW 9 */ + 5166 "00001110" // /* MW 8 */ + 5167 "00011011" // /* MW 7 */ + 5168 "01000101" // /* MW 6 */ + 5169 "11101000" // /* MW 5 */ + 5170 "00101010" // /* MW 4 */ + 5171 "11111000" // /* MW 3 */ + 5172 "00001100" // /* MW 2 */ + 5173 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5174 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5175 "01010001" // /* MW 11 */ + 5176 "11101101" // /* MW 10 */ + 5177 "10110100" // /* MW 9 */ + 5178 "01100010" // /* MW 8 */ + 5179 "11001100" // /* MW 7 */ + 5180 "01001010" // /* MW 6 */ + 5181 "11101100" // /* MW 5 */ + 5182 "00001100" // /* MW 4 */ + 5183 "01111000" // /* MW 3 */ + 5184 "11000101" // /* MW 2 */ + 5185 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5186 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5187 "01101001" // /* MW 13 */ + 5188 "00000100" // /* MW 12 */ + 5189 "10001000" // /* MW 11 */ + 5190 "10100011" // /* MW 10 */ + 5191 "01010110" // /* MW 9 */ + 5192 "01100110" // /* MW 8 */ + 5193 "00000000" // /* MW 7 */ + 5194 "00000000" // /* MW 6 */ + 5195 "01101100" // /* MW 5 */ + 5196 "00010100" // /* MW 4 */ + 5197 "01110010" // /* MW 3 */ + 5198 "11001101" // /* MW 2 */ + 5199 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_944 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5200 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5201 "00000000" // /* MW 15 */ + 5202 "00000000" // /* MW 14 */ + 5203 "01111000" // /* MW 13 */ + 5204 "10100101" // /* MW 12 */ + 5205 "00000001" // /* MW 11 */ + 5206 "00000000" // /* MW 10 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "00110110" // /* MW 7 */ + 5210 "10001010" // /* MW 6 */ + 5211 "00100001" // /* MW 5 */ + 5212 "00000000" // /* MW 4 */ + 5213 "11110000" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5216 "10001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB x4, [p7, #64]; MOVS p4, p1; NOPX; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5217 "10001010" // /* MW 15 */ + 5218 "10100111" // /* MW 14 */ + 5219 "01111101" // /* MW 13 */ + 5220 "01100000" // /* MW 12 */ + 5221 "10110110" // /* MW 11 */ + 5222 "00000010" // /* MW 10 */ + 5223 "00000000" // /* MW 9 */ + 5224 "00000000" // /* MW 8 */ + 5225 "10001011" // /* MW 7 */ + 5226 "10000100" // /* MW 6 */ + 5227 "01101100" // /* MW 5 */ + 5228 "00101010" // /* MW 4 */ + 5229 "01111110" // /* MW 3 */ + 5230 "11001101" // /* MW 2 */ + 5231 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5232 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5233 "00000001" // /* MW 15 */ + 5234 "01011011" // /* MW 14 */ + 5235 "00111100" // /* MW 13 */ + 5236 "00100110" // /* MW 12 */ + 5237 "10011101" // /* MW 11 */ + 5238 "00000001" // /* MW 10 */ + 5239 "00000000" // /* MW 9 */ + 5240 "00000000" // /* MW 8 */ + 5241 "10001011" // /* MW 7 */ + 5242 "10000100" // /* MW 6 */ + 5243 "00100000" // /* MW 5 */ + 5244 "01010111" // /* MW 4 */ + 5245 "01111011" // /* MW 3 */ + 5246 "01000101" // /* MW 2 */ + 5247 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.src_ref 2 "gemm_bfp16.h" 202 6 first +.src_ref 2 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5248 "01001011" // PADDA [p0], m3; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5249 "00100001" // /* MW 15 */ + 5250 "01001001" // /* MW 14 */ + 5251 "00111100" // /* MW 13 */ + 5252 "00101010" // /* MW 12 */ + 5253 "11011101" // /* MW 11 */ + 5254 "00000001" // /* MW 10 */ + 5255 "00000000" // /* MW 9 */ + 5256 "00000000" // /* MW 8 */ + 5257 "00110110" // /* MW 7 */ + 5258 "00001010" // /* MW 6 */ + 5259 "00100000" // /* MW 5 */ + 5260 "11010111" // /* MW 4 */ + 5261 "11110011" // /* MW 3 */ + 5262 "00001100" // /* MW 2 */ + 5263 "00001101" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5264 "01001010" // VCONV.bfp16ebs8.fp32 ex1, dm4; MOV m1, r24; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5265 "01101001" // /* MW 9 */ + 5266 "01000000" // /* MW 8 */ + 5267 "10001010" // /* MW 7 */ + 5268 "11100100" // /* MW 6 */ + 5269 "00100000" // /* MW 5 */ + 5270 "00001100" // /* MW 4 */ + 5271 "11000001" // /* MW 3 */ + 5272 "01000110" // /* MW 2 */ + 5273 "00010001" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5274 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5275 "01010001" // /* MW 9 */ + 5276 "11101101" // /* MW 8 */ + 5277 "10110100" // /* MW 7 */ + 5278 "01100010" // /* MW 6 */ + 5279 "11001100" // /* MW 5 */ + 5280 "01001010" // /* MW 4 */ + 5281 "01110100" // /* MW 3 */ + 5282 "11000101" // /* MW 2 */ + 5283 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5284 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5285 "01101001" // /* MW 13 */ + 5286 "00000100" // /* MW 12 */ + 5287 "10001000" // /* MW 11 */ + 5288 "10100011" // /* MW 10 */ + 5289 "01010110" // /* MW 9 */ + 5290 "01100110" // /* MW 8 */ + 5291 "00000000" // /* MW 7 */ + 5292 "00000000" // /* MW 6 */ + 5293 "01101100" // /* MW 5 */ + 5294 "00010100" // /* MW 4 */ + 5295 "01110010" // /* MW 3 */ + 5296 "11001101" // /* MW 2 */ + 5297 "10100010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5298 "10111010" // PADDB [p4], m1; VCONV.bfp16ebs8.fp32 ex3, dm4; MOV p5, p4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5299 "01110110" // /* MW 9 */ + 5300 "01100000" // /* MW 8 */ + 5301 "10110100" // /* MW 7 */ + 5302 "00000010" // /* MW 6 */ + 5303 "10010000" // /* MW 5 */ + 5304 "00101011" // /* MW 4 */ + 5305 "11000100" // /* MW 3 */ + 5306 "01000110" // /* MW 2 */ + 5307 "00110001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5308 "01100010" // MOV m2, r18; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5309 "00101001" // /* MW 7 */ + 5310 "01100000" // /* MW 6 */ + 5311 "10001011" // /* MW 5 */ + 5312 "11100110" // /* MW 4 */ + 5313 "00100000" // /* MW 3 */ + 5314 "00001001" // /* MW 2 */ + 5315 "00000010" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5316 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5317 "01010001" // /* MW 9 */ + 5318 "11110001" // /* MW 8 */ + 5319 "10110100" // /* MW 7 */ + 5320 "00001001" // /* MW 6 */ + 5321 "00110110" // /* MW 5 */ + 5322 "00001010" // /* MW 4 */ + 5323 "01110000" // /* MW 3 */ + 5324 "11001101" // /* MW 2 */ + 5325 "11000010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5326 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "00101001" // /* MW 3 */ + 5328 "00100100" // /* MW 2 */ + 5329 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5330 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5331 "01101001" // /* MW 7 */ + 5332 "01000000" // /* MW 6 */ + 5333 "10001010" // /* MW 5 */ + 5334 "00000010" // /* MW 4 */ + 5335 "11000000" // /* MW 3 */ + 5336 "01000110" // /* MW 2 */ + 5337 "00010001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5338 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5339 "01101001" // /* MW 3 */ + 5340 "00000100" // /* MW 2 */ + 5341 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5343 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5344 "00011000" // VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "00110110" // /* MW 3 */ + 5346 "00001010" // /* MW 2 */ + 5347 "00001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5348 "01100010" // VCONV.bfp16ebs8.fp32 ex3, dm4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5349 "00101001" // /* MW 7 */ + 5350 "01100000" // /* MW 6 */ + 5351 "10001011" // /* MW 5 */ + 5352 "00000010" // /* MW 4 */ + 5353 "11000000" // /* MW 3 */ + 5354 "01000110" // /* MW 2 */ + 5355 "00110001" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5357 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5359 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5360 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5361 "00101001" // /* MW 3 */ + 5362 "00100100" // /* MW 2 */ + 5363 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5364 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5365 "01101001" // /* MW 3 */ + 5366 "01000000" // /* MW 2 */ + 5367 "10001010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5368 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5369 "01101001" // /* MW 3 */ + 5370 "00000100" // /* MW 2 */ + 5371 "10001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5372 "10011000" // VST bmlh3, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5373 "10100110" // /* MW 3 */ + 5374 "00010101" // /* MW 2 */ + 5375 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5376 "10011000" // VST bmhl3, [p1, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5377 "11000110" // /* MW 3 */ + 5378 "00100101" // /* MW 2 */ + 5379 "00001001" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5380 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5381 "11100110" // /* MW 3 */ + 5382 "00110101" // /* MW 2 */ + 5383 "00001001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5384 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5385 "01110110" // /* MW 9 */ + 5386 "01100000" // /* MW 8 */ + 5387 "10110101" // /* MW 7 */ + 5388 "00000000" // /* MW 6 */ + 5389 "10010000" // /* MW 5 */ + 5390 "11001011" // /* MW 4 */ + 5391 "11010101" // /* MW 3 */ + 5392 "10110000" // /* MW 2 */ + 5393 "00100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.src_ref 2 "gemm_bfp16.h" 203 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5394 "10111010" // PADDB.2D [p1], d2; VST bmlh2, [p0, #64]; MOV m2, m3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5395 "01110110" // /* MW 9 */ + 5396 "00000000" // /* MW 8 */ + 5397 "00000011" // /* MW 7 */ + 5398 "00000001" // /* MW 6 */ + 5399 "10010000" // /* MW 5 */ + 5400 "01010011" // /* MW 4 */ + 5401 "11010001" // /* MW 3 */ + 5402 "10100100" // /* MW 2 */ + 5403 "00000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5404 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "01000110" // /* MW 3 */ + 5406 "00100101" // /* MW 2 */ + 5407 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5408 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "01100110" // /* MW 3 */ + 5410 "00110101" // /* MW 2 */ + 5411 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5412 "00000010" // VST bmll2, [p0]; MOV p0, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5413 "01110000" // /* MW 7 */ + 5414 "01100000" // /* MW 6 */ + 5415 "00110011" // /* MW 5 */ + 5416 "00000000" // /* MW 4 */ + 5417 "11010000" // /* MW 3 */ + 5418 "10100000" // /* MW 2 */ + 5419 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 175 6 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5420 "10111010" // PADDB [p0], m3; VST bmlh1, [p4, #64]; MOV m3, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5421 "01110110" // /* MW 9 */ + 5422 "10010000" // /* MW 8 */ + 5423 "10000100" // /* MW 7 */ + 5424 "00000001" // /* MW 6 */ + 5425 "10010000" // /* MW 5 */ + 5426 "01101011" // /* MW 4 */ + 5427 "11010000" // /* MW 3 */ + 5428 "10010100" // /* MW 2 */ + 5429 "10000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5430 "00001100" // VLDA bmlh2, [p0, #64]; VST bmhl1, [p4, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5431 "10001101" // /* MW 5 */ + 5432 "01001001" // /* MW 4 */ + 5433 "10111000" // /* MW 3 */ + 5434 "10100110" // /* MW 2 */ + 5435 "00000010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 172 2 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5436 "01111010" // VLDA bmhl2, [p0, #128]; VST bmhh1, [p4, #192]; JNZD r23, r23, p2 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 5437 "10100000" // /* MW 9 */ + 5438 "11101110" // /* MW 8 */ + 5439 "00000101" // /* MW 7 */ + 5440 "10000000" // /* MW 6 */ + 5441 "11100110" // /* MW 5 */ + 5442 "00110100" // /* MW 4 */ + 5443 "10110100" // /* MW 3 */ + 5444 "10101010" // /* MW 2 */ + 5445 "00000100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 175 6 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5446 "11110110" // VLDA bmhh2, [p0, #192]; PADDB [p3], m6; VST bmll1, [p4]; MOV p4, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5447 "01110000" // /* MW 11 */ + 5448 "01100000" // /* MW 10 */ + 5449 "00110011" // /* MW 9 */ + 5450 "10000010" // /* MW 8 */ + 5451 "10000110" // /* MW 7 */ + 5452 "00000100" // /* MW 6 */ + 5453 "00100100" // /* MW 5 */ + 5454 "10010111" // /* MW 4 */ + 5455 "10110111" // /* MW 3 */ + 5456 "10101110" // /* MW 2 */ + 5457 "00000110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5458 "00001100" // VLDA bmlh0, [p3, #64]; VST bmlh0, [p5, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5459 "01001101" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "10111010" // /* MW 3 */ + 5462 "10000110" // /* MW 2 */ + 5463 "01100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5464 "00001100" // VLDA bmhl0, [p3, #128]; VST bmhl0, [p5, #128] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5465 "10001101" // /* MW 5 */ + 5466 "01001000" // /* MW 4 */ + 5467 "10111010" // /* MW 3 */ + 5468 "10001010" // /* MW 2 */ + 5469 "01100100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 177 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5470 "00001100" // VLDA bmhh0, [p3, #192]; VST bmhh0, [p5, #192] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5471 "11001101" // /* MW 5 */ + 5472 "01101000" // /* MW 4 */ + 5473 "10111010" // /* MW 3 */ + 5474 "10001110" // /* MW 2 */ + 5475 "01100110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 174 6 +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 177 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5476 "11110110" // VLDA bmll0, [p3]; PADDB [p4], m1; VST bmll0, [p5]; MOV p5, p4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "01100000" // /* MW 10 */ + 5479 "10110100" // /* MW 9 */ + 5480 "10000010" // /* MW 8 */ + 5481 "00000110" // /* MW 7 */ + 5482 "00000100" // /* MW 6 */ + 5483 "00100101" // /* MW 5 */ + 5484 "01010111" // /* MW 4 */ + 5485 "10111000" // /* MW 3 */ + 5486 "10000010" // /* MW 2 */ + 5487 "01100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5488 "11110110" // VLDA bmlh1, [p4, #64]; PADDB [p5], m7; MOVS p3, p5; MOV m1, r16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5489 "01110000" // /* MW 11 */ + 5490 "00010000" // /* MW 10 */ + 5491 "10000100" // /* MW 9 */ + 5492 "00000000" // /* MW 8 */ + 5493 "10001011" // /* MW 7 */ + 5494 "10010100" // /* MW 6 */ + 5495 "00100011" // /* MW 5 */ + 5496 "11010111" // /* MW 4 */ + 5497 "10111011" // /* MW 3 */ + 5498 "10010110" // /* MW 2 */ + 5499 "10000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 176 6 first +.src_ref 2 "gemm_bfp16.h" 182 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5500 "10111010" // VLDA bmhl1, [p4, #128]; PADDB.2D [p3], d3; ADD.NC lc, r26, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5501 "01001110" // /* MW 9 */ + 5502 "10111111" // /* MW 8 */ + 5503 "10111110" // /* MW 7 */ + 5504 "00000010" // /* MW 6 */ + 5505 "10010000" // /* MW 5 */ + 5506 "01110011" // /* MW 4 */ + 5507 "10110011" // /* MW 3 */ + 5508 "10011010" // /* MW 2 */ + 5509 "10000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5510 "10111010" // VLDA bmhh1, [p4, #192]; MOVXM ls, #5696 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5511 "00010000" // /* MW 9 */ + 5512 "00100000" // /* MW 8 */ + 5513 "01111011" // /* MW 7 */ + 5514 "00000100" // /* MW 6 */ + 5515 "00000000" // /* MW 5 */ + 5516 "00000000" // /* MW 4 */ + 5517 "10110000" // /* MW 3 */ + 5518 "10011110" // /* MW 2 */ + 5519 "10000110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 182 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5520 "01111110" // VLDA bmll1, [p4]; VLDB.3D x7, [p7], d1; MOVS p4, p7; MOVXM le, #5776 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5521 "01100000" // /* MW 13 */ + 5522 "10010001" // /* MW 12 */ + 5523 "10010011" // /* MW 11 */ + 5524 "00000010" // /* MW 10 */ + 5525 "01101001" // /* MW 9 */ + 5526 "10110111" // /* MW 8 */ + 5527 "00000000" // /* MW 7 */ + 5528 "00000000" // /* MW 6 */ + 5529 "11101000" // /* MW 5 */ + 5530 "01110011" // /* MW 4 */ + 5531 "10111110" // /* MW 3 */ + 5532 "10010010" // /* MW 2 */ + 5533 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.src_ref 2 "gemm_bfp16.h" 175 6 +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5534 "10111010" // VLDA bmlh3, [p5, #64]; PADDB [p4], m4; MOV m3, m2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5535 "01111110" // /* MW 9 */ + 5536 "00000000" // /* MW 8 */ + 5537 "10000010" // /* MW 7 */ + 5538 "00000001" // /* MW 6 */ + 5539 "10010000" // /* MW 5 */ + 5540 "10001011" // /* MW 4 */ + 5541 "10110100" // /* MW 3 */ + 5542 "10110110" // /* MW 2 */ + 5543 "10100010" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5544 "00110010" // VLDA x9, [p4]; VLDB x5, [p4, #64]; MOVS p4, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5545 "10001011" // /* MW 7 */ + 5546 "10011100" // /* MW 6 */ + 5547 "11101100" // /* MW 5 */ + 5548 "00101010" // /* MW 4 */ + 5549 "01111000" // /* MW 3 */ + 5550 "11001011" // /* MW 2 */ + 5551 "10000000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5552 "00110010" // VLDA bmhl3, [p5, #128]; VLDB x4, [p7, #64]; PADDS [p4], m4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5553 "01011011" // /* MW 7 */ + 5554 "10001000" // /* MW 6 */ + 5555 "01101100" // /* MW 5 */ + 5556 "00101010" // /* MW 4 */ + 5557 "10111110" // /* MW 3 */ + 5558 "10111010" // /* MW 2 */ + 5559 "10100100" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 2 "gemm_bfp16.h" 174 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5560 "00111100" // VLDA bmhh3, [p5, #192]; VLDB.3D x7, [p7], d1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5561 "11101000" // /* MW 5 */ + 5562 "01110011" // /* MW 4 */ + 5563 "10111110" // /* MW 3 */ + 5564 "10111110" // /* MW 2 */ + 5565 "10100110" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 2 "gemm_bfp16.h" 174 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5566 "10111010" // VLDA bmll3, [p5]; VLDB x5, [p4, #64]; MOV p5, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5567 "01111110" // /* MW 9 */ + 5568 "01100000" // /* MW 8 */ + 5569 "10110110" // /* MW 7 */ + 5570 "00000010" // /* MW 6 */ + 5571 "01110100" // /* MW 5 */ + 5572 "00010101" // /* MW 4 */ + 5573 "10110100" // /* MW 3 */ + 5574 "10110010" // /* MW 2 */ + 5575 "10100000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5576 "00111100" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5577 "00100000" // /* MW 5 */ + 5578 "01010111" // /* MW 4 */ + 5579 "01111011" // /* MW 3 */ + 5580 "01000101" // /* MW 2 */ + 5581 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 175 6 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5582 "10111010" // VLDA bmll2, [p0]; VLDB x9, [p4]; VSHUFFLE x6, x7, x4, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5583 "00111110" // /* MW 9 */ + 5584 "00100110" // /* MW 8 */ + 5585 "10011101" // /* MW 7 */ + 5586 "00000001" // /* MW 6 */ + 5587 "01110100" // /* MW 5 */ + 5588 "00000110" // /* MW 4 */ + 5589 "10110100" // /* MW 3 */ + 5590 "10100010" // /* MW 2 */ + 5591 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5592 "01111000" // VSHUFFLE x7, x7, x4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5593 "01010100" // /* MW 3 */ + 5594 "10111010" // /* MW 2 */ + 5595 "00011011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5596 "10111010" // VLDB x4, [p7, #64]; MOVS p4, p7; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5597 "00110110" // /* MW 9 */ + 5598 "01100110" // /* MW 8 */ + 5599 "00100101" // /* MW 7 */ + 5600 "00000010" // /* MW 6 */ + 5601 "00110100" // /* MW 5 */ + 5602 "00010101" // /* MW 4 */ + 5603 "01100111" // /* MW 3 */ + 5604 "10010001" // /* MW 2 */ + 5605 "10010011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5606 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];PADDB [p4], m4; VSHUFFLE x9, x9, x5, r21; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5607 "01010001" // /* MW 11 */ + 5608 "11101101" // /* MW 10 */ + 5609 "10110100" // /* MW 9 */ + 5610 "01100010" // /* MW 8 */ + 5611 "11010100" // /* MW 7 */ + 5612 "11001010" // /* MW 6 */ + 5613 "00100100" // /* MW 5 */ + 5614 "00010111" // /* MW 4 */ + 5615 "01111001" // /* MW 3 */ + 5616 "11000101" // /* MW 2 */ + 5617 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5618 "11110110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64];VLDB x5, [p4, #64]; MOVS p5, p6; VSHUFFLE x6, x7, x4, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5619 "00110000" // /* MW 11 */ + 5620 "00100110" // /* MW 10 */ + 5621 "10011101" // /* MW 9 */ + 5622 "00000001" // /* MW 8 */ + 5623 "10001011" // /* MW 7 */ + 5624 "10011000" // /* MW 6 */ + 5625 "11101101" // /* MW 5 */ + 5626 "00101010" // /* MW 4 */ + 5627 "01111000" // /* MW 3 */ + 5628 "11001101" // /* MW 2 */ + 5629 "10100010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5630 "11010100" // PADDA [p5], m5; VSHUFFLE x7, x7, x4, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5631 "10101000" // /* MW 5 */ + 5632 "01110100" // /* MW 4 */ + 5633 "11110111" // /* MW 3 */ + 5634 "00001100" // /* MW 2 */ + 5635 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5636 "01001010" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5637 "01010001" // /* MW 9 */ + 5638 "11110001" // /* MW 8 */ + 5639 "10110100" // /* MW 7 */ + 5640 "00001001" // /* MW 6 */ + 5641 "00110110" // /* MW 5 */ + 5642 "00001010" // /* MW 4 */ + 5643 "01110000" // /* MW 3 */ + 5644 "11001101" // /* MW 2 */ + 5645 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5646 "10111010" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;VLDB.3D x7, [p7], d1; VSHUFFLE x8, x9, x5, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5647 "00111110" // /* MW 9 */ + 5648 "01100110" // /* MW 8 */ + 5649 "00100101" // /* MW 7 */ + 5650 "00000010" // /* MW 6 */ + 5651 "11110100" // /* MW 5 */ + 5652 "00111001" // /* MW 4 */ + 5653 "01110111" // /* MW 3 */ + 5654 "01000101" // /* MW 2 */ + 5655 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5656 "01111000" // VSHUFFLE x9, x9, x5, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "11010100" // /* MW 3 */ + 5658 "11001010" // /* MW 2 */ + 5659 "00011100" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5660 "00011000" // VCONV.bfp16ebs8.fp32 ex1, dm4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5661 "00110110" // /* MW 3 */ + 5662 "10001010" // /* MW 2 */ + 5663 "00001000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5664 "01001010" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VMUL.f dm4, y3, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5665 "01010001" // /* MW 9 */ + 5666 "11101101" // /* MW 8 */ + 5667 "10110100" // /* MW 7 */ + 5668 "00011101" // /* MW 6 */ + 5669 "01110100" // /* MW 5 */ + 5670 "00000110" // /* MW 4 */ + 5671 "01110100" // /* MW 3 */ + 5672 "11000101" // /* MW 2 */ + 5673 "10100000" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5674 "00001100" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5675 "01101100" // /* MW 5 */ + 5676 "00010100" // /* MW 4 */ + 5677 "01110010" // /* MW 3 */ + 5678 "11001101" // /* MW 2 */ + 5679 "10100010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5680 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5681 "00000000" // /* MW 15 */ + 5682 "00000000" // /* MW 14 */ + 5683 "01111000" // /* MW 13 */ + 5684 "10100101" // /* MW 12 */ + 5685 "00000001" // /* MW 11 */ + 5686 "00000000" // /* MW 10 */ + 5687 "00000000" // /* MW 9 */ + 5688 "00000000" // /* MW 8 */ + 5689 "00110110" // /* MW 7 */ + 5690 "10001010" // /* MW 6 */ + 5691 "00100001" // /* MW 5 */ + 5692 "00000000" // /* MW 4 */ + 5693 "11110000" // /* MW 3 */ + 5694 "00101100" // /* MW 2 */ + 5695 "00000000" // /* MW 1 */ +.label ZLS_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1440 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.begin_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5696 "01001010" // VLDB x4, [p7, #64]; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5697 "01010001" // /* MW 9 */ + 5698 "11110001" // /* MW 8 */ + 5699 "10110100" // /* MW 7 */ + 5700 "11100110" // /* MW 6 */ + 5701 "11000000" // /* MW 5 */ + 5702 "01101100" // /* MW 4 */ + 5703 "01101101" // /* MW 3 */ + 5704 "00101010" // /* MW 2 */ + 5705 "00001110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5706 "01001011" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64];VLDB.3D x7, [p7], d1; MOVS p4, p7; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5707 "00000001" // /* MW 15 */ + 5708 "01011011" // /* MW 14 */ + 5709 "00111100" // /* MW 13 */ + 5710 "00100110" // /* MW 12 */ + 5711 "10011101" // /* MW 11 */ + 5712 "00000001" // /* MW 10 */ + 5713 "00000000" // /* MW 9 */ + 5714 "00000000" // /* MW 8 */ + 5715 "10001011" // /* MW 7 */ + 5716 "10011100" // /* MW 6 */ + 5717 "11101100" // /* MW 5 */ + 5718 "01110011" // /* MW 4 */ + 5719 "01111110" // /* MW 3 */ + 5720 "11001101" // /* MW 2 */ + 5721 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5722 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p4], m4; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5723 "00100001" // /* MW 15 */ + 5724 "01001001" // /* MW 14 */ + 5725 "00111100" // /* MW 13 */ + 5726 "00101010" // /* MW 12 */ + 5727 "11011101" // /* MW 11 */ + 5728 "00000001" // /* MW 10 */ + 5729 "00000000" // /* MW 9 */ + 5730 "00000000" // /* MW 8 */ + 5731 "00110110" // /* MW 7 */ + 5732 "00001010" // /* MW 6 */ + 5733 "00100000" // /* MW 5 */ + 5734 "00010111" // /* MW 4 */ + 5735 "01111001" // /* MW 3 */ + 5736 "01000101" // /* MW 2 */ + 5737 "11000011" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5738 "01100110" // PADDA [p5], m5; VLDB x5, [p4, #64]; VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5739 "01101001" // /* MW 11 */ + 5740 "01000000" // /* MW 10 */ + 5741 "10001010" // /* MW 9 */ + 5742 "00001110" // /* MW 8 */ + 5743 "00011011" // /* MW 7 */ + 5744 "01000101" // /* MW 6 */ + 5745 "11101000" // /* MW 5 */ + 5746 "00101010" // /* MW 4 */ + 5747 "11111000" // /* MW 3 */ + 5748 "00001100" // /* MW 2 */ + 5749 "10110101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5750 "01100110" // VLDA.CONV.fp32.bf16 cml4, [p5];VLDB x9, [p4]; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5751 "01010001" // /* MW 11 */ + 5752 "11101101" // /* MW 10 */ + 5753 "10110100" // /* MW 9 */ + 5754 "01100010" // /* MW 8 */ + 5755 "11001100" // /* MW 7 */ + 5756 "01001010" // /* MW 6 */ + 5757 "11101100" // /* MW 5 */ + 5758 "00001100" // /* MW 4 */ + 5759 "01111000" // /* MW 3 */ + 5760 "11000101" // /* MW 2 */ + 5761 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5762 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5763 "01101001" // /* MW 13 */ + 5764 "00000100" // /* MW 12 */ + 5765 "10001000" // /* MW 11 */ + 5766 "10100011" // /* MW 10 */ + 5767 "01010110" // /* MW 9 */ + 5768 "01100110" // /* MW 8 */ + 5769 "00000000" // /* MW 7 */ + 5770 "00000000" // /* MW 6 */ + 5771 "01101100" // /* MW 5 */ + 5772 "00010100" // /* MW 4 */ + 5773 "01110010" // /* MW 3 */ + 5774 "11001101" // /* MW 2 */ + 5775 "10100010" // /* MW 1 */ +.label ZLE_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1520 +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.end_of_loop +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5776 "11100001" // NOPA; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5777 "00000000" // /* MW 15 */ + 5778 "00000000" // /* MW 14 */ + 5779 "01111000" // /* MW 13 */ + 5780 "10100101" // /* MW 12 */ + 5781 "00000001" // /* MW 11 */ + 5782 "00000000" // /* MW 10 */ + 5783 "00000000" // /* MW 9 */ + 5784 "00000000" // /* MW 8 */ + 5785 "00110110" // /* MW 7 */ + 5786 "10001010" // /* MW 6 */ + 5787 "00100001" // /* MW 5 */ + 5788 "00000000" // /* MW 4 */ + 5789 "11110000" // /* MW 3 */ + 5790 "00101100" // /* MW 2 */ + 5791 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 6 "array_helpers.hpp" 313 19 +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5792 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p6, #64]; MOVS p4, p1; MOV p5, p6; VMUL.f dm4, y4, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5793 "01010001" // /* MW 13 */ + 5794 "11110001" // /* MW 12 */ + 5795 "10110100" // /* MW 11 */ + 5796 "00000111" // /* MW 10 */ + 5797 "01100110" // /* MW 9 */ + 5798 "01101011" // /* MW 8 */ + 5799 "00000000" // /* MW 7 */ + 5800 "00000000" // /* MW 6 */ + 5801 "00010110" // /* MW 5 */ + 5802 "00001001" // /* MW 4 */ + 5803 "01111001" // /* MW 3 */ + 5804 "11001101" // /* MW 2 */ + 5805 "11000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 6 "array_helpers.hpp" 313 19 first +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 203 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5806 "01001011" // VLDA.3D.CONV.fp32.bf16 cml4, [p6], d0;PADDB [p5], m5; MOVS p0, p1; NOPX; VSHUFFLE x6, x7, x4, r19; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5807 "00000001" // /* MW 15 */ + 5808 "01011011" // /* MW 14 */ + 5809 "00111100" // /* MW 13 */ + 5810 "00100110" // /* MW 12 */ + 5811 "10011101" // /* MW 11 */ + 5812 "00000001" // /* MW 10 */ + 5813 "00000000" // /* MW 9 */ + 5814 "00000000" // /* MW 8 */ + 5815 "10001011" // /* MW 7 */ + 5816 "10000100" // /* MW 6 */ + 5817 "00100000" // /* MW 5 */ + 5818 "01010111" // /* MW 4 */ + 5819 "01111011" // /* MW 3 */ + 5820 "01000101" // /* MW 2 */ + 5821 "11000011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.src_ref 8 "transpose.hpp" 225 15 first +.src_ref 2 "gemm_bfp16.h" 202 6 first +.src_ref 2 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5822 "01001011" // MOVA dj1, #-304; PADDB [p1], m7; VCONV.bfp16ebs8.fp32 ex0, dm4;NOPX; VSHUFFLE x7, x7, x4, r21; VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5823 "00100001" // /* MW 15 */ + 5824 "01001001" // /* MW 14 */ + 5825 "00111100" // /* MW 13 */ + 5826 "00101010" // /* MW 12 */ + 5827 "11011101" // /* MW 11 */ + 5828 "00000001" // /* MW 10 */ + 5829 "00000000" // /* MW 9 */ + 5830 "00000000" // /* MW 8 */ + 5831 "00110110" // /* MW 7 */ + 5832 "00001010" // /* MW 6 */ + 5833 "00100000" // /* MW 5 */ + 5834 "11010111" // /* MW 4 */ + 5835 "10000011" // /* MW 3 */ + 5836 "00000110" // /* MW 2 */ + 5837 "11011010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 203 6 first +.src_ref 2 "gemm_bfp16.h" 268 37 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5838 "01101110" // PADDA [p0], m3; VCONV.bfp16ebs8.fp32 ex1, dm4; MOV p7, r20; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5839 "01101001" // /* MW 13 */ + 5840 "01000000" // /* MW 12 */ + 5841 "10001010" // /* MW 11 */ + 5842 "00000111" // /* MW 10 */ + 5843 "01010001" // /* MW 9 */ + 5844 "01111011" // /* MW 8 */ + 5845 "00000000" // /* MW 7 */ + 5846 "00000000" // /* MW 6 */ + 5847 "01101100" // /* MW 5 */ + 5848 "00010100" // /* MW 4 */ + 5849 "11110001" // /* MW 3 */ + 5850 "00001100" // /* MW 2 */ + 5851 "00001101" // /* MW 1 */ +.src_ref 6 "array_helpers.hpp" 252 27 first +.src_ref 7 "accum.hpp" 940 83 first +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 8 "transpose.hpp" 224 15 first +.src_ref 2 "gemm_bfp16.h" 268 12 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5852 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p5]; MOVS p6, r25; VSHUFFLE x8, x9, x5, r19; VMUL.f dm4, y3, y5, r22 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5853 "01010001" // /* MW 13 */ + 5854 "11101101" // /* MW 12 */ + 5855 "10110100" // /* MW 11 */ + 5856 "01100011" // /* MW 10 */ + 5857 "01010110" // /* MW 9 */ + 5858 "01100010" // /* MW 8 */ + 5859 "00000000" // /* MW 7 */ + 5860 "00000000" // /* MW 6 */ + 5861 "00010110" // /* MW 5 */ + 5862 "00110010" // /* MW 4 */ + 5863 "01111101" // /* MW 3 */ + 5864 "11000101" // /* MW 2 */ + 5865 "10100000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1365 19 first +.src_ref 6 "array_helpers.hpp" 252 27 +.src_ref 7 "accum.hpp" 903 19 first +.src_ref 7 "accum.hpp" 940 83 +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5866 "01101110" // VLDA.CONV.fp32.bf16 cmh4, [p5, #64]; VCONV.bfp16ebs8.fp32 ex2, dm4; VSHUFFLE x9, x9, x5, r21; VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5867 "01101001" // /* MW 13 */ + 5868 "00000100" // /* MW 12 */ + 5869 "10001000" // /* MW 11 */ + 5870 "10100011" // /* MW 10 */ + 5871 "01010110" // /* MW 9 */ + 5872 "01100110" // /* MW 8 */ + 5873 "00000000" // /* MW 7 */ + 5874 "00000000" // /* MW 6 */ + 5875 "01101100" // /* MW 5 */ + 5876 "00010100" // /* MW 4 */ + 5877 "01110010" // /* MW 3 */ + 5878 "11001101" // /* MW 2 */ + 5879 "10100010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 2 "gemm_bfp16.h" 176 6 +.src_ref 2 "gemm_bfp16.h" 204 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5880 "00000010" // VCONV.bfp16ebs8.fp32 ex3, dm4; MOV m1, r24 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5881 "01110000" // /* MW 7 */ + 5882 "00010000" // /* MW 6 */ + 5883 "10000110" // /* MW 5 */ + 5884 "00000000" // /* MW 4 */ + 5885 "11000000" // /* MW 3 */ + 5886 "01000110" // /* MW 2 */ + 5887 "00110001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 204 6 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5888 "01001010" // PADDB [p4], m1; MOV p5, p4; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5889 "00101001" // /* MW 9 */ + 5890 "01100000" // /* MW 8 */ + 5891 "10001011" // /* MW 7 */ + 5892 "11100110" // /* MW 6 */ + 5893 "11000000" // /* MW 5 */ + 5894 "01101000" // /* MW 4 */ + 5895 "00100101" // /* MW 3 */ + 5896 "01010111" // /* MW 2 */ + 5897 "00001000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 111 36 first +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5898 "01100010" // VCONV.bfp16ebs8.fp32 ex0, dm4; VMUL.f dm4, y4, y5, r22 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5899 "01010001" // /* MW 7 */ + 5900 "11110001" // /* MW 6 */ + 5901 "10110100" // /* MW 5 */ + 5902 "00000010" // /* MW 4 */ + 5903 "11000000" // /* MW 3 */ + 5904 "01000110" // /* MW 2 */ + 5905 "00000001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5906 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5907 "00101001" // /* MW 3 */ + 5908 "00100100" // /* MW 2 */ + 5909 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5910 "01100010" // VCONV.bfp16ebs8.fp32 ex1, dm4; VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5911 "01101001" // /* MW 7 */ + 5912 "01000000" // /* MW 6 */ + 5913 "10001010" // /* MW 5 */ + 5914 "00000010" // /* MW 4 */ + 5915 "11000000" // /* MW 3 */ + 5916 "01000110" // /* MW 2 */ + 5917 "00010001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5918 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5919 "01101001" // /* MW 3 */ + 5920 "00000100" // /* MW 2 */ + 5921 "10001000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5923 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 113 29 first +.src_ref 2 "gemm_bfp16.h" 268 12 +.src_ref 2 "gemm_bfp16.h" 268 37 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5924 "10111010" // LDA r17, [p7, dj1]; VCONV.bfp16ebs8.fp32 ex2, dm4; MOV dj1, #280 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5925 "01010010" // /* MW 9 */ + 5926 "00011000" // /* MW 8 */ + 5927 "11000001" // /* MW 7 */ + 5928 "00000000" // /* MW 6 */ + 5929 "00110110" // /* MW 5 */ + 5930 "00001010" // /* MW 4 */ + 5931 "11010001" // /* MW 3 */ + 5932 "01000110" // /* MW 2 */ + 5933 "11100100" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 114 29 first +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 first +.src_ref 2 "gemm_bfp16.h" 268 12 +.src_ref 2 "gemm_bfp16.h" 269 34 +.src_ref 2 "gemm_bfp16.h" 269 48 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5934 "01001011" // LDA r16, [p6, dj1]; NOPB; VCONV.bfp16ebs8.fp32 ex3, dm4;MOVXM p7, #508416; VMAC.f dm3, dm3, ex0, ex1, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5935 "00000001" // /* MW 15 */ + 5936 "01011011" // /* MW 14 */ + 5937 "00010100" // /* MW 13 */ + 5938 "00000000" // /* MW 12 */ + 5939 "10110001" // /* MW 11 */ + 5940 "11110011" // /* MW 10 */ + 5941 "00000001" // /* MW 9 */ + 5942 "00000000" // /* MW 8 */ + 5943 "00110110" // /* MW 7 */ + 5944 "10001010" // /* MW 6 */ + 5945 "00100001" // /* MW 5 */ + 5946 "00000000" // /* MW 4 */ + 5947 "11010000" // /* MW 3 */ + 5948 "01000010" // /* MW 2 */ + 5949 "11000100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5951 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5953 "00000000" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5954 "01001000" // VMAC.f dm1, dm1, ex2, ex1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5955 "00101001" // /* MW 3 */ + 5956 "00100100" // /* MW 2 */ + 5957 "10001001" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5958 "01001000" // VMAC.f dm2, dm2, ex0, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5959 "01101001" // /* MW 3 */ + 5960 "01000000" // /* MW 2 */ + 5961 "10001010" // /* MW 1 */ +.src_ref 7 "mmul_bf16_bf16.hpp" 116 21 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5962 "01001000" // VMAC.f dm0, dm0, ex2, ex3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5963 "01101001" // /* MW 3 */ + 5964 "00000100" // /* MW 2 */ + 5965 "10001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 first +.src_ref 2 "gemm_bfp16.h" 268 45 first + 5966 "01011100" // VST bmlh3, [p1, #64]; ADD r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5967 "11111110" // /* MW 5 */ + 5968 "11000111" // /* MW 4 */ + 5969 "11011000" // /* MW 3 */ + 5970 "10110100" // /* MW 2 */ + 5971 "00100010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 268 28 + 5972 "01011100" // VST bmhl3, [p1, #128]; NE r17, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5973 "00010001" // /* MW 5 */ + 5974 "11000110" // /* MW 4 */ + 5975 "11011000" // /* MW 3 */ + 5976 "10111000" // /* MW 2 */ + 5977 "00100100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 202 6 + 5978 "10011000" // VST bmhh3, [p1, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5979 "11100110" // /* MW 3 */ + 5980 "00110101" // /* MW 2 */ + 5981 "00001001" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 202 6 +.src_ref 2 "gemm_bfp16.h" 205 6 first + 5982 "10111010" // PADDB [p5], m6; VST bmll3, [p1]; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5983 "01110110" // /* MW 9 */ + 5984 "01100000" // /* MW 8 */ + 5985 "10110101" // /* MW 7 */ + 5986 "00000000" // /* MW 6 */ + 5987 "10010000" // /* MW 5 */ + 5988 "11001011" // /* MW 4 */ + 5989 "11010101" // /* MW 3 */ + 5990 "10110000" // /* MW 2 */ + 5991 "00100000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 first + 5992 "10011000" // VST bmlh2, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5993 "00100110" // /* MW 3 */ + 5994 "00010101" // /* MW 2 */ + 5995 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 + 5996 "10011000" // VST bmhl2, [p0, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "01000110" // /* MW 3 */ + 5998 "00100101" // /* MW 2 */ + 5999 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 + 6000 "10011000" // VST bmhh2, [p0, #192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6001 "01100110" // /* MW 3 */ + 6002 "00110101" // /* MW 2 */ + 6003 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 203 6 + 6004 "10011000" // VST bmll2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6005 "00000110" // /* MW 3 */ + 6006 "00000101" // /* MW 2 */ + 6007 "00001000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 first + 6008 "10011000" // VST bmlh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6009 "10100110" // /* MW 3 */ + 6010 "00010100" // /* MW 2 */ + 6011 "00001100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 + 6012 "10011000" // VST bmhl1, [p4, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6013 "11000110" // /* MW 3 */ + 6014 "00100100" // /* MW 2 */ + 6015 "00001100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 +.src_ref 2 "gemm_bfp16.h" 268 6 first + 6016 "00111010" // VST bmhh1, [p4, #192]; JNZ r17, #6128 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6128 delay_slots=5 */ + 6017 "01100001" // /* MW 9 */ + 6018 "00000000" // /* MW 8 */ + 6019 "00010000" // /* MW 7 */ + 6020 "11111110" // /* MW 6 */ + 6021 "00000010" // /* MW 5 */ + 6022 "00100010" // /* MW 4 */ + 6023 "11010000" // /* MW 3 */ + 6024 "10011100" // /* MW 2 */ + 6025 "10000110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 204 6 first +.delay_slot + 6026 "10011000" // VST bmll1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6027 "10000110" // /* MW 3 */ + 6028 "00000100" // /* MW 2 */ + 6029 "00001100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 205 6 first +.delay_slot + 6030 "10011000" // VST bmlh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6031 "00100110" // /* MW 3 */ + 6032 "00010100" // /* MW 2 */ + 6033 "00001101" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot + 6034 "10011000" // VST bmhl0, [p5, #128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6035 "01000110" // /* MW 3 */ + 6036 "00100100" // /* MW 2 */ + 6037 "00001101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot + 6038 "00000010" // VST bmhh0, [p5, #192]; MOV m2, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6039 "01110000" // /* MW 7 */ + 6040 "10010000" // /* MW 6 */ + 6041 "00000100" // /* MW 5 */ + 6042 "00000001" // /* MW 4 */ + 6043 "11010000" // /* MW 3 */ + 6044 "10001100" // /* MW 2 */ + 6045 "10100110" // /* MW 1 */ +.src_ref 5 "aie_core.h" 81 15 first +.src_ref 2 "gemm_bfp16.h" 205 6 +.delay_slot + 6046 "01001100" // PADDB.2D [p1], d2; VST bmll0, [p5] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6047 "00001101" // /* MW 5 */ + 6048 "00001000" // /* MW 4 */ + 6049 "00001010" // /* MW 3 */ + 6050 "01110010" // /* MW 2 */ + 6051 "00101010" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 34 first + 6052 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6053 "00010110" // /* MW 3 */ + 6054 "00000110" // /* MW 2 */ + 6055 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 48 + 6056 "10011000" // LDA r17, [p7, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6057 "00110110" // /* MW 3 */ + 6058 "00100110" // /* MW 2 */ + 6059 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 4 + 6060 "00011000" // LDA p0, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "00011001" // /* MW 3 */ + 6062 "11110100" // /* MW 2 */ + 6063 "00000111" // /* MW 1 */ + 6064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6065 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 4 +.no_stack_arguments + 6066 "00000100" // JL #3952 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3952 delay_slots=5 */ + 6067 "00000001" // /* MW 5 */ + 6068 "00000000" // /* MW 4 */ + 6069 "10111000" // /* MW 3 */ + 6070 "00000111" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6077 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 269 40 +.delay_slot + 6078 "10011000" // MUL r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6079 "00001111" // /* MW 3 */ + 6080 "01000001" // /* MW 2 */ + 6081 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6082 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6083 "00011100" // /* MW 13 */ + 6084 "00000000" // /* MW 12 */ + 6085 "00000000" // /* MW 11 */ + 6086 "01010111" // /* MW 10 */ + 6087 "00011010" // /* MW 9 */ + 6088 "01000000" // /* MW 8 */ + 6089 "00000000" // /* MW 7 */ + 6090 "00000000" // /* MW 6 */ + 6091 "10110110" // /* MW 5 */ + 6092 "00000010" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.return_address + 6096 "10000100" // J #6144 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6144 delay_slots=5 */ + 6097 "00000000" // /* MW 5 */ + 6098 "00000000" // /* MW 4 */ + 6099 "00000000" // /* MW 3 */ + 6100 "00001100" // /* MW 2 */ + 6101 "00000000" // /* MW 1 */ +.delay_slot + 6102 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6103 "00000001" // /* MW 3 */ + 6104 "00100000" // /* MW 2 */ + 6105 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6112 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6113 "00000000" // /* MW 15 */ + 6114 "00000000" // /* MW 14 */ + 6115 "01111000" // /* MW 13 */ + 6116 "10100101" // /* MW 12 */ + 6117 "00000001" // /* MW 11 */ + 6118 "00000000" // /* MW 10 */ + 6119 "00000000" // /* MW 9 */ + 6120 "00000000" // /* MW 8 */ + 6121 "01011011" // /* MW 7 */ + 6122 "00000001" // /* MW 6 */ + 6123 "00100000" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "11110000" // /* MW 3 */ + 6126 "00101100" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1872 +.src_ref 2 "gemm_bfp16.h" 272 25 first + 6128 "11100001" // NOPA; NOPB; NOPS; ADD r16, r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6129 "00000000" // /* MW 15 */ + 6130 "00000000" // /* MW 14 */ + 6131 "01111000" // /* MW 13 */ + 6132 "10100101" // /* MW 12 */ + 6133 "00000001" // /* MW 11 */ + 6134 "00111000" // /* MW 10 */ + 6135 "00000000" // /* MW 9 */ + 6136 "00100001" // /* MW 8 */ + 6137 "01011011" // /* MW 7 */ + 6138 "00000001" // /* MW 6 */ + 6139 "00100000" // /* MW 5 */ + 6140 "00000000" // /* MW 4 */ + 6141 "11110000" // /* MW 3 */ + 6142 "00101100" // /* MW 2 */ + 6143 "00000000" // /* MW 1 */ +.label TGT_F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_1888 +.src_ref 2 "gemm_bfp16.h" 274 + 6144 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "00111001" // /* MW 3 */ + 6146 "11111000" // /* MW 2 */ + 6147 "00000111" // /* MW 1 */ + 6148 "00011000" // LDA p7, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6149 "10011001" // /* MW 3 */ + 6150 "11110011" // /* MW 2 */ + 6151 "00000111" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ + 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6155 "00000000" // /* MW 1 */ + 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.noswbrkpt + 6160 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6161 "00011001" // /* MW 3 */ + 6162 "11111111" // /* MW 2 */ + 6163 "00000111" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 274 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 6164 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6165 "00000000" // /* MW 3 */ + 6166 "00101000" // /* MW 2 */ + 6167 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 6168 "10111000" // MOV dj1, #280 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6169 "00110000" // /* MW 3 */ + 6170 "10000010" // /* MW 2 */ + 6171 "00011001" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 "10011000" // ST r16, [p6, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6173 "00010001" // /* MW 3 */ + 6174 "00100010" // /* MW 2 */ + 6175 "00001110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 274 first +.delay_slot + 6176 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6177 "00000001" // /* MW 5 */ + 6178 "00000000" // /* MW 4 */ + 6179 "00000000" // /* MW 3 */ + 6180 "11111000" // /* MW 2 */ + 6181 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params__end +.label __Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params___func_end0 + 6185 "00000000" // /* MW 1 */ +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_GemmBfp16 _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 9 "superkernels.cpp" 381 first +.src_ref 9 "superkernels.cpp" 382 6 +.src_ref 9 "superkernels.cpp" 388 11 +.function_start + 6192 "00111010" // MOVS p4, p1; MOVXM p5, #508768 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6193 "00010001" // /* MW 9 */ + 6194 "10110000" // /* MW 8 */ + 6195 "10110001" // /* MW 7 */ + 6196 "11110010" // /* MW 6 */ + 6197 "00000001" // /* MW 5 */ + 6198 "00000000" // /* MW 4 */ + 6199 "01100000" // /* MW 3 */ + 6200 "10010001" // /* MW 2 */ + 6201 "10010000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 382 6 first + 6202 "10011000" // LDA r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6203 "00010110" // /* MW 3 */ + 6204 "00000110" // /* MW 2 */ + 6205 "00000101" // /* MW 1 */ + 6206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6207 "00000000" // /* MW 1 */ + 6208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6209 "00000000" // /* MW 1 */ + 6210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6211 "00000000" // /* MW 1 */ + 6212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6213 "00000000" // /* MW 1 */ + 6214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6215 "00000000" // /* MW 1 */ + 6216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6217 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 382 6 +.src_ref 9 "superkernels.cpp" 382 16 + 6218 "10000100" // JNZ r16, #6336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6336 delay_slots=5 */ + 6219 "00000001" // /* MW 5 */ + 6220 "01000000" // /* MW 4 */ + 6221 "01100000" // /* MW 3 */ + 6222 "00001100" // /* MW 2 */ + 6223 "10000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 381 +.delay_slot + 6224 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6225 "00000001" // /* MW 5 */ + 6226 "00000000" // /* MW 4 */ + 6227 "00000000" // /* MW 3 */ + 6228 "00001000" // /* MW 2 */ + 6229 "00000000" // /* MW 1 */ +.delay_slot + 6230 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6231 "00011101" // /* MW 3 */ + 6232 "11111111" // /* MW 2 */ + 6233 "00001111" // /* MW 1 */ +.delay_slot + 6234 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6235 "10011101" // /* MW 3 */ + 6236 "11110111" // /* MW 2 */ + 6237 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 6238 "00000010" // ST lr, [sp, #-8]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6239 "01110000" // /* MW 7 */ + 6240 "01100000" // /* MW 6 */ + 6241 "10110000" // /* MW 5 */ + 6242 "00000011" // /* MW 4 */ + 6243 "10110000" // /* MW 3 */ + 6244 "00000111" // /* MW 2 */ + 6245 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 1 "io_buffer_main.h" 348 51 +.delay_slot + 6246 "11111000" // MOV p6, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6247 "11000000" // /* MW 3 */ + 6248 "01100110" // /* MW 2 */ + 6249 "00011110" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 +.src_ref 9 "superkernels.cpp" 384 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6250 "00111010" // MOVS p0, p2; MOVXM p3, #508788 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6251 "00010001" // /* MW 9 */ + 6252 "10111010" // /* MW 8 */ + 6253 "10110001" // /* MW 7 */ + 6254 "11110001" // /* MW 6 */ + 6255 "00000001" // /* MW 5 */ + 6256 "00000000" // /* MW 4 */ + 6257 "01100000" // /* MW 3 */ + 6258 "00010001" // /* MW 2 */ + 6259 "00010001" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 +.src_ref 8 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6260 "10111010" // ST.s8 r16, [p3]; MOVXM p3, #508784 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6261 "00010000" // /* MW 9 */ + 6262 "10111000" // /* MW 8 */ + 6263 "10110001" // /* MW 7 */ + 6264 "11110001" // /* MW 6 */ + 6265 "00000001" // /* MW 5 */ + 6266 "00000000" // /* MW 4 */ + 6267 "11100000" // /* MW 3 */ + 6268 "11000000" // /* MW 2 */ + 6269 "01100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6271 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 384 6 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6272 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 6273 "00000001" // /* MW 5 */ + 6274 "00000000" // /* MW 4 */ + 6275 "00001000" // /* MW 3 */ + 6276 "00000110" // /* MW 2 */ + 6277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6281 "00000000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6282 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6283 "00110001" // /* MW 3 */ + 6284 "00100000" // /* MW 2 */ + 6285 "00010000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 +.delay_slot + 6286 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6287 "00000101" // /* MW 3 */ + 6288 "00100000" // /* MW 2 */ + 6289 "00010000" // /* MW 1 */ +.src_ref 8 "tile.hpp" 74 8 first +.delay_slot + 6290 "00101110" // NOPA; ST r16, [p3]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6291 "00011100" // /* MW 13 */ + 6292 "00000000" // /* MW 12 */ + 6293 "00000000" // /* MW 11 */ + 6294 "01010111" // /* MW 10 */ + 6295 "00011010" // /* MW 9 */ + 6296 "01000000" // /* MW 8 */ + 6297 "00000000" // /* MW 7 */ + 6298 "00000000" // /* MW 6 */ + 6299 "00100011" // /* MW 5 */ + 6300 "00001100" // /* MW 4 */ + 6301 "11110110" // /* MW 3 */ + 6302 "00101100" // /* MW 2 */ + 6303 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 385 18 +.src_ref 9 "superkernels.cpp" 385 20 first +.return_address + 6304 "10111010" // LDA el0, [p2, #24]; MOVXM p2, #508776 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6305 "00010000" // /* MW 9 */ + 6306 "10110100" // /* MW 8 */ + 6307 "00110001" // /* MW 7 */ + 6308 "11110001" // /* MW 6 */ + 6309 "00000001" // /* MW 5 */ + 6310 "00000000" // /* MW 4 */ + 6311 "11010000" // /* MW 3 */ + 6312 "10000101" // /* MW 2 */ + 6313 "01001100" // /* MW 1 */ + 6314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6315 "00000000" // /* MW 1 */ + 6316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6317 "00000000" // /* MW 1 */ + 6318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6319 "00000000" // /* MW 1 */ + 6320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6321 "00000000" // /* MW 1 */ + 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6323 "00000000" // /* MW 1 */ + 6324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6325 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 385 18 + 6326 "01111010" // NOPA; ST el0, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6327 "00000000" // /* MW 9 */ + 6328 "00000000" // /* MW 8 */ + 6329 "00000000" // /* MW 7 */ + 6330 "10000000" // /* MW 6 */ + 6331 "00101001" // /* MW 5 */ + 6332 "00000100" // /* MW 4 */ + 6333 "11110010" // /* MW 3 */ + 6334 "00101100" // /* MW 2 */ + 6335 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_144 +.src_ref 9 "superkernels.cpp" 387 12 +.src_ref 9 "superkernels.cpp" 388 11 first + 6336 "10111010" // LDA r16, [p5]; MOVXM p2, #508772 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6337 "00010000" // /* MW 9 */ + 6338 "10110010" // /* MW 8 */ + 6339 "00110001" // /* MW 7 */ + 6340 "11110001" // /* MW 6 */ + 6341 "00000001" // /* MW 5 */ + 6342 "00000000" // /* MW 4 */ + 6343 "11010000" // /* MW 3 */ + 6344 "11000010" // /* MW 2 */ + 6345 "10100000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 387 12 first + 6346 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6347 "00110110" // /* MW 3 */ + 6348 "00000110" // /* MW 2 */ + 6349 "00000010" // /* MW 1 */ + 6350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6351 "00000000" // /* MW 1 */ + 6352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6353 "00000000" // /* MW 1 */ + 6354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6355 "00000000" // /* MW 1 */ + 6356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6357 "00000000" // /* MW 1 */ + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 389 6 first +.src_ref 9 "superkernels.cpp" 389 17 first + 6362 "10000100" // JNZ r17, #6448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6448 delay_slots=5 */ + 6363 "00000001" // /* MW 5 */ + 6364 "01000000" // /* MW 4 */ + 6365 "10011000" // /* MW 3 */ + 6366 "00001100" // /* MW 2 */ + 6367 "10001000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 388 11 first +.delay_slot + 6368 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6369 "00000111" // /* MW 3 */ + 6370 "00100000" // /* MW 2 */ + 6371 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 388 11 +.delay_slot + 6372 "10011000" // ST r16, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6373 "00010001" // /* MW 3 */ + 6374 "00000110" // /* MW 2 */ + 6375 "00001101" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 387 12 first +.delay_slot + 6376 "00011000" // ADD r16, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6377 "00000111" // /* MW 3 */ + 6378 "01100000" // /* MW 2 */ + 6379 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 387 12 +.delay_slot + 6380 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6381 "00010001" // /* MW 3 */ + 6382 "00000110" // /* MW 2 */ + 6383 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6386 "11111000" // MOV r16, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6387 "11000000" // /* MW 3 */ + 6388 "00011100" // /* MW 2 */ + 6389 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6390 "00011000" // ADD.NC p2, r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6391 "00000110" // /* MW 3 */ + 6392 "01101000" // /* MW 2 */ + 6393 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6394 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01110110" // /* MW 3 */ + 6396 "11111111" // /* MW 2 */ + 6397 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6398 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6399 "00010110" // /* MW 3 */ + 6400 "11111110" // /* MW 2 */ + 6401 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6402 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6403 "00110110" // /* MW 3 */ + 6404 "11111110" // /* MW 2 */ + 6405 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6408 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6409 "00010110" // /* MW 3 */ + 6410 "01000110" // /* MW 2 */ + 6411 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6415 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6417 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6419 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6420 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6421 "00000010" // /* MW 3 */ + 6422 "01100001" // /* MW 2 */ + 6423 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6424 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6425 "00010001" // /* MW 3 */ + 6426 "00000110" // /* MW 2 */ + 6427 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 6428 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6429 "11111101" // /* MW 3 */ + 6430 "11100010" // /* MW 2 */ + 6431 "00010111" // /* MW 1 */ + 6432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6433 "00000000" // /* MW 1 */ + 6434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6435 "00000000" // /* MW 1 */ + 6436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6437 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6438 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6439 "00011000" // /* MW 9 */ + 6440 "00010011" // /* MW 8 */ + 6441 "00000100" // /* MW 7 */ + 6442 "00000000" // /* MW 6 */ + 6443 "01011011" // /* MW 5 */ + 6444 "00000001" // /* MW 4 */ + 6445 "11110000" // /* MW 3 */ + 6446 "00101100" // /* MW 2 */ + 6447 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_256 +.src_ref 2 "gemm_bfp16.h" 285 80 +.src_ref 2 "gemm_bfp16.h" 285 80 + 6448 "10111010" // MOVA r24, #0; MOVXM r16, #2147483616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6449 "00010000" // /* MW 9 */ + 6450 "11110000" // /* MW 8 */ + 6451 "00001111" // /* MW 7 */ + 6452 "11111110" // /* MW 6 */ + 6453 "11111111" // /* MW 5 */ + 6454 "00011111" // /* MW 4 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00011000" // /* MW 2 */ + 6457 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 + 6458 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00000101" // /* MW 3 */ + 6460 "00100010" // /* MW 2 */ + 6461 "00010000" // /* MW 1 */ + 6462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 2 "gemm_bfp16.h" 285 86 + 6464 "10111010" // LDA p3, [p4]; MOVXM p4, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6465 "00010000" // /* MW 9 */ + 6466 "00000110" // /* MW 8 */ + 6467 "00110001" // /* MW 7 */ + 6468 "11110010" // /* MW 6 */ + 6469 "00000001" // /* MW 5 */ + 6470 "00000000" // /* MW 4 */ + 6471 "11010000" // /* MW 3 */ + 6472 "10110011" // /* MW 2 */ + 6473 "10000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 86 first + 6474 "10011000" // LDA r27, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6475 "01110110" // /* MW 3 */ + 6476 "11111111" // /* MW 2 */ + 6477 "00000100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 + 6478 "10011000" // LDA r18, [p4], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6479 "01010110" // /* MW 3 */ + 6480 "11101110" // /* MW 2 */ + 6481 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 9 "superkernels.cpp" 393 34 + 6482 "11010100" // LDA p0, [p7]; MOV p7, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6483 "10000001" // /* MW 5 */ + 6484 "11010001" // /* MW 4 */ + 6485 "11011110" // /* MW 3 */ + 6486 "10000011" // /* MW 2 */ + 6487 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6488 "10011000" // LDA p2, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6489 "00011110" // /* MW 3 */ + 6490 "00000101" // /* MW 2 */ + 6491 "00000110" // /* MW 1 */ + 6492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6493 "00000000" // /* MW 1 */ + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 + 6496 "11111000" // MOV r19, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6497 "11000000" // /* MW 3 */ + 6498 "11010110" // /* MW 2 */ + 6499 "00011100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 4 first +.no_stack_arguments + 6500 "00000100" // JL #4256 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4256 delay_slots=5 */ + 6501 "00000001" // /* MW 5 */ + 6502 "00000000" // /* MW 4 */ + 6503 "01010000" // /* MW 3 */ + 6504 "00001000" // /* MW 2 */ + 6505 "00000000" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 +.delay_slot + 6506 "00011000" // ADD r18, r18, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6507 "01111111" // /* MW 3 */ + 6508 "10100100" // /* MW 2 */ + 6509 "00010100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 +.delay_slot + 6510 "10011000" // AND r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6511 "00000100" // /* MW 3 */ + 6512 "10100001" // /* MW 2 */ + 6513 "00010100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 80 +.delay_slot + 6514 "00011000" // SEL.EQZ r16, r24, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6515 "00000010" // /* MW 3 */ + 6516 "00100001" // /* MW 2 */ + 6517 "00010110" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 +.delay_slot + 6518 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6519 "00011101" // /* MW 3 */ + 6520 "00100001" // /* MW 2 */ + 6521 "00010100" // /* MW 1 */ +.src_ref 2 "gemm_bfp16.h" 285 74 +.delay_slot + 6522 "10010100" // NOPA; ADD.NC p1, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6523 "10000010" // /* MW 5 */ + 6524 "11010011" // /* MW 4 */ + 6525 "11110010" // /* MW 3 */ + 6526 "00101100" // /* MW 2 */ + 6527 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 393 6 +.src_ref 9 "superkernels.cpp" 393 34 first +.src_ref 9 "superkernels.cpp" 394 17 +.return_address + 6528 "10111010" // LDA r16, [p7, #16]; MOVXM p2, #508772 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6529 "00010000" // /* MW 9 */ + 6530 "10110010" // /* MW 8 */ + 6531 "00110001" // /* MW 7 */ + 6532 "11110001" // /* MW 6 */ + 6533 "00000001" // /* MW 5 */ + 6534 "00000000" // /* MW 4 */ + 6535 "11010000" // /* MW 3 */ + 6536 "11000010" // /* MW 2 */ + 6537 "11101000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.src_ref 9 "superkernels.cpp" 393 6 + 6538 "11010100" // LDA r18, [p2]; MOV r17, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6539 "10000001" // /* MW 5 */ + 6540 "10111001" // /* MW 4 */ + 6541 "11011000" // /* MW 3 */ + 6542 "11001010" // /* MW 2 */ + 6543 "01000000" // /* MW 1 */ + 6544 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6545 "10011001" // /* MW 3 */ + 6546 "11110111" // /* MW 2 */ + 6547 "00000111" // /* MW 1 */ + 6548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6549 "00000000" // /* MW 1 */ + 6550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6551 "00000000" // /* MW 1 */ + 6552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6553 "00000000" // /* MW 1 */ + 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6555 "00000000" // /* MW 1 */ + 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6557 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 393 17 + 6558 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6559 "00001000" // /* MW 3 */ + 6560 "10100001" // /* MW 2 */ + 6561 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 393 6 + 6562 "10000100" // JNZ r16, #6640 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6640 delay_slots=5 */ + 6563 "00000001" // /* MW 5 */ + 6564 "01000000" // /* MW 4 */ + 6565 "11111000" // /* MW 3 */ + 6566 "00001100" // /* MW 2 */ + 6567 "10000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 394 17 +.src_ref 9 "superkernels.cpp" 398 16 +.delay_slot + 6568 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6569 "00000001" // /* MW 3 */ + 6570 "00110000" // /* MW 2 */ + 6571 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 9 "superkernels.cpp" 394 17 first + 6580 "00111010" // ST r24, [p2]; MOVX r16, #1; ADD.NC p6, r17, #20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6581 "00001001" // /* MW 9 */ + 6582 "01000101" // /* MW 8 */ + 6583 "00110100" // /* MW 7 */ + 6584 "00101011" // /* MW 6 */ + 6585 "00000000" // /* MW 5 */ + 6586 "00000001" // /* MW 4 */ + 6587 "00110000" // /* MW 3 */ + 6588 "11100010" // /* MW 2 */ + 6589 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6590 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6591 "00110110" // /* MW 3 */ + 6592 "00000110" // /* MW 2 */ + 6593 "00000110" // /* MW 1 */ + 6594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6595 "00000000" // /* MW 1 */ + 6596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6597 "00000000" // /* MW 1 */ + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6606 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6607 "00001000" // /* MW 3 */ + 6608 "01010001" // /* MW 2 */ + 6609 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6610 "10011000" // LDA r17, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6611 "00110110" // /* MW 3 */ + 6612 "11100110" // /* MW 2 */ + 6613 "00000110" // /* MW 1 */ + 6614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6615 "00000000" // /* MW 1 */ + 6616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6617 "00000000" // /* MW 1 */ + 6618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6619 "00000000" // /* MW 1 */ + 6620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6621 "00000000" // /* MW 1 */ + 6622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6623 "00000000" // /* MW 1 */ + 6624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6625 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6626 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6627 "00010001" // /* MW 3 */ + 6628 "00100001" // /* MW 2 */ + 6629 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6630 "01111010" // NOPA; ST r16, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6631 "00000000" // /* MW 9 */ + 6632 "00000000" // /* MW 8 */ + 6633 "00000000" // /* MW 7 */ + 6634 "10000000" // /* MW 6 */ + 6635 "00010001" // /* MW 5 */ + 6636 "11100110" // /* MW 4 */ + 6637 "11110110" // /* MW 3 */ + 6638 "00101100" // /* MW 2 */ + 6639 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 9 "superkernels.cpp" 397 6 +.src_ref 9 "superkernels.cpp" 398 16 + 6640 "01000100" // MOVXM p2, #508768 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6641 "11000000" // /* MW 5 */ + 6642 "11000110" // /* MW 4 */ + 6643 "11000100" // /* MW 3 */ + 6644 "00000111" // /* MW 2 */ + 6645 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 6 first +.src_ref 9 "superkernels.cpp" 397 19 + 6646 "10111010" // LDA r16, [p2]; MOVXM p3, #508776 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6647 "00010000" // /* MW 9 */ + 6648 "10110100" // /* MW 8 */ + 6649 "10110001" // /* MW 7 */ + 6650 "11110001" // /* MW 6 */ + 6651 "00000001" // /* MW 5 */ + 6652 "00000000" // /* MW 4 */ + 6653 "11010000" // /* MW 3 */ + 6654 "11000010" // /* MW 2 */ + 6655 "01000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 19 + 6656 "10011000" // LDA r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6657 "00110110" // /* MW 3 */ + 6658 "00000110" // /* MW 2 */ + 6659 "00000011" // /* MW 1 */ + 6660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6661 "00000000" // /* MW 1 */ + 6662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6663 "00000000" // /* MW 1 */ + 6664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6665 "00000000" // /* MW 1 */ + 6666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6667 "00000000" // /* MW 1 */ + 6668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6669 "00000000" // /* MW 1 */ + 6670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6671 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 16 + 6672 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "00001000" // /* MW 3 */ + 6674 "01100001" // /* MW 2 */ + 6675 "00010100" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 397 6 + 6676 "10000100" // JNZ r16, #6704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6704 delay_slots=5 */ + 6677 "00000001" // /* MW 5 */ + 6678 "01000000" // /* MW 4 */ + 6679 "00011000" // /* MW 3 */ + 6680 "00001101" // /* MW 2 */ + 6681 "10000000" // /* MW 1 */ +.delay_slot + 6682 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6683 "00011001" // /* MW 3 */ + 6684 "11111111" // /* MW 2 */ + 6685 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6693 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 398 16 first + 6694 "01111010" // NOPA; ST r24, [p2]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6695 "00000000" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "00000000" // /* MW 7 */ + 6698 "10000000" // /* MW 6 */ + 6699 "00010001" // /* MW 5 */ + 6700 "00000111" // /* MW 4 */ + 6701 "11110010" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_512 +.src_ref 9 "superkernels.cpp" 400 + 6704 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6705 "00111001" // /* MW 3 */ + 6706 "11111000" // /* MW 2 */ + 6707 "00000111" // /* MW 1 */ + 6708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6709 "00000000" // /* MW 1 */ + 6710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6711 "00000000" // /* MW 1 */ + 6712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6713 "00000000" // /* MW 1 */ + 6714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6715 "00000000" // /* MW 1 */ + 6716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6717 "00000000" // /* MW 1 */ + 6718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6719 "00000000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 400 first + 6720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6721 "00000000" // /* MW 3 */ + 6722 "00101000" // /* MW 2 */ + 6723 "00010000" // /* MW 1 */ +.src_ref 9 "superkernels.cpp" 400 +.delay_slot + 6724 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6725 "00000001" // /* MW 5 */ + 6726 "00000000" // /* MW 4 */ + 6727 "00000000" // /* MW 3 */ + 6728 "11111000" // /* MW 2 */ + 6729 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 6737 "00000000" // /* MW 1 */ +.label __Z15_b13786_wrapperPPv___func_begin0 +.label _Z15_b13786_wrapperPPv +.function _b13786_wrapper _Z15_b13786_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 18 first +.src_ref 0 "0_0_reloadable82.cc" 20 79 +.function_start + 6752 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6753 "11000000" // /* MW 3 */ + 6754 "01100000" // /* MW 2 */ + 6755 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 20 79 first + 6756 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6757 "00011110" // /* MW 3 */ + 6758 "00011100" // /* MW 2 */ + 6759 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 21 79 first + 6760 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6761 "10011110" // /* MW 3 */ + 6762 "00101100" // /* MW 2 */ + 6763 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 23 81 first + 6764 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6765 "10011110" // /* MW 3 */ + 6766 "11110101" // /* MW 2 */ + 6767 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 22 46 first + 6768 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "00011110" // /* MW 3 */ + 6770 "00000101" // /* MW 2 */ + 6771 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 19 4 first +.tail_call + 6772 "10000100" // J #6192 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6192 delay_slots=5 */ + 6773 "00000000" // /* MW 5 */ + 6774 "00000000" // /* MW 4 */ + 6775 "00011000" // /* MW 3 */ + 6776 "00001100" // /* MW 2 */ + 6777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13786_wrapperPPv__end +.label __Z15_b13786_wrapperPPv___func_end0 + 6787 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function buffer_pad_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.function_start + 6800 "11010100" // LDA el0, [p1]; MOV r17, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6801 "10000001" // /* MW 5 */ + 6802 "10101001" // /* MW 4 */ + 6803 "11011000" // /* MW 3 */ + 6804 "10000101" // /* MW 2 */ + 6805 "00100000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 first + 6806 "00011000" // ADD.NC p1, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10000010" // /* MW 3 */ + 6808 "01101000" // /* MW 2 */ + 6809 "00011001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6810 "10011000" // LDA r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "01010110" // /* MW 3 */ + 6812 "00011110" // /* MW 2 */ + 6813 "00000001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 27 33 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6814 "10011000" // LDA r15, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6815 "11110110" // /* MW 3 */ + 6816 "00000101" // /* MW 2 */ + 6817 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6819 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6821 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6823 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6825 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6826 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6827 "10100000" // /* MW 3 */ + 6828 "00010111" // /* MW 2 */ + 6829 "00011000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6830 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6831 "00000001" // /* MW 5 */ + 6832 "00000000" // /* MW 4 */ + 6833 "00000000" // /* MW 3 */ + 6834 "00001000" // /* MW 2 */ + 6835 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 43 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6836 "01100100" // MUL r18, r15, r18; MOV r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6837 "11111101" // /* MW 5 */ + 6838 "00111111" // /* MW 4 */ + 6839 "11111000" // /* MW 3 */ + 6840 "10100101" // /* MW 2 */ + 6841 "01111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6842 "00111010" // ST r18, [sp, #-20]; MOVXM r17, #1073741823 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6843 "10010001" // /* MW 9 */ + 6844 "11111111" // /* MW 8 */ + 6845 "00101111" // /* MW 7 */ + 6846 "11111110" // /* MW 6 */ + 6847 "11111111" // /* MW 5 */ + 6848 "00001111" // /* MW 4 */ + 6849 "10110000" // /* MW 3 */ + 6850 "11001010" // /* MW 2 */ + 6851 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 6852 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00001101" // /* MW 3 */ + 6854 "10100001" // /* MW 2 */ + 6855 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 6856 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "00000100" // /* MW 3 */ + 6858 "01100001" // /* MW 2 */ + 6859 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 22 + 6860 "10000100" // JZ r16, #6944 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6944 delay_slots=5 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "10010000" // /* MW 3 */ + 6864 "00001101" // /* MW 2 */ + 6865 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.delay_slot + 6866 "11010100" // LDA p7, [p0]; MOV p0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6867 "10000001" // /* MW 5 */ + 6868 "11011101" // /* MW 4 */ + 6869 "11010000" // /* MW 3 */ + 6870 "11110011" // /* MW 2 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot + 6872 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6873 "00011101" // /* MW 3 */ + 6874 "11111000" // /* MW 2 */ + 6875 "00001111" // /* MW 1 */ +.delay_slot + 6876 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6877 "11010101" // /* MW 3 */ + 6878 "11110101" // /* MW 2 */ + 6879 "00001111" // /* MW 1 */ +.delay_slot + 6880 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6881 "00111101" // /* MW 3 */ + 6882 "11110000" // /* MW 2 */ + 6883 "00001111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 25 24 first +.delay_slot + 6884 "00001100" // LDA r14, [p1, #-8]; ST r0, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6885 "00101011" // /* MW 5 */ + 6886 "11111000" // /* MW 4 */ + 6887 "11011111" // /* MW 3 */ + 6888 "10111010" // /* MW 2 */ + 6889 "00111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 6890 "01011100" // ST el0, [sp, #-24]; MOVX r0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6891 "00000010" // /* MW 5 */ + 6892 "00000000" // /* MW 4 */ + 6893 "10110000" // /* MW 3 */ + 6894 "00000101" // /* MW 2 */ + 6895 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 6896 "00011000" // LDA p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "10011001" // /* MW 3 */ + 6898 "11101000" // /* MW 2 */ + 6899 "00000111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 first +.no_stack_arguments + 6900 "00000100" // JL #10032 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10032 delay_slots=5 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "10011000" // /* MW 3 */ + 6904 "00010011" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.delay_slot + 6906 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "00001001" // /* MW 3 */ + 6908 "00100010" // /* MW 2 */ + 6909 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 first +.delay_slot + 6910 "10011000" // LSHL r1, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6911 "00011101" // /* MW 3 */ + 6912 "00000011" // /* MW 2 */ + 6913 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6918 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6919 "01111110" // /* MW 9 */ + 6920 "10100101" // /* MW 8 */ + 6921 "00000001" // /* MW 7 */ + 6922 "00000000" // /* MW 6 */ + 6923 "00010000" // /* MW 5 */ + 6924 "00000000" // /* MW 4 */ + 6925 "11110000" // /* MW 3 */ + 6926 "00101100" // /* MW 2 */ + 6927 "00000000" // /* MW 1 */ +.return_address + 6928 "10000100" // J #6960 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6960 delay_slots=5 */ + 6929 "00000000" // /* MW 5 */ + 6930 "00000000" // /* MW 4 */ + 6931 "10011000" // /* MW 3 */ + 6932 "00001101" // /* MW 2 */ + 6933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6943 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 6944 "11100001" // NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6945 "00000000" // /* MW 15 */ + 6946 "00000000" // /* MW 14 */ + 6947 "01111000" // /* MW 13 */ + 6948 "10100101" // /* MW 12 */ + 6949 "00000001" // /* MW 11 */ + 6950 "00000000" // /* MW 10 */ + 6951 "00000000" // /* MW 9 */ + 6952 "10000000" // /* MW 8 */ + 6953 "00101101" // /* MW 7 */ + 6954 "11101000" // /* MW 6 */ + 6955 "00100111" // /* MW 5 */ + 6956 "00000000" // /* MW 4 */ + 6957 "11110000" // /* MW 3 */ + 6958 "00101100" // /* MW 2 */ + 6959 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 22 first + 6960 "10000100" // JZ r15, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */ + 6961 "00000001" // /* MW 5 */ + 6962 "00000000" // /* MW 4 */ + 6963 "00100000" // /* MW 3 */ + 6964 "00001110" // /* MW 2 */ + 6965 "01111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6967 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6975 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 first + 6976 "10111010" // LDA r17, [sp, #-20]; MOVXM ls, #7072 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6977 "00010000" // /* MW 9 */ + 6978 "11010000" // /* MW 8 */ + 6979 "01111101" // /* MW 7 */ + 6980 "00000100" // /* MW 6 */ + 6981 "00000000" // /* MW 5 */ + 6982 "00000000" // /* MW 4 */ + 6983 "00100000" // /* MW 3 */ + 6984 "11000110" // /* MW 2 */ + 6985 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 + 6986 "10111010" // MOVA r19, #1; MOVXM le, #7168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6987 "00010000" // /* MW 9 */ + 6988 "00000000" // /* MW 8 */ + 6989 "10111110" // /* MW 7 */ + 6990 "00000101" // /* MW 6 */ + 6991 "00000000" // /* MW 5 */ + 6992 "00000000" // /* MW 4 */ + 6993 "00000000" // /* MW 3 */ + 6994 "00110011" // /* MW 2 */ + 6995 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 6996 "10111010" // LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6997 "11001000" // /* MW 9 */ + 6998 "11111111" // /* MW 8 */ + 6999 "00001011" // /* MW 7 */ + 7000 "11101110" // /* MW 6 */ + 7001 "01001001" // /* MW 5 */ + 7002 "00011101" // /* MW 4 */ + 7003 "00100000" // /* MW 3 */ + 7004 "01001010" // /* MW 2 */ + 7005 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 7006 "10111010" // LDA lr, [sp, #-16]; MOVXM p0, #7040 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7007 "00010000" // /* MW 9 */ + 7008 "11000000" // /* MW 8 */ + 7009 "00110101" // /* MW 7 */ + 7010 "00000100" // /* MW 6 */ + 7011 "00000000" // /* MW 5 */ + 7012 "00000000" // /* MW 4 */ + 7013 "00100000" // /* MW 3 */ + 7014 "00000111" // /* MW 2 */ + 7015 "11111110" // /* MW 1 */ + 7016 "11111000" // MOV m0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7017 "00100000" // /* MW 3 */ + 7018 "00001010" // /* MW 2 */ + 7019 "00011000" // /* MW 1 */ + 7020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7021 "00000000" // /* MW 1 */ + 7022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7023 "00000000" // /* MW 1 */ + 7024 "11100001" // NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7025 "00000000" // /* MW 15 */ + 7026 "00000000" // /* MW 14 */ + 7027 "01111000" // /* MW 13 */ + 7028 "10100101" // /* MW 12 */ + 7029 "00000001" // /* MW 11 */ + 7030 "11101100" // /* MW 10 */ + 7031 "00011001" // /* MW 9 */ + 7032 "00100011" // /* MW 8 */ + 7033 "01011011" // /* MW 7 */ + 7034 "00000001" // /* MW 6 */ + 7035 "00100000" // /* MW 5 */ + 7036 "00000000" // /* MW 4 */ + 7037 "11110000" // /* MW 3 */ + 7038 "00101100" // /* MW 2 */ + 7039 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.loop_nesting 1 + 7040 "10000100" // JZ r14, #7184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7184 delay_slots=5 */ + 7041 "00000001" // /* MW 5 */ + 7042 "00000000" // /* MW 4 */ + 7043 "00001000" // /* MW 3 */ + 7044 "00001110" // /* MW 2 */ + 7045 "01110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7049 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7055 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 7056 "00000010" // MOVS p2, p7; MOV lc, r14 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7057 "01110000" // /* MW 7 */ + 7058 "10010000" // /* MW 6 */ + 7059 "10111011" // /* MW 5 */ + 7060 "00000010" // /* MW 4 */ + 7061 "01100000" // /* MW 3 */ + 7062 "10010001" // /* MW 2 */ + 7063 "01010011" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 7064 "00000010" // NOPS; MOV p1, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7065 "01110000" // /* MW 7 */ + 7066 "10010000" // /* MW 6 */ + 7067 "10110100" // /* MW 5 */ + 7068 "00000000" // /* MW 4 */ + 7069 "01100000" // /* MW 3 */ + 7070 "00101011" // /* MW 2 */ + 7071 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 first +.begin_of_loop +.loop_nesting 2 + 7072 "11100001" // LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "10100101" // /* MW 12 */ + 7077 "00000001" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "00000000" // /* MW 8 */ + 7081 "01011011" // /* MW 7 */ + 7082 "00000001" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "01010000" // /* MW 3 */ + 7086 "11001110" // /* MW 2 */ + 7087 "01000011" // /* MW 1 */ + 7088 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7089 "00000000" // /* MW 15 */ + 7090 "00000000" // /* MW 14 */ + 7091 "01111000" // /* MW 13 */ + 7092 "10100101" // /* MW 12 */ + 7093 "00000001" // /* MW 11 */ + 7094 "00000000" // /* MW 10 */ + 7095 "00000000" // /* MW 9 */ + 7096 "00000000" // /* MW 8 */ + 7097 "01011011" // /* MW 7 */ + 7098 "00000001" // /* MW 6 */ + 7099 "00100000" // /* MW 5 */ + 7100 "00000000" // /* MW 4 */ + 7101 "11110000" // /* MW 3 */ + 7102 "00101100" // /* MW 2 */ + 7103 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 7104 "11100001" // ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7105 "00000000" // /* MW 15 */ + 7106 "00000000" // /* MW 14 */ + 7107 "01111000" // /* MW 13 */ + 7108 "10100101" // /* MW 12 */ + 7109 "00000001" // /* MW 11 */ + 7110 "00000000" // /* MW 10 */ + 7111 "00000000" // /* MW 9 */ + 7112 "00000000" // /* MW 8 */ + 7113 "01011011" // /* MW 7 */ + 7114 "00000001" // /* MW 6 */ + 7115 "00100000" // /* MW 5 */ + 7116 "00000000" // /* MW 4 */ + 7117 "11100000" // /* MW 3 */ + 7118 "11001110" // /* MW 2 */ + 7119 "00100011" // /* MW 1 */ + 7120 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7121 "00000000" // /* MW 15 */ + 7122 "00000000" // /* MW 14 */ + 7123 "01111000" // /* MW 13 */ + 7124 "10100101" // /* MW 12 */ + 7125 "00000001" // /* MW 11 */ + 7126 "00000000" // /* MW 10 */ + 7127 "00000000" // /* MW 9 */ + 7128 "00000000" // /* MW 8 */ + 7129 "01011011" // /* MW 7 */ + 7130 "00000001" // /* MW 6 */ + 7131 "00100000" // /* MW 5 */ + 7132 "00000000" // /* MW 4 */ + 7133 "11110000" // /* MW 3 */ + 7134 "00101100" // /* MW 2 */ + 7135 "00000000" // /* MW 1 */ + 7136 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7137 "00000000" // /* MW 15 */ + 7138 "00000000" // /* MW 14 */ + 7139 "01111000" // /* MW 13 */ + 7140 "10100101" // /* MW 12 */ + 7141 "00000001" // /* MW 11 */ + 7142 "00000000" // /* MW 10 */ + 7143 "00000000" // /* MW 9 */ + 7144 "00000000" // /* MW 8 */ + 7145 "01011011" // /* MW 7 */ + 7146 "00000001" // /* MW 6 */ + 7147 "00100000" // /* MW 5 */ + 7148 "00000000" // /* MW 4 */ + 7149 "11110000" // /* MW 3 */ + 7150 "00101100" // /* MW 2 */ + 7151 "00000000" // /* MW 1 */ + 7152 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7153 "00000000" // /* MW 15 */ + 7154 "00000000" // /* MW 14 */ + 7155 "01111000" // /* MW 13 */ + 7156 "10100101" // /* MW 12 */ + 7157 "00000001" // /* MW 11 */ + 7158 "00000000" // /* MW 10 */ + 7159 "00000000" // /* MW 9 */ + 7160 "00000000" // /* MW 8 */ + 7161 "01011011" // /* MW 7 */ + 7162 "00000001" // /* MW 6 */ + 7163 "00100000" // /* MW 5 */ + 7164 "00000000" // /* MW 4 */ + 7165 "11110000" // /* MW 3 */ + 7166 "00101100" // /* MW 2 */ + 7167 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 7168 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7169 "00000000" // /* MW 15 */ + 7170 "00000000" // /* MW 14 */ + 7171 "01111000" // /* MW 13 */ + 7172 "10100101" // /* MW 12 */ + 7173 "00000001" // /* MW 11 */ + 7174 "00000000" // /* MW 10 */ + 7175 "00000000" // /* MW 9 */ + 7176 "00000000" // /* MW 8 */ + 7177 "01011011" // /* MW 7 */ + 7178 "00000001" // /* MW 6 */ + 7179 "00100000" // /* MW 5 */ + 7180 "00000000" // /* MW 4 */ + 7181 "11110000" // /* MW 3 */ + 7182 "00101100" // /* MW 2 */ + 7183 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.loop_nesting 1 + 7184 "00011100" // PADDB [p7], m0; JNZD r16, r16, p0 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 7185 "01000000" // /* MW 5 */ + 7186 "01000000" // /* MW 4 */ + 7187 "00001000" // /* MW 3 */ + 7188 "01110010" // /* MW 2 */ + 7189 "11100001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7195 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ +.delay_slot + 7198 "01011000" // ADD.NC r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "11001001" // /* MW 3 */ + 7200 "10011000" // /* MW 2 */ + 7201 "00011100" // /* MW 1 */ +.loop_nesting 0 + 7202 "10000100" // J #7248 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7248 delay_slots=5 */ + 7203 "00000000" // /* MW 5 */ + 7204 "00000000" // /* MW 4 */ + 7205 "00101000" // /* MW 3 */ + 7206 "00001110" // /* MW 2 */ + 7207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7216 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7217 "00000000" // /* MW 15 */ + 7218 "00000000" // /* MW 14 */ + 7219 "01111000" // /* MW 13 */ + 7220 "10100101" // /* MW 12 */ + 7221 "00000001" // /* MW 11 */ + 7222 "00000000" // /* MW 10 */ + 7223 "00000000" // /* MW 9 */ + 7224 "00000000" // /* MW 8 */ + 7225 "01011011" // /* MW 7 */ + 7226 "00000001" // /* MW 6 */ + 7227 "00100000" // /* MW 5 */ + 7228 "00000000" // /* MW 4 */ + 7229 "11110000" // /* MW 3 */ + 7230 "00101100" // /* MW 2 */ + 7231 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 7232 "11100001" // LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7233 "00000000" // /* MW 15 */ + 7234 "00000000" // /* MW 14 */ + 7235 "01111000" // /* MW 13 */ + 7236 "10100101" // /* MW 12 */ + 7237 "00000001" // /* MW 11 */ + 7238 "00000000" // /* MW 10 */ + 7239 "00000000" // /* MW 9 */ + 7240 "00000000" // /* MW 8 */ + 7241 "01011011" // /* MW 7 */ + 7242 "00000001" // /* MW 6 */ + 7243 "00100000" // /* MW 5 */ + 7244 "00000000" // /* MW 4 */ + 7245 "00100000" // /* MW 3 */ + 7246 "00000111" // /* MW 2 */ + 7247 "11111110" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 7248 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7249 "11010001" // /* MW 3 */ + 7250 "11110101" // /* MW 2 */ + 7251 "00000111" // /* MW 1 */ + 7252 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7253 "10011001" // /* MW 3 */ + 7254 "11111011" // /* MW 2 */ + 7255 "00000111" // /* MW 1 */ + 7256 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7257 "11110001" // /* MW 3 */ + 7258 "11111101" // /* MW 2 */ + 7259 "00000111" // /* MW 1 */ + 7260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7261 "00000000" // /* MW 1 */ + 7262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7263 "00000000" // /* MW 1 */ + 7264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7265 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 first + 7266 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7267 "00000000" // /* MW 3 */ + 7268 "00101000" // /* MW 2 */ + 7269 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 +.delay_slot + 7270 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7271 "00000001" // /* MW 5 */ + 7272 "00000000" // /* MW 4 */ + 7273 "00000000" // /* MW 3 */ + 7274 "11111000" // /* MW 2 */ + 7275 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + 7283 "00000000" // /* MW 1 */ +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function _b8148_wrapper _Z14_b8148_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 27 first +.src_ref 0 "0_0_reloadable82.cc" 29 79 +.function_start + 7296 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "11000000" // /* MW 3 */ + 7298 "01100000" // /* MW 2 */ + 7299 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 29 79 first + 7300 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "00011110" // /* MW 3 */ + 7302 "00011100" // /* MW 2 */ + 7303 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 31 46 first + 7304 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00011110" // /* MW 3 */ + 7306 "00010101" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 30 80 first + 7308 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7309 "10011110" // /* MW 3 */ + 7310 "00000100" // /* MW 2 */ + 7311 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 28 4 first +.tail_call + 7312 "10000100" // J #6800 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 7313 "00000000" // /* MW 5 */ + 7314 "00000000" // /* MW 4 */ + 7315 "01001000" // /* MW 3 */ + 7316 "00001101" // /* MW 2 */ + 7317 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 + 7327 "00000000" // /* MW 1 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function load_slice_generic_innermost_rtp _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.src_ref 11 "slice_generic_innermost_params.h" 40 first +.src_ref 11 "slice_generic_innermost_params.h" 41 19 first +.function_start + 7328 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7329 "00101110" // /* MW 3 */ + 7330 "00011100" // /* MW 2 */ + 7331 "00000001" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ + 7342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7343 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 41 17 first + 7344 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7345 "00101001" // /* MW 3 */ + 7346 "00011100" // /* MW 2 */ + 7347 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 42 19 first + 7348 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7349 "00101110" // /* MW 3 */ + 7350 "00011100" // /* MW 2 */ + 7351 "00000001" // /* MW 1 */ + 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7353 "00000000" // /* MW 1 */ + 7354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7355 "00000000" // /* MW 1 */ + 7356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7357 "00000000" // /* MW 1 */ + 7358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7359 "00000000" // /* MW 1 */ + 7360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7361 "00000000" // /* MW 1 */ + 7362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7363 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 42 17 + 7364 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "00101001" // /* MW 3 */ + 7366 "00011100" // /* MW 2 */ + 7367 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 43 19 first + 7368 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7369 "00101110" // /* MW 3 */ + 7370 "00011100" // /* MW 2 */ + 7371 "00000001" // /* MW 1 */ + 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7373 "00000000" // /* MW 1 */ + 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7375 "00000000" // /* MW 1 */ + 7376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7377 "00000000" // /* MW 1 */ + 7378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7379 "00000000" // /* MW 1 */ + 7380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7381 "00000000" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 43 17 + 7384 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7385 "00101001" // /* MW 3 */ + 7386 "00011100" // /* MW 2 */ + 7387 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 44 19 first + 7388 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00101110" // /* MW 3 */ + 7390 "00011100" // /* MW 2 */ + 7391 "00000001" // /* MW 1 */ + 7392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7393 "00000000" // /* MW 1 */ + 7394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7395 "00000000" // /* MW 1 */ + 7396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7397 "00000000" // /* MW 1 */ + 7398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7399 "00000000" // /* MW 1 */ + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 44 17 + 7404 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7405 "00101001" // /* MW 3 */ + 7406 "00011100" // /* MW 2 */ + 7407 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 45 19 first + 7408 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00101110" // /* MW 3 */ + 7410 "00011100" // /* MW 2 */ + 7411 "00000001" // /* MW 1 */ + 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7413 "00000000" // /* MW 1 */ + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ + 7416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7417 "00000000" // /* MW 1 */ + 7418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7419 "00000000" // /* MW 1 */ + 7420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7421 "00000000" // /* MW 1 */ + 7422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7423 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 45 17 + 7424 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00101001" // /* MW 3 */ + 7426 "00011100" // /* MW 2 */ + 7427 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 46 17 first + 7428 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7429 "00101110" // /* MW 3 */ + 7430 "00011100" // /* MW 2 */ + 7431 "00000001" // /* MW 1 */ + 7432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7433 "00000000" // /* MW 1 */ + 7434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7435 "00000000" // /* MW 1 */ + 7436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7437 "00000000" // /* MW 1 */ + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 46 15 + 7444 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "00101001" // /* MW 3 */ + 7446 "00011100" // /* MW 2 */ + 7447 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 47 18 first + 7448 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7449 "00101110" // /* MW 3 */ + 7450 "00000100" // /* MW 2 */ + 7451 "00000001" // /* MW 1 */ + 7452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7453 "00000000" // /* MW 1 */ + 7454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7455 "00000000" // /* MW 1 */ + 7456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7457 "00000000" // /* MW 1 */ + 7458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7459 "00000000" // /* MW 1 */ + 7460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7461 "00000000" // /* MW 1 */ + 7462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7463 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 47 16 + 7464 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7465 "00101001" // /* MW 3 */ + 7466 "00000100" // /* MW 2 */ + 7467 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 48 18 first + 7468 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7469 "00101110" // /* MW 3 */ + 7470 "00010100" // /* MW 2 */ + 7471 "00000001" // /* MW 1 */ + 7472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7473 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 49 first + 7474 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7475 "00000000" // /* MW 3 */ + 7476 "00101000" // /* MW 2 */ + 7477 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7485 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 48 16 first +.delay_slot + 7486 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7487 "00101001" // /* MW 3 */ + 7488 "00010100" // /* MW 2 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 7489 "00001000" // /* MW 1 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function setup_slice_generic_innermost _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.src_ref 11 "slice_generic_innermost_params.h" 52 first +.src_ref 11 "slice_generic_innermost_params.h" 53 25 first +.src_ref 11 "slice_generic_innermost_params.h" 55 42 +.src_ref 11 "slice_generic_innermost_params.h" 58 40 +.function_start + 7504 "10111010" // LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7505 "01011000" // /* MW 9 */ + 7506 "00100000" // /* MW 8 */ + 7507 "10000000" // /* MW 7 */ + 7508 "00101000" // /* MW 6 */ + 7509 "00000000" // /* MW 5 */ + 7510 "00000000" // /* MW 4 */ + 7511 "11010000" // /* MW 3 */ + 7512 "10000110" // /* MW 2 */ + 7513 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 38 first +.src_ref 11 "slice_generic_innermost_params.h" 58 30 +.src_ref 11 "slice_generic_innermost_params.h" 59 31 + 7514 "10111010" // LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7515 "01011000" // /* MW 9 */ + 7516 "11111010" // /* MW 8 */ + 7517 "01001111" // /* MW 7 */ + 7518 "01001000" // /* MW 6 */ + 7519 "00110000" // /* MW 5 */ + 7520 "00000000" // /* MW 4 */ + 7521 "11010000" // /* MW 3 */ + 7522 "10010110" // /* MW 2 */ + 7523 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 51 +.src_ref 11 "slice_generic_innermost_params.h" 60 27 +.src_ref 11 "slice_generic_innermost_params.h" 62 27 + 7524 "01010100" // LDA r4, [p0], #8; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7525 "00000001" // /* MW 5 */ + 7526 "00000001" // /* MW 4 */ + 7527 "11010000" // /* MW 3 */ + 7528 "10010010" // /* MW 2 */ + 7529 "00000101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 55 42 first +.src_ref 11 "slice_generic_innermost_params.h" 60 27 + 7530 "01010100" // LDA r6, [p0], m1; MOV dj0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7531 "00000001" // /* MW 5 */ + 7532 "00000010" // /* MW 4 */ + 7533 "11010001" // /* MW 3 */ + 7534 "00011010" // /* MW 2 */ + 7535 "00000101" // /* MW 1 */ + 7536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7537 "00000000" // /* MW 1 */ + 7538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7539 "00000000" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 30 first + 7546 "10011000" // MUL r1, r5, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7547 "00011111" // /* MW 3 */ + 7548 "01000010" // /* MW 2 */ + 7549 "00010001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 58 40 first + 7550 "10011000" // AND r0, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00000100" // /* MW 3 */ + 7552 "10000000" // /* MW 2 */ + 7553 "00010001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 58 30 + 7554 "10011000" // OR r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7555 "00000101" // /* MW 3 */ + 7556 "11000000" // /* MW 2 */ + 7557 "00010000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 53 43 first +.src_ref 11 "slice_generic_innermost_params.h" 58 28 + 7558 "01011100" // ST r0, [p0], #-16; MUL r1, r1, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7559 "10011111" // /* MW 5 */ + 7560 "10000100" // /* MW 4 */ + 7561 "00110000" // /* MW 3 */ + 7562 "10000010" // /* MW 2 */ + 7563 "00011001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 75 first + 7564 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7565 "00000000" // /* MW 3 */ + 7566 "00101000" // /* MW 2 */ + 7567 "00010000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 59 31 first +.delay_slot + 7568 "10011000" // LSHL r0, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7569 "00101101" // /* MW 3 */ + 7570 "01000000" // /* MW 2 */ + 7571 "00010000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 59 25 +.delay_slot + 7572 "10011000" // ST r0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7573 "00010001" // /* MW 3 */ + 7574 "00011100" // /* MW 2 */ + 7575 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 60 27 first +.delay_slot + 7576 "10011000" // ST m0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00000001" // /* MW 3 */ + 7578 "00011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 60 27 +.delay_slot + 7580 "10011000" // ST dj0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "01000001" // /* MW 3 */ + 7582 "00000100" // /* MW 2 */ + 7583 "00001000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 62 27 first +.delay_slot + 7584 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7585 "00000001" // /* MW 3 */ + 7586 "00010100" // /* MW 2 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + 7587 "00001000" // /* MW 1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function setup_slice_generic_innermost_params _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.src_ref 11 "slice_generic_innermost_params.h" 79 first +.src_ref 11 "slice_generic_innermost_params.h" 80 4 first +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 7600 "00000100" // JL #7328 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7328 delay_slots=5 */ + 7601 "00000001" // /* MW 5 */ + 7602 "00000000" // /* MW 4 */ + 7603 "01010000" // /* MW 3 */ + 7604 "00001110" // /* MW 2 */ + 7605 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7606 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7607 "11100000" // /* MW 3 */ + 7608 "11000001" // /* MW 2 */ + 7609 "00011000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 7610 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7611 "11000000" // /* MW 3 */ + 7612 "01100000" // /* MW 2 */ + 7613 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7615 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7617 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7618 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7619 "00011100" // /* MW 13 */ + 7620 "00000000" // /* MW 12 */ + 7621 "00000000" // /* MW 11 */ + 7622 "01010111" // /* MW 10 */ + 7623 "00011010" // /* MW 9 */ + 7624 "01000000" // /* MW 8 */ + 7625 "00000000" // /* MW 7 */ + 7626 "00000000" // /* MW 6 */ + 7627 "10110110" // /* MW 5 */ + 7628 "00000010" // /* MW 4 */ + 7629 "11110000" // /* MW 3 */ + 7630 "00101100" // /* MW 2 */ + 7631 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 first +.tail_call +.return_address + 7632 "10000100" // J #7504 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7504 delay_slots=5 */ + 7633 "00000000" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "10101000" // /* MW 3 */ + 7636 "00001110" // /* MW 2 */ + 7637 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 7638 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7639 "10000000" // /* MW 3 */ + 7640 "01110001" // /* MW 2 */ + 7641 "00011111" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 7642 "11111000" // MOV p0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7643 "11000000" // /* MW 3 */ + 7644 "01100100" // /* MW 2 */ + 7645 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 7651 "00000000" // /* MW 1 */ +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function slice_generic_innermost _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "slice_generic_innermost.h" 25 first +.src_ref 11 "slice_generic_innermost.h" 35 60 +.src_ref 11 "slice_generic_innermost.h" 54 19 +.function_start + 7664 "00000010" // MOVS p5, p1; MOV r0, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7665 "01110000" // /* MW 7 */ + 7666 "01100000" // /* MW 6 */ + 7667 "00001010" // /* MW 5 */ + 7668 "00000000" // /* MW 4 */ + 7669 "01100000" // /* MW 3 */ + 7670 "10010001" // /* MW 2 */ + 7671 "10110000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 35 60 first + 7672 "00011000" // ADD.NC p3, r0, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7673 "00010010" // /* MW 3 */ + 7674 "01100000" // /* MW 2 */ + 7675 "00011011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 35 60 + 7676 "11010100" // LDA m2, [p3], #4; MOV r0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7677 "10000001" // /* MW 5 */ + 7678 "00111101" // /* MW 4 */ + 7679 "11010000" // /* MW 3 */ + 7680 "10100000" // /* MW 2 */ + 7681 "01100011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 36 61 first + 7682 "10011000" // LDA m0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7683 "00000110" // /* MW 3 */ + 7684 "00011100" // /* MW 2 */ + 7685 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 35 first + 7686 "10011000" // LDA r2, [p3, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7687 "01010110" // /* MW 3 */ + 7688 "11010100" // /* MW 2 */ + 7689 "00000011" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 38 59 first + 7690 "10011000" // LDA m1, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7691 "10000110" // /* MW 3 */ + 7692 "00000100" // /* MW 2 */ + 7693 "00000011" // /* MW 1 */ + 7694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7695 "00000000" // /* MW 1 */ + 7696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7697 "00000000" // /* MW 1 */ + 7698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7699 "00000000" // /* MW 1 */ + 7700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7701 "00000000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 first +.src_ref 11 "slice_generic_innermost.h" 40 26 first + 7704 "10000100" // JZ r2, #8128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8128 delay_slots=5 */ + 7705 "00000001" // /* MW 5 */ + 7706 "00000000" // /* MW 4 */ + 7707 "11100000" // /* MW 3 */ + 7708 "00001111" // /* MW 2 */ + 7709 "00010000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 36 39 +.src_ref 11 "slice_generic_innermost.h" 50 19 +.delay_slot + 7710 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7711 "11000000" // /* MW 3 */ + 7712 "01100000" // /* MW 2 */ + 7713 "00011111" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 36 39 first +.src_ref 11 "slice_generic_innermost.h" 50 19 +.delay_slot + 7714 "11110100" // PADDB [p7], m0; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7715 "10000001" // /* MW 5 */ + 7716 "11011101" // /* MW 4 */ + 7717 "00000110" // /* MW 3 */ + 7718 "01110010" // /* MW 2 */ + 7719 "11100001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 35 38 first +.delay_slot + 7720 "00011000" // PADDB [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7721 "10010000" // /* MW 3 */ + 7722 "01001011" // /* MW 2 */ + 7723 "00111000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 37 39 first +.src_ref 11 "slice_generic_innermost.h" 52 20 +.delay_slot + 7724 "11110100" // PADDB [p0], m0; MOV p4, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7725 "10000001" // /* MW 5 */ + 7726 "11000001" // /* MW 4 */ + 7727 "00001000" // /* MW 3 */ + 7728 "01110010" // /* MW 2 */ + 7729 "00000001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 38 37 first +.delay_slot + 7730 "00011000" // PADDB [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7731 "10010000" // /* MW 3 */ + 7732 "00101011" // /* MW 2 */ + 7733 "00111001" // /* MW 1 */ + 7734 "00011000" // MOVX r1, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00010001" // /* MW 3 */ + 7736 "00000010" // /* MW 2 */ + 7737 "00010000" // /* MW 1 */ + 7738 "10011000" // LTU r3, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7739 "00011100" // /* MW 3 */ + 7740 "10000110" // /* MW 2 */ + 7741 "00010000" // /* MW 1 */ + 7742 "10000100" // JNZ r3, #7984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7984 delay_slots=5 */ + 7743 "00000001" // /* MW 5 */ + 7744 "01000000" // /* MW 4 */ + 7745 "10011000" // /* MW 3 */ + 7746 "00001111" // /* MW 2 */ + 7747 "00011000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 +.delay_slot + 7748 "10111000" // MOV dj0, #48 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "01100000" // /* MW 3 */ + 7750 "10000000" // /* MW 2 */ + 7751 "00011000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 first +.delay_slot + 7752 "10011000" // LDA r1, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "00110110" // /* MW 3 */ + 7754 "00000000" // /* MW 2 */ + 7755 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7757 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7759 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7761 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 40 8 +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first + 7762 "10110110" // VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #7856 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7763 "00010000" // /* MW 11 */ + 7764 "01011000" // /* MW 10 */ + 7765 "01111111" // /* MW 9 */ + 7766 "00000100" // /* MW 8 */ + 7767 "00000000" // /* MW 7 */ + 7768 "00000000" // /* MW 6 */ + 7769 "11101000" // /* MW 5 */ + 7770 "00010000" // /* MW 4 */ + 7771 "01110110" // /* MW 3 */ + 7772 "00010011" // /* MW 2 */ + 7773 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 40 8 first +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7774 "01111110" // PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #7904 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7775 "01100000" // /* MW 13 */ + 7776 "00001011" // /* MW 12 */ + 7777 "01100001" // /* MW 11 */ + 7778 "00000010" // /* MW 10 */ + 7779 "11101110" // /* MW 9 */ + 7780 "10110111" // /* MW 8 */ + 7781 "00000000" // /* MW 7 */ + 7782 "00000000" // /* MW 6 */ + 7783 "01101000" // /* MW 5 */ + 7784 "00010000" // /* MW 4 */ + 7785 "11111110" // /* MW 3 */ + 7786 "00001100" // /* MW 2 */ + 7787 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 40 8 +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7788 "11110110" // VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7789 "01000000" // /* MW 11 */ + 7790 "10111111" // /* MW 10 */ + 7791 "10111000" // /* MW 9 */ + 7792 "00000010" // /* MW 8 */ + 7793 "01011011" // /* MW 7 */ + 7794 "00001000" // /* MW 6 */ + 7795 "11101111" // /* MW 5 */ + 7796 "00010001" // /* MW 4 */ + 7797 "01110000" // /* MW 3 */ + 7798 "00001011" // /* MW 2 */ + 7799 "01100001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.src_ref 11 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7800 "00110010" // PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7801 "01011011" // /* MW 7 */ + 7802 "00001000" // /* MW 6 */ + 7803 "01101011" // /* MW 5 */ + 7804 "00010001" // /* MW 4 */ + 7805 "11111000" // /* MW 3 */ + 7806 "00001100" // /* MW 2 */ + 7807 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7808 "00111100" // PADDA [p4], m0; VLDB x0, [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7809 "01101000" // /* MW 5 */ + 7810 "00010000" // /* MW 4 */ + 7811 "11111110" // /* MW 3 */ + 7812 "00001100" // /* MW 2 */ + 7813 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7814 "01001100" // VLDB x3, [p0], m0; PADDS [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7815 "10110110" // /* MW 5 */ + 7816 "00010000" // /* MW 4 */ + 7817 "10001110" // /* MW 3 */ + 7818 "00011110" // /* MW 2 */ + 7819 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7820 "00111100" // PADDA [p0], m0; VLDB x1, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7821 "11101000" // /* MW 5 */ + 7822 "00010000" // /* MW 4 */ + 7823 "11110110" // /* MW 3 */ + 7824 "00001100" // /* MW 2 */ + 7825 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7826 "10110100" // VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7827 "00001011" // /* MW 5 */ + 7828 "00010010" // /* MW 4 */ + 7829 "10000000" // /* MW 3 */ + 7830 "00010110" // /* MW 2 */ + 7831 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7832 "00110010" // NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7833 "01011011" // /* MW 7 */ + 7834 "00001000" // /* MW 6 */ + 7835 "01101011" // /* MW 5 */ + 7836 "00010000" // /* MW 4 */ + 7837 "11111110" // /* MW 3 */ + 7838 "00101100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7840 "11100001" // NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7841 "00000000" // /* MW 15 */ + 7842 "00000000" // /* MW 14 */ + 7843 "11101000" // /* MW 13 */ + 7844 "11000010" // /* MW 12 */ + 7845 "01000000" // /* MW 11 */ + 7846 "00000000" // /* MW 10 */ + 7847 "00000000" // /* MW 9 */ + 7848 "10000000" // /* MW 8 */ + 7849 "00000110" // /* MW 7 */ + 7850 "00101000" // /* MW 6 */ + 7851 "11101101" // /* MW 5 */ + 7852 "00010001" // /* MW 4 */ + 7853 "11110000" // /* MW 3 */ + 7854 "00101100" // /* MW 2 */ + 7855 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7856 "11100001" // PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7857 "00000000" // /* MW 15 */ + 7858 "00000000" // /* MW 14 */ + 7859 "11101000" // /* MW 13 */ + 7860 "10000010" // /* MW 12 */ + 7861 "00000100" // /* MW 11 */ + 7862 "00000000" // /* MW 10 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "01011011" // /* MW 7 */ + 7866 "00001000" // /* MW 6 */ + 7867 "11101111" // /* MW 5 */ + 7868 "00010000" // /* MW 4 */ + 7869 "11110110" // /* MW 3 */ + 7870 "00001100" // /* MW 2 */ + 7871 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.src_ref 11 "slice_generic_innermost.h" 59 21 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7872 "11100001" // PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7873 "00000000" // /* MW 15 */ + 7874 "00000000" // /* MW 14 */ + 7875 "01111000" // /* MW 13 */ + 7876 "10100101" // /* MW 12 */ + 7877 "00000001" // /* MW 11 */ + 7878 "00000000" // /* MW 10 */ + 7879 "00000000" // /* MW 9 */ + 7880 "10000000" // /* MW 8 */ + 7881 "00100110" // /* MW 7 */ + 7882 "00101000" // /* MW 6 */ + 7883 "01101001" // /* MW 5 */ + 7884 "00010001" // /* MW 4 */ + 7885 "11111000" // /* MW 3 */ + 7886 "00001100" // /* MW 2 */ + 7887 "00000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 55 19 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7888 "11100001" // PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7889 "00000000" // /* MW 15 */ + 7890 "00000000" // /* MW 14 */ + 7891 "11101000" // /* MW 13 */ + 7892 "11000010" // /* MW 12 */ + 7893 "01000000" // /* MW 11 */ + 7894 "00000000" // /* MW 10 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "01011011" // /* MW 7 */ + 7898 "00001000" // /* MW 6 */ + 7899 "01101011" // /* MW 5 */ + 7900 "00010000" // /* MW 4 */ + 7901 "11111110" // /* MW 3 */ + 7902 "00001100" // /* MW 2 */ + 7903 "10100101" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7904 "11100001" // PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7905 "00000000" // /* MW 15 */ + 7906 "00000000" // /* MW 14 */ + 7907 "01111000" // /* MW 13 */ + 7908 "10100101" // /* MW 12 */ + 7909 "00000001" // /* MW 11 */ + 7910 "00000000" // /* MW 10 */ + 7911 "00000000" // /* MW 9 */ + 7912 "10000000" // /* MW 8 */ + 7913 "00000110" // /* MW 7 */ + 7914 "00101000" // /* MW 6 */ + 7915 "11101101" // /* MW 5 */ + 7916 "00010001" // /* MW 4 */ + 7917 "11110000" // /* MW 3 */ + 7918 "00001100" // /* MW 2 */ + 7919 "00100101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7920 "11011000" // VSHUFFLE bmll0, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7921 "00000101" // /* MW 3 */ + 7922 "00001001" // /* MW 2 */ + 7923 "00011000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7924 "10011000" // VST bmlh0, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "00100110" // /* MW 3 */ + 7926 "00101000" // /* MW 2 */ + 7927 "00001001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7928 "10010100" // PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7929 "00001011" // /* MW 5 */ + 7930 "00000011" // /* MW 4 */ + 7931 "11110001" // /* MW 3 */ + 7932 "00001100" // /* MW 2 */ + 7933 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7934 "10000100" // J #8128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8128 delay_slots=5 */ + 7935 "00000000" // /* MW 5 */ + 7936 "00000000" // /* MW 4 */ + 7937 "11100000" // /* MW 3 */ + 7938 "00001111" // /* MW 2 */ + 7939 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 46 17 first +.src_ref 11 "slice_generic_innermost.h" 55 19 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7940 "10111010" // PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7941 "11100010" // /* MW 9 */ + 7942 "10000010" // /* MW 8 */ + 7943 "00000100" // /* MW 7 */ + 7944 "10000000" // /* MW 6 */ + 7945 "00100110" // /* MW 5 */ + 7946 "00101000" // /* MW 4 */ + 7947 "11110001" // /* MW 3 */ + 7948 "00001100" // /* MW 2 */ + 7949 "10100101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7950 "00001100" // PADDA [p1], m1; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7951 "00001101" // /* MW 5 */ + 7952 "01010000" // /* MW 4 */ + 7953 "11111010" // /* MW 3 */ + 7954 "00001100" // /* MW 2 */ + 7955 "00100101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 55 19 first +.delay_slot + 7956 "10010100" // PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7957 "00001011" // /* MW 5 */ + 7958 "00000011" // /* MW 4 */ + 7959 "11110001" // /* MW 3 */ + 7960 "00001100" // /* MW 2 */ + 7961 "10100101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first +.delay_slot + 7962 "00001100" // NOPA; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7963 "00001101" // /* MW 5 */ + 7964 "01010000" // /* MW 4 */ + 7965 "11111010" // /* MW 3 */ + 7966 "00101100" // /* MW 2 */ + 7967 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "slice_generic_innermost.h" 60 19 first +.delay_slot + 7968 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7969 "00000000" // /* MW 15 */ + 7970 "00000000" // /* MW 14 */ + 7971 "01111000" // /* MW 13 */ + 7972 "10100101" // /* MW 12 */ + 7973 "00000001" // /* MW 11 */ + 7974 "00000000" // /* MW 10 */ + 7975 "00000000" // /* MW 9 */ + 7976 "10000000" // /* MW 8 */ + 7977 "00100110" // /* MW 7 */ + 7978 "00101000" // /* MW 6 */ + 7979 "00100001" // /* MW 5 */ + 7980 "00000000" // /* MW 4 */ + 7981 "11110000" // /* MW 3 */ + 7982 "00101100" // /* MW 2 */ + 7983 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.src_ref 11 "slice_generic_innermost.h" 40 8 first + 7984 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7985 "00100000" // /* MW 3 */ + 7986 "01110001" // /* MW 2 */ + 7987 "00011101" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 + 7988 "01000100" // MOVXM ls, #8000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7989 "10000000" // /* MW 5 */ + 7990 "11111110" // /* MW 4 */ + 7991 "00010001" // /* MW 3 */ + 7992 "00000000" // /* MW 2 */ + 7993 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 40 8 + 7994 "01000100" // MOVXM le, #8112 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7995 "01100000" // /* MW 5 */ + 7996 "11111111" // /* MW 4 */ + 7997 "00010110" // /* MW 3 */ + 7998 "00000000" // /* MW 2 */ + 7999 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 50 19 first +.src_ref 11 "slice_generic_innermost.h" 52 20 first +.begin_of_loop +.loop_nesting 1 + 8000 "00111100" // VLDA x1, [p4], m0; VLDB x2, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8001 "01101000" // /* MW 5 */ + 8002 "00010001" // /* MW 4 */ + 8003 "01110110" // /* MW 3 */ + 8004 "00001011" // /* MW 2 */ + 8005 "10000001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "slice_generic_innermost.h" 51 19 first +.src_ref 11 "slice_generic_innermost.h" 53 20 first +.src_ref 11 "slice_generic_innermost.h" 56 21 first + 8006 "00110010" // PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8007 "01011011" // /* MW 7 */ + 8008 "00001000" // /* MW 6 */ + 8009 "01101100" // /* MW 5 */ + 8010 "00010000" // /* MW 4 */ + 8011 "11111110" // /* MW 3 */ + 8012 "00001100" // /* MW 2 */ + 8013 "01100001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "slice_generic_innermost.h" 57 21 first +.src_ref 11 "slice_generic_innermost.h" 58 21 first + 8014 "00111100" // PADDA [p7], m0; VLDB x3, [p0], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8015 "11101000" // /* MW 5 */ + 8016 "00010001" // /* MW 4 */ + 8017 "11110000" // /* MW 3 */ + 8018 "00001100" // /* MW 2 */ + 8019 "11100001" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 59 21 first + 8020 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8021 "10010000" // /* MW 3 */ + 8022 "00001011" // /* MW 2 */ + 8023 "00111000" // /* MW 1 */ + 8024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8025 "00000000" // /* MW 1 */ + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ + 8028 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "01100111" // /* MW 3 */ + 8030 "00000001" // /* MW 2 */ + 8031 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 46 17 first + 8032 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "11101000" // /* MW 13 */ + 8036 "01000010" // /* MW 12 */ + 8037 "00001000" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "11110000" // /* MW 3 */ + 8046 "00101100" // /* MW 2 */ + 8047 "00000000" // /* MW 1 */ + 8048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "00000000" // /* MW 15 */ + 8050 "00000000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "01011011" // /* MW 7 */ + 8058 "00000001" // /* MW 6 */ + 8059 "00100000" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "11110000" // /* MW 3 */ + 8062 "00101100" // /* MW 2 */ + 8063 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 47 18 first +.src_ref 11 "slice_generic_innermost.h" 54 19 first + 8064 "11100001" // NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8065 "00000000" // /* MW 15 */ + 8066 "00000000" // /* MW 14 */ + 8067 "11101000" // /* MW 13 */ + 8068 "11000010" // /* MW 12 */ + 8069 "01000000" // /* MW 11 */ + 8070 "00000000" // /* MW 10 */ + 8071 "00000000" // /* MW 9 */ + 8072 "10000000" // /* MW 8 */ + 8073 "00000110" // /* MW 7 */ + 8074 "00101000" // /* MW 6 */ + 8075 "00100101" // /* MW 5 */ + 8076 "00000000" // /* MW 4 */ + 8077 "11110000" // /* MW 3 */ + 8078 "00101100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ +.src_ref 11 "slice_generic_innermost.h" 55 19 first + 8080 "11100001" // NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8081 "00000000" // /* MW 15 */ + 8082 "00000000" // /* MW 14 */ + 8083 "01111000" // /* MW 13 */ + 8084 "10100101" // /* MW 12 */ + 8085 "00000001" // /* MW 11 */ + 8086 "00000000" // /* MW 10 */ + 8087 "00000000" // /* MW 9 */ + 8088 "00000000" // /* MW 8 */ + 8089 "01011011" // /* MW 7 */ + 8090 "00000001" // /* MW 6 */ + 8091 "00100000" // /* MW 5 */ + 8092 "01010111" // /* MW 4 */ + 8093 "11111010" // /* MW 3 */ + 8094 "00101100" // /* MW 2 */ + 8095 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "slice_generic_innermost.h" 60 19 first + 8096 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8097 "00000000" // /* MW 15 */ + 8098 "00000000" // /* MW 14 */ + 8099 "01111000" // /* MW 13 */ + 8100 "10100101" // /* MW 12 */ + 8101 "00000001" // /* MW 11 */ + 8102 "00000000" // /* MW 10 */ + 8103 "00000000" // /* MW 9 */ + 8104 "10000000" // /* MW 8 */ + 8105 "00100110" // /* MW 7 */ + 8106 "00101000" // /* MW 6 */ + 8107 "00100001" // /* MW 5 */ + 8108 "00000000" // /* MW 4 */ + 8109 "11110000" // /* MW 3 */ + 8110 "00101100" // /* MW 2 */ + 8111 "00000000" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.src_ref 11 "slice_generic_innermost.h" 61 19 first +.end_of_loop + 8112 "11100001" // NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8113 "00000000" // /* MW 15 */ + 8114 "00000000" // /* MW 14 */ + 8115 "01111000" // /* MW 13 */ + 8116 "10100101" // /* MW 12 */ + 8117 "00000001" // /* MW 11 */ + 8118 "00000000" // /* MW 10 */ + 8119 "00000000" // /* MW 9 */ + 8120 "00000000" // /* MW 8 */ + 8121 "01011011" // /* MW 7 */ + 8122 "00000001" // /* MW 6 */ + 8123 "00100000" // /* MW 5 */ + 8124 "01010111" // /* MW 4 */ + 8125 "11110010" // /* MW 3 */ + 8126 "00101100" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.src_ref 11 "slice_generic_innermost.h" 76 first +.loop_nesting 0 + 8128 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8129 "00000000" // /* MW 3 */ + 8130 "00101000" // /* MW 2 */ + 8131 "00010000" // /* MW 1 */ +.delay_slot + 8132 "11111000" // MOV p7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8133 "00100000" // /* MW 3 */ + 8134 "01100000" // /* MW 2 */ + 8135 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 + 8143 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function slice_generic_innermost_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 first +.function_start + 8144 "00111010" // MOVS p5, p0; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8145 "01110001" // /* MW 9 */ + 8146 "00000000" // /* MW 8 */ + 8147 "00000000" // /* MW 7 */ + 8148 "00000000" // /* MW 6 */ + 8149 "00000100" // /* MW 5 */ + 8150 "00000000" // /* MW 4 */ + 8151 "01100000" // /* MW 3 */ + 8152 "00010001" // /* MW 2 */ + 8153 "10110000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 8154 "00000010" // ST lr, [sp, #-4]; MOV p3, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8155 "01110000" // /* MW 7 */ + 8156 "01100000" // /* MW 6 */ + 8157 "10110001" // /* MW 5 */ + 8158 "00000001" // /* MW 4 */ + 8159 "10110000" // /* MW 3 */ + 8160 "10000111" // /* MW 2 */ + 8161 "11111111" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 first +.no_stack_arguments + 8162 "00111010" // MOVS p1, p2; JL #7600 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=7600 delay_slots=5 */ + 8163 "01000001" // /* MW 9 */ + 8164 "00000000" // /* MW 8 */ + 8165 "00000000" // /* MW 7 */ + 8166 "10110110" // /* MW 6 */ + 8167 "00000011" // /* MW 5 */ + 8168 "00000000" // /* MW 4 */ + 8169 "01100000" // /* MW 3 */ + 8170 "00010001" // /* MW 2 */ + 8171 "00110001" // /* MW 1 */ +.delay_slot + 8172 "11111000" // MOV p0, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8173 "11100000" // /* MW 3 */ + 8174 "01100101" // /* MW 2 */ + 8175 "00011000" // /* MW 1 */ +.delay_slot + 8176 "00011000" // PADDB [p0], #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8177 "10010000" // /* MW 3 */ + 8178 "11101111" // /* MW 2 */ + 8179 "00111000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.delay_slot + 8180 "11111000" // MOV p4, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8181 "11000000" // /* MW 3 */ + 8182 "01100000" // /* MW 2 */ + 8183 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8186 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 31 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.return_address + 8192 "10111010" // LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8193 "01110010" // /* MW 9 */ + 8194 "01110000" // /* MW 8 */ + 8195 "00101101" // /* MW 7 */ + 8196 "00000010" // /* MW 6 */ + 8197 "10001011" // /* MW 5 */ + 8198 "10010000" // /* MW 4 */ + 8199 "00100010" // /* MW 3 */ + 8200 "01001010" // /* MW 2 */ + 8201 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 44 + 8202 "00101100" // LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8203 "00100000" // /* MW 5 */ + 8204 "11000101" // /* MW 4 */ + 8205 "00101000" // /* MW 3 */ + 8206 "11011010" // /* MW 2 */ + 8207 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 57 first + 8208 "10111010" // LDA r20, [sp, #-120]; MOVXM r19, #65534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8209 "00010000" // /* MW 9 */ + 8210 "11111111" // /* MW 8 */ + 8211 "01101111" // /* MW 7 */ + 8212 "00111110" // /* MW 6 */ + 8213 "00000000" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00100000" // /* MW 3 */ + 8216 "01010010" // /* MW 2 */ + 8217 "11110001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first + 8218 "00101100" // LDA p1, [p3]; ADD r17, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8219 "00100001" // /* MW 5 */ + 8220 "11000110" // /* MW 4 */ + 8221 "11011001" // /* MW 3 */ + 8222 "10010011" // /* MW 2 */ + 8223 "01100000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 70 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 59 first + 8224 "00101100" // LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8225 "01100000" // /* MW 5 */ + 8226 "11010101" // /* MW 4 */ + 8227 "00101000" // /* MW 3 */ + 8228 "11001110" // /* MW 2 */ + 8229 "11110001" // /* MW 1 */ + 8230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8231 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8232 "10011000" // LDA r17, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8233 "00110110" // /* MW 3 */ + 8234 "00000110" // /* MW 2 */ + 8235 "00000101" // /* MW 1 */ + 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8237 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 36 first + 8238 "10011000" // MUL r18, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8239 "00101111" // /* MW 3 */ + 8240 "10100101" // /* MW 2 */ + 8241 "00010101" // /* MW 1 */ + 8242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8243 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 49 + 8244 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8245 "01001111" // /* MW 3 */ + 8246 "10100101" // /* MW 2 */ + 8247 "00010100" // /* MW 1 */ + 8248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8249 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 62 + 8250 "10011000" // MUL r18, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8251 "00101111" // /* MW 3 */ + 8252 "01100101" // /* MW 2 */ + 8253 "00010101" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 first +.no_stack_arguments + 8254 "00000100" // JL #7664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 8255 "00000001" // /* MW 5 */ + 8256 "00000000" // /* MW 4 */ + 8257 "11111000" // /* MW 3 */ + 8258 "00001110" // /* MW 2 */ + 8259 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 57 +.delay_slot + 8260 "10011000" // MUL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8261 "00101111" // /* MW 3 */ + 8262 "11100101" // /* MW 2 */ + 8263 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 8264 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8265 "00000101" // /* MW 3 */ + 8266 "00100000" // /* MW 2 */ + 8267 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 8268 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8269 "00001101" // /* MW 3 */ + 8270 "10100001" // /* MW 2 */ + 8271 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 8272 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8273 "11000001" // /* MW 3 */ + 8274 "01101000" // /* MW 2 */ + 8275 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8276 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8277 "10000001" // /* MW 11 */ + 8278 "10101101" // /* MW 10 */ + 8279 "00000000" // /* MW 9 */ + 8280 "00000000" // /* MW 8 */ + 8281 "00000000" // /* MW 7 */ + 8282 "00000000" // /* MW 6 */ + 8283 "00100000" // /* MW 5 */ + 8284 "00000000" // /* MW 4 */ + 8285 "11110000" // /* MW 3 */ + 8286 "00101100" // /* MW 2 */ + 8287 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.return_address + 8288 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8289 "00111001" // /* MW 3 */ + 8290 "11111100" // /* MW 2 */ + 8291 "00000111" // /* MW 1 */ + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ + 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8297 "00000000" // /* MW 1 */ + 8298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8299 "00000000" // /* MW 1 */ + 8300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8301 "00000000" // /* MW 1 */ + 8302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8303 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 first + 8304 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8305 "00000000" // /* MW 3 */ + 8306 "00101000" // /* MW 2 */ + 8307 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.delay_slot + 8308 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8309 "00000001" // /* MW 5 */ + 8310 "00000000" // /* MW 4 */ + 8311 "00000000" // /* MW 3 */ + 8312 "11110000" // /* MW 2 */ + 8313 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8315 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + 8321 "00000000" // /* MW 1 */ +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function _b8170_wrapper _Z14_b8170_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 35 first +.src_ref 0 "0_0_reloadable82.cc" 37 79 +.function_start + 8336 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8337 "11000000" // /* MW 3 */ + 8338 "01100000" // /* MW 2 */ + 8339 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 37 79 first + 8340 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8341 "00011110" // /* MW 3 */ + 8342 "00011100" // /* MW 2 */ + 8343 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 39 47 first + 8344 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00011110" // /* MW 3 */ + 8346 "00010101" // /* MW 2 */ + 8347 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 38 80 first + 8348 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "10011110" // /* MW 3 */ + 8350 "00000100" // /* MW 2 */ + 8351 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 36 4 first +.tail_call + 8352 "10000100" // J #8144 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8144 delay_slots=5 */ + 8353 "00000000" // /* MW 5 */ + 8354 "00000000" // /* MW 4 */ + 8355 "11101000" // /* MW 3 */ + 8356 "00001111" // /* MW 2 */ + 8357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 + 8367 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.src_ref 11 "transposeshuffle_params.h" 71 first +.src_ref 11 "transposeshuffle_params.h" 76 16 +.src_ref 11 "transposeshuffle_params.h" 76 18 first +.function_start + 8368 "10111010" // LDA el0, [p1], #4; MOVXM r0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8369 "00010000" // /* MW 9 */ + 8370 "11100000" // /* MW 8 */ + 8371 "00001001" // /* MW 7 */ + 8372 "11110000" // /* MW 6 */ + 8373 "00000001" // /* MW 5 */ + 8374 "00000000" // /* MW 4 */ + 8375 "11010000" // /* MW 3 */ + 8376 "10000101" // /* MW 2 */ + 8377 "00100011" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 9 +.src_ref 11 "transposeshuffle_params.h" 76 16 +.src_ref 11 "transposeshuffle_params.h" 80 28 +.src_ref 11 "transposeshuffle_params.h" 80 36 +.src_ref 11 "transposeshuffle_params.h" 81 28 +.src_ref 11 "transposeshuffle_params.h" 81 36 + 8378 "01110110" // MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8379 "00001000" // /* MW 11 */ + 8380 "00000001" // /* MW 10 */ + 8381 "00110000" // /* MW 9 */ + 8382 "10101001" // /* MW 8 */ + 8383 "00100111" // /* MW 7 */ + 8384 "00111110" // /* MW 6 */ + 8385 "00001011" // /* MW 5 */ + 8386 "10000000" // /* MW 4 */ + 8387 "10000000" // /* MW 3 */ + 8388 "00000000" // /* MW 2 */ + 8389 "00001000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 85 22 +.src_ref 11 "transposeshuffle_params.h" 86 17 +.src_ref 11 "transposeshuffle_params.h" 89 43 +.src_ref 11 "transposeshuffle_params.h" 91 18 +.src_ref 11 "transposeshuffle_params.h" 93 4 +.src_ref 11 "transposeshuffle_params.h" 94 4 + 8390 "01100100" // MOVX r1, #4; MOV r0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8391 "00000001" // /* MW 5 */ + 8392 "00100010" // /* MW 4 */ + 8393 "00100000" // /* MW 3 */ + 8394 "01000010" // /* MW 2 */ + 8395 "00000000" // /* MW 1 */ + 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8397 "00000000" // /* MW 1 */ + 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8399 "00000000" // /* MW 1 */ + 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8401 "00000000" // /* MW 1 */ + 8402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8403 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 first + 8404 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8405 "00101001" // /* MW 3 */ + 8406 "00011100" // /* MW 2 */ + 8407 "00001000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8408 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8409 "00101110" // /* MW 3 */ + 8410 "00011100" // /* MW 2 */ + 8411 "00000001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8412 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8413 "00001110" // /* MW 3 */ + 8414 "00011100" // /* MW 2 */ + 8415 "00000001" // /* MW 1 */ + 8416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8417 "00000000" // /* MW 1 */ + 8418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8419 "00000000" // /* MW 1 */ + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8426 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8427 "00101001" // /* MW 3 */ + 8428 "00011100" // /* MW 2 */ + 8429 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8430 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8431 "00001001" // /* MW 3 */ + 8432 "00011100" // /* MW 2 */ + 8433 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8434 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8435 "00101110" // /* MW 3 */ + 8436 "00011100" // /* MW 2 */ + 8437 "00000001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8438 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8439 "00001110" // /* MW 3 */ + 8440 "00011100" // /* MW 2 */ + 8441 "00000001" // /* MW 1 */ + 8442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8443 "00000000" // /* MW 1 */ + 8444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8445 "00000000" // /* MW 1 */ + 8446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8447 "00000000" // /* MW 1 */ + 8448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8449 "00000000" // /* MW 1 */ + 8450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8451 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8452 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8453 "00101001" // /* MW 3 */ + 8454 "00011100" // /* MW 2 */ + 8455 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8456 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8457 "00001001" // /* MW 3 */ + 8458 "00011100" // /* MW 2 */ + 8459 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8460 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8461 "00001110" // /* MW 3 */ + 8462 "00000100" // /* MW 2 */ + 8463 "00000001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 18 + 8464 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8465 "00101110" // /* MW 3 */ + 8466 "00010100" // /* MW 2 */ + 8467 "00000001" // /* MW 1 */ + 8468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8469 "00000000" // /* MW 1 */ + 8470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8471 "00000000" // /* MW 1 */ + 8472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8473 "00000000" // /* MW 1 */ + 8474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8475 "00000000" // /* MW 1 */ + 8476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8477 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8478 "10011000" // ST eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8479 "00001001" // /* MW 3 */ + 8480 "00000100" // /* MW 2 */ + 8481 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 76 16 + 8482 "10011000" // ST el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00101001" // /* MW 3 */ + 8484 "00010100" // /* MW 2 */ + 8485 "00001010" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 28 first + 8486 "10011000" // LDA r3, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "01110110" // /* MW 3 */ + 8488 "00001000" // /* MW 2 */ + 8489 "00000000" // /* MW 1 */ + 8490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8491 "00000000" // /* MW 1 */ + 8492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8493 "00000000" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 36 + 8502 "10011000" // LSHL r4, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8503 "00101101" // /* MW 3 */ + 8504 "11001000" // /* MW 2 */ + 8505 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 42 +.src_ref 11 "transposeshuffle_params.h" 89 43 first + 8506 "00100100" // LSHL r3, r3, r1; ADD.NC r1, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8507 "11111111" // /* MW 5 */ + 8508 "10100100" // /* MW 4 */ + 8509 "10110000" // /* MW 3 */ + 8510 "11000011" // /* MW 2 */ + 8511 "00011000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 19 +.src_ref 11 "transposeshuffle_params.h" 80 19 first + 8512 "00000010" // ST r1, [p0]; MOV r4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8513 "01110000" // /* MW 7 */ + 8514 "01100000" // /* MW 6 */ + 8515 "10001000" // /* MW 5 */ + 8516 "00000000" // /* MW 4 */ + 8517 "00110000" // /* MW 3 */ + 8518 "10000110" // /* MW 2 */ + 8519 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 80 19 + 8520 "00011000" // ADD.NC p1, r4, #-60 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8521 "01100010" // /* MW 3 */ + 8522 "01100010" // /* MW 2 */ + 8523 "00011001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 28 first + 8524 "10011000" // LDA r4, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8525 "10010110" // /* MW 3 */ + 8526 "00001000" // /* MW 2 */ + 8527 "00000001" // /* MW 1 */ + 8528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8529 "00000000" // /* MW 1 */ + 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8531 "00000000" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ + 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8537 "00000000" // /* MW 1 */ + 8538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 36 +.src_ref 11 "transposeshuffle_params.h" 90 77 + 8540 "01100100" // LSHL r2, r4, r2; MOV r4, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "00000001" // /* MW 5 */ + 8542 "00100010" // /* MW 4 */ + 8543 "10110010" // /* MW 3 */ + 8544 "10000101" // /* MW 2 */ + 8545 "00100000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 42 + 8546 "00011000" // ADD r2, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "11111111" // /* MW 3 */ + 8548 "10000101" // /* MW 2 */ + 8549 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 81 19 +.src_ref 11 "transposeshuffle_params.h" 90 77 first + 8550 "01011100" // ST r2, [p1], #4; MSC r4, r4, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8551 "01011100" // /* MW 5 */ + 8552 "10010000" // /* MW 4 */ + 8553 "00110001" // /* MW 3 */ + 8554 "10001010" // /* MW 2 */ + 8555 "00100011" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 85 22 first + 8556 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8557 "00010001" // /* MW 3 */ + 8558 "00011100" // /* MW 2 */ + 8559 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 85 22 + 8560 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8561 "00010001" // /* MW 3 */ + 8562 "00011100" // /* MW 2 */ + 8563 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 86 17 first + 8564 "10011000" // ST r0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00010001" // /* MW 3 */ + 8566 "00101100" // /* MW 2 */ + 8567 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 89 23 first + 8568 "10011000" // ST r3, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "01110001" // /* MW 3 */ + 8570 "11111100" // /* MW 2 */ + 8571 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 90 23 first + 8572 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "10010001" // /* MW 3 */ + 8574 "00101100" // /* MW 2 */ + 8575 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 91 18 +.src_ref 11 "transposeshuffle_params.h" 91 18 first + 8576 "00000010" // ST r0, [p1]; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8577 "01110000" // /* MW 7 */ + 8578 "01100000" // /* MW 6 */ + 8579 "10101001" // /* MW 5 */ + 8580 "00000000" // /* MW 4 */ + 8581 "00110000" // /* MW 3 */ + 8582 "10000010" // /* MW 2 */ + 8583 "00100000" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 91 18 + 8584 "00011000" // ADD.NC p1, r5, #-68 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8585 "11011110" // /* MW 3 */ + 8586 "01100010" // /* MW 2 */ + 8587 "00011001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 first + 8588 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8589 "00010001" // /* MW 3 */ + 8590 "00011100" // /* MW 2 */ + 8591 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 + 8592 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8593 "00010001" // /* MW 3 */ + 8594 "00011100" // /* MW 2 */ + 8595 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 + 8596 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8597 "01010001" // /* MW 3 */ + 8598 "00011100" // /* MW 2 */ + 8599 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 + 8600 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8601 "00110001" // /* MW 3 */ + 8602 "00011100" // /* MW 2 */ + 8603 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 93 4 +.src_ref 11 "transposeshuffle_params.h" 95 first + 8604 "01011100" // ST r0, [p1], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8605 "00000000" // /* MW 5 */ + 8606 "01010000" // /* MW 4 */ + 8607 "00110000" // /* MW 3 */ + 8608 "10000010" // /* MW 2 */ + 8609 "00100011" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 first +.delay_slot + 8610 "10011000" // ST r3, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8611 "01110001" // /* MW 3 */ + 8612 "00101100" // /* MW 2 */ + 8613 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8614 "10011000" // ST r2, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8615 "01010001" // /* MW 3 */ + 8616 "11111100" // /* MW 2 */ + 8617 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8618 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8619 "10010001" // /* MW 3 */ + 8620 "00101100" // /* MW 2 */ + 8621 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8622 "10011000" // ST r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8623 "00110001" // /* MW 3 */ + 8624 "00000100" // /* MW 2 */ + 8625 "00001001" // /* MW 1 */ +.src_ref 11 "transposeshuffle_params.h" 94 4 +.delay_slot + 8626 "10011000" // ST r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8627 "00010001" // /* MW 3 */ + 8628 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + 8629 "00001001" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.src_ref 11 "transposeshuffle.h" 38 first +.src_ref 11 "transposeshuffle.h" 72 14 +.src_ref 11 "transposeshuffle.h" 79 23 +.function_start + 8640 "10111010" // MOVA r1, #2; MOVXM p2, #508876 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8641 "00010000" // /* MW 9 */ + 8642 "11100110" // /* MW 8 */ + 8643 "00110001" // /* MW 7 */ + 8644 "11110001" // /* MW 6 */ + 8645 "00000001" // /* MW 5 */ + 8646 "00000000" // /* MW 4 */ + 8647 "00000000" // /* MW 3 */ + 8648 "01000001" // /* MW 2 */ + 8649 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 72 8 +.src_ref 11 "transposeshuffle.h" 72 14 first +.src_ref 11 "transposeshuffle.h" 72 23 + 8650 "00101100" // LDA r27, [p2]; MOVX r0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8651 "10110010" // /* MW 5 */ + 8652 "00000000" // /* MW 4 */ + 8653 "11010000" // /* MW 3 */ + 8654 "11101110" // /* MW 2 */ + 8655 "01000000" // /* MW 1 */ + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 79 23 first + 8668 "10011000" // EQ r1, r27, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8669 "00010111" // /* MW 3 */ + 8670 "11000010" // /* MW 2 */ + 8671 "00010110" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 79 8 + 8672 "10000100" // JNZ r1, #9136 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9136 delay_slots=5 */ + 8673 "00000001" // /* MW 5 */ + 8674 "01000000" // /* MW 4 */ + 8675 "11011000" // /* MW 3 */ + 8676 "00010001" // /* MW 2 */ + 8677 "00001000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 72 8 +.src_ref 11 "transposeshuffle.h" 72 23 +.delay_slot + 8678 "00011000" // MOVX r2, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8679 "01110101" // /* MW 3 */ + 8680 "00000100" // /* MW 2 */ + 8681 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 72 8 first +.src_ref 11 "transposeshuffle.h" 72 23 first +.delay_slot + 8682 "00011000" // SEL.EQZ r0, r0, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8683 "00100010" // /* MW 3 */ + 8684 "00000000" // /* MW 2 */ + 8685 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8691 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 34 + 8692 "01000100" // MOVXM p2, #508880 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8693 "10100000" // /* MW 5 */ + 8694 "11000111" // /* MW 4 */ + 8695 "11000100" // /* MW 3 */ + 8696 "00000111" // /* MW 2 */ + 8697 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 34 first + 8698 "10011000" // LDA r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8699 "00110110" // /* MW 3 */ + 8700 "00000100" // /* MW 2 */ + 8701 "00000010" // /* MW 1 */ + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ + 8704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8705 "00000000" // /* MW 1 */ + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ + 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8709 "00000000" // /* MW 1 */ + 8710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8711 "00000000" // /* MW 1 */ + 8712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8713 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 8 +.src_ref 11 "transposeshuffle.h" 116 26 + 8714 "10000100" // JZ r1, #9776 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9776 delay_slots=5 */ + 8715 "00000001" // /* MW 5 */ + 8716 "00000000" // /* MW 4 */ + 8717 "00011000" // /* MW 3 */ + 8718 "00010011" // /* MW 2 */ + 8719 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8727 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8729 "00000000" // /* MW 1 */ + 8730 "00011000" // MOVX r2, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8731 "00101001" // /* MW 3 */ + 8732 "00000100" // /* MW 2 */ + 8733 "00010000" // /* MW 1 */ + 8734 "10011000" // LTU r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8735 "00101100" // /* MW 3 */ + 8736 "01000100" // /* MW 2 */ + 8737 "00010000" // /* MW 1 */ + 8738 "10000100" // JNZ r2, #8976 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8976 delay_slots=5 */ + 8739 "00000001" // /* MW 5 */ + 8740 "01000000" // /* MW 4 */ + 8741 "10001000" // /* MW 3 */ + 8742 "00010001" // /* MW 2 */ + 8743 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8753 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 116 8 +.src_ref 11 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8754 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #8880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8755 "00010000" // /* MW 9 */ + 8756 "01011000" // /* MW 8 */ + 8757 "01111001" // /* MW 7 */ + 8758 "00001000" // /* MW 6 */ + 8759 "00000000" // /* MW 5 */ + 8760 "00000000" // /* MW 4 */ + 8761 "01101000" // /* MW 3 */ + 8762 "00111000" // /* MW 2 */ + 8763 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 116 8 first +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8764 "00111010" // VLDB x0, [p0], #64; MOVXM le, #8880 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8765 "00010000" // /* MW 9 */ + 8766 "01011000" // /* MW 8 */ + 8767 "10111001" // /* MW 7 */ + 8768 "00001001" // /* MW 6 */ + 8769 "00000000" // /* MW 5 */ + 8770 "00000000" // /* MW 4 */ + 8771 "01101000" // /* MW 3 */ + 8772 "00111000" // /* MW 2 */ + 8773 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 116 8 +.src_ref 11 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8774 "10111010" // NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8775 "11001110" // /* MW 9 */ + 8776 "01111101" // /* MW 8 */ + 8777 "10111000" // /* MW 7 */ + 8778 "00000010" // /* MW 6 */ + 8779 "00110100" // /* MW 5 */ + 8780 "00011100" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8784 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8785 "00000000" // /* MW 15 */ + 8786 "00000000" // /* MW 14 */ + 8787 "01111000" // /* MW 13 */ + 8788 "10100101" // /* MW 12 */ + 8789 "00000001" // /* MW 11 */ + 8790 "00000000" // /* MW 10 */ + 8791 "00000000" // /* MW 9 */ + 8792 "00000000" // /* MW 8 */ + 8793 "01011011" // /* MW 7 */ + 8794 "00000001" // /* MW 6 */ + 8795 "01101000" // /* MW 5 */ + 8796 "00111000" // /* MW 4 */ + 8797 "11110000" // /* MW 3 */ + 8798 "00101100" // /* MW 2 */ + 8799 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8800 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8801 "00000000" // /* MW 15 */ + 8802 "00000000" // /* MW 14 */ + 8803 "01111000" // /* MW 13 */ + 8804 "10100101" // /* MW 12 */ + 8805 "00000001" // /* MW 11 */ + 8806 "00000000" // /* MW 10 */ + 8807 "00000000" // /* MW 9 */ + 8808 "00000000" // /* MW 8 */ + 8809 "01011011" // /* MW 7 */ + 8810 "00000001" // /* MW 6 */ + 8811 "01101000" // /* MW 5 */ + 8812 "00111000" // /* MW 4 */ + 8813 "11110000" // /* MW 3 */ + 8814 "00101100" // /* MW 2 */ + 8815 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8816 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8817 "00000000" // /* MW 15 */ + 8818 "00000000" // /* MW 14 */ + 8819 "01111000" // /* MW 13 */ + 8820 "10100101" // /* MW 12 */ + 8821 "00000001" // /* MW 11 */ + 8822 "00000000" // /* MW 10 */ + 8823 "00000000" // /* MW 9 */ + 8824 "00000000" // /* MW 8 */ + 8825 "01011011" // /* MW 7 */ + 8826 "00000001" // /* MW 6 */ + 8827 "01101000" // /* MW 5 */ + 8828 "00111000" // /* MW 4 */ + 8829 "11110000" // /* MW 3 */ + 8830 "00101100" // /* MW 2 */ + 8831 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8832 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8833 "00000000" // /* MW 15 */ + 8834 "00000000" // /* MW 14 */ + 8835 "01111000" // /* MW 13 */ + 8836 "10100101" // /* MW 12 */ + 8837 "00000001" // /* MW 11 */ + 8838 "00000000" // /* MW 10 */ + 8839 "00000000" // /* MW 9 */ + 8840 "00000000" // /* MW 8 */ + 8841 "01011011" // /* MW 7 */ + 8842 "00000001" // /* MW 6 */ + 8843 "01101000" // /* MW 5 */ + 8844 "00111000" // /* MW 4 */ + 8845 "11110000" // /* MW 3 */ + 8846 "00101100" // /* MW 2 */ + 8847 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 11 "transposeshuffle.h" 119 21 +.src_ref 11 "transposeshuffle.h" 120 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8848 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8849 "00000000" // /* MW 15 */ + 8850 "00000000" // /* MW 14 */ + 8851 "11101000" // /* MW 13 */ + 8852 "00000000" // /* MW 12 */ + 8853 "00000000" // /* MW 11 */ + 8854 "00000000" // /* MW 10 */ + 8855 "00000000" // /* MW 9 */ + 8856 "00000000" // /* MW 8 */ + 8857 "01011011" // /* MW 7 */ + 8858 "00000001" // /* MW 6 */ + 8859 "01101000" // /* MW 5 */ + 8860 "00111000" // /* MW 4 */ + 8861 "11110000" // /* MW 3 */ + 8862 "00101100" // /* MW 2 */ + 8863 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 119 21 first +.src_ref 11 "transposeshuffle.h" 120 17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8864 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8865 "00000000" // /* MW 15 */ + 8866 "00000000" // /* MW 14 */ + 8867 "11101000" // /* MW 13 */ + 8868 "00000000" // /* MW 12 */ + 8869 "00000000" // /* MW 11 */ + 8870 "00000000" // /* MW 10 */ + 8871 "00000000" // /* MW 9 */ + 8872 "00000000" // /* MW 8 */ + 8873 "01011011" // /* MW 7 */ + 8874 "00000001" // /* MW 6 */ + 8875 "01101000" // /* MW 5 */ + 8876 "00111000" // /* MW 4 */ + 8877 "11110000" // /* MW 3 */ + 8878 "00101100" // /* MW 2 */ + 8879 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.src_ref 8 "vector.hpp" 1139 17 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "transposeshuffle.h" 119 21 +.src_ref 11 "transposeshuffle.h" 120 17 first +.src_ref 11 "transposeshuffle.h" 122 22 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8880 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8881 "00000000" // /* MW 15 */ + 8882 "00000000" // /* MW 14 */ + 8883 "11101000" // /* MW 13 */ + 8884 "00000000" // /* MW 12 */ + 8885 "00000000" // /* MW 11 */ + 8886 "00000000" // /* MW 10 */ + 8887 "00000000" // /* MW 9 */ + 8888 "10000000" // /* MW 8 */ + 8889 "00000110" // /* MW 7 */ + 8890 "00011100" // /* MW 6 */ + 8891 "01101001" // /* MW 5 */ + 8892 "00111000" // /* MW 4 */ + 8893 "11110000" // /* MW 3 */ + 8894 "00101100" // /* MW 2 */ + 8895 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8896 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8897 "11100000" // /* MW 7 */ + 8898 "00000000" // /* MW 6 */ + 8899 "00000000" // /* MW 5 */ + 8900 "00000000" // /* MW 4 */ + 8901 "11010000" // /* MW 3 */ + 8902 "10000000" // /* MW 2 */ + 8903 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8905 "11100000" // /* MW 7 */ + 8906 "00000000" // /* MW 6 */ + 8907 "00000000" // /* MW 5 */ + 8908 "00000000" // /* MW 4 */ + 8909 "11010000" // /* MW 3 */ + 8910 "10000000" // /* MW 2 */ + 8911 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8912 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8913 "11100000" // /* MW 7 */ + 8914 "00000000" // /* MW 6 */ + 8915 "00000000" // /* MW 5 */ + 8916 "00000000" // /* MW 4 */ + 8917 "11010000" // /* MW 3 */ + 8918 "10000000" // /* MW 2 */ + 8919 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.src_ref 11 "transposeshuffle.h" 126 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8920 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 8921 "11101001" // /* MW 9 */ + 8922 "00000000" // /* MW 8 */ + 8923 "00000000" // /* MW 7 */ + 8924 "00000000" // /* MW 6 */ + 8925 "01000000" // /* MW 5 */ + 8926 "00000001" // /* MW 4 */ + 8927 "11010000" // /* MW 3 */ + 8928 "10000000" // /* MW 2 */ + 8929 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "transposeshuffle.h" 120 17 first +.src_ref 11 "transposeshuffle.h" 122 22 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8931 "11100000" // /* MW 7 */ + 8932 "00000000" // /* MW 6 */ + 8933 "00000000" // /* MW 5 */ + 8934 "00000000" // /* MW 4 */ + 8935 "11010000" // /* MW 3 */ + 8936 "10000000" // /* MW 2 */ + 8937 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8938 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8939 "11100000" // /* MW 7 */ + 8940 "00000000" // /* MW 6 */ + 8941 "00000000" // /* MW 5 */ + 8942 "00000000" // /* MW 4 */ + 8943 "11010000" // /* MW 3 */ + 8944 "10000000" // /* MW 2 */ + 8945 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 120 17 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8946 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8947 "11100000" // /* MW 7 */ + 8948 "00000000" // /* MW 6 */ + 8949 "00000000" // /* MW 5 */ + 8950 "00000000" // /* MW 4 */ + 8951 "11010000" // /* MW 3 */ + 8952 "10000000" // /* MW 2 */ + 8953 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8954 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8955 "00001101" // /* MW 5 */ + 8956 "00111000" // /* MW 4 */ + 8957 "11110010" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1159 33 +.src_ref 11 "transposeshuffle.h" 122 22 +.delay_slot + 8960 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "10100101" // /* MW 12 */ + 8965 "00000001" // /* MW 11 */ + 8966 "00000000" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "10000000" // /* MW 8 */ + 8969 "00000110" // /* MW 7 */ + 8970 "00011100" // /* MW 6 */ + 8971 "00100001" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.src_ref 11 "transposeshuffle.h" 116 8 first + 8976 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8977 "10100000" // /* MW 3 */ + 8978 "01110000" // /* MW 2 */ + 8979 "00011101" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 8 + 8980 "01000100" // MOVXM ls, #8992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8981 "01000000" // /* MW 5 */ + 8982 "11100110" // /* MW 4 */ + 8983 "00100001" // /* MW 3 */ + 8984 "00000000" // /* MW 2 */ + 8985 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 116 8 + 8986 "01000100" // MOVXM le, #9104 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8987 "00100000" // /* MW 5 */ + 8988 "11100111" // /* MW 4 */ + 8989 "00100110" // /* MW 3 */ + 8990 "00000000" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.src_ref 8 "vector.hpp" 1139 17 first +.src_ref 11 "transposeshuffle.h" 119 21 first +.begin_of_loop +.loop_nesting 1 + 8992 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8993 "00110100" // /* MW 3 */ + 8994 "00011100" // /* MW 2 */ + 8995 "00111000" // /* MW 1 */ + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ + 8998 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8999 "01111110" // /* MW 9 */ + 9000 "10100101" // /* MW 8 */ + 9001 "00000001" // /* MW 7 */ + 9002 "00000000" // /* MW 6 */ + 9003 "00010000" // /* MW 5 */ + 9004 "00000000" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ + 9008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "10100101" // /* MW 12 */ + 9013 "00000001" // /* MW 11 */ + 9014 "00000000" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "01011011" // /* MW 7 */ + 9018 "00000001" // /* MW 6 */ + 9019 "00100000" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "10100101" // /* MW 12 */ + 9029 "00000001" // /* MW 11 */ + 9030 "00000000" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ + 9040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9041 "00000000" // /* MW 15 */ + 9042 "00000000" // /* MW 14 */ + 9043 "01111000" // /* MW 13 */ + 9044 "10100101" // /* MW 12 */ + 9045 "00000001" // /* MW 11 */ + 9046 "00000000" // /* MW 10 */ + 9047 "00000000" // /* MW 9 */ + 9048 "00000000" // /* MW 8 */ + 9049 "01011011" // /* MW 7 */ + 9050 "00000001" // /* MW 6 */ + 9051 "00100000" // /* MW 5 */ + 9052 "00000000" // /* MW 4 */ + 9053 "11110000" // /* MW 3 */ + 9054 "00101100" // /* MW 2 */ + 9055 "00000000" // /* MW 1 */ + 9056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9057 "00000000" // /* MW 15 */ + 9058 "00000000" // /* MW 14 */ + 9059 "01111000" // /* MW 13 */ + 9060 "10100101" // /* MW 12 */ + 9061 "00000001" // /* MW 11 */ + 9062 "00000000" // /* MW 10 */ + 9063 "00000000" // /* MW 9 */ + 9064 "00000000" // /* MW 8 */ + 9065 "01011011" // /* MW 7 */ + 9066 "00000001" // /* MW 6 */ + 9067 "00100000" // /* MW 5 */ + 9068 "00000000" // /* MW 4 */ + 9069 "11110000" // /* MW 3 */ + 9070 "00101100" // /* MW 2 */ + 9071 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 120 17 first + 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9073 "00000000" // /* MW 15 */ + 9074 "00000000" // /* MW 14 */ + 9075 "11101000" // /* MW 13 */ + 9076 "00000000" // /* MW 12 */ + 9077 "00000000" // /* MW 11 */ + 9078 "00000000" // /* MW 10 */ + 9079 "00000000" // /* MW 9 */ + 9080 "00000000" // /* MW 8 */ + 9081 "01011011" // /* MW 7 */ + 9082 "00000001" // /* MW 6 */ + 9083 "00100000" // /* MW 5 */ + 9084 "00000000" // /* MW 4 */ + 9085 "11110000" // /* MW 3 */ + 9086 "00101100" // /* MW 2 */ + 9087 "00000000" // /* MW 1 */ + 9088 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9089 "00000000" // /* MW 15 */ + 9090 "00000000" // /* MW 14 */ + 9091 "01111000" // /* MW 13 */ + 9092 "10100101" // /* MW 12 */ + 9093 "00000001" // /* MW 11 */ + 9094 "00000000" // /* MW 10 */ + 9095 "00000000" // /* MW 9 */ + 9096 "00000000" // /* MW 8 */ + 9097 "01011011" // /* MW 7 */ + 9098 "00000001" // /* MW 6 */ + 9099 "00100000" // /* MW 5 */ + 9100 "00000000" // /* MW 4 */ + 9101 "11110000" // /* MW 3 */ + 9102 "00101100" // /* MW 2 */ + 9103 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.src_ref 8 "vector.hpp" 1159 33 first +.src_ref 11 "transposeshuffle.h" 122 22 first +.end_of_loop + 9104 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9105 "00000000" // /* MW 15 */ + 9106 "00000000" // /* MW 14 */ + 9107 "01111000" // /* MW 13 */ + 9108 "10100101" // /* MW 12 */ + 9109 "00000001" // /* MW 11 */ + 9110 "00000000" // /* MW 10 */ + 9111 "00000000" // /* MW 9 */ + 9112 "10000000" // /* MW 8 */ + 9113 "00000110" // /* MW 7 */ + 9114 "00011100" // /* MW 6 */ + 9115 "00100001" // /* MW 5 */ + 9116 "00000000" // /* MW 4 */ + 9117 "11110000" // /* MW 3 */ + 9118 "00101100" // /* MW 2 */ + 9119 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9120 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9121 "00000000" // /* MW 3 */ + 9122 "00101000" // /* MW 2 */ + 9123 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9131 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9132 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9133 "01100111" // /* MW 3 */ + 9134 "00000001" // /* MW 2 */ + 9135 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.src_ref 11 "transposeshuffle.h" 86 34 + 9136 "01000100" // MOVXM p2, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9137 "10000000" // /* MW 5 */ + 9138 "11000111" // /* MW 4 */ + 9139 "11000100" // /* MW 3 */ + 9140 "00000111" // /* MW 2 */ + 9141 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 34 first + 9142 "10011000" // LDA r0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9143 "00010110" // /* MW 3 */ + 9144 "00000100" // /* MW 2 */ + 9145 "00000010" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ + 9148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9149 "00000000" // /* MW 1 */ + 9150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9151 "00000000" // /* MW 1 */ + 9152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9153 "00000000" // /* MW 1 */ + 9154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9155 "00000000" // /* MW 1 */ + 9156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9157 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 26 + 9158 "10000100" // JZ r0, #9776 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9776 delay_slots=5 */ + 9159 "00000001" // /* MW 5 */ + 9160 "00000000" // /* MW 4 */ + 9161 "00011000" // /* MW 3 */ + 9162 "00010011" // /* MW 2 */ + 9163 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9173 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9174 "10111010" // MOVA m5, #36; MOVXM p4, #508868 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9175 "00010000" // /* MW 9 */ + 9176 "11100010" // /* MW 8 */ + 9177 "00110001" // /* MW 7 */ + 9178 "11110010" // /* MW 6 */ + 9179 "00000001" // /* MW 5 */ + 9180 "00000000" // /* MW 4 */ + 9181 "10000000" // /* MW 3 */ + 9182 "10010100" // /* MW 2 */ + 9183 "00000100" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 87 35 + 9184 "10111010" // LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9185 "01011000" // /* MW 9 */ + 9186 "11111101" // /* MW 8 */ + 9187 "01001111" // /* MW 7 */ + 9188 "00001000" // /* MW 6 */ + 9189 "01010001" // /* MW 5 */ + 9190 "00000000" // /* MW 4 */ + 9191 "11010000" // /* MW 3 */ + 9192 "10000110" // /* MW 2 */ + 9193 "10000011" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 11 "transposeshuffle.h" 86 8 + 9194 "10111010" // LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9195 "01011000" // /* MW 9 */ + 9196 "00000000" // /* MW 8 */ + 9197 "01100000" // /* MW 7 */ + 9198 "00101010" // /* MW 6 */ + 9199 "00110000" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "11010000" // /* MW 3 */ + 9202 "00010010" // /* MW 2 */ + 9203 "10010101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9204 "01110110" // LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9205 "01011000" // /* MW 11 */ + 9206 "00100000" // /* MW 10 */ + 9207 "00000000" // /* MW 9 */ + 9208 "10001010" // /* MW 8 */ + 9209 "01100000" // /* MW 7 */ + 9210 "00000000" // /* MW 6 */ + 9211 "01001011" // /* MW 5 */ + 9212 "00010000" // /* MW 4 */ + 9213 "11010000" // /* MW 3 */ + 9214 "10010000" // /* MW 2 */ + 9215 "10011101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 8 "transpose.hpp" 224 15 +.src_ref 11 "transposeshuffle.h" 86 8 + 9216 "01110110" // LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9217 "01011000" // /* MW 11 */ + 9218 "00110100" // /* MW 10 */ + 9219 "11101000" // /* MW 9 */ + 9220 "11111000" // /* MW 8 */ + 9221 "00001111" // /* MW 7 */ + 9222 "00000000" // /* MW 6 */ + 9223 "01001011" // /* MW 5 */ + 9224 "00010000" // /* MW 4 */ + 9225 "11010001" // /* MW 3 */ + 9226 "10010100" // /* MW 2 */ + 9227 "10011101" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9228 "01110110" // LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #9312 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9229 "00010000" // /* MW 11 */ + 9230 "00110000" // /* MW 10 */ + 9231 "00110010" // /* MW 9 */ + 9232 "00001001" // /* MW 8 */ + 9233 "00000000" // /* MW 7 */ + 9234 "00000000" // /* MW 6 */ + 9235 "01001011" // /* MW 5 */ + 9236 "00010000" // /* MW 4 */ + 9237 "11010101" // /* MW 3 */ + 9238 "10011000" // /* MW 2 */ + 9239 "10000111" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 87 12 + 9240 "10111010" // LDA dn5, [p4], #-8; MOVXM p3, #9344 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9241 "00010000" // /* MW 9 */ + 9242 "01000000" // /* MW 8 */ + 9243 "10110010" // /* MW 7 */ + 9244 "00001001" // /* MW 6 */ + 9245 "00000000" // /* MW 5 */ + 9246 "00000000" // /* MW 4 */ + 9247 "11010000" // /* MW 3 */ + 9248 "11010100" // /* MW 2 */ + 9249 "10011101" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 225 15 +.src_ref 11 "transposeshuffle.h" 86 8 + 9250 "00101100" // LDA dj5, [p4], m4; MOVX r16, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9251 "10101010" // /* MW 5 */ + 9252 "01000001" // /* MW 4 */ + 9253 "11010000" // /* MW 3 */ + 9254 "01011000" // /* MW 2 */ + 9255 "10010001" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 87 35 first + 9256 "10111010" // LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9257 "11001000" // /* MW 9 */ + 9258 "01111111" // /* MW 8 */ + 9259 "10101000" // /* MW 7 */ + 9260 "11100100" // /* MW 6 */ + 9261 "10110000" // /* MW 5 */ + 9262 "00001011" // /* MW 4 */ + 9263 "11010000" // /* MW 3 */ + 9264 "10000000" // /* MW 2 */ + 9265 "10011101" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 first +.src_ref 11 "transposeshuffle.h" 86 8 first + 9266 "10111010" // LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9267 "11001000" // /* MW 9 */ + 9268 "00111111" // /* MW 8 */ + 9269 "10101001" // /* MW 7 */ + 9270 "01101100" // /* MW 6 */ + 9271 "00010001" // /* MW 5 */ + 9272 "00001011" // /* MW 4 */ + 9273 "11010000" // /* MW 3 */ + 9274 "10000100" // /* MW 2 */ + 9275 "10011101" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 +.src_ref 11 "transposeshuffle.h" 86 8 + 9276 "10111010" // LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9277 "01001000" // /* MW 9 */ + 9278 "01000000" // /* MW 8 */ + 9279 "10101100" // /* MW 7 */ + 9280 "01101100" // /* MW 6 */ + 9281 "00100001" // /* MW 5 */ + 9282 "00001010" // /* MW 4 */ + 9283 "11010000" // /* MW 3 */ + 9284 "10001000" // /* MW 2 */ + 9285 "10000111" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 + 9286 "10111010" // LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9287 "01001000" // /* MW 9 */ + 9288 "10000000" // /* MW 8 */ + 9289 "01101000" // /* MW 7 */ + 9290 "10010000" // /* MW 6 */ + 9291 "01010010" // /* MW 5 */ + 9292 "00000110" // /* MW 4 */ + 9293 "11010000" // /* MW 3 */ + 9294 "11000100" // /* MW 2 */ + 9295 "10000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 86 8 + 9296 "11100001" // LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "11111000" // /* MW 10 */ + 9303 "01011111" // /* MW 9 */ + 9304 "00001010" // /* MW 8 */ + 9305 "01011011" // /* MW 7 */ + 9306 "00000001" // /* MW 6 */ + 9307 "00100000" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11010000" // /* MW 3 */ + 9310 "11001000" // /* MW 2 */ + 9311 "10011100" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.src_ref 11 "transposeshuffle.h" 87 12 first +.loop_nesting 1 + 9312 "10000100" // JZ r1, #9760 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9760 delay_slots=5 */ + 9313 "00000001" // /* MW 5 */ + 9314 "00000000" // /* MW 4 */ + 9315 "00010000" // /* MW 3 */ + 9316 "00010011" // /* MW 2 */ + 9317 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9327 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 87 12 + 9328 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9329 "00000000" // /* MW 15 */ + 9330 "00000000" // /* MW 14 */ + 9331 "01111000" // /* MW 13 */ + 9332 "01010000" // /* MW 12 */ + 9333 "00101001" // /* MW 11 */ + 9334 "00000010" // /* MW 10 */ + 9335 "00000000" // /* MW 9 */ + 9336 "00000000" // /* MW 8 */ + 9337 "01011011" // /* MW 7 */ + 9338 "00000001" // /* MW 6 */ + 9339 "00100000" // /* MW 5 */ + 9340 "00000000" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.src_ref 11 "transposeshuffle.h" 88 16 first +.loop_nesting 2 + 9344 "10000100" // JZ r4, #9744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9744 delay_slots=5 */ + 9345 "00000001" // /* MW 5 */ + 9346 "00000000" // /* MW 4 */ + 9347 "00001000" // /* MW 3 */ + 9348 "00010011" // /* MW 2 */ + 9349 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "10011000" // LTU r18, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9361 "01101100" // /* MW 3 */ + 9362 "11100100" // /* MW 2 */ + 9363 "00010000" // /* MW 1 */ + 9364 "10000100" // JNZ r18, #9600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9600 delay_slots=5 */ + 9365 "00000001" // /* MW 5 */ + 9366 "01000000" // /* MW 4 */ + 9367 "11000000" // /* MW 3 */ + 9368 "00010010" // /* MW 2 */ + 9369 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9379 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1132 26 first +.src_ref 11 "transposeshuffle.h" 88 16 + 9380 "00111010" // VLDB x0, [p0, #64]; MOVXM ls, #9488 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9381 "00010000" // /* MW 9 */ + 9382 "10001000" // /* MW 8 */ + 9383 "01111010" // /* MW 7 */ + 9384 "00001000" // /* MW 6 */ + 9385 "00000000" // /* MW 5 */ + 9386 "00000000" // /* MW 4 */ + 9387 "01101000" // /* MW 3 */ + 9388 "00101000" // /* MW 2 */ + 9389 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 11 "transposeshuffle.h" 88 16 first + 9390 "00111010" // VLDB.3D x1, [p0], d1; MOVXM le, #9520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9391 "00010000" // /* MW 9 */ + 9392 "10011000" // /* MW 8 */ + 9393 "10111010" // /* MW 7 */ + 9394 "00001001" // /* MW 6 */ + 9395 "00000000" // /* MW 5 */ + 9396 "00000000" // /* MW 4 */ + 9397 "11101000" // /* MW 3 */ + 9398 "01110000" // /* MW 2 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 88 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9400 "10011000" // ADD.NC lc, r3, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "11111110" // /* MW 3 */ + 9402 "01110001" // /* MW 2 */ + 9403 "00011101" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9404 "00011000" // VLDB x0, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9405 "00110100" // /* MW 3 */ + 9406 "00010100" // /* MW 2 */ + 9407 "00111000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9408 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9409 "00000000" // /* MW 15 */ + 9410 "00000000" // /* MW 14 */ + 9411 "01111000" // /* MW 13 */ + 9412 "10100101" // /* MW 12 */ + 9413 "00000001" // /* MW 11 */ + 9414 "00000000" // /* MW 10 */ + 9415 "00000000" // /* MW 9 */ + 9416 "00000000" // /* MW 8 */ + 9417 "01011011" // /* MW 7 */ + 9418 "00000001" // /* MW 6 */ + 9419 "11101000" // /* MW 5 */ + 9420 "01110000" // /* MW 4 */ + 9421 "11110000" // /* MW 3 */ + 9422 "00101100" // /* MW 2 */ + 9423 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9424 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9425 "00000000" // /* MW 15 */ + 9426 "00000000" // /* MW 14 */ + 9427 "01111000" // /* MW 13 */ + 9428 "10100101" // /* MW 12 */ + 9429 "00000001" // /* MW 11 */ + 9430 "00000000" // /* MW 10 */ + 9431 "00000000" // /* MW 9 */ + 9432 "00000000" // /* MW 8 */ + 9433 "01011011" // /* MW 7 */ + 9434 "00000001" // /* MW 6 */ + 9435 "00100000" // /* MW 5 */ + 9436 "00000000" // /* MW 4 */ + 9437 "11110000" // /* MW 3 */ + 9438 "00101100" // /* MW 2 */ + 9439 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9440 "11100001" // NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "01111000" // /* MW 13 */ + 9444 "10100101" // /* MW 12 */ + 9445 "00000001" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "01101000" // /* MW 5 */ + 9452 "00101000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9456 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9457 "00000000" // /* MW 15 */ + 9458 "00000000" // /* MW 14 */ + 9459 "01111000" // /* MW 13 */ + 9460 "10100101" // /* MW 12 */ + 9461 "00000001" // /* MW 11 */ + 9462 "00000000" // /* MW 10 */ + 9463 "00000000" // /* MW 9 */ + 9464 "00000000" // /* MW 8 */ + 9465 "01011011" // /* MW 7 */ + 9466 "00000001" // /* MW 6 */ + 9467 "11101000" // /* MW 5 */ + 9468 "01110000" // /* MW 4 */ + 9469 "11110000" // /* MW 3 */ + 9470 "00101100" // /* MW 2 */ + 9471 "00000000" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9472 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9473 "00000000" // /* MW 15 */ + 9474 "00000000" // /* MW 14 */ + 9475 "11101000" // /* MW 13 */ + 9476 "00001110" // /* MW 12 */ + 9477 "01000100" // /* MW 11 */ + 9478 "00000000" // /* MW 10 */ + 9479 "00000000" // /* MW 9 */ + 9480 "00000000" // /* MW 8 */ + 9481 "01011011" // /* MW 7 */ + 9482 "00000001" // /* MW 6 */ + 9483 "00100000" // /* MW 5 */ + 9484 "00000000" // /* MW 4 */ + 9485 "11110000" // /* MW 3 */ + 9486 "00101100" // /* MW 2 */ + 9487 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.src_ref 8 "vector.hpp" 1132 26 first +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 3 + 9488 "11100001" // NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9489 "00000000" // /* MW 15 */ + 9490 "00000000" // /* MW 14 */ + 9491 "11101000" // /* MW 13 */ + 9492 "00100000" // /* MW 12 */ + 9493 "00000100" // /* MW 11 */ + 9494 "00000000" // /* MW 10 */ + 9495 "00000000" // /* MW 9 */ + 9496 "00000000" // /* MW 8 */ + 9497 "10001011" // /* MW 7 */ + 9498 "10000100" // /* MW 6 */ + 9499 "01101100" // /* MW 5 */ + 9500 "00101000" // /* MW 4 */ + 9501 "11110000" // /* MW 3 */ + 9502 "00101100" // /* MW 2 */ + 9503 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 +.src_ref 8 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9504 "11100001" // NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9505 "00000000" // /* MW 15 */ + 9506 "00000000" // /* MW 14 */ + 9507 "01111000" // /* MW 13 */ + 9508 "10100101" // /* MW 12 */ + 9509 "00000001" // /* MW 11 */ + 9510 "00000000" // /* MW 10 */ + 9511 "00000000" // /* MW 9 */ + 9512 "10000000" // /* MW 8 */ + 9513 "00100110" // /* MW 7 */ + 9514 "00011000" // /* MW 6 */ + 9515 "11101001" // /* MW 5 */ + 9516 "01110000" // /* MW 4 */ + 9517 "11110000" // /* MW 3 */ + 9518 "00101100" // /* MW 2 */ + 9519 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 224 15 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9520 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9521 "00000000" // /* MW 15 */ + 9522 "00000000" // /* MW 14 */ + 9523 "11101000" // /* MW 13 */ + 9524 "00001110" // /* MW 12 */ + 9525 "01000100" // /* MW 11 */ + 9526 "00000000" // /* MW 10 */ + 9527 "00000000" // /* MW 9 */ + 9528 "10000000" // /* MW 8 */ + 9529 "00000110" // /* MW 7 */ + 9530 "00010100" // /* MW 6 */ + 9531 "00100100" // /* MW 5 */ + 9532 "00000000" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 9536 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9537 "11100000" // /* MW 7 */ + 9538 "00100000" // /* MW 6 */ + 9539 "00000100" // /* MW 5 */ + 9540 "00000000" // /* MW 4 */ + 9541 "01100000" // /* MW 3 */ + 9542 "10010001" // /* MW 2 */ + 9543 "10010000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9544 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9545 "00100110" // /* MW 3 */ + 9546 "00011000" // /* MW 2 */ + 9547 "00001001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9548 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9549 "11100000" // /* MW 7 */ + 9550 "00001110" // /* MW 6 */ + 9551 "01000100" // /* MW 5 */ + 9552 "00000000" // /* MW 4 */ + 9553 "11010000" // /* MW 3 */ + 9554 "10000000" // /* MW 2 */ + 9555 "10000010" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9556 "11011000" // VSHUFFLE bmll0, x1, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "01000001" // /* MW 3 */ + 9558 "00001000" // /* MW 2 */ + 9559 "00011000" // /* MW 1 */ + 9560 "10000100" // J #9744 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9744 delay_slots=5 */ + 9561 "00000000" // /* MW 5 */ + 9562 "00000000" // /* MW 4 */ + 9563 "00001000" // /* MW 3 */ + 9564 "00010011" // /* MW 2 */ + 9565 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "vector.hpp" 1152 43 first +.delay_slot + 9566 "00000010" // VST.3D bmlh0, [p1], d0; MOV p4, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9567 "01110000" // /* MW 7 */ + 9568 "01100000" // /* MW 6 */ + 9569 "00110001" // /* MW 5 */ + 9570 "00000010" // /* MW 4 */ + 9571 "11010000" // /* MW 3 */ + 9572 "00000100" // /* MW 2 */ + 9573 "00100011" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 224 15 first +.delay_slot + 9574 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9575 "11100000" // /* MW 7 */ + 9576 "00001110" // /* MW 6 */ + 9577 "01000100" // /* MW 5 */ + 9578 "00000000" // /* MW 4 */ + 9579 "11010000" // /* MW 3 */ + 9580 "10000000" // /* MW 2 */ + 9581 "10000010" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.src_ref 8 "transpose.hpp" 225 15 first +.delay_slot + 9582 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9583 "11100000" // /* MW 7 */ + 9584 "00100000" // /* MW 6 */ + 9585 "00000100" // /* MW 5 */ + 9586 "00000000" // /* MW 4 */ + 9587 "01100000" // /* MW 3 */ + 9588 "10010001" // /* MW 2 */ + 9589 "10010000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 first +.delay_slot + 9590 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9591 "00100110" // /* MW 3 */ + 9592 "00011000" // /* MW 2 */ + 9593 "00001001" // /* MW 1 */ +.src_ref 8 "vector.hpp" 1152 43 +.delay_slot + 9594 "00001100" // NOPA; VST bmll0, [p4, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9595 "00001101" // /* MW 5 */ + 9596 "00101000" // /* MW 4 */ + 9597 "11111000" // /* MW 3 */ + 9598 "00101100" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.src_ref 11 "transposeshuffle.h" 88 16 first + 9600 "01000100" // MOVXM ls, #9616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9601 "00100000" // /* MW 5 */ + 9602 "11101011" // /* MW 4 */ + 9603 "00100001" // /* MW 3 */ + 9604 "00000000" // /* MW 2 */ + 9605 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 88 16 + 9606 "01000100" // MOVXM le, #9728 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9607 "00000000" // /* MW 5 */ + 9608 "11101100" // /* MW 4 */ + 9609 "00100110" // /* MW 3 */ + 9610 "00000000" // /* MW 2 */ + 9611 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 88 16 + 9612 "10011000" // ADD.NC lc, r2, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9613 "00000000" // /* MW 3 */ + 9614 "01110001" // /* MW 2 */ + 9615 "00011101" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.src_ref 8 "vector.hpp" 1132 26 first +.src_ref 8 "vector.hpp" 1152 43 +.begin_of_loop +.loop_nesting 3 + 9616 "11110100" // VLDB x0, [p0, #64]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9617 "10000001" // /* MW 5 */ + 9618 "11000101" // /* MW 4 */ + 9619 "10001000" // /* MW 3 */ + 9620 "10000110" // /* MW 2 */ + 9621 "00000010" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1132 26 + 9622 "00011000" // VLDB.3D x1, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9623 "01110100" // /* MW 3 */ + 9624 "00111000" // /* MW 2 */ + 9625 "00111000" // /* MW 1 */ + 9626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9627 "00000000" // /* MW 1 */ + 9628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9629 "00000000" // /* MW 1 */ + 9630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9631 "00000000" // /* MW 1 */ + 9632 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "01111000" // /* MW 13 */ + 9636 "10100101" // /* MW 12 */ + 9637 "00000001" // /* MW 11 */ + 9638 "00000000" // /* MW 10 */ + 9639 "00000000" // /* MW 9 */ + 9640 "00000000" // /* MW 8 */ + 9641 "01011011" // /* MW 7 */ + 9642 "00000001" // /* MW 6 */ + 9643 "00100000" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ + 9648 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "10100101" // /* MW 12 */ + 9653 "00000001" // /* MW 11 */ + 9654 "00000000" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "00000000" // /* MW 8 */ + 9657 "01011011" // /* MW 7 */ + 9658 "00000001" // /* MW 6 */ + 9659 "00100000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11110000" // /* MW 3 */ + 9662 "00101100" // /* MW 2 */ + 9663 "00000000" // /* MW 1 */ + 9664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9665 "00000000" // /* MW 15 */ + 9666 "00000000" // /* MW 14 */ + 9667 "01111000" // /* MW 13 */ + 9668 "10100101" // /* MW 12 */ + 9669 "00000001" // /* MW 11 */ + 9670 "00000000" // /* MW 10 */ + 9671 "00000000" // /* MW 9 */ + 9672 "00000000" // /* MW 8 */ + 9673 "01011011" // /* MW 7 */ + 9674 "00000001" // /* MW 6 */ + 9675 "00100000" // /* MW 5 */ + 9676 "00000000" // /* MW 4 */ + 9677 "11110000" // /* MW 3 */ + 9678 "00101100" // /* MW 2 */ + 9679 "00000000" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 224 15 first + 9680 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9681 "00000000" // /* MW 15 */ + 9682 "00000000" // /* MW 14 */ + 9683 "11101000" // /* MW 13 */ + 9684 "00001110" // /* MW 12 */ + 9685 "01000100" // /* MW 11 */ + 9686 "00000000" // /* MW 10 */ + 9687 "00000000" // /* MW 9 */ + 9688 "00000000" // /* MW 8 */ + 9689 "01011011" // /* MW 7 */ + 9690 "00000001" // /* MW 6 */ + 9691 "00100000" // /* MW 5 */ + 9692 "00000000" // /* MW 4 */ + 9693 "11110000" // /* MW 3 */ + 9694 "00101100" // /* MW 2 */ + 9695 "00000000" // /* MW 1 */ +.src_ref 8 "transpose.hpp" 225 15 first + 9696 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9697 "00000000" // /* MW 15 */ + 9698 "00000000" // /* MW 14 */ + 9699 "11101000" // /* MW 13 */ + 9700 "00100000" // /* MW 12 */ + 9701 "00000100" // /* MW 11 */ + 9702 "00000000" // /* MW 10 */ + 9703 "00000000" // /* MW 9 */ + 9704 "00000000" // /* MW 8 */ + 9705 "01011011" // /* MW 7 */ + 9706 "00000001" // /* MW 6 */ + 9707 "00100000" // /* MW 5 */ + 9708 "00000000" // /* MW 4 */ + 9709 "11110000" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.src_ref 5 "aie_core.h" 100 15 first +.src_ref 8 "vector.hpp" 1152 43 first + 9712 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9713 "00000000" // /* MW 15 */ + 9714 "00000000" // /* MW 14 */ + 9715 "01111000" // /* MW 13 */ + 9716 "10100101" // /* MW 12 */ + 9717 "00000001" // /* MW 11 */ + 9718 "00000000" // /* MW 10 */ + 9719 "00000000" // /* MW 9 */ + 9720 "10000000" // /* MW 8 */ + 9721 "00100110" // /* MW 7 */ + 9722 "00011000" // /* MW 6 */ + 9723 "00100001" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "11110000" // /* MW 3 */ + 9726 "00101100" // /* MW 2 */ + 9727 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.src_ref 8 "vector.hpp" 1152 43 +.end_of_loop + 9728 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9729 "00000000" // /* MW 15 */ + 9730 "00000000" // /* MW 14 */ + 9731 "01111000" // /* MW 13 */ + 9732 "10100101" // /* MW 12 */ + 9733 "00000001" // /* MW 11 */ + 9734 "00000000" // /* MW 10 */ + 9735 "00000000" // /* MW 9 */ + 9736 "10000000" // /* MW 8 */ + 9737 "00000110" // /* MW 7 */ + 9738 "00010100" // /* MW 6 */ + 9739 "00100100" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11110000" // /* MW 3 */ + 9742 "00101100" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.src_ref 11 "transposeshuffle.h" 87 12 first +.loop_nesting 2 + 9744 "00011000" // JNZD r17, r17, p3 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 9745 "11100000" // /* MW 3 */ + 9746 "01100010" // /* MW 2 */ + 9747 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.src_ref 11 "transposeshuffle.h" 86 8 first +.loop_nesting 1 + 9760 "00011000" // JNZD r0, r0, p2 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 9761 "10100000" // /* MW 3 */ + 9762 "00000000" // /* MW 2 */ + 9763 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9772 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9773 "01100111" // /* MW 3 */ + 9774 "00000001" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.src_ref 11 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9776 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9777 "00000000" // /* MW 3 */ + 9778 "00101000" // /* MW 2 */ + 9779 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + 9789 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 first +.function_start + 9792 "11111000" // MOV p3, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9793 "11000000" // /* MW 3 */ + 9794 "01101100" // /* MW 2 */ + 9795 "00011011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 + 9796 "00111010" // MOVS p6, p1; MOVXM p1, #508780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9797 "00010001" // /* MW 9 */ + 9798 "10110110" // /* MW 8 */ + 9799 "10110001" // /* MW 7 */ + 9800 "11110000" // /* MW 6 */ + 9801 "00000001" // /* MW 5 */ + 9802 "00000000" // /* MW 4 */ + 9803 "01100000" // /* MW 3 */ + 9804 "10010001" // /* MW 2 */ + 9805 "11010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 first + 9806 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9807 "00010110" // /* MW 3 */ + 9808 "00000110" // /* MW 2 */ + 9809 "00000001" // /* MW 1 */ + 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9811 "00000000" // /* MW 1 */ + 9812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9813 "00000000" // /* MW 1 */ + 9814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9815 "00000000" // /* MW 1 */ + 9816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9817 "00000000" // /* MW 1 */ + 9818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9819 "00000000" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 15 + 9822 "10000100" // JNZ r16, #9888 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9888 delay_slots=5 */ + 9823 "00000001" // /* MW 5 */ + 9824 "01000000" // /* MW 4 */ + 9825 "01010000" // /* MW 3 */ + 9826 "00010011" // /* MW 2 */ + 9827 "10000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 +.delay_slot + 9828 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9829 "00000001" // /* MW 5 */ + 9830 "00000000" // /* MW 4 */ + 9831 "00000000" // /* MW 3 */ + 9832 "00001000" // /* MW 2 */ + 9833 "00000000" // /* MW 1 */ +.delay_slot + 9834 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9835 "00111101" // /* MW 3 */ + 9836 "11110100" // /* MW 2 */ + 9837 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 9838 "00000010" // MOVS p7, p0; MOV p1, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9839 "01110000" // /* MW 7 */ + 9840 "01100000" // /* MW 6 */ + 9841 "10110111" // /* MW 5 */ + 9842 "00000000" // /* MW 4 */ + 9843 "01100000" // /* MW 3 */ + 9844 "00010001" // /* MW 2 */ + 9845 "11110000" // /* MW 1 */ +.delay_slot + 9846 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9847 "10011101" // /* MW 3 */ + 9848 "11111001" // /* MW 2 */ + 9849 "00001111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 9850 "00111010" // ST p1, [sp, #-4]; MOVXM p0, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9851 "00010001" // /* MW 9 */ + 9852 "11100000" // /* MW 8 */ + 9853 "00110001" // /* MW 7 */ + 9854 "11110000" // /* MW 6 */ + 9855 "00000001" // /* MW 5 */ + 9856 "00000000" // /* MW 4 */ + 9857 "10110000" // /* MW 3 */ + 9858 "10010011" // /* MW 2 */ + 9859 "11111111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 first +.no_stack_arguments + 9860 "00000100" // JL #8368 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8368 delay_slots=5 */ + 9861 "00000001" // /* MW 5 */ + 9862 "00000000" // /* MW 4 */ + 9863 "01011000" // /* MW 3 */ + 9864 "00010000" // /* MW 2 */ + 9865 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 9866 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9867 "11000000" // /* MW 3 */ + 9868 "01100100" // /* MW 2 */ + 9869 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9873 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9876 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9877 "10000001" // /* MW 11 */ + 9878 "10101101" // /* MW 10 */ + 9879 "00000000" // /* MW 9 */ + 9880 "00000000" // /* MW 8 */ + 9881 "00000000" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00100000" // /* MW 5 */ + 9884 "00000000" // /* MW 4 */ + 9885 "11110000" // /* MW 3 */ + 9886 "00101100" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 11 "transposeshuffle.h" 137 72 +.return_address + 9888 "10111010" // LDA r16, [p7]; MOVXM p7, #508884 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9889 "00010000" // /* MW 9 */ + 9890 "11101010" // /* MW 8 */ + 9891 "10110001" // /* MW 7 */ + 9892 "11110011" // /* MW 6 */ + 9893 "00000001" // /* MW 5 */ + 9894 "00000000" // /* MW 4 */ + 9895 "11010000" // /* MW 3 */ + 9896 "11000010" // /* MW 2 */ + 9897 "11100000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 72 first + 9898 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9899 "00110110" // /* MW 3 */ + 9900 "00000110" // /* MW 2 */ + 9901 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 9902 "10011000" // LDA p1, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9903 "10011110" // /* MW 3 */ + 9904 "00000100" // /* MW 2 */ + 9905 "00000110" // /* MW 1 */ + 9906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9907 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 11 first +.no_stack_arguments + 9908 "00000100" // JL #8640 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8640 delay_slots=5 */ + 9909 "00000001" // /* MW 5 */ + 9910 "00000000" // /* MW 4 */ + 9911 "11100000" // /* MW 3 */ + 9912 "00010000" // /* MW 2 */ + 9913 "00000000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 64 +.delay_slot + 9914 "00011000" // MOVX r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9915 "00000101" // /* MW 3 */ + 9916 "00100100" // /* MW 2 */ + 9917 "00010000" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 11 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 9918 "01000100" // MOVXM p2, #508864 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9919 "10000000" // /* MW 5 */ + 9920 "11000111" // /* MW 4 */ + 9921 "11000100" // /* MW 3 */ + 9922 "00000111" // /* MW 2 */ + 9923 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 9924 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9925 "11000000" // /* MW 3 */ + 9926 "01100100" // /* MW 2 */ + 9927 "00011110" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 64 +.delay_slot + 9928 "10011000" // LSHL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9929 "00101101" // /* MW 3 */ + 9930 "01100011" // /* MW 2 */ + 9931 "00010100" // /* MW 1 */ +.src_ref 11 "transposeshuffle.h" 137 64 +.delay_slot + 9932 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9933 "11000001" // /* MW 3 */ + 9934 "01101000" // /* MW 2 */ + 9935 "00011000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 +.return_address + 9936 "10111010" // LDA lr, [sp, #-12]; MOVXM p2, #508780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9937 "00010000" // /* MW 9 */ + 9938 "10110110" // /* MW 8 */ + 9939 "00110001" // /* MW 7 */ + 9940 "11110001" // /* MW 6 */ + 9941 "00000001" // /* MW 5 */ + 9942 "00000000" // /* MW 4 */ + 9943 "00100000" // /* MW 3 */ + 9944 "10000111" // /* MW 2 */ + 9945 "11111110" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first + 9946 "00101100" // LDA r16, [p2]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00000010" // /* MW 5 */ + 9948 "01100000" // /* MW 4 */ + 9949 "11010000" // /* MW 3 */ + 9950 "11000010" // /* MW 2 */ + 9951 "01000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 + 9952 "10011000" // LDA r17, [p6, #24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "00110110" // /* MW 3 */ + 9954 "01100110" // /* MW 2 */ + 9955 "00000110" // /* MW 1 */ + 9956 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9957 "00011001" // /* MW 3 */ + 9958 "11111011" // /* MW 2 */ + 9959 "00000111" // /* MW 1 */ + 9960 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9961 "10011001" // /* MW 3 */ + 9962 "11111111" // /* MW 2 */ + 9963 "00000111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 first + 9964 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9965 "00000001" // /* MW 5 */ + 9966 "00000000" // /* MW 4 */ + 9967 "00000000" // /* MW 3 */ + 9968 "11111000" // /* MW 2 */ + 9969 "11111111" // /* MW 1 */ + 9970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9971 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 + 9972 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9973 "00000000" // /* MW 3 */ + 9974 "00101000" // /* MW 2 */ + 9975 "00010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first +.delay_slot + 9976 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9977 "00000111" // /* MW 3 */ + 9978 "00100000" // /* MW 2 */ + 9979 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 17 +.delay_slot + 9980 "10011000" // EQ r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9981 "00000111" // /* MW 3 */ + 9982 "01110111" // /* MW 2 */ + 9983 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.delay_slot + 9984 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9985 "10000010" // /* MW 3 */ + 9986 "00100001" // /* MW 2 */ + 9987 "00010100" // /* MW 1 */ +.delay_slot + 9988 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9989 "00010001" // /* MW 3 */ + 9990 "00000110" // /* MW 2 */ + 9991 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + 9993 "00000000" // /* MW 1 */ +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function _b7835_wrapper _Z14_b7835_wrapperPPv +.src_ref 0 "0_0_reloadable82.cc" 43 first +.src_ref 0 "0_0_reloadable82.cc" 45 79 +.function_start + 10000 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "11000000" // /* MW 3 */ + 10002 "01100000" // /* MW 2 */ + 10003 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 45 79 first + 10004 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "00011110" // /* MW 3 */ + 10006 "00011100" // /* MW 2 */ + 10007 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 47 46 first + 10008 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10009 "00011110" // /* MW 3 */ + 10010 "00010101" // /* MW 2 */ + 10011 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 46 80 first + 10012 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10013 "10011110" // /* MW 3 */ + 10014 "00000100" // /* MW 2 */ + 10015 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable82.cc" 44 4 first +.tail_call + 10016 "10000100" // J #9792 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9792 delay_slots=5 */ + 10017 "00000000" // /* MW 5 */ + 10018 "00000000" // /* MW 4 */ + 10019 "00100000" // /* MW 3 */ + 10020 "00010011" // /* MW 2 */ + 10021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 + 10031 "00000000" // /* MW 1 */ +.label memset +.function memset memset +.src_ref 12 "string.c" 325 first +.src_ref 12 "string.c" 328 4 first +.function_start + 10032 "10000100" // JZ r1, #10192 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10192 delay_slots=5 */ + 10033 "00000001" // /* MW 5 */ + 10034 "00000000" // /* MW 4 */ + 10035 "11101000" // /* MW 3 */ + 10036 "00010011" // /* MW 2 */ + 10037 "00001000" // /* MW 1 */ +.src_ref 12 "string.c" 329 3 +.delay_slot + 10038 "11111000" // MOV p0, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10039 "11000000" // /* MW 3 */ + 10040 "01100010" // /* MW 2 */ + 10041 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10049 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 first +.src_ref 12 "string.c" 329 3 + 10050 "00000010" // MOVS p1, p0; MOV lc, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10051 "01110000" // /* MW 7 */ + 10052 "01010000" // /* MW 6 */ + 10053 "10111000" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01100000" // /* MW 3 */ + 10056 "00010001" // /* MW 2 */ + 10057 "00110000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 10058 "01000100" // MOVXM ls, #10080 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10059 "11000000" // /* MW 5 */ + 10060 "11101110" // /* MW 4 */ + 10061 "00100001" // /* MW 3 */ + 10062 "00000000" // /* MW 2 */ + 10063 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 10064 "11100001" // NOPA; NOPB; NOPS; MOVXM le, #10176; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10065 "00000000" // /* MW 15 */ + 10066 "00000000" // /* MW 14 */ + 10067 "00010000" // /* MW 13 */ + 10068 "11100000" // /* MW 12 */ + 10069 "10111011" // /* MW 11 */ + 10070 "00001001" // /* MW 10 */ + 10071 "00000000" // /* MW 9 */ + 10072 "00000000" // /* MW 8 */ + 10073 "01011011" // /* MW 7 */ + 10074 "00000001" // /* MW 6 */ + 10075 "00100000" // /* MW 5 */ + 10076 "00000000" // /* MW 4 */ + 10077 "11110000" // /* MW 3 */ + 10078 "00101100" // /* MW 2 */ + 10079 "00000000" // /* MW 1 */ +.label ZLS_Fmemset_48 +.src_ref 12 "string.c" 329 3 first +.begin_of_loop +.loop_nesting 1 + 10080 "11100001" // ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "10100101" // /* MW 12 */ + 10085 "00000001" // /* MW 11 */ + 10086 "00000000" // /* MW 10 */ + 10087 "00000000" // /* MW 9 */ + 10088 "00000000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11100000" // /* MW 3 */ + 10094 "10000000" // /* MW 2 */ + 10095 "00100011" // /* MW 1 */ + 10096 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10097 "00000000" // /* MW 15 */ + 10098 "00000000" // /* MW 14 */ + 10099 "01111000" // /* MW 13 */ + 10100 "10100101" // /* MW 12 */ + 10101 "00000001" // /* MW 11 */ + 10102 "00000000" // /* MW 10 */ + 10103 "00000000" // /* MW 9 */ + 10104 "00000000" // /* MW 8 */ + 10105 "01011011" // /* MW 7 */ + 10106 "00000001" // /* MW 6 */ + 10107 "00100000" // /* MW 5 */ + 10108 "00000000" // /* MW 4 */ + 10109 "11110000" // /* MW 3 */ + 10110 "00101100" // /* MW 2 */ + 10111 "00000000" // /* MW 1 */ + 10112 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10113 "00000000" // /* MW 15 */ + 10114 "00000000" // /* MW 14 */ + 10115 "01111000" // /* MW 13 */ + 10116 "10100101" // /* MW 12 */ + 10117 "00000001" // /* MW 11 */ + 10118 "00000000" // /* MW 10 */ + 10119 "00000000" // /* MW 9 */ + 10120 "00000000" // /* MW 8 */ + 10121 "01011011" // /* MW 7 */ + 10122 "00000001" // /* MW 6 */ + 10123 "00100000" // /* MW 5 */ + 10124 "00000000" // /* MW 4 */ + 10125 "11110000" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ + 10128 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10129 "00000000" // /* MW 15 */ + 10130 "00000000" // /* MW 14 */ + 10131 "01111000" // /* MW 13 */ + 10132 "10100101" // /* MW 12 */ + 10133 "00000001" // /* MW 11 */ + 10134 "00000000" // /* MW 10 */ + 10135 "00000000" // /* MW 9 */ + 10136 "00000000" // /* MW 8 */ + 10137 "01011011" // /* MW 7 */ + 10138 "00000001" // /* MW 6 */ + 10139 "00100000" // /* MW 5 */ + 10140 "00000000" // /* MW 4 */ + 10141 "11110000" // /* MW 3 */ + 10142 "00101100" // /* MW 2 */ + 10143 "00000000" // /* MW 1 */ + 10144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10145 "00000000" // /* MW 15 */ + 10146 "00000000" // /* MW 14 */ + 10147 "01111000" // /* MW 13 */ + 10148 "10100101" // /* MW 12 */ + 10149 "00000001" // /* MW 11 */ + 10150 "00000000" // /* MW 10 */ + 10151 "00000000" // /* MW 9 */ + 10152 "00000000" // /* MW 8 */ + 10153 "01011011" // /* MW 7 */ + 10154 "00000001" // /* MW 6 */ + 10155 "00100000" // /* MW 5 */ + 10156 "00000000" // /* MW 4 */ + 10157 "11110000" // /* MW 3 */ + 10158 "00101100" // /* MW 2 */ + 10159 "00000000" // /* MW 1 */ + 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "00100000" // /* MW 5 */ + 10172 "00000000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.label ZLE_Fmemset_144 +.end_of_loop + 10176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10177 "00000000" // /* MW 15 */ + 10178 "00000000" // /* MW 14 */ + 10179 "01111000" // /* MW 13 */ + 10180 "10100101" // /* MW 12 */ + 10181 "00000001" // /* MW 11 */ + 10182 "00000000" // /* MW 10 */ + 10183 "00000000" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "01011011" // /* MW 7 */ + 10186 "00000001" // /* MW 6 */ + 10187 "00100000" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.label TGT_Fmemset_160 +.src_ref 12 "string.c" 330 4 first +.loop_nesting 0 + 10192 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10193 "00000000" // /* MW 3 */ + 10194 "00101000" // /* MW 2 */ + 10195 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10197 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10199 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10201 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label memset__end + 10205 "00000000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/gemm" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 6 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 9 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 11 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 12 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.txt b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.txt new file mode 100644 index 0000000000000000000000000000000000000000..d90a9f103e8dc309baf126f9c016c07f52c446ee --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/Release/0_0_reloadable82.txt @@ -0,0 +1,3015 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 128 0xc10 x +gemm_bfp16_params.h 130 0xc10 1 +gemm_bfp16_params.h 130 0xc10 2 x +gemm_bfp16_params.h 58 0xc1a +gemm_bfp16_params.h 59 0xc1a 1 +gemm_bfp16_params.h 61 0xc1a 2 +gemm_bfp16_params.h 71 0xc1a 3 +gemm_bfp16_params.h 86 0xc1a 4 +gemm_bfp16_params.h 93 0xc1a 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0xc24 +aie.hpp 6982 0xc24 1 +aie.hpp 6982 0xc24 2 +aie.hpp 6982 0xc24 3 +aie.hpp 6982 0xc24 4 +aie.hpp 7054 0xc24 5 +aie.hpp 7056 0xc24 6 +aie.hpp 7057 0xc24 7 +aie.hpp 7072 0xc24 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xc24 9 +gemm_bfp16_params.h 44 0xc24 10 +gemm_bfp16_params.h 80 0xc24 11 +gemm_bfp16_params.h 99 0xc24 12 +gemm_bfp16_params.h 138 0xc24 13 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0xc2e +aie.hpp 7072 0xc2e 1 +aie.hpp 7073 0xc2e 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xc2e 3 +gemm_bfp16_params.h 88 0xc2e 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0xc38 +aie.hpp 7053 0xc38 1 +aie.hpp 7053 0xc38 2 +aie.hpp 7057 0xc38 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 85 0xc38 4 +gemm_bfp16_params.h 88 0xc38 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0xc42 +tuple 562 0xc42 1 +tuple 562 0xc42 2 +tuple 562 0xc42 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0xc42 4 +aie.hpp 6982 0xc42 5 +aie.hpp 7056 0xc42 6 +aie.hpp 7056 0xc42 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 138 0xc42 8 +gemm_bfp16_params.h 138 0xc48 +gemm_bfp16_params.h 58 0xc4c x +gemm_bfp16_params.h 130 0xc4c 1 x +gemm_bfp16_params.h 93 0xc52 x +gemm_bfp16_params.h 131 0xc52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0xc58 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 80 0xc58 1 x +gemm_bfp16_params.h 82 0xc5e +gemm_bfp16_params.h 85 0xc5e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0xc64 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 59 0xc6e x +gemm_bfp16_params.h 131 0xc6e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0xc74 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 132 0xc74 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0xc7a +aie.hpp 7056 0xc80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 65 0xc8a x +gemm_bfp16_params.h 132 0xc8a 1 x +gemm_bfp16_params.h 61 0xc90 x +gemm_bfp16_params.h 133 0xc90 1 x +gemm_bfp16_params.h 71 0xc96 x +gemm_bfp16_params.h 88 0xc96 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7057 0xc9c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 86 0xc9c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0xca2 +aie.hpp 7056 0xca2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 71 0xca2 2 x +gemm_bfp16_params.h 86 0xca2 3 +gemm_bfp16_params.h 88 0xcac x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0xcb0 x +aie.hpp 7057 0xcb4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 99 0xcb8 x +gemm_bfp16_params.h 133 0xcb8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7053 0xcbe x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 134 0xcbe 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0xcc4 x +aie.hpp 7054 0xcc8 x +aie.hpp 7054 0xccc +aie.hpp 7057 0xccc 1 x +aie.hpp 7054 0xcd0 x +aie.hpp 7056 0xcd0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 134 0xcd8 x +gemm_bfp16_params.h 135 0xcdc x +gemm_bfp16_params.h 135 0xcec + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0xcf0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xcf0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0xcf8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xcf8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0xcfe + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xcfe 1 x +gemm_bfp16_params.h 44 0xd06 +gemm_bfp16_params.h 44 0xd0a +gemm_bfp16_params.h 44 0xd0e +gemm_bfp16_params.h 44 0xd12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7072 0xd16 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd16 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7073 0xd1c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd1c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0xd22 x +aie.hpp 7072 0xd22 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd22 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0xd28 +aie.hpp 7057 0xd28 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd28 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7056 0xd2e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd2e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7072 0xd34 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd34 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7057 0xd3a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd3a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7057 0xd40 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 44 0xd40 1 x +gemm_bfp16_params.h 44 0xd46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0xd4a x +aie.hpp 6982 0xd4e +aie.hpp 6982 0xd52 +aie.hpp 6982 0xd56 +aie.hpp 6982 0xd5a +aie.hpp 6982 0xd5e +aie.hpp 6982 0xd62 +aie.hpp 6982 0xd66 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0xd6a x +tuple 562 0xd6e +tuple 562 0xd72 +tuple 562 0xd76 +tuple 562 0xd7a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 7054 0xd7a 1 x +aie.hpp 7057 0xd7a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0xd80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0xd84 x +aie.hpp 7056 0xd84 1 x +aie.hpp 6982 0xd8a +aie.hpp 7073 0xd8a 1 x +aie.hpp 6982 0xd90 x +aie.hpp 6982 0xd94 +aie.hpp 6982 0xd98 +aie.hpp 6982 0xd9c +aie.hpp 6982 0xda0 +aie.hpp 6982 0xda4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0xda8 x +tuple 562 0xdac +tuple 562 0xdb0 +tuple 562 0xdb4 +tuple 562 0xdb8 +tuple 562 0xdbc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0xdc0 x +aie.hpp 6982 0xdc4 +aie.hpp 6982 0xdc8 +aie.hpp 6982 0xdcc +aie.hpp 6982 0xdd0 +aie.hpp 6982 0xdd4 +aie.hpp 6982 0xdd8 +aie.hpp 6982 0xddc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0xde0 x +tuple 562 0xde4 +tuple 562 0xde8 +tuple 562 0xdec +tuple 562 0xdf0 +tuple 562 0xdf4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0xdf8 x +aie.hpp 6982 0xdfc +aie.hpp 6982 0xe00 +aie.hpp 6982 0xe04 +aie.hpp 6982 0xe08 +aie.hpp 6982 0xe0c +aie.hpp 6982 0xe10 +aie.hpp 6982 0xe14 +aie.hpp 6982 0xe18 +aie.hpp 6982 0xe1c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 139 0xe1c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0xe22 x +aie.hpp 6982 0xe26 +aie.hpp 6982 0xe2a +aie.hpp 6982 0xe2e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16_params.h: +gemm_bfp16_params.h 138 0xe32 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 38 0xe40 x +gemm_bfp16.h 41 0xe40 1 +gemm_bfp16.h 38 0xe46 +gemm_bfp16.h 41 0xe4c x +gemm_bfp16.h 68 0xe50 x +gemm_bfp16.h 41 0xe56 +gemm_bfp16.h 68 0xe56 1 +gemm_bfp16.h 53 0xe60 +gemm_bfp16.h 53 0xe60 1 +gemm_bfp16.h 39 0xe6a +gemm_bfp16.h 41 0xe6a 1 x +gemm_bfp16.h 53 0xe6a 2 x +gemm_bfp16.h 75 0xe6a 3 +gemm_bfp16.h 39 0xe74 x +gemm_bfp16.h 41 0xe7a +gemm_bfp16.h 75 0xe7a 1 x +gemm_bfp16.h 41 0xe80 +gemm_bfp16.h 42 0xe80 1 +gemm_bfp16.h 41 0xe86 x +gemm_bfp16.h 42 0xe86 1 +gemm_bfp16.h 75 0xe86 2 +gemm_bfp16.h 42 0xe92 +gemm_bfp16.h 42 0xe92 1 x +gemm_bfp16.h 75 0xe92 2 +gemm_bfp16.h 41 0xe9c x +gemm_bfp16.h 41 0xe9c 1 x +gemm_bfp16.h 75 0xe9c 2 x +gemm_bfp16.h 42 0xea2 x +gemm_bfp16.h 44 0xea2 1 +gemm_bfp16.h 69 0xea2 2 +gemm_bfp16.h 76 0xea2 3 +gemm_bfp16.h 77 0xea2 4 +gemm_bfp16.h 41 0xeb0 x +gemm_bfp16.h 44 0xeb0 1 x +gemm_bfp16.h 54 0xeb0 2 +gemm_bfp16.h 75 0xeb0 3 +gemm_bfp16.h 54 0xec0 x +gemm_bfp16.h 68 0xec0 1 x +gemm_bfp16.h 54 0xed2 +gemm_bfp16.h 63 0xed2 1 +gemm_bfp16.h 64 0xed2 2 +gemm_bfp16.h 54 0xed6 +gemm_bfp16.h 63 0xed6 1 x +gemm_bfp16.h 54 0xeda x +gemm_bfp16.h 64 0xeda 1 x +gemm_bfp16.h 69 0xee6 x +gemm_bfp16.h 71 0xeea x +gemm_bfp16.h 69 0xef0 x +gemm_bfp16.h 69 0xef4 +gemm_bfp16.h 70 0xef8 x +gemm_bfp16.h 70 0xefc +gemm_bfp16.h 71 0xf00 x +gemm_bfp16.h 71 0xf10 +gemm_bfp16.h 72 0xf20 x +gemm_bfp16.h 72 0xf30 +gemm_bfp16.h 53 0xf40 x +gemm_bfp16.h 75 0xf40 1 x +gemm_bfp16.h 76 0xf40 2 x +gemm_bfp16.h 77 0xf48 x +gemm_bfp16.h 80 0xf54 x +gemm_bfp16.h 80 0xf58 +gemm_bfp16.h 83 0xf70 x +gemm_bfp16.h 92 0xf70 1 +gemm_bfp16.h 93 0xf70 2 +gemm_bfp16.h 97 0xf70 3 +gemm_bfp16.h 97 0xf70 4 +gemm_bfp16.h 92 0xf7c +gemm_bfp16.h 94 0xf7c 1 +gemm_bfp16.h 94 0xf7c 2 +gemm_bfp16.h 95 0xf7c 3 +gemm_bfp16.h 96 0xf7c 4 +gemm_bfp16.h 92 0xf86 x +gemm_bfp16.h 93 0xf86 1 +gemm_bfp16.h 94 0xf86 2 +gemm_bfp16.h 94 0xf86 3 x +gemm_bfp16.h 95 0xf86 4 +gemm_bfp16.h 96 0xf86 5 +gemm_bfp16.h 92 0xf92 +gemm_bfp16.h 93 0xf92 1 x +gemm_bfp16.h 95 0xf92 2 +gemm_bfp16.h 93 0xf9c +gemm_bfp16.h 94 0xf9c 1 +gemm_bfp16.h 94 0xf9c 2 x +gemm_bfp16.h 95 0xf9c 3 +gemm_bfp16.h 96 0xf9c 4 +gemm_bfp16.h 94 0xfa6 +gemm_bfp16.h 95 0xfa6 1 x +gemm_bfp16.h 96 0xfa6 2 +gemm_bfp16.h 92 0xfb0 x +gemm_bfp16.h 94 0xfb0 1 x +gemm_bfp16.h 95 0xfb0 2 +gemm_bfp16.h 94 0xfba +gemm_bfp16.h 95 0xfba 1 x +gemm_bfp16.h 96 0xfba 2 x +gemm_bfp16.h 95 0xfc4 +gemm_bfp16.h 96 0xfc4 1 +gemm_bfp16.h 95 0xfce +gemm_bfp16.h 96 0xfce 1 +gemm_bfp16.h 97 0xfce 2 +gemm_bfp16.h 97 0xfce 3 +gemm_bfp16.h 98 0xfce 4 +gemm_bfp16.h 98 0xfce 5 +gemm_bfp16.h 98 0xfce 6 +gemm_bfp16.h 99 0xfce 7 +gemm_bfp16.h 99 0xfce 8 +gemm_bfp16.h 99 0xfce 9 +gemm_bfp16.h 100 0xfce 10 +gemm_bfp16.h 100 0xfce 11 +gemm_bfp16.h 100 0xfce 12 +gemm_bfp16.h 92 0xfd8 x +gemm_bfp16.h 96 0xfd8 1 +gemm_bfp16.h 92 0xfe2 +gemm_bfp16.h 96 0xfe2 1 x +gemm_bfp16.h 98 0xfec +gemm_bfp16.h 99 0xfec 1 +gemm_bfp16.h 100 0xfec 2 +gemm_bfp16.h 97 0x1000 x +gemm_bfp16.h 97 0x1000 1 x +gemm_bfp16.h 98 0x1000 2 x +gemm_bfp16.h 98 0x1006 +gemm_bfp16.h 98 0x1006 1 +gemm_bfp16.h 98 0x1006 2 +gemm_bfp16.h 99 0x1006 3 x +gemm_bfp16.h 98 0x100c x +gemm_bfp16.h 98 0x100c 1 x +gemm_bfp16.h 98 0x100c 2 x +gemm_bfp16.h 99 0x100c 3 +gemm_bfp16.h 99 0x100c 4 +gemm_bfp16.h 99 0x100c 5 +gemm_bfp16.h 100 0x100c 6 x +gemm_bfp16.h 94 0x1016 x +gemm_bfp16.h 99 0x1016 1 x +gemm_bfp16.h 99 0x1016 2 x +gemm_bfp16.h 99 0x1016 3 x +gemm_bfp16.h 100 0x1016 4 +gemm_bfp16.h 100 0x1016 5 +gemm_bfp16.h 100 0x1016 6 +gemm_bfp16.h 95 0x1020 x +gemm_bfp16.h 96 0x1024 x +gemm_bfp16.h 100 0x1024 1 x +gemm_bfp16.h 100 0x1024 2 x +gemm_bfp16.h 100 0x1024 3 x +gemm_bfp16.h 93 0x102e x +gemm_bfp16.h 94 0x102e 1 x +gemm_bfp16.h 93 0x1034 +gemm_bfp16.h 94 0x1034 1 +gemm_bfp16.h 95 0x1034 2 x +gemm_bfp16.h 94 0x103e x +gemm_bfp16.h 95 0x103e 1 +gemm_bfp16.h 96 0x103e 2 x +gemm_bfp16.h 94 0x1048 +gemm_bfp16.h 96 0x1048 1 +gemm_bfp16.h 95 0x104e x +gemm_bfp16.h 95 0x1052 +gemm_bfp16.h 96 0x1056 x +gemm_bfp16.h 96 0x105a +gemm_bfp16.h 97 0x1070 x +gemm_bfp16.h 97 0x1070 1 x +gemm_bfp16.h 98 0x1070 2 x +gemm_bfp16.h 98 0x1076 +gemm_bfp16.h 98 0x1076 1 +gemm_bfp16.h 98 0x1076 2 +gemm_bfp16.h 102 0x1076 3 x +gemm_bfp16.h 98 0x107c x +gemm_bfp16.h 98 0x107c 1 x +gemm_bfp16.h 98 0x107c 2 x +gemm_bfp16.h 99 0x107c 3 x +gemm_bfp16.h 99 0x1082 +gemm_bfp16.h 99 0x1082 1 +gemm_bfp16.h 99 0x1082 2 +gemm_bfp16.h 100 0x1082 3 x +gemm_bfp16.h 99 0x1088 x +gemm_bfp16.h 99 0x1088 1 x +gemm_bfp16.h 99 0x1088 2 x +gemm_bfp16.h 100 0x1088 3 +gemm_bfp16.h 100 0x1088 4 +gemm_bfp16.h 100 0x1088 5 +gemm_bfp16.h 100 0x1090 x +gemm_bfp16.h 100 0x1090 1 x +gemm_bfp16.h 100 0x1090 2 x +gemm_bfp16.h 225 0x10a0 x +gemm_bfp16.h 231 0x10a0 1 +gemm_bfp16.h 231 0x10a0 2 +gemm_bfp16.h 231 0x10ac x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x10b6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x10b6 1 +array_helpers.hpp 313 0x10b6 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x10b6 3 +accum.hpp 940 0x10b6 4 +accum.hpp 940 0x10b6 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 225 0x10be +gemm_bfp16.h 231 0x10c8 +gemm_bfp16.h 231 0x10c8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x10da +tile.hpp 74 0x10da 1 +tile.hpp 74 0x10e6 x +tile.hpp 86 0x10e6 1 +tile.hpp 86 0x10e6 2 +tile.hpp 86 0x10f2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 235 0x10f2 1 +gemm_bfp16.h 235 0x10fc +gemm_bfp16.h 235 0x10fc 1 x +gemm_bfp16.h 235 0x1102 +gemm_bfp16.h 235 0x1106 +gemm_bfp16.h 235 0x110a +gemm_bfp16.h 235 0x1110 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x1120 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1120 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x1120 2 +gemm_bfp16.h 252 0x1120 3 +gemm_bfp16.h 252 0x112a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x112e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x112e 1 +gemm_bfp16.h 126 0x1132 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1136 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x113a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x113a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp: +aie.hpp 6982 0x1142 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x1142 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x1146 x +tuple 562 0x1146 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 252 0x1146 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x114c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 134 0x114c 1 x +gemm_bfp16.h 252 0x114c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/tuple: +tuple 562 0x1156 +tuple 562 0x1156 1 +tuple 562 0x1156 2 x +tuple 562 0x115c +tuple 562 0x1160 +tuple 562 0x1160 1 +tuple 562 0x1164 +tuple 562 0x1168 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x116c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x116c 1 +gemm_bfp16.h 135 0x116c 2 +gemm_bfp16.h 110 0x1176 x +gemm_bfp16.h 135 0x1176 1 x +gemm_bfp16.h 110 0x1180 +gemm_bfp16.h 135 0x1180 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x118a +vector.hpp 109 0x118a 1 +vector.hpp 1365 0x118a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x118a 3 +transpose.hpp 225 0x118a 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x1196 +vector.hpp 109 0x119a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 135 0x119a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x11a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 110 0x11a0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x11a8 x +vector.hpp 109 0x11ac +vector.hpp 109 0x11b4 +vector.hpp 109 0x11d0 +vector.hpp 109 0x11d0 1 +vector.hpp 1365 0x11d0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x11d0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x11e0 x +vector.hpp 109 0x11e0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x11e0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x11f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x11f0 1 x +gemm_bfp16.h 113 0x1200 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x1220 x +vector.hpp 1365 0x1220 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1220 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x1224 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x1224 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x1228 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x1228 1 x +gemm_bfp16.h 113 0x1230 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x1238 +vector.hpp 1365 0x1238 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1238 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 109 0x123c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x123c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 113 0x1240 x +gemm_bfp16.h 113 0x1248 +gemm_bfp16.h 141 0x1250 x +gemm_bfp16.h 148 0x1254 x +gemm_bfp16.h 148 0x1258 +gemm_bfp16.h 148 0x1258 1 +gemm_bfp16.h 148 0x125e +gemm_bfp16.h 172 0x1262 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1276 +aie_core.h 81 0x1276 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1276 2 +gemm_bfp16.h 175 0x1276 3 +gemm_bfp16.h 176 0x1276 4 +gemm_bfp16.h 177 0x1276 5 +gemm_bfp16.h 202 0x1276 6 +gemm_bfp16.h 203 0x1276 7 +gemm_bfp16.h 204 0x1276 8 +gemm_bfp16.h 205 0x1276 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x127c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x127c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 172 0x127c 2 +gemm_bfp16.h 172 0x1282 x +gemm_bfp16.h 172 0x1288 +gemm_bfp16.h 172 0x128e +gemm_bfp16.h 172 0x1294 +gemm_bfp16.h 172 0x129a +gemm_bfp16.h 172 0x129a 1 +gemm_bfp16.h 172 0x12a0 +gemm_bfp16.h 172 0x12a0 1 +gemm_bfp16.h 172 0x12a6 +gemm_bfp16.h 172 0x12aa +gemm_bfp16.h 172 0x12ae + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12b2 +aie_core.h 81 0x12b2 1 +aie_core.h 100 0x12b2 2 +aie_core.h 100 0x12b2 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x12b2 4 +array_helpers.hpp 252 0x12b2 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x12b2 6 +accum.hpp 940 0x12b2 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 172 0x12b2 8 +gemm_bfp16.h 172 0x12b8 +gemm_bfp16.h 175 0x12b8 1 +gemm_bfp16.h 268 0x12b8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12c2 +aie_core.h 100 0x12c2 1 +aie_core.h 100 0x12c2 2 +aie_core.h 100 0x12c2 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x12c2 4 +array_helpers.hpp 252 0x12c2 5 +array_helpers.hpp 252 0x12c2 6 +array_helpers.hpp 313 0x12c2 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x12c2 8 +accum.hpp 940 0x12c2 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 172 0x12c2 10 +gemm_bfp16.h 175 0x12c2 11 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12d0 +aie_core.h 100 0x12d0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x12d0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x12d0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12da +aie_core.h 81 0x12da 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x12da 2 +gemm_bfp16.h 175 0x12da 3 +gemm_bfp16.h 176 0x12da 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12e4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x12e4 1 +mmul_bf16_bf16.hpp 116 0x12e4 2 +mmul_bf16_bf16.hpp 116 0x12e4 3 +mmul_bf16_bf16.hpp 116 0x12e4 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x12e4 5 +gemm_bfp16.h 176 0x12e4 6 +gemm_bfp16.h 177 0x12e4 7 x +gemm_bfp16.h 202 0x12e4 8 +gemm_bfp16.h 203 0x12e4 9 +gemm_bfp16.h 204 0x12e4 10 +gemm_bfp16.h 204 0x12e4 11 +gemm_bfp16.h 205 0x12e4 12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12f2 +aie_core.h 100 0x12f2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x12f2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x12f2 3 +mmul_bf16_bf16.hpp 113 0x12f2 4 +mmul_bf16_bf16.hpp 114 0x12f2 5 +mmul_bf16_bf16.hpp 114 0x12f2 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 177 0x12f2 7 +gemm_bfp16.h 268 0x12f2 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1300 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x1300 1 +transpose.hpp 224 0x1300 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x1300 3 +gemm_bfp16.h 177 0x1300 4 +gemm_bfp16.h 203 0x1300 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1310 +aie_core.h 100 0x1310 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x1310 2 +vector.hpp 1365 0x1310 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1310 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1310 5 +transpose.hpp 225 0x1310 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 177 0x1310 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1320 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1320 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1320 2 +mmul_bf16_bf16.hpp 111 0x1320 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1320 4 +gemm_bfp16.h 176 0x1320 5 x +gemm_bfp16.h 177 0x1320 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1330 +aie_core.h 100 0x1330 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1330 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1330 3 x +gemm_bfp16.h 176 0x1330 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x133c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x133c 1 x +gemm_bfp16.h 182 0x133c 2 x +gemm_bfp16.h 176 0x1346 +gemm_bfp16.h 182 0x1346 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1350 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1350 1 x +array_helpers.hpp 313 0x1350 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x1350 3 +gemm_bfp16.h 182 0x1350 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x135e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x135e 1 x +gemm_bfp16.h 175 0x135e 2 +gemm_bfp16.h 203 0x135e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1368 x +array_helpers.hpp 252 0x1368 1 x +array_helpers.hpp 313 0x1368 2 +array_helpers.hpp 252 0x1370 +array_helpers.hpp 313 0x1370 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1370 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1378 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1378 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1378 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x137e +array_helpers.hpp 313 0x137e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x137e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1388 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1388 1 x +array_helpers.hpp 313 0x1388 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x1388 3 x +accum.hpp 940 0x1388 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x138e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x138e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x138e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x1398 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1398 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x139c x +array_helpers.hpp 313 0x139c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x139c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x13a6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x13a6 1 +array_helpers.hpp 313 0x13a6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x13a6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x13a6 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x13a6 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x13b2 x +array_helpers.hpp 252 0x13b2 1 x +array_helpers.hpp 313 0x13b2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x13b2 3 x +accum.hpp 940 0x13b2 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x13b2 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x13be x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x13be 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x13be 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x13c4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x13c4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x13c4 2 x +mmul_bf16_bf16.hpp 113 0x13c4 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x13ce x +aie_core.h 100 0x13ce 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x13ce 2 +array_helpers.hpp 252 0x13ce 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x13ce 4 x +accum.hpp 940 0x13ce 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x13ce 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x13d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x13d8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x13dc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x13e0 x +array_helpers.hpp 252 0x13e0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x13e0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x13e0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x13ea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x13ea 1 x +accum.hpp 940 0x13ea 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x13ea 3 x +mmul_bf16_bf16.hpp 114 0x13f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1400 x +array_helpers.hpp 313 0x1400 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1400 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x140a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x140a 1 +array_helpers.hpp 252 0x140a 2 +array_helpers.hpp 313 0x140a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x140a 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x140a 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x140a 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x141a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x141a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x141a 2 x +array_helpers.hpp 313 0x141a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x141a 4 x +accum.hpp 940 0x141a 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x141a 6 x +mmul_bf16_bf16.hpp 116 0x141a 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x141a 8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x142a +array_helpers.hpp 313 0x142a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x142a 2 x +mmul_bf16_bf16.hpp 116 0x142a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1436 x +array_helpers.hpp 252 0x1436 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1436 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1436 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x1436 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x1442 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1442 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x1442 2 x +accum.hpp 940 0x1442 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x1442 4 x +mmul_bf16_bf16.hpp 116 0x1442 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1442 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x1450 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1460 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1460 1 x +array_helpers.hpp 252 0x1460 2 x +array_helpers.hpp 313 0x1460 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1460 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1460 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x1460 6 +gemm_bfp16.h 205 0x1460 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1470 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1470 1 +array_helpers.hpp 313 0x1470 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x1470 3 x +accum.hpp 940 0x1470 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x1470 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x1470 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 203 0x1470 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x1480 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x1480 1 x +mmul_bf16_bf16.hpp 116 0x1480 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1480 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x1480 4 x +gemm_bfp16.h 203 0x1480 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x1490 x +mmul_bf16_bf16.hpp 116 0x1490 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x1490 2 +gemm_bfp16.h 204 0x1490 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x149a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x149a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x149a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x149a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x14a4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x14a4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x14a4 2 x +accum.hpp 940 0x14a4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x14a4 4 x +mmul_bf16_bf16.hpp 116 0x14a4 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x14a4 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x14b2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x14b2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x14b2 2 x +gemm_bfp16.h 205 0x14b2 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x14bc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x14bc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x14c4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x14c4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x14c4 2 x +mmul_bf16_bf16.hpp 113 0x14c4 3 x +mmul_bf16_bf16.hpp 116 0x14ce x +mmul_bf16_bf16.hpp 114 0x14d2 x +mmul_bf16_bf16.hpp 116 0x14d2 1 +mmul_bf16_bf16.hpp 116 0x14da x +mmul_bf16_bf16.hpp 113 0x14e0 x +mmul_bf16_bf16.hpp 114 0x14e4 x +mmul_bf16_bf16.hpp 116 0x14e4 1 x +mmul_bf16_bf16.hpp 116 0x14f0 +mmul_bf16_bf16.hpp 116 0x14f4 +mmul_bf16_bf16.hpp 116 0x14f8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x14fc x +gemm_bfp16.h 202 0x1500 +gemm_bfp16.h 202 0x1504 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1508 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x1508 1 +gemm_bfp16.h 205 0x1508 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1512 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x1512 1 +gemm_bfp16.h 203 0x1512 2 +gemm_bfp16.h 203 0x1512 3 x +gemm_bfp16.h 203 0x151c +gemm_bfp16.h 203 0x1520 +gemm_bfp16.h 175 0x1524 +gemm_bfp16.h 203 0x1524 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x152c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x152c 1 x +gemm_bfp16.h 204 0x152c 2 x +gemm_bfp16.h 175 0x1536 +gemm_bfp16.h 204 0x1536 1 +gemm_bfp16.h 172 0x153c x +gemm_bfp16.h 175 0x153c 1 +gemm_bfp16.h 204 0x153c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1546 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1546 1 +gemm_bfp16.h 175 0x1546 2 x +gemm_bfp16.h 176 0x1546 3 +gemm_bfp16.h 177 0x1546 4 x +gemm_bfp16.h 204 0x1546 5 x +gemm_bfp16.h 177 0x1552 +gemm_bfp16.h 205 0x1552 1 x +gemm_bfp16.h 177 0x1558 x +gemm_bfp16.h 205 0x1558 1 +gemm_bfp16.h 177 0x155e +gemm_bfp16.h 205 0x155e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1564 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1564 1 +gemm_bfp16.h 176 0x1564 2 x +gemm_bfp16.h 177 0x1564 3 x +gemm_bfp16.h 205 0x1564 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1570 +aie_core.h 100 0x1570 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1570 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x1570 3 x +gemm_bfp16.h 176 0x1570 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x157c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x157c 1 x +gemm_bfp16.h 182 0x157c 2 x +gemm_bfp16.h 176 0x1586 +gemm_bfp16.h 182 0x1586 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1590 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1590 1 x +array_helpers.hpp 313 0x1590 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x1590 3 +gemm_bfp16.h 182 0x1590 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x159e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x159e 1 x +gemm_bfp16.h 175 0x159e 2 +gemm_bfp16.h 203 0x159e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15a8 x +array_helpers.hpp 252 0x15a8 1 x +array_helpers.hpp 313 0x15a8 2 +array_helpers.hpp 252 0x15b0 +array_helpers.hpp 313 0x15b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x15b0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15b8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15b8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x15b8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15be +array_helpers.hpp 313 0x15be 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 174 0x15be 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15c8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15c8 1 x +array_helpers.hpp 313 0x15c8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x15c8 3 x +accum.hpp 940 0x15c8 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15ce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x15ce 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 175 0x15ce 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x15d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x15d8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15dc x +array_helpers.hpp 313 0x15dc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x15dc 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x15e6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15e6 1 +array_helpers.hpp 313 0x15e6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x15e6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x15e6 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x15e6 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x15f2 x +array_helpers.hpp 252 0x15f2 1 x +array_helpers.hpp 313 0x15f2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x15f2 3 x +accum.hpp 940 0x15f2 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x15f2 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x15fe x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 313 0x15fe 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x15fe 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1604 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1604 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1604 2 x +mmul_bf16_bf16.hpp 113 0x1604 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x160e x +aie_core.h 100 0x160e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x160e 2 +array_helpers.hpp 252 0x160e 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x160e 4 x +accum.hpp 940 0x160e 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x160e 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x1618 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1618 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x161c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1620 x +array_helpers.hpp 252 0x1620 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1620 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1620 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x162a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x162a 1 x +accum.hpp 940 0x162a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x162a 3 x +mmul_bf16_bf16.hpp 114 0x1630 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1640 x +array_helpers.hpp 313 0x1640 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1640 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x164a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x164a 1 +array_helpers.hpp 252 0x164a 2 +array_helpers.hpp 313 0x164a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x164a 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x164a 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x164a 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x165a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x165a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x165a 2 x +array_helpers.hpp 313 0x165a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x165a 4 x +accum.hpp 940 0x165a 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x165a 6 x +mmul_bf16_bf16.hpp 116 0x165a 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x165a 8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x166a +array_helpers.hpp 313 0x166a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x166a 2 x +mmul_bf16_bf16.hpp 116 0x166a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1676 x +array_helpers.hpp 252 0x1676 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1676 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x1676 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x1676 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x1682 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x1682 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x1682 2 x +accum.hpp 940 0x1682 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x1682 4 x +mmul_bf16_bf16.hpp 116 0x1682 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x1682 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x1690 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x16a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x16a0 1 x +array_helpers.hpp 313 0x16a0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x16a0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x16a0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x16a0 5 +gemm_bfp16.h 205 0x16a0 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x16ae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x16ae 1 +array_helpers.hpp 313 0x16ae 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x16ae 3 x +accum.hpp 940 0x16ae 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x16ae 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x16ae 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 203 0x16ae 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x16be x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x16be 1 x +mmul_bf16_bf16.hpp 116 0x16be 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x16be 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x16be 4 x +gemm_bfp16.h 268 0x16be 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x16ce x +mmul_bf16_bf16.hpp 116 0x16ce 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 203 0x16ce 2 x +gemm_bfp16.h 268 0x16ce 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x16dc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x16dc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x16dc 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x16dc 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 268 0x16dc 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1365 0x16ea x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 252 0x16ea 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 903 0x16ea 2 x +accum.hpp 940 0x16ea 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 113 0x16ea 4 x +mmul_bf16_bf16.hpp 116 0x16ea 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x16ea 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x16f8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 176 0x16f8 1 +gemm_bfp16.h 204 0x16f8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1700 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x1700 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 204 0x1700 2 x +gemm_bfp16.h 205 0x1700 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 111 0x170a x +mmul_bf16_bf16.hpp 113 0x170a 1 x +mmul_bf16_bf16.hpp 116 0x1712 x +mmul_bf16_bf16.hpp 114 0x1716 x +mmul_bf16_bf16.hpp 116 0x1716 1 +mmul_bf16_bf16.hpp 116 0x171e x +mmul_bf16_bf16.hpp 113 0x1724 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 268 0x1724 1 +gemm_bfp16.h 268 0x1724 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 114 0x172e x +mmul_bf16_bf16.hpp 116 0x172e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 268 0x172e 2 +gemm_bfp16.h 269 0x172e 3 +gemm_bfp16.h 269 0x172e 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/mmul_bf16_bf16.hpp: +mmul_bf16_bf16.hpp 116 0x1742 +mmul_bf16_bf16.hpp 116 0x1746 +mmul_bf16_bf16.hpp 116 0x174a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x174e x +gemm_bfp16.h 268 0x174e 1 x +gemm_bfp16.h 202 0x1754 +gemm_bfp16.h 268 0x1754 1 +gemm_bfp16.h 202 0x175a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x175e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 202 0x175e 1 +gemm_bfp16.h 205 0x175e 2 x +gemm_bfp16.h 203 0x1768 x +gemm_bfp16.h 203 0x176c +gemm_bfp16.h 203 0x1770 +gemm_bfp16.h 203 0x1774 +gemm_bfp16.h 204 0x1778 x +gemm_bfp16.h 204 0x177c +gemm_bfp16.h 204 0x1780 +gemm_bfp16.h 268 0x1780 1 x +gemm_bfp16.h 204 0x178a x +gemm_bfp16.h 205 0x178e x +gemm_bfp16.h 205 0x1792 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1796 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 205 0x1796 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x179e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 205 0x179e 1 +gemm_bfp16.h 269 0x17a4 x +gemm_bfp16.h 269 0x17a8 +gemm_bfp16.h 269 0x17ac +gemm_bfp16.h 269 0x17b2 +gemm_bfp16.h 269 0x17be +gemm_bfp16.h 272 0x17f0 x +gemm_bfp16.h 274 0x1800 +gemm_bfp16.h 274 0x1814 x +gemm_bfp16.h 274 0x1820 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1830 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 381 0x1830 1 x +superkernels.cpp 382 0x1830 2 +superkernels.cpp 388 0x1830 3 +superkernels.cpp 382 0x183a x +superkernels.cpp 382 0x184a +superkernels.cpp 382 0x184a 1 +superkernels.cpp 381 0x1850 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x185e +io_buffer_main.h 149 0x1866 +io_buffer_main.h 242 0x1866 1 +io_buffer_main.h 348 0x1866 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x186a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 384 0x186a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1874 +tile.hpp 86 0x1874 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 384 0x1880 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x188a +tile.hpp 74 0x188e +tile.hpp 74 0x1892 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x18a0 +superkernels.cpp 385 0x18a0 1 x +superkernels.cpp 385 0x18b6 +superkernels.cpp 387 0x18c0 +superkernels.cpp 388 0x18c0 1 x +superkernels.cpp 387 0x18ca x +superkernels.cpp 389 0x18da x +superkernels.cpp 389 0x18da 1 x +superkernels.cpp 388 0x18e0 x +superkernels.cpp 388 0x18e4 +superkernels.cpp 387 0x18e8 x +superkernels.cpp 387 0x18ec + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x18f2 +io_buffer_main.h 242 0x18f6 x +io_buffer_main.h 242 0x18fa +io_buffer_main.h 242 0x18fe +io_buffer_main.h 242 0x1902 +io_buffer_main.h 259 0x1908 x +io_buffer_main.h 242 0x1914 x +io_buffer_main.h 242 0x1914 1 x +io_buffer_main.h 242 0x1918 +io_buffer_main.h 419 0x191c +io_buffer_main.h 419 0x1926 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 285 0x1930 +gemm_bfp16.h 285 0x1930 1 +gemm_bfp16.h 285 0x193a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1940 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 285 0x1940 1 +gemm_bfp16.h 285 0x194a x +gemm_bfp16.h 285 0x194e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1952 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x1952 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1958 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h: +gemm_bfp16.h 285 0x1960 +gemm_bfp16.h 285 0x1964 x +gemm_bfp16.h 285 0x196a +gemm_bfp16.h 285 0x196e +gemm_bfp16.h 285 0x1972 +gemm_bfp16.h 285 0x1976 +gemm_bfp16.h 285 0x197a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x1980 +superkernels.cpp 393 0x1980 1 x +superkernels.cpp 394 0x1980 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x198a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x198a 1 +superkernels.cpp 393 0x199e +superkernels.cpp 393 0x19a2 +superkernels.cpp 394 0x19a8 +superkernels.cpp 398 0x19a8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x19b4 x +io_buffer_main.h 351 0x19b4 1 +io_buffer_main.h 449 0x19b4 2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 394 0x19b4 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x19be +io_buffer_main.h 449 0x19ce x +io_buffer_main.h 351 0x19d2 x +io_buffer_main.h 351 0x19e2 +io_buffer_main.h 351 0x19e6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x19f0 +superkernels.cpp 398 0x19f0 1 +superkernels.cpp 397 0x19f6 x +superkernels.cpp 397 0x19f6 1 +superkernels.cpp 397 0x1a00 +superkernels.cpp 397 0x1a10 +superkernels.cpp 397 0x1a14 +superkernels.cpp 398 0x1a26 x +superkernels.cpp 400 0x1a30 +superkernels.cpp 400 0x1a40 x +superkernels.cpp 400 0x1a44 +superkernels.cpp - 0x1a45 + + +buffer_pad_adf_wrapper.cpp: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 33 0x2640 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2644 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 37 0x2644 1 +transpose4d_adf_wrapper.cpp 37 0x264e x +transpose4d_adf_wrapper.cpp 37 0x265e +transpose4d_adf_wrapper.cpp 37 0x265e 1 +transpose4d_adf_wrapper.cpp 33 0x2664 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x266e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 38 0x267a +transpose4d_adf_wrapper.cpp 38 0x2684 x +transpose4d_adf_wrapper.cpp 38 0x268a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x26a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x26a0 1 +transposeshuffle.h 137 0x26aa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x26ae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x26b4 x +transposeshuffle.h 137 0x26ba +transposeshuffle.h 137 0x26be + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 43 0x26be 1 +transpose4d_adf_wrapper.cpp 43 0x26c4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x26c8 +transposeshuffle.h 137 0x26cc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 43 0x26d0 +transpose4d_adf_wrapper.cpp 46 0x26d0 1 +transpose4d_adf_wrapper.cpp 43 0x26da +transpose4d_adf_wrapper.cpp 43 0x26da 1 x +transpose4d_adf_wrapper.cpp 43 0x26e0 +transpose4d_adf_wrapper.cpp 46 0x26ec x +transpose4d_adf_wrapper.cpp 46 0x26f4 +transpose4d_adf_wrapper.cpp 43 0x26f8 x +transpose4d_adf_wrapper.cpp 43 0x26fc +transpose4d_adf_wrapper.cpp 43 0x2700 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1a90 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/buffer_pad_adf_wrapper.cpp: +buffer_pad_adf_wrapper.cpp 24 0x1a90 1 x +buffer_pad_adf_wrapper.cpp 26 0x1a90 2 +buffer_pad_adf_wrapper.cpp 26 0x1a96 x +buffer_pad_adf_wrapper.cpp 26 0x1a9a +buffer_pad_adf_wrapper.cpp 27 0x1a9e x +buffer_pad_adf_wrapper.cpp 24 0x1aae +buffer_pad_adf_wrapper.cpp 36 0x1ab4 x +buffer_pad_adf_wrapper.cpp 36 0x1ab4 1 +buffer_pad_adf_wrapper.cpp 36 0x1aba +buffer_pad_adf_wrapper.cpp 36 0x1ac4 +buffer_pad_adf_wrapper.cpp 36 0x1ac8 +buffer_pad_adf_wrapper.cpp 36 0x1acc +buffer_pad_adf_wrapper.cpp 36 0x1acc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1ad2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/buffer_pad_adf_wrapper.cpp: +buffer_pad_adf_wrapper.cpp 25 0x1ae4 x +buffer_pad_adf_wrapper.cpp 37 0x1aea +buffer_pad_adf_wrapper.cpp 37 0x1af0 +buffer_pad_adf_wrapper.cpp 37 0x1af4 x +buffer_pad_adf_wrapper.cpp 36 0x1afa +buffer_pad_adf_wrapper.cpp 36 0x1afe x +buffer_pad_adf_wrapper.cpp 40 0x1b30 x +buffer_pad_adf_wrapper.cpp 40 0x1b30 1 x +buffer_pad_adf_wrapper.cpp 41 0x1b40 x +buffer_pad_adf_wrapper.cpp 41 0x1b4a +buffer_pad_adf_wrapper.cpp 42 0x1b54 +buffer_pad_adf_wrapper.cpp 40 0x1b5e +buffer_pad_adf_wrapper.cpp 45 0x1b5e 1 +buffer_pad_adf_wrapper.cpp 41 0x1b80 +buffer_pad_adf_wrapper.cpp 41 0x1b90 +buffer_pad_adf_wrapper.cpp 42 0x1b90 1 +buffer_pad_adf_wrapper.cpp 42 0x1b98 +buffer_pad_adf_wrapper.cpp 42 0x1ba0 x +buffer_pad_adf_wrapper.cpp 42 0x1bc0 +buffer_pad_adf_wrapper.cpp 40 0x1c10 x +buffer_pad_adf_wrapper.cpp 45 0x1c40 +buffer_pad_adf_wrapper.cpp 45 0x1c62 x +buffer_pad_adf_wrapper.cpp 45 0x1c66 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1fd0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 29 0x1fd0 1 +e_generic_innermost_adf_wrapper.cpp 29 0x1fd0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1fda + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 33 0x1fe2 +e_generic_innermost_adf_wrapper.cpp 33 0x1fe2 1 x +e_generic_innermost_adf_wrapper.cpp 37 0x1ff4 +e_generic_innermost_adf_wrapper.cpp 36 0x2000 x +e_generic_innermost_adf_wrapper.cpp 37 0x2000 1 +e_generic_innermost_adf_wrapper.cpp 34 0x200a x +e_generic_innermost_adf_wrapper.cpp 36 0x200a 1 +e_generic_innermost_adf_wrapper.cpp 34 0x2010 +e_generic_innermost_adf_wrapper.cpp 36 0x2010 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x201a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 34 0x201a 1 x +e_generic_innermost_adf_wrapper.cpp 36 0x2020 x +e_generic_innermost_adf_wrapper.cpp 37 0x2020 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2028 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 36 0x202e x +e_generic_innermost_adf_wrapper.cpp 36 0x2034 +e_generic_innermost_adf_wrapper.cpp 36 0x203a +e_generic_innermost_adf_wrapper.cpp 37 0x203e x +e_generic_innermost_adf_wrapper.cpp 37 0x2044 +e_generic_innermost_adf_wrapper.cpp 37 0x2048 +e_generic_innermost_adf_wrapper.cpp 37 0x204c +e_generic_innermost_adf_wrapper.cpp 37 0x2050 +e_generic_innermost_adf_wrapper.cpp 39 0x2060 +e_generic_innermost_adf_wrapper.cpp 39 0x2070 x +e_generic_innermost_adf_wrapper.cpp 39 0x2074 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1df0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 25 0x1df0 1 x +slice_generic_innermost.h 35 0x1df0 2 +slice_generic_innermost.h 54 0x1df0 3 +slice_generic_innermost.h 35 0x1df8 x +slice_generic_innermost.h 35 0x1dfc +slice_generic_innermost.h 36 0x1e02 x +slice_generic_innermost.h 40 0x1e06 x +slice_generic_innermost.h 38 0x1e0a x +slice_generic_innermost.h 40 0x1e18 x +slice_generic_innermost.h 40 0x1e18 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e1e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 36 0x1e1e 1 +slice_generic_innermost.h 50 0x1e1e 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e22 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 36 0x1e22 1 x +slice_generic_innermost.h 50 0x1e22 2 +slice_generic_innermost.h 35 0x1e28 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e2c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 37 0x1e2c 1 x +slice_generic_innermost.h 52 0x1e2c 2 +slice_generic_innermost.h 38 0x1e32 x +slice_generic_innermost.h 40 0x1e44 +slice_generic_innermost.h 40 0x1e48 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e52 x +vector.hpp 1139 0x1e52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x1e52 2 +slice_generic_innermost.h 50 0x1e52 3 x +slice_generic_innermost.h 52 0x1e52 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e5e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x1e5e 1 x +slice_generic_innermost.h 51 0x1e5e 2 x +slice_generic_innermost.h 53 0x1e5e 3 x +slice_generic_innermost.h 56 0x1e5e 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e6c x +vector.hpp 1139 0x1e6c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x1e6c 2 +slice_generic_innermost.h 50 0x1e6c 3 x +slice_generic_innermost.h 57 0x1e6c 4 x +slice_generic_innermost.h 58 0x1e6c 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e78 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x1e78 1 x +slice_generic_innermost.h 52 0x1e78 2 x +slice_generic_innermost.h 59 0x1e78 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 53 0x1e80 1 x +slice_generic_innermost.h 56 0x1e80 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e86 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 57 0x1e86 1 x +slice_generic_innermost.h 58 0x1e86 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e8c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 50 0x1e8c 1 x +slice_generic_innermost.h 59 0x1e8c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e92 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x1e92 1 x +slice_generic_innermost.h 52 0x1e92 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e98 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x1e98 1 x +slice_generic_innermost.h 56 0x1e98 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ea0 +vector.hpp 1159 0x1ea0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x1ea0 2 x +slice_generic_innermost.h 54 0x1ea0 3 x +slice_generic_innermost.h 58 0x1ea0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1eb0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x1eb0 1 x +slice_generic_innermost.h 50 0x1eb0 2 x +slice_generic_innermost.h 53 0x1eb0 3 x +slice_generic_innermost.h 57 0x1eb0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ec0 +vector.hpp 1159 0x1ec0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 52 0x1ec0 2 x +slice_generic_innermost.h 59 0x1ec0 3 x +slice_generic_innermost.h 60 0x1ec0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ed0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x1ed0 1 x +slice_generic_innermost.h 51 0x1ed0 2 x +slice_generic_innermost.h 55 0x1ed0 3 x +slice_generic_innermost.h 56 0x1ed0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ee0 +vector.hpp 1159 0x1ee0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x1ee0 2 x +slice_generic_innermost.h 58 0x1ee0 3 x +slice_generic_innermost.h 61 0x1ee0 4 x +slice_generic_innermost.h 46 0x1ef0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1ef4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x1ef4 1 x +slice_generic_innermost.h 47 0x1ef8 x +slice_generic_innermost.h 61 0x1ef8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f04 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x1f04 1 x +slice_generic_innermost.h 55 0x1f04 2 x +slice_generic_innermost.h 60 0x1f04 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f0e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x1f0e 1 x +slice_generic_innermost.h 61 0x1f0e 2 x +slice_generic_innermost.h 47 0x1f14 x +slice_generic_innermost.h 55 0x1f14 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f1a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x1f1a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x1f20 1 x +slice_generic_innermost.h 40 0x1f30 x +slice_generic_innermost.h 40 0x1f34 +slice_generic_innermost.h 40 0x1f3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f40 x +vector.hpp 1139 0x1f40 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 50 0x1f40 2 x +slice_generic_innermost.h 52 0x1f40 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x1f46 1 x +slice_generic_innermost.h 53 0x1f46 2 x +slice_generic_innermost.h 56 0x1f46 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 57 0x1f4e 1 x +slice_generic_innermost.h 58 0x1f4e 2 x +slice_generic_innermost.h 59 0x1f54 x +slice_generic_innermost.h 46 0x1f60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x1f80 1 x +slice_generic_innermost.h 54 0x1f80 2 x +slice_generic_innermost.h 55 0x1f90 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fa0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x1fa0 1 x +slice_generic_innermost.h 61 0x1fb0 x +slice_generic_innermost.h 76 0x1fc0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 18 0x1a60 x +0_0_reloadable82.cc 20 0x1a60 1 +0_0_reloadable82.cc 20 0x1a64 x +0_0_reloadable82.cc 21 0x1a68 x +0_0_reloadable82.cc 23 0x1a6c x +0_0_reloadable82.cc 22 0x1a70 x +0_0_reloadable82.cc 19 0x1a74 x +0_0_reloadable82.cc 27 0x1c80 x +0_0_reloadable82.cc 29 0x1c80 1 +0_0_reloadable82.cc 29 0x1c84 x +0_0_reloadable82.cc 31 0x1c88 x +0_0_reloadable82.cc 30 0x1c8c x +0_0_reloadable82.cc 28 0x1c90 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost_params.h: +slice_generic_innermost_params.h 40 0x1ca0 x +slice_generic_innermost_params.h 41 0x1ca0 1 x +slice_generic_innermost_params.h 41 0x1cb0 x +slice_generic_innermost_params.h 42 0x1cb4 x +slice_generic_innermost_params.h 42 0x1cc4 +slice_generic_innermost_params.h 43 0x1cc8 x +slice_generic_innermost_params.h 43 0x1cd8 +slice_generic_innermost_params.h 44 0x1cdc x +slice_generic_innermost_params.h 44 0x1cec +slice_generic_innermost_params.h 45 0x1cf0 x +slice_generic_innermost_params.h 45 0x1d00 +slice_generic_innermost_params.h 46 0x1d04 x +slice_generic_innermost_params.h 46 0x1d14 +slice_generic_innermost_params.h 47 0x1d18 x +slice_generic_innermost_params.h 47 0x1d28 +slice_generic_innermost_params.h 48 0x1d2c x +slice_generic_innermost_params.h 49 0x1d32 x +slice_generic_innermost_params.h 48 0x1d3e x +slice_generic_innermost_params.h 52 0x1d50 x +slice_generic_innermost_params.h 53 0x1d50 1 x +slice_generic_innermost_params.h 55 0x1d50 2 +slice_generic_innermost_params.h 58 0x1d50 3 +slice_generic_innermost_params.h 53 0x1d5a x +slice_generic_innermost_params.h 58 0x1d5a 1 +slice_generic_innermost_params.h 59 0x1d5a 2 +slice_generic_innermost_params.h 53 0x1d64 +slice_generic_innermost_params.h 60 0x1d64 1 +slice_generic_innermost_params.h 62 0x1d64 2 +slice_generic_innermost_params.h 55 0x1d6a x +slice_generic_innermost_params.h 60 0x1d6a 1 +slice_generic_innermost_params.h 53 0x1d7a x +slice_generic_innermost_params.h 58 0x1d7e x +slice_generic_innermost_params.h 58 0x1d82 +slice_generic_innermost_params.h 53 0x1d86 x +slice_generic_innermost_params.h 58 0x1d86 1 +slice_generic_innermost_params.h 75 0x1d8c x +slice_generic_innermost_params.h 59 0x1d90 x +slice_generic_innermost_params.h 59 0x1d94 +slice_generic_innermost_params.h 60 0x1d98 x +slice_generic_innermost_params.h 60 0x1d9c +slice_generic_innermost_params.h 62 0x1da0 x +slice_generic_innermost_params.h 79 0x1db0 x +slice_generic_innermost_params.h 80 0x1db0 1 x +slice_generic_innermost_params.h 81 0x1db6 +slice_generic_innermost_params.h 81 0x1dba +slice_generic_innermost_params.h 81 0x1dd0 x +slice_generic_innermost_params.h 81 0x1dd6 +slice_generic_innermost_params.h 81 0x1dda + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 35 0x2090 x +0_0_reloadable82.cc 37 0x2090 1 +0_0_reloadable82.cc 37 0x2094 x +0_0_reloadable82.cc 39 0x2098 x +0_0_reloadable82.cc 38 0x209c x +0_0_reloadable82.cc 36 0x20a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle_params.h: +transposeshuffle_params.h 71 0x20b0 x +transposeshuffle_params.h 76 0x20b0 1 +transposeshuffle_params.h 76 0x20b0 2 x +transposeshuffle_params.h 76 0x20ba +transposeshuffle_params.h 76 0x20ba 1 +transposeshuffle_params.h 80 0x20ba 2 +transposeshuffle_params.h 80 0x20ba 3 +transposeshuffle_params.h 81 0x20ba 4 +transposeshuffle_params.h 81 0x20ba 5 +transposeshuffle_params.h 85 0x20c6 +transposeshuffle_params.h 86 0x20c6 1 +transposeshuffle_params.h 89 0x20c6 2 +transposeshuffle_params.h 91 0x20c6 3 +transposeshuffle_params.h 93 0x20c6 4 +transposeshuffle_params.h 94 0x20c6 5 +transposeshuffle_params.h 76 0x20d4 x +transposeshuffle_params.h 76 0x20d8 +transposeshuffle_params.h 76 0x20dc +transposeshuffle_params.h 76 0x20ea +transposeshuffle_params.h 76 0x20ee +transposeshuffle_params.h 76 0x20f2 +transposeshuffle_params.h 76 0x20f6 +transposeshuffle_params.h 76 0x2104 +transposeshuffle_params.h 76 0x2108 +transposeshuffle_params.h 76 0x210c +transposeshuffle_params.h 76 0x2110 +transposeshuffle_params.h 76 0x211e +transposeshuffle_params.h 76 0x2122 +transposeshuffle_params.h 80 0x2126 x +transposeshuffle_params.h 80 0x2136 +transposeshuffle_params.h 80 0x213a +transposeshuffle_params.h 89 0x213a 1 x +transposeshuffle_params.h 80 0x2140 +transposeshuffle_params.h 80 0x2140 1 x +transposeshuffle_params.h 80 0x2148 +transposeshuffle_params.h 81 0x214c x +transposeshuffle_params.h 81 0x215c +transposeshuffle_params.h 90 0x215c 1 +transposeshuffle_params.h 81 0x2162 +transposeshuffle_params.h 81 0x2166 +transposeshuffle_params.h 90 0x2166 1 x +transposeshuffle_params.h 85 0x216c x +transposeshuffle_params.h 85 0x2170 +transposeshuffle_params.h 86 0x2174 x +transposeshuffle_params.h 89 0x2178 x +transposeshuffle_params.h 90 0x217c x +transposeshuffle_params.h 91 0x2180 +transposeshuffle_params.h 91 0x2180 1 x +transposeshuffle_params.h 91 0x2188 +transposeshuffle_params.h 93 0x218c x +transposeshuffle_params.h 93 0x2190 +transposeshuffle_params.h 93 0x2194 +transposeshuffle_params.h 93 0x2198 +transposeshuffle_params.h 93 0x219c +transposeshuffle_params.h 95 0x219c 1 x +transposeshuffle_params.h 94 0x21a2 x +transposeshuffle_params.h 94 0x21a6 +transposeshuffle_params.h 94 0x21aa +transposeshuffle_params.h 94 0x21ae +transposeshuffle_params.h 94 0x21b2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 38 0x21c0 x +transposeshuffle.h 72 0x21c0 1 +transposeshuffle.h 79 0x21c0 2 +transposeshuffle.h 72 0x21ca +transposeshuffle.h 72 0x21ca 1 x +transposeshuffle.h 72 0x21ca 2 +transposeshuffle.h 79 0x21dc x +transposeshuffle.h 79 0x21e0 +transposeshuffle.h 72 0x21e6 +transposeshuffle.h 72 0x21e6 1 +transposeshuffle.h 72 0x21ea x +transposeshuffle.h 72 0x21ea 1 x +transposeshuffle.h 116 0x21f4 +transposeshuffle.h 116 0x21fa x +transposeshuffle.h 116 0x220a +transposeshuffle.h 116 0x220a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2232 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x2232 1 +transposeshuffle.h 119 0x2232 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x223c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x223c 1 x +transposeshuffle.h 119 0x223c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2246 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x2246 1 +transposeshuffle.h 119 0x2246 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2250 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2250 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2260 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2260 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2270 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2270 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2280 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2280 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2290 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2290 1 +transposeshuffle.h 120 0x2290 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x22a0 1 x +transposeshuffle.h 120 0x22a0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22b0 +vector.hpp 1159 0x22b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x22b0 2 +transposeshuffle.h 120 0x22b0 3 x +transposeshuffle.h 122 0x22b0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x22c0 1 +transposeshuffle.h 122 0x22c0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22c8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x22c8 1 +transposeshuffle.h 122 0x22c8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22d0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x22d0 1 +transposeshuffle.h 122 0x22d0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22d8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x22d8 1 +transposeshuffle.h 122 0x22d8 2 +transposeshuffle.h 126 0x22d8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22e2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x22e2 1 x +transposeshuffle.h 122 0x22e2 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22ea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x22ea 1 +transposeshuffle.h 122 0x22ea 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22f2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x22f2 1 +transposeshuffle.h 122 0x22f2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22fa + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x22fa 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2300 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x2300 1 +transposeshuffle.h 116 0x2310 x +transposeshuffle.h 116 0x2314 +transposeshuffle.h 116 0x231a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2320 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2320 1 x +transposeshuffle.h 120 0x2370 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2390 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x2390 1 x +transposeshuffle.h 126 0x23a0 x +transposeshuffle.h 86 0x23b0 +transposeshuffle.h 86 0x23b6 x +transposeshuffle.h 86 0x23c6 +transposeshuffle.h 86 0x23c6 1 +transposeshuffle.h 86 0x23d6 +transposeshuffle.h 86 0x23d6 1 +transposeshuffle.h 86 0x23e0 +transposeshuffle.h 86 0x23e0 1 +transposeshuffle.h 87 0x23e0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x23ea +aie_core.h 100 0x23ea 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x23ea 2 +vector.hpp 1152 0x23ea 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x23ea 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x23f4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x23f4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x23f4 2 +transposeshuffle.h 86 0x23f4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2400 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2400 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2400 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x2400 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x240c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x240c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x240c 2 +transposeshuffle.h 86 0x240c 3 +transposeshuffle.h 86 0x2418 +transposeshuffle.h 87 0x2418 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2422 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x2422 1 +transposeshuffle.h 86 0x2428 +transposeshuffle.h 87 0x2428 1 x +transposeshuffle.h 86 0x2432 x +transposeshuffle.h 86 0x2432 1 x +transposeshuffle.h 86 0x243c +transposeshuffle.h 86 0x243c 1 +transposeshuffle.h 86 0x2446 +transposeshuffle.h 86 0x2450 +transposeshuffle.h 87 0x2460 x +transposeshuffle.h 87 0x2470 +transposeshuffle.h 88 0x2480 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x24a4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x24a4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x24ae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x24ae 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x24ae 2 x +transposeshuffle.h 88 0x24b8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x24bc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x24c0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x24c0 1 +vector.hpp 1132 0x24e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x24f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x24f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2500 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2510 x +vector.hpp 1152 0x2510 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2510 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2520 x +aie_core.h 100 0x2520 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2520 2 +vector.hpp 1152 0x2520 3 x +vector.hpp 1152 0x2530 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2530 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2540 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2540 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2548 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2548 1 x +vector.hpp 1152 0x254c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x254c 1 x +transpose.hpp 225 0x2554 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x255e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x255e 1 +vector.hpp 1152 0x255e 2 x +vector.hpp 1152 0x2566 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2566 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x256e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x256e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2576 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2576 1 x +vector.hpp 1152 0x257a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x2580 x +transposeshuffle.h 88 0x2586 +transposeshuffle.h 88 0x258c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2590 x +vector.hpp 1152 0x2590 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2596 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2596 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x25d0 x +transpose.hpp 225 0x25e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x25f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x25f0 1 x +vector.hpp 1152 0x2600 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 87 0x2610 x +transposeshuffle.h 86 0x2620 x +transposeshuffle.h 126 0x2630 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 43 0x2710 x +0_0_reloadable82.cc 45 0x2710 1 +0_0_reloadable82.cc 45 0x2714 x +0_0_reloadable82.cc 47 0x2718 x +0_0_reloadable82.cc 46 0x271c x +0_0_reloadable82.cc 44 0x2720 x +0_0_reloadable82.cc 60 0x9e0 x +0_0_reloadable82.cc 62 0x9e0 1 +0_0_reloadable82.cc 62 0x9e0 2 x +0_0_reloadable82.cc 60 0x9e6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f0 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 67 0x9f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 64 0x9f8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa08 x +io_buffer_compiler.h 614 0xa0c +io_buffer_compiler.h 614 0xa10 +io_buffer_compiler.h 614 0xa14 +io_buffer_compiler.h 614 0xa18 +io_buffer_compiler.h 219 0xa28 x +io_buffer_compiler.h 219 0xa28 1 x +io_buffer_compiler.h 218 0xa2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa30 +io_buffer_main.h 434 0xa30 1 +io_buffer_main.h 434 0xa30 2 +io_buffer_main.h 434 0xa3c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 64 0xa40 +0_0_reloadable82.cc 64 0xa40 1 +0_0_reloadable82.cc 67 0xa40 2 +0_0_reloadable82.cc 70 0xa40 3 +0_0_reloadable82.cc 64 0xa46 +0_0_reloadable82.cc 64 0xa46 1 x +0_0_reloadable82.cc 64 0xa4c +0_0_reloadable82.cc 64 0xa4c 1 +0_0_reloadable82.cc 64 0xa52 +0_0_reloadable82.cc 67 0xa52 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa5c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa6e x +io_buffer_compiler.h 614 0xa72 +io_buffer_compiler.h 614 0xa76 +io_buffer_compiler.h 614 0xa7a +io_buffer_compiler.h 614 0xa7e +io_buffer_compiler.h 219 0xa8e x +io_buffer_compiler.h 219 0xa8e 1 x +io_buffer_compiler.h 218 0xa92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa9e x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 67 0xaa2 x +0_0_reloadable82.cc 67 0xaa6 +0_0_reloadable82.cc 67 0xaa6 1 +0_0_reloadable82.cc 67 0xaac +0_0_reloadable82.cc 67 0xaac 1 +0_0_reloadable82.cc 67 0xab2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xac4 x +io_buffer_compiler.h 614 0xac8 +io_buffer_compiler.h 614 0xacc +io_buffer_compiler.h 614 0xad0 +io_buffer_compiler.h 614 0xad4 +io_buffer_compiler.h 219 0xae4 x +io_buffer_compiler.h 219 0xae4 1 x +io_buffer_compiler.h 218 0xae8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xaf4 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 70 0xaf8 x +0_0_reloadable82.cc 70 0xafc +0_0_reloadable82.cc 70 0xb00 +0_0_reloadable82.cc 70 0xb06 +0_0_reloadable82.cc 70 0xb18 +0_0_reloadable82.cc 73 0xb1c +0_0_reloadable82.cc 75 0xb1c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb30 +io_buffer_compiler.h 630 0xb30 1 +io_buffer_compiler.h 630 0xb30 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb30 3 +io_buffer_main.h 464 0xb30 4 +io_buffer_main.h 464 0xb30 5 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 73 0xb30 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb36 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 75 0xb3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb3e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb3e 1 +io_buffer_main.h 464 0xb42 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 629 0xb4a x +io_buffer_compiler.h 629 0xb4e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb5e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb62 +io_buffer_compiler.h 630 0xb62 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb68 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 75 0xb68 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb78 x +io_buffer_compiler.h 629 0xb7c x +io_buffer_compiler.h 630 0xb7c 1 +io_buffer_compiler.h 629 0xb82 +io_buffer_compiler.h 630 0xb82 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb92 +io_buffer_main.h 464 0xb96 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb9a +io_buffer_compiler.h 630 0xb9a 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 78 0xba0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbae x +io_buffer_compiler.h 629 0xbb2 x +io_buffer_compiler.h 630 0xbb2 1 +io_buffer_compiler.h 629 0xbb8 +io_buffer_compiler.h 630 0xbb8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xbca x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 80 0xbce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbd2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc: +0_0_reloadable82.cc 80 0xbe6 x +0_0_reloadable82.cc 80 0xbec + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbf0 x +io_buffer_compiler.h 630 0xbf6 +io_buffer_compiler.h 630 0xbfa +io_buffer_compiler.h 630 0xbfe +io_buffer_compiler.h - 0xbff + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: src/string.c: +File name Line number Starting address View Stmt + +src/string.c: +string.c 325 0x2730 x +string.c 328 0x2730 1 x +string.c 329 0x2736 +string.c 328 0x2742 x +string.c 329 0x2742 1 +string.c 328 0x274a +string.c 328 0x2750 +string.c 329 0x2760 x +string.c 330 0x27d0 x +string.c - 0x27d1 + + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/scripts/0_0_reloadable82.bcf b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/scripts/0_0_reloadable82.bcf new file mode 100644 index 0000000000000000000000000000000000000000..b5025c34b99f02de39e461699cdc760aa2cbe456 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/scripts/0_0_reloadable82.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x9e0 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x9e0 + +_reserved DMb 0x7b540 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7bd40 0x40 //reserved for sync buffer +_stack DM_stack 0x7bd80 0x440 //stack for core +_reserved DMb 0x7c1c0 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c200 0x800//heap +_reserved DMb 0x40000 0x3b540 + +_reserved DMb 0x7ca00 0x3600 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/scripts/0_0_reloadable82.prx b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/scripts/0_0_reloadable82.prx new file mode 100644 index 0000000000000000000000000000000000000000..753ff27e979b985242ebf1e77e12fa0bd1564534 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/scripts/0_0_reloadable82.prx @@ -0,0 +1,13 @@ + + + + \ No newline at end of file diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc new file mode 100644 index 0000000000000000000000000000000000000000..6bfa51ec0dc7a2226fbf804762928ed7647251b0 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/src/0_0_reloadable82.cc @@ -0,0 +1,80 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_GemmBfp16(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[7],adf::io_buffer, adf::locking::async>> &__restrict); +#include "buffer_pad_adf_wrapper.cpp" +#include "slice_generic_innermost_adf_wrapper.cpp" +#include "transpose4d_adf_wrapper.cpp" + +// Declare Kernel objects and external arrays + + +void _b13786_wrapper(void* args[]) +{ + superkernel_GemmBfp16( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b8148_wrapper(void* args[]) +{ + mllib_graphs::buffer_pad_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[2])); +} + +void _b8170_wrapper(void* args[]) +{ + mllib_graphs::slice_generic_innermost_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[2])); +} + +void _b7835_wrapper(void* args[]) +{ + mllib_graphs::transpose4d_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[2])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[4] = { + _b13786_wrapper, + _b8148_wrapper, + _b8170_wrapper, + _b7835_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->acquire(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->release(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->release(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; +} diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/timestamped_log/0_0_reloadable82.log b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/timestamped_log/0_0_reloadable82.log new file mode 100644 index 0000000000000000000000000000000000000000..09ff3e10125937f83e30df1e09e984656f49f8d5 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/timestamped_log/0_0_reloadable82.log @@ -0,0 +1,483 @@ +Configuration: Release_LLVM +Compiling "0_0_reloadable82.ll" +chess-clang --chess-proc-dir=/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -S -O2 -std=c++2a -fno-builtin-memcpy -mllvm -instcombine-code-sinking=false -mllvm -disable-lsr -mllvm -replexitval=never -mllvm -enable-load-pre=false -mllvm -chess-disable-add-to-or -mllvm -chess-combine-gep-indices=none -mllvm -chess-disable-fold-phi-of-loads -mllvm -chess-aainfo2chains-algo=4 -mllvm -chess-aggressive-aainfo=false -mllvm -chess-enable-indvarsimplify=0 -mllvm -chess-disable-cse-across-loopboundary -mllvm -chess-tbaa-detect-common-underlying-object=true -mllvm -chess-protect-llvm-global-reg-access=true -fno-jump-tables -fno-discard-value-names -g ../../ir/0_0_reloadable82.ll -o../Release/chesswork3593526/0_0_reloadable82.sfg --chess-proc-name=me +noodle -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -iaie_core.h +Sinl +Olbb=200 +Opmsa +NOpld +Olzyinl +w../Release/chesswork3593526 ../Release/chesswork3593526/0_0_reloadable82.sfg +Q1=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl +Q2=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl +Q3=+Sinl,+Olbb=1000,+Opmsa,+NOpld,+Olzyinl +Qfast=+Sinl,+Olbb=1000,+Opmsa,+NOpld,+Olzyinl,+Opfp +Qs=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl +Qz=+Sinl,+Olbb=200,+Opmsa,+NOpld,+Olzyinl me +chess-backend 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +chess-backend 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +chess-backend 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +chess-backend 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +Warning in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h", line 662, column 4: in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h", line 662: (loop #3) + further loop software pipelining (to 3 cycles) is feasible with `chess_prepare_for_pipelining' + but requires a minimum loop count of 6 + ... consider annotating the loop with `chess_loop_range(6,)' if applicable, + ... or remove the current `chess_loop_range(4,)` annotation + +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +Warning in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h", line 125, column 4: in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h", line 125: (loop #19) + further loop software pipelining (to 2 cycles) is feasible with `chess_prepare_for_pipelining' + but requires a minimum loop count of 7 + ... consider annotating the loop with `chess_loop_range(7,)' if applicable, + ... or remove the current `chess_loop_range(4,)` annotation + +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outEN_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN25elementwise_binary_sharedI8bfloat168sub_implIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17superkernel_sub1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEER_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +Warning in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h", line 258, column 4: in "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h", line 258: (loop #3) + further loop software pipelining (to 4 cycles) is feasible with `chess_prepare_for_pipelining' + but requires a minimum loop count of 4 + ... consider annotating the loop with `chess_loop_range(4,)' if applicable, + ... or remove the current `chess_loop_range(2,)` annotation + +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asy_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist1 -k64 --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z8init_accILt1EEvPaS0_iii_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL23setup_gemm_bfp16_paramsR17gemm_bfp16_paramsPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z12post_processPai_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21superkernel_GemmBfp16RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA7_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5async_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE5setupER15reduce_params_tIS4_EPKv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z20transpose4d_adf_initv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z15_b13786_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z21superkernel_reducesumRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA18_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncES_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NS_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14_b8148_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist1 -k64 --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14_b8170_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0__ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--mist1 -k64 --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_N_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +chess-backend 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z14_b7835_wrapperPPv_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x --print-subtools --cosel -m +ef +s -M3 --amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --mist1 -k64 --showcolor -b -Obbl --mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--cosel -m +ef +s -M3 --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +chess-backend --gvt me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation --tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist1 -k64 --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--showcolor -b -Obbl --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-kernelWrapper_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation -x +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z10gemm_bfp16I8bfloat16Lt1ELj1EQsr3stdE9is_same_vIT_S0_EEvPS1_S2_S2_S2_R17gemm_bfp16_params_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--amnesia -p10 -q2 -ecrSCDEn -ecrMCDEn -ecrVaddSign -ecrUnpackSign -ecrPackSign -ecrUPSSign -ecrUPSMode -ecrSRSSign -ecrSRSMode -ecrF2IMask -ecrUnpackSize -ecrPackSize -ecrSat -ecrRnd +Oefc +Opbr +Odhls +Oprefer-local-reg-moves -Onocb --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist1 -k64 --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--showcolor -b -Obbl --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--mist2 -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 +Omod -k64 +Omsbr=100 +Opnll +A +pnopALU +pnopLDA +pnopLDB +pnopSTS +pnopVEC +Ofexm +Omsanafs +Onzmem +Onombt +Ochex +Omsmfi +Omslactc=lckLdaRsrc_E1,lckLdbRsrc_E1:2 +Odra +Oslr=crSRSSign +Oslr=crUPSMode +Oslr=crSRSMode +Oslr=crRnd +Oslr=crSat +Onop-syntax=NOPA +Onop-syntax=NOPB +Onop-syntax=NOPM +Onop-syntax=NOPV +Onop-syntax=NOPS +Onop-syntax=NOPX +Onop-syntax=NOPXM --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +--tale -g -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -V0_0_reloadable82 -L --common 0_0_reloadable82-F_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj_ me /proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib +H/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/elongation +bridge -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -i -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 0_0_reloadable82.objlist -o../0_0_reloadable82.o -pme +darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno ../Release/0_0_reloadable82.o me +Linking "../Release/0_0_reloadable82" +bridge -o../Release/0_0_reloadable82 ../Release/0_0_reloadable82.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable82.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3593526 -pme +darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable82 me +Compilation finished successfully (0 errors, 3 warnings) diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/xlopt.log b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/xlopt.log new file mode 100644 index 0000000000000000000000000000000000000000..62897a4bf385fd8652ab8542743948ead9124df5 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable82/xlopt.log @@ -0,0 +1,461 @@ + + +--------------- FILTER ANALYSIS INFO LOG --------------- + +Reading Header IR from ir/_header.ll + +-------------------------------------------------------- + +Add module pass *1*{anonymous}::GuidancePass +Add module pass *1*{anonymous}::ChessOptionsPass +Add module pass *1*{anonymous}::DisableInliningInMainPass +Add module pass *1*cdno::xlopt::AIEMergeSubWordStoresOpt +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::IpConstPropPass +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::XLModuleAdaptor +Add module pass *1*{anonymous}::XLModuleAdaptor<{anonymous}::AIELoopInfoPass> +Add module pass *1*cdno::xlopt::AIEAnnotatePragmaPass +Add module pass *1*{anonymous}::XLModuleAdaptor<{anonymous}::AIELoopPeelPass> +Add module pass *1*{anonymous}::AIEAliasAnalysisPass + + +--------------- MEMORY MANAGEMENT GUIDANCE LOG --------------- + +ALIGNMENT_HINT: Alignment of global array g_uniformKernelFuncs is 4 bytes; automatically aligning it to 64 bytes. +ALIGNMENT_HINT: Alignment of global array aie::detail::transpose_bits_impl<16u, bfloat16, 64u>::shuffle_modes is 4 bytes; automatically aligning it to 64 bytes. +SIZE_HINT: Global array sigmoid_lut<0u, 256u>::data_ab is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array sigmoid_lut<0u, 256u>::data_cd is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array sigmoid_lut_fp16<0u, 256u>::data_ab is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array sigmoid_lut_fp16<0u, 256u>::data_cd is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array gelu_lut_32<0u, 512u>::data_ab is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array gelu_lut_32<0u, 512u>::data_cd is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array tanh_lut<0u, 512u>::data_ab is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array tanh_lut<0u, 512u>::data_cd is 2048 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array log_f32_lut<256u>::fraction_table_ab_f32 is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array log_f32_lut<256u>::fraction_table_cd_f32 is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array exp2_lut<512u>::exp2_table_ab is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. +SIZE_HINT: Global array exp2_lut<512u>::exp2_table_cd is 1024 bytes. Consider making it mapper-managed LUT or memory buffer. + +-------------------------------------------------------------- + + + +--------------- MERGING SUBWORD STORES OPT LOG --------------- + + + +--------------- LOOP STATISTICS : _ZN18conv2d_bf16_paramsC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +----------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z14conv2d_genericILh1EL5act_t0ELb0ELb1ELb0E8bfloat16EvPS1_S2_S2_S2_R18conv2d_bf16_params10out_mode_t --------------- + +Total loops = 4 +Loops with prepare for pipelining pragma = 4 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 4 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj2EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +-------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params --------------- + +Total loops = 2 +Loops with prepare for pipelining pragma = 2 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 2 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z8init_accILt1EEvPaS0_iii --------------- + +Total loops = 2 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 2 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL12gemm_bf16x16ILj2ELj2ELj1EEvP8bfloat16S1_PaR10MMultIncrsb --------------- + +Total loops = 2 +Loops with prepare for pipelining pragma = 2 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 2 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z12post_processPai --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZL14transpose_bf16P8bfloat16S0_j --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 1 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +----------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie4mmulILj8ELj8ELj8E8bfloat16S3_7accautoEELj4EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj64EEELj2EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6detail10accum_baseILNS2_10AccumClassE2ELj32ELj32EEELj2EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +------------------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZN3aie17tensor_descriptorILj4E8bfloat16Lj64ENSt3__25tupleIJNS_6detail6dim_3dEiEEEE14steps_to_incrsERKNS2_5arrayINS_10tensor_dimELj4EEEb --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + + + +--------------- LOOP STATISTICS : _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E --------------- + +Total loops = 7 +Loops with prepare for pipelining pragma = 3 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 3 +Loops with max range pragma = 0 +Loops with known trip count = 3 + +------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj1EEC2Ev --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZNSt3__28__fill_nB7v160003IP6addr_tjS1_EET_S3_T0_RKT1_ --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +--------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj --------------- + +Total loops = 3 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 1 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +-------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj --------------- + +Total loops = 1 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 1 + +------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- LOOP STATISTICS : _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params --------------- + +Total loops = 4 +Loops with prepare for pipelining pragma = 0 +Loops with unroll pragma = 0 +Loops with flatten pragma = 0 +Loops with min range pragma = 0 +Loops with max range pragma = 0 +Loops with known trip count = 0 + +---------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + +--------------- PRAGMA INSERTION LOG (unroll threshold = 3000, max unroll factor = 1) --------------- + +Adding pragma to function _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh : + Pragma added to loop at line 398 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h : chess_loop_range(8,8) +Adding pragma to function _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj2EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh : + Pragma added to loop at line 180 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h : chess_loop_range(4,4) +Adding pragma to function _ZN3aie17tensor_descriptorILj4E8bfloat16Lj64ENSt3__25tupleIJNS_6detail6dim_3dEiEEEE14steps_to_incrsERKNS2_5arrayINS_10tensor_dimELj4EEEb : + Pragma added to loop at line 7052 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/aie.hpp : chess_loop_range(4,4) +Adding pragma to function _Z8init_accILt1EEvPaS0_iii : + Pragma added to loop at line 68 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h : chess_prepare_for_pipelining + Pragma added to loop at line 53 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h : chess_prepare_for_pipelining +Adding pragma to function _ZNSt3__25arrayIN3aie4mmulILj8ELj8ELj8E8bfloat16S3_7accautoEELj4EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _ZNSt3__25arrayIN3aie6detail10accum_baseILNS2_10AccumClassE2ELj32ELj32EEELj2EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _Z12post_processPai : + Pragma added to loop at line 92 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/gemm/gemm_bfp16.h : chess_prepare_for_pipelining +Adding pragma to function _ZNSt3__25arrayIN3aie6vectorI8bfloat16Lj32EEELj1EEC2Ev : + Pragma added to loop at line 158 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include-lite/../include/array : chess_prepare_for_pipelining +Adding pragma to function _ZN15reduce_skeletonI8bfloat1614reducesum_implIS0_E18reducesum_params_tIS0_EiE3runEPS0_S6_R15reduce_params_tIS4_E : + Pragma added to loop at line 280 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_loop_range(32,32) + Pragma added to loop at line 268 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_loop_range(1,1) + Pragma added to loop at line 268 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_flatten_loop + Pragma added to loop at line 258 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_loop_range(1,1) + Pragma added to loop at line 258 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/reduce_base.h : chess_flatten_loop +Adding pragma to function _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj : + Pragma added to loop at line 75 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle_params.h : chess_loop_range(7,7) +Adding pragma to function _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params : + Pragma added to loop at line 88 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h : chess_prepare_for_pipelining + Pragma added to loop at line 116 in file aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h : chess_prepare_for_pipelining + +----------------------------------------------------------------------------------------------------- + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.calltree b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.calltree new file mode 100644 index 0000000000000000000000000000000000000000..d14fb6d090940167cca43c8c6a97196883ac208d --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.calltree @@ -0,0 +1,96 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable4 ../Release/0_0_reloadable4.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable4.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577691 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z15_b14160_wrapperPPv (referenced text) + _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + _Z14_b7835_wrapperPPv (referenced text) + _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + _Z14_b8148_wrapperPPv (referenced text) + _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + memset + _Z15_b13739_wrapperPPv (referenced text) + _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _Z15_b13744_wrapperPPv (referenced text) + _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + _Z15_b13749_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z14_b8170_wrapperPPv (referenced text) + _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 256 0 0 546 9978 _Z13kernelWrapperPPvjjjj + 0 192 1 1 36 2736 _Z15_b14160_wrapperPPv + 128 192 1 2 478 2700 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 792 934 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 64 64 2 3 360 1288 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + 0 0 3 4 674 674 _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + 0 0 2 4 254 254 _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + 0 64 1 1 32 1646 _Z14_b7835_wrapperPPv + 64 64 1 2 202 1614 _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + 0 0 2 3 262 262 _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + 0 0 2 3 1150 1150 _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + 0 64 1 1 32 690 _Z14_b8148_wrapperPPv + 64 64 1 2 484 658 _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + 0 0 2 3 174 174 memset + 0 192 1 1 32 1282 _Z15_b13739_wrapperPPv + 64 192 1 2 488 1250 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 128 2 3 60 318 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + 64 64 3 4 178 202 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + 128 128 2 3 114 444 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + 0 0 3 4 330 330 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 0 64 1 1 32 966 _Z15_b13744_wrapperPPv + 64 64 1 2 488 934 _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 68 68 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + 0 0 2 3 378 378 _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + 0 128 1 1 36 1124 _Z15_b13749_wrapperPPv + 64 128 1 2 602 1088 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 154 178 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 308 308 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 128 1 1 32 988 _Z14_b8170_wrapperPPv + 128 128 1 2 178 956 _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + 0 0 2 3 52 298 _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + 0 0 3 4 162 162 _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + 0 0 2 4 84 84 _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + 0 0 2 3 480 480 _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + +Maximum call level : 5 +Maximum stack level: 4 +Maximum stack size : 256 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.cmic2 b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..f5e6b3e5828701d92c85b709bbc3b7c45a16ad8f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.cmic2 @@ -0,0 +1,14042 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable4 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable4.cc" 91 first +.src_ref 0 "0_0_reloadable4.cc" 93 60 +.src_ref 0 "0_0_reloadable4.cc" 93 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 91 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2816 "01000100" // MOVXM p7, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "10000000" // /* MW 5 */ + 2818 "11000100" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 104 60 +.src_ref 0 "0_0_reloadable4.cc" 106 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 104 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 106 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 106 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 109 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 3088 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3089 "00000000" // /* MW 3 */ + 3090 "00101000" // /* MW 2 */ + 3091 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3092 "01000100" // MOVXM p0, #508704 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3093 "01000000" // /* MW 5 */ + 3094 "11000110" // /* MW 4 */ + 3095 "11000000" // /* MW 3 */ + 3096 "00000111" // /* MW 2 */ + 3097 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3098 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3099 "10000000" // /* MW 3 */ + 3100 "00000000" // /* MW 2 */ + 3101 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 3102 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3103 "00000001" // /* MW 3 */ + 3104 "00000100" // /* MW 2 */ + 3105 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3106 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3107 "00000001" // /* MW 3 */ + 3108 "00010100" // /* MW 2 */ + 3109 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3111 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 3120 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3121 "00010000" // /* MW 9 */ + 3122 "10000000" // /* MW 8 */ + 3123 "00110001" // /* MW 7 */ + 3124 "11110000" // /* MW 6 */ + 3125 "00000001" // /* MW 5 */ + 3126 "00000000" // /* MW 4 */ + 3127 "11010000" // /* MW 3 */ + 3128 "10000101" // /* MW 2 */ + 3129 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 3130 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3131 "00000001" // /* MW 5 */ + 3132 "00000000" // /* MW 4 */ + 3133 "00000000" // /* MW 3 */ + 3134 "00001000" // /* MW 2 */ + 3135 "00000000" // /* MW 1 */ + 3136 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3137 "00111101" // /* MW 3 */ + 3138 "11111000" // /* MW 2 */ + 3139 "00001111" // /* MW 1 */ + 3140 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "11110101" // /* MW 3 */ + 3142 "11111101" // /* MW 2 */ + 3143 "00001111" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 3150 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "00101001" // /* MW 3 */ + 3152 "00011100" // /* MW 2 */ + 3153 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 3154 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3155 "00101110" // /* MW 3 */ + 3156 "00011100" // /* MW 2 */ + 3157 "00000001" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 3170 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3171 "00101001" // /* MW 3 */ + 3172 "00011100" // /* MW 2 */ + 3173 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 3174 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3175 "00101110" // /* MW 3 */ + 3176 "00000100" // /* MW 2 */ + 3177 "00000001" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ + 3182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3183 "00000000" // /* MW 1 */ + 3184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3185 "00000000" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 3190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00101001" // /* MW 3 */ + 3192 "00011100" // /* MW 2 */ + 3193 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 3194 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "00101110" // /* MW 3 */ + 3196 "00010100" // /* MW 2 */ + 3197 "00000001" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ + 3200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3201 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 3202 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 3203 "00000001" // /* MW 5 */ + 3204 "00000000" // /* MW 4 */ + 3205 "00001000" // /* MW 3 */ + 3206 "00000110" // /* MW 2 */ + 3207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3213 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 3214 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3215 "00101001" // /* MW 3 */ + 3216 "11011100" // /* MW 2 */ + 3217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.delay_slot + 3218 "00101110" // NOPA; NOPS; MOV r15, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3219 "00011100" // /* MW 13 */ + 3220 "00000000" // /* MW 12 */ + 3221 "00000000" // /* MW 11 */ + 3222 "00000111" // /* MW 10 */ + 3223 "10000110" // /* MW 9 */ + 3224 "01011110" // /* MW 8 */ + 3225 "00000000" // /* MW 7 */ + 3226 "00000000" // /* MW 6 */ + 3227 "10110110" // /* MW 5 */ + 3228 "00000010" // /* MW 4 */ + 3229 "11110000" // /* MW 3 */ + 3230 "00101100" // /* MW 2 */ + 3231 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 3232 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3233 "00001000" // /* MW 9 */ + 3234 "11000100" // /* MW 8 */ + 3235 "00110011" // /* MW 7 */ + 3236 "01101000" // /* MW 6 */ + 3237 "00000000" // /* MW 5 */ + 3238 "00000001" // /* MW 4 */ + 3239 "00100000" // /* MW 3 */ + 3240 "00000111" // /* MW 2 */ + 3241 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 3242 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3243 "01011000" // /* MW 9 */ + 3244 "11111101" // /* MW 8 */ + 3245 "00000111" // /* MW 7 */ + 3246 "00001000" // /* MW 6 */ + 3247 "10000000" // /* MW 5 */ + 3248 "00000001" // /* MW 4 */ + 3249 "10000000" // /* MW 3 */ + 3250 "11100010" // /* MW 2 */ + 3251 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 3252 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3253 "00000001" // /* MW 9 */ + 3254 "10100000" // /* MW 8 */ + 3255 "00000111" // /* MW 7 */ + 3256 "10000000" // /* MW 6 */ + 3257 "00010001" // /* MW 5 */ + 3258 "00001010" // /* MW 4 */ + 3259 "00100000" // /* MW 3 */ + 3260 "10111110" // /* MW 2 */ + 3261 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 3262 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3263 "01001010" // /* MW 3 */ + 3264 "00000110" // /* MW 2 */ + 3265 "00000000" // /* MW 1 */ + 3266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3271 "00010111" // /* MW 3 */ + 3272 "00000010" // /* MW 2 */ + 3273 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3275 "00000000" // /* MW 3 */ + 3276 "00101000" // /* MW 2 */ + 3277 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3279 "00000101" // /* MW 3 */ + 3280 "00100010" // /* MW 2 */ + 3281 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3283 "00000001" // /* MW 5 */ + 3284 "00000000" // /* MW 4 */ + 3285 "00000000" // /* MW 3 */ + 3286 "11111000" // /* MW 2 */ + 3287 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00100111" // /* MW 3 */ + 3290 "01110111" // /* MW 2 */ + 3291 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "10000010" // /* MW 3 */ + 3294 "00100001" // /* MW 2 */ + 3295 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3297 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 40 first +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.function_start + 3312 "10111010" // MOVA m0, #20; MOVXM p0, #508684 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3313 "00010000" // /* MW 9 */ + 3314 "10000110" // /* MW 8 */ + 3315 "00110001" // /* MW 7 */ + 3316 "11110000" // /* MW 6 */ + 3317 "00000001" // /* MW 5 */ + 3318 "00000000" // /* MW 4 */ + 3319 "10000000" // /* MW 3 */ + 3320 "10000000" // /* MW 2 */ + 3321 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 + 3322 "10111010" // LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3323 "01011000" // /* MW 9 */ + 3324 "00000110" // /* MW 8 */ + 3325 "00101000" // /* MW 7 */ + 3326 "00101000" // /* MW 6 */ + 3327 "00100000" // /* MW 5 */ + 3328 "00000000" // /* MW 4 */ + 3329 "01010000" // /* MW 3 */ + 3330 "00000001" // /* MW 2 */ + 3331 "00000001" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 43 4 first + 3342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3343 "00000000" // /* MW 3 */ + 3344 "00101000" // /* MW 2 */ + 3345 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.delay_slot + 3346 "00011000" // NEZ r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "11110000" // /* MW 3 */ + 3348 "00000110" // /* MW 2 */ + 3349 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.delay_slot + 3350 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00001000" // /* MW 3 */ + 3352 "10000000" // /* MW 2 */ + 3353 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 first +.delay_slot + 3354 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00011101" // /* MW 3 */ + 3356 "00000000" // /* MW 2 */ + 3357 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 23 +.delay_slot + 3358 "01011100" // ST r0, [p0, #4]; LSHL r2, r3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3359 "00111011" // /* MW 5 */ + 3360 "10001000" // /* MW 4 */ + 3361 "00110001" // /* MW 3 */ + 3362 "10000010" // /* MW 2 */ + 3363 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 23 +.delay_slot + 3364 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3365 "01010001" // /* MW 3 */ + 3366 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3367 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_broadcasting.h" 35 +.src_ref 2 "elementwise_binary_broadcasting.h" 35 first +.function_start + 3376 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000000" // /* MW 4 */ + 3379 "00000000" // /* MW 3 */ + 3380 "00001000" // /* MW 2 */ + 3381 "00000000" // /* MW 1 */ + 3382 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00111101" // /* MW 3 */ + 3384 "11111100" // /* MW 2 */ + 3385 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 first +.no_stack_arguments + 3386 "00000100" // JL #3120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3120 delay_slots=5 */ + 3387 "00000001" // /* MW 5 */ + 3388 "00000000" // /* MW 4 */ + 3389 "00011000" // /* MW 3 */ + 3390 "00000110" // /* MW 2 */ + 3391 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 +.delay_slot + 3392 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000000" // /* MW 5 */ + 3394 "11000110" // /* MW 4 */ + 3395 "11000000" // /* MW 3 */ + 3396 "00000111" // /* MW 2 */ + 3397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "00000001" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.return_address + 3408 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00111001" // /* MW 3 */ + 3410 "11111100" // /* MW 2 */ + 3411 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 first +.tail_call + 3412 "10000100" // J #3312 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3312 delay_slots=5 */ + 3413 "00000000" // /* MW 5 */ + 3414 "00000000" // /* MW 4 */ + 3415 "01111000" // /* MW 3 */ + 3416 "00000110" // /* MW 2 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.delay_slot + 3418 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "00000000" // /* MW 5 */ + 3420 "11000110" // /* MW 4 */ + 3421 "11000000" // /* MW 3 */ + 3422 "00000111" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 38 4 first +.delay_slot + 3424 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3425 "00000001" // /* MW 5 */ + 3426 "00000000" // /* MW 4 */ + 3427 "00000000" // /* MW 3 */ + 3428 "11111000" // /* MW 2 */ + 3429 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3435 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 48 first +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 +.function_start + 3440 "10111010" // MOVA m0, #20; MOVXM p3, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3441 "00010000" // /* MW 9 */ + 3442 "10000000" // /* MW 8 */ + 3443 "10110001" // /* MW 7 */ + 3444 "11110001" // /* MW 6 */ + 3445 "00000001" // /* MW 5 */ + 3446 "00000000" // /* MW 4 */ + 3447 "10000000" // /* MW 3 */ + 3448 "10000000" // /* MW 2 */ + 3449 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 first + 3450 "10011000" // LDA r0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3451 "00010110" // /* MW 3 */ + 3452 "00111100" // /* MW 2 */ + 3453 "00000011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3454 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3455 "10000001" // /* MW 5 */ + 3456 "11001101" // /* MW 4 */ + 3457 "01011000" // /* MW 3 */ + 3458 "00000101" // /* MW 2 */ + 3459 "01100001" // /* MW 1 */ + 3460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3461 "00000000" // /* MW 1 */ + 3462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3463 "00000000" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 12 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 35 + 3472 "10000100" // JNZ r1, #3536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3536 delay_slots=5 */ + 3473 "00000001" // /* MW 5 */ + 3474 "01000000" // /* MW 4 */ + 3475 "11101000" // /* MW 3 */ + 3476 "00000110" // /* MW 2 */ + 3477 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 +.delay_slot + 3478 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3479 "11101001" // /* MW 3 */ + 3480 "11000100" // /* MW 2 */ + 3481 "00010111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 first +.delay_slot + 3482 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3483 "00101101" // /* MW 3 */ + 3484 "00000000" // /* MW 2 */ + 3485 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 62 28 first + 3492 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00110010" // /* MW 3 */ + 3494 "00000100" // /* MW 2 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "10000100" // J #3568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3568 delay_slots=5 */ + 3503 "00000000" // /* MW 5 */ + 3504 "00000000" // /* MW 4 */ + 3505 "11111000" // /* MW 3 */ + 3506 "00000110" // /* MW 2 */ + 3507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3511 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 3512 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "01110010" // /* MW 3 */ + 3514 "00000101" // /* MW 2 */ + 3515 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3516 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "01100111" // /* MW 3 */ + 3518 "00000001" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3520 "11100001" // NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00000000" // /* MW 15 */ + 3522 "00000000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "00010011" // /* MW 7 */ + 3530 "00000100" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.src_ref 2 "elementwise_binary_broadcasting.h" 65 28 first + 3536 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "00110010" // /* MW 3 */ + 3538 "00000100" // /* MW 2 */ + 3539 "00000001" // /* MW 1 */ + 3540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3541 "00000000" // /* MW 1 */ + 3542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3543 "00000000" // /* MW 1 */ + 3544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3545 "00000000" // /* MW 1 */ + 3546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3547 "00000000" // /* MW 1 */ + 3548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3549 "00000000" // /* MW 1 */ + 3550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3551 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 3552 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "01110010" // /* MW 3 */ + 3554 "00000101" // /* MW 2 */ + 3555 "00011000" // /* MW 1 */ + 3556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3557 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3558 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "00000000" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00010011" // /* MW 5 */ + 3564 "00000100" // /* MW 4 */ + 3565 "11110001" // /* MW 3 */ + 3566 "00101100" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first + 3568 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01001000" // /* MW 9 */ + 3570 "00111111" // /* MW 8 */ + 3571 "10111000" // /* MW 7 */ + 3572 "10001010" // /* MW 6 */ + 3573 "00000111" // /* MW 5 */ + 3574 "00000000" // /* MW 4 */ + 3575 "11010000" // /* MW 3 */ + 3576 "10000000" // /* MW 2 */ + 3577 "10001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3578 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #3680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3579 "00010000" // /* MW 9 */ + 3580 "00110000" // /* MW 8 */ + 3581 "01111111" // /* MW 7 */ + 3582 "00000000" // /* MW 6 */ + 3583 "00000000" // /* MW 5 */ + 3584 "00000000" // /* MW 4 */ + 3585 "11010000" // /* MW 3 */ + 3586 "10010000" // /* MW 2 */ + 3587 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3588 "01000100" // MOVXM le, #3712 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3589 "00000000" // /* MW 5 */ + 3590 "11111101" // /* MW 4 */ + 3591 "00000110" // /* MW 3 */ + 3592 "00000000" // /* MW 2 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3594 "01000100" // MOVXM p4, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "01000000" // /* MW 5 */ + 3596 "11000100" // /* MW 4 */ + 3597 "11001000" // /* MW 3 */ + 3598 "00000111" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3600 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00100010" // /* MW 3 */ + 3602 "00000100" // /* MW 2 */ + 3603 "00000100" // /* MW 1 */ + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first + 3608 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "10101011" // /* MW 3 */ + 3610 "00001000" // /* MW 2 */ + 3611 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 148 20 first + 3612 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "00101011" // /* MW 3 */ + 3614 "00101001" // /* MW 2 */ + 3615 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first + 3616 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00101011" // /* MW 3 */ + 3618 "00001000" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "00101011" // /* MW 3 */ + 3622 "00101010" // /* MW 2 */ + 3623 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "00000000" // /* MW 5 */ + 3626 "11110101" // /* MW 4 */ + 3627 "01110000" // /* MW 3 */ + 3628 "00010101" // /* MW 2 */ + 3629 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3631 "00111101" // /* MW 7 */ + 3632 "00101000" // /* MW 6 */ + 3633 "00000011" // /* MW 5 */ + 3634 "00000100" // /* MW 4 */ + 3635 "01110000" // /* MW 3 */ + 3636 "00100101" // /* MW 2 */ + 3637 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3639 "00101011" // /* MW 3 */ + 3640 "00001000" // /* MW 2 */ + 3641 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3643 "00111101" // /* MW 7 */ + 3644 "00010000" // /* MW 6 */ + 3645 "00000100" // /* MW 5 */ + 3646 "00000100" // /* MW 4 */ + 3647 "01110000" // /* MW 3 */ + 3648 "01000101" // /* MW 2 */ + 3649 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3651 "10101011" // /* MW 3 */ + 3652 "00001000" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3655 "00111101" // /* MW 7 */ + 3656 "00101000" // /* MW 6 */ + 3657 "00000011" // /* MW 5 */ + 3658 "00000100" // /* MW 4 */ + 3659 "01110000" // /* MW 3 */ + 3660 "00100101" // /* MW 2 */ + 3661 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3663 "00101011" // /* MW 3 */ + 3664 "00001000" // /* MW 2 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3667 "00111101" // /* MW 13 */ + 3668 "00010000" // /* MW 12 */ + 3669 "00000100" // /* MW 11 */ + 3670 "01010111" // /* MW 10 */ + 3671 "00011010" // /* MW 9 */ + 3672 "01000000" // /* MW 8 */ + 3673 "00000000" // /* MW 7 */ + 3674 "00000000" // /* MW 6 */ + 3675 "01000110" // /* MW 5 */ + 3676 "00111011" // /* MW 4 */ + 3677 "01110100" // /* MW 3 */ + 3678 "01000101" // /* MW 2 */ + 3679 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3680 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "10101011" // /* MW 3 */ + 3682 "00001000" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3685 "00111101" // /* MW 11 */ + 3686 "00101000" // /* MW 10 */ + 3687 "00000011" // /* MW 9 */ + 3688 "10001110" // /* MW 8 */ + 3689 "00010001" // /* MW 7 */ + 3690 "00001111" // /* MW 6 */ + 3691 "00100001" // /* MW 5 */ + 3692 "00000000" // /* MW 4 */ + 3693 "01110000" // /* MW 3 */ + 3694 "00100101" // /* MW 2 */ + 3695 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3697 "00000000" // /* MW 15 */ + 3698 "00000000" // /* MW 14 */ + 3699 "01111000" // /* MW 13 */ + 3700 "10100101" // /* MW 12 */ + 3701 "00000001" // /* MW 11 */ + 3702 "00000000" // /* MW 10 */ + 3703 "00000000" // /* MW 9 */ + 3704 "00000000" // /* MW 8 */ + 3705 "01011011" // /* MW 7 */ + 3706 "00000001" // /* MW 6 */ + 3707 "00100000" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "01110000" // /* MW 3 */ + 3710 "00000101" // /* MW 2 */ + 3711 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3713 "10000001" // /* MW 15 */ + 3714 "00100000" // /* MW 14 */ + 3715 "01111000" // /* MW 13 */ + 3716 "10100101" // /* MW 12 */ + 3717 "00000001" // /* MW 11 */ + 3718 "00000000" // /* MW 10 */ + 3719 "00000000" // /* MW 9 */ + 3720 "00000000" // /* MW 8 */ + 3721 "10100011" // /* MW 7 */ + 3722 "00011101" // /* MW 6 */ + 3723 "00100010" // /* MW 5 */ + 3724 "00000000" // /* MW 4 */ + 3725 "01110000" // /* MW 3 */ + 3726 "01000101" // /* MW 2 */ + 3727 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3729 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3731 "00111101" // /* MW 7 */ + 3732 "00101000" // /* MW 6 */ + 3733 "00000011" // /* MW 5 */ + 3734 "00000010" // /* MW 4 */ + 3735 "01100000" // /* MW 3 */ + 3736 "11000100" // /* MW 2 */ + 3737 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3741 "00111101" // /* MW 7 */ + 3742 "00010000" // /* MW 6 */ + 3743 "00000100" // /* MW 5 */ + 3744 "00000010" // /* MW 4 */ + 3745 "01100000" // /* MW 3 */ + 3746 "10110100" // /* MW 2 */ + 3747 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.src_ref 2 "elementwise_binary_broadcasting.h" 80 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3751 "00000000" // /* MW 5 */ + 3752 "01010000" // /* MW 4 */ + 3753 "01100000" // /* MW 3 */ + 3754 "11000100" // /* MW 2 */ + 3755 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 3758 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3759 "10100011" // /* MW 3 */ + 3760 "00011101" // /* MW 2 */ + 3761 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 3764 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3765 "00100011" // /* MW 3 */ + 3766 "00011110" // /* MW 2 */ + 3767 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3769 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 first +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.function_start + 3776 "00111010" // MOVS p2, p1; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3777 "01110001" // /* MW 9 */ + 3778 "00000000" // /* MW 8 */ + 3779 "00000000" // /* MW 7 */ + 3780 "00000000" // /* MW 6 */ + 3781 "00000100" // /* MW 5 */ + 3782 "00000000" // /* MW 4 */ + 3783 "01100000" // /* MW 3 */ + 3784 "10010001" // /* MW 2 */ + 3785 "01010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 + 3786 "00000010" // ST lr, [sp, #-4]; MOV r16, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3787 "01110000" // /* MW 7 */ + 3788 "01100000" // /* MW 6 */ + 3789 "00001000" // /* MW 5 */ + 3790 "00000010" // /* MW 4 */ + 3791 "10110000" // /* MW 3 */ + 3792 "10000111" // /* MW 2 */ + 3793 "11111111" // /* MW 1 */ + 3794 "11111000" // MOV r17, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "11100000" // /* MW 3 */ + 3796 "01010101" // /* MW 2 */ + 3797 "00011100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 + 3798 "01000100" // MOVXM p3, #508684 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3799 "00011000" // /* MW 5 */ + 3800 "11000110" // /* MW 4 */ + 3801 "11000110" // /* MW 3 */ + 3802 "00000111" // /* MW 2 */ + 3803 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 first + 3804 "00010100" // LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3805 "10000000" // /* MW 5 */ + 3806 "11010001" // /* MW 4 */ + 3807 "01010000" // /* MW 3 */ + 3808 "11101101" // /* MW 2 */ + 3809 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 3810 "00001100" // LDA.s16 r18, [p3], #-14; VST sfh, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3811 "01010110" // /* MW 5 */ + 3812 "00001110" // /* MW 4 */ + 3813 "01010000" // /* MW 3 */ + 3814 "11001010" // /* MW 2 */ + 3815 "01110011" // /* MW 1 */ + 3816 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3817 "01010111" // /* MW 3 */ + 3818 "00000110" // /* MW 2 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ + 3822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3823 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 first +.no_stack_arguments + 3824 "00000100" // JL #3440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3440 delay_slots=5 */ + 3825 "00000001" // /* MW 5 */ + 3826 "00000000" // /* MW 4 */ + 3827 "10111000" // /* MW 3 */ + 3828 "00000110" // /* MW 2 */ + 3829 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.delay_slot + 3830 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3831 "11000000" // /* MW 3 */ + 3832 "01010000" // /* MW 2 */ + 3833 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 first +.delay_slot + 3836 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "00010010" // /* MW 3 */ + 3838 "00100101" // /* MW 2 */ + 3839 "00010100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3840 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000001" // /* MW 5 */ + 3842 "11010010" // /* MW 4 */ + 3843 "01000010" // /* MW 3 */ + 3844 "00100000" // /* MW 2 */ + 3845 "10001100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3846 "10111010" // NOPA; NOPB; MOV p0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111110" // /* MW 9 */ + 3848 "00010000" // /* MW 8 */ + 3849 "00110100" // /* MW 7 */ + 3850 "00000000" // /* MW 6 */ + 3851 "00010000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.return_address + 3856 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00111001" // /* MW 3 */ + 3858 "11111100" // /* MW 2 */ + 3859 "00000111" // /* MW 1 */ + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ + 3862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3863 "00000000" // /* MW 1 */ + 3864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3865 "00000000" // /* MW 1 */ + 3866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3867 "00000000" // /* MW 1 */ + 3868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3869 "00000000" // /* MW 1 */ + 3870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 first + 3872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3873 "00000000" // /* MW 3 */ + 3874 "00101000" // /* MW 2 */ + 3875 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.delay_slot + 3876 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3877 "00000001" // /* MW 5 */ + 3878 "00000000" // /* MW 4 */ + 3879 "00000000" // /* MW 3 */ + 3880 "11110000" // /* MW 2 */ + 3881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3889 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 152 first +.src_ref 6 "superkernels.cpp" 157 6 +.function_start + 3904 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3905 "00000000" // /* MW 5 */ + 3906 "11000100" // /* MW 4 */ + 3907 "11000110" // /* MW 3 */ + 3908 "00000111" // /* MW 2 */ + 3909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 first + 3910 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3911 "11000001" // /* MW 5 */ + 3912 "10110101" // /* MW 4 */ + 3913 "11011000" // /* MW 3 */ + 3914 "11000010" // /* MW 2 */ + 3915 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 152 + 3916 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3917 "00000001" // /* MW 5 */ + 3918 "00000000" // /* MW 4 */ + 3919 "00000000" // /* MW 3 */ + 3920 "00001000" // /* MW 2 */ + 3921 "00000000" // /* MW 1 */ + 3922 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3923 "01110000" // /* MW 7 */ + 3924 "11010000" // /* MW 6 */ + 3925 "00001011" // /* MW 5 */ + 3926 "00000000" // /* MW 4 */ + 3927 "10110000" // /* MW 3 */ + 3928 "01100011" // /* MW 2 */ + 3929 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 11 + 3930 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3931 "00010001" // /* MW 9 */ + 3932 "00000010" // /* MW 8 */ + 3933 "00110001" // /* MW 7 */ + 3934 "11110011" // /* MW 6 */ + 3935 "00000001" // /* MW 5 */ + 3936 "00000000" // /* MW 4 */ + 3937 "10110000" // /* MW 3 */ + 3938 "10000010" // /* MW 2 */ + 3939 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3940 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3941 "11000000" // /* MW 3 */ + 3942 "11010100" // /* MW 2 */ + 3943 "00011011" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 +.src_ref 6 "superkernels.cpp" 157 16 + 3948 "10000100" // JNZ r16, #4112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4112 delay_slots=5 */ + 3949 "00000001" // /* MW 5 */ + 3950 "01000000" // /* MW 4 */ + 3951 "00001000" // /* MW 3 */ + 3952 "00001000" // /* MW 2 */ + 3953 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 22 first +.delay_slot + 3954 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10010000" // /* MW 3 */ + 3956 "01100010" // /* MW 2 */ + 3957 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 30 +.delay_slot + 3958 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3959 "11111011" // /* MW 3 */ + 3960 "01100011" // /* MW 2 */ + 3961 "00010100" // /* MW 1 */ +.delay_slot + 3962 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3963 "00111101" // /* MW 3 */ + 3964 "11110100" // /* MW 2 */ + 3965 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 154 11 +.delay_slot + 3966 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3967 "01110000" // /* MW 7 */ + 3968 "01100000" // /* MW 6 */ + 3969 "00110000" // /* MW 5 */ + 3970 "00000011" // /* MW 4 */ + 3971 "00110000" // /* MW 3 */ + 3972 "11000110" // /* MW 2 */ + 3973 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 +.src_ref 6 "superkernels.cpp" 171 2 +.delay_slot + 3974 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3975 "00000000" // /* MW 5 */ + 3976 "11000110" // /* MW 4 */ + 3977 "11000000" // /* MW 3 */ + 3978 "00000111" // /* MW 2 */ + 3979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3981 "01000000" // /* MW 5 */ + 3982 "11000100" // /* MW 4 */ + 3983 "11000100" // /* MW 3 */ + 3984 "00000111" // /* MW 2 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "00010000" // /* MW 9 */ + 3988 "00001110" // /* MW 8 */ + 3989 "00110001" // /* MW 7 */ + 3990 "11110001" // /* MW 6 */ + 3991 "00000001" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "11100000" // /* MW 3 */ + 3994 "11000000" // /* MW 2 */ + 3995 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 "00000100" // JL #3376 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3376 delay_slots=5 */ + 3999 "00000001" // /* MW 5 */ + 4000 "00000000" // /* MW 4 */ + 4001 "10011000" // /* MW 3 */ + 4002 "00000110" // /* MW 2 */ + 4003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4009 "00110001" // /* MW 3 */ + 4010 "00100000" // /* MW 2 */ + 4011 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4012 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4013 "00000101" // /* MW 3 */ + 4014 "00100000" // /* MW 2 */ + 4015 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4016 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "01111000" // /* MW 13 */ + 4020 "10100101" // /* MW 12 */ + 4021 "00000001" // /* MW 11 */ + 4022 "00000000" // /* MW 10 */ + 4023 "00000000" // /* MW 9 */ + 4024 "10000000" // /* MW 8 */ + 4025 "00010001" // /* MW 7 */ + 4026 "00000110" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 +.return_address + 4032 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4033 "00001000" // /* MW 5 */ + 4034 "11000100" // /* MW 4 */ + 4035 "11000100" // /* MW 3 */ + 4036 "00000111" // /* MW 2 */ + 4037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 first +.src_ref 6 "superkernels.cpp" 164 65 + 4038 "10111010" // LDA r16, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4039 "00010000" // /* MW 9 */ + 4040 "10000000" // /* MW 8 */ + 4041 "00110001" // /* MW 7 */ + 4042 "11110001" // /* MW 6 */ + 4043 "00000001" // /* MW 5 */ + 4044 "00000000" // /* MW 4 */ + 4045 "11010000" // /* MW 3 */ + 4046 "11000010" // /* MW 2 */ + 4047 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 +.src_ref 6 "superkernels.cpp" 164 65 +.src_ref 6 "superkernels.cpp" 171 2 + 4048 "10111010" // LDA r17, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "00010000" // /* MW 9 */ + 4050 "10000000" // /* MW 8 */ + 4051 "00110001" // /* MW 7 */ + 4052 "11110001" // /* MW 6 */ + 4053 "00000001" // /* MW 5 */ + 4054 "00000000" // /* MW 4 */ + 4055 "11010000" // /* MW 3 */ + 4056 "11000110" // /* MW 2 */ + 4057 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 first +.src_ref 6 "superkernels.cpp" 164 16 +.src_ref 6 "superkernels.cpp" 169 47 + 4058 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4059 "00010000" // /* MW 9 */ + 4060 "00000100" // /* MW 8 */ + 4061 "10110001" // /* MW 7 */ + 4062 "11110000" // /* MW 6 */ + 4063 "00000001" // /* MW 5 */ + 4064 "00000000" // /* MW 4 */ + 4065 "01010000" // /* MW 3 */ + 4066 "11001011" // /* MW 2 */ + 4067 "01001010" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "10000100" // J #4128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4128 delay_slots=5 */ + 4073 "00000000" // /* MW 5 */ + 4074 "00000000" // /* MW 4 */ + 4075 "00010000" // /* MW 3 */ + 4076 "00001000" // /* MW 2 */ + 4077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 +.delay_slot + 4078 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4079 "00110000" // /* MW 5 */ + 4080 "11000100" // /* MW 4 */ + 4081 "11000000" // /* MW 3 */ + 4082 "00000111" // /* MW 2 */ + 4083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 27 first +.delay_slot + 4086 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4087 "00001111" // /* MW 3 */ + 4088 "01100001" // /* MW 2 */ + 4089 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 first +.delay_slot + 4090 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4091 "10100011" // /* MW 5 */ + 4092 "00001100" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 16 first +.delay_slot + 4096 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4097 "00000000" // /* MW 15 */ + 4098 "00000000" // /* MW 14 */ + 4099 "01111000" // /* MW 13 */ + 4100 "10100101" // /* MW 12 */ + 4101 "00000001" // /* MW 11 */ + 4102 "00000000" // /* MW 10 */ + 4103 "00000000" // /* MW 9 */ + 4104 "10000000" // /* MW 8 */ + 4105 "00010001" // /* MW 7 */ + 4106 "00000110" // /* MW 6 */ + 4107 "00100001" // /* MW 5 */ + 4108 "00000000" // /* MW 4 */ + 4109 "11110000" // /* MW 3 */ + 4110 "00101100" // /* MW 2 */ + 4111 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 169 47 +.src_ref 6 "superkernels.cpp" 171 2 + 4112 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4113 "00000000" // /* MW 15 */ + 4114 "00000000" // /* MW 14 */ + 4115 "00010000" // /* MW 13 */ + 4116 "00000100" // /* MW 12 */ + 4117 "10110001" // /* MW 11 */ + 4118 "11110000" // /* MW 10 */ + 4119 "00000001" // /* MW 9 */ + 4120 "00000000" // /* MW 8 */ + 4121 "10001011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "00100010" // /* MW 5 */ + 4124 "00000000" // /* MW 4 */ + 4125 "11110000" // /* MW 3 */ + 4126 "00101100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4128 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4129 "00000000" // /* MW 7 */ + 4130 "11000011" // /* MW 6 */ + 4131 "10110011" // /* MW 5 */ + 4132 "00000011" // /* MW 4 */ + 4133 "01100000" // /* MW 3 */ + 4134 "10010001" // /* MW 2 */ + 4135 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 168 2 + 4136 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4137 "00010000" // /* MW 9 */ + 4138 "00000000" // /* MW 8 */ + 4139 "00110001" // /* MW 7 */ + 4140 "11110000" // /* MW 6 */ + 4141 "00000001" // /* MW 5 */ + 4142 "00000000" // /* MW 4 */ + 4143 "11010000" // /* MW 3 */ + 4144 "11101110" // /* MW 2 */ + 4145 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4146 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4147 "00010110" // /* MW 3 */ + 4148 "11111110" // /* MW 2 */ + 4149 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4150 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "00110110" // /* MW 3 */ + 4152 "11111110" // /* MW 2 */ + 4153 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4154 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4155 "01010110" // /* MW 3 */ + 4156 "01000110" // /* MW 2 */ + 4157 "00000111" // /* MW 1 */ + 4158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4159 "00000000" // /* MW 1 */ + 4160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4161 "00000000" // /* MW 1 */ + 4162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4163 "00000000" // /* MW 1 */ + 4164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4165 "00000000" // /* MW 1 */ + 4166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4167 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4168 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "00000010" // /* MW 3 */ + 4170 "01100001" // /* MW 2 */ + 4171 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4172 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4173 "00010001" // /* MW 3 */ + 4174 "00000110" // /* MW 2 */ + 4175 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4176 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4177 "11111101" // /* MW 3 */ + 4178 "11100000" // /* MW 2 */ + 4179 "00010111" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ + 4184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4186 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4187 "00001000" // /* MW 3 */ + 4188 "10010011" // /* MW 2 */ + 4189 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 + 4190 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4191 "10000001" // /* MW 5 */ + 4192 "10101101" // /* MW 4 */ + 4193 "10100111" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00000100" // /* MW 1 */ + 4196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4197 "00000000" // /* MW 1 */ + 4198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4199 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first + 4200 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4201 "00110110" // /* MW 3 */ + 4202 "00000110" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4204 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4205 "10000001" // /* MW 5 */ + 4206 "11011101" // /* MW 4 */ + 4207 "11011100" // /* MW 3 */ + 4208 "11001010" // /* MW 2 */ + 4209 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 47 first + 4210 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "01110110" // /* MW 3 */ + 4212 "00000110" // /* MW 2 */ + 4213 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4214 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "10011110" // /* MW 3 */ + 4216 "01011100" // /* MW 2 */ + 4217 "00000111" // /* MW 1 */ + 4218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 171 2 first +.no_stack_arguments + 4220 "00000100" // JL #3776 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 4221 "00000001" // /* MW 5 */ + 4222 "00000000" // /* MW 4 */ + 4223 "01100000" // /* MW 3 */ + 4224 "00000111" // /* MW 2 */ + 4225 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4227 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first +.delay_slot + 4228 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4229 "00000111" // /* MW 3 */ + 4230 "01100010" // /* MW 2 */ + 4231 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 +.delay_slot + 4232 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4233 "00110001" // /* MW 3 */ + 4234 "00000110" // /* MW 2 */ + 4235 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 first +.delay_slot + 4236 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4237 "00001101" // /* MW 3 */ + 4238 "11100001" // /* MW 2 */ + 4239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 +.delay_slot + 4240 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4241 "00000000" // /* MW 15 */ + 4242 "00000000" // /* MW 14 */ + 4243 "10101000" // /* MW 13 */ + 4244 "10100000" // /* MW 12 */ + 4245 "00110100" // /* MW 11 */ + 4246 "00000000" // /* MW 10 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "01011011" // /* MW 7 */ + 4250 "00000001" // /* MW 6 */ + 4251 "00100000" // /* MW 5 */ + 4252 "00000000" // /* MW 4 */ + 4253 "11110000" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 173 6 +.src_ref 6 "superkernels.cpp" 174 14 +.return_address + 4256 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4257 "00010000" // /* MW 9 */ + 4258 "00000000" // /* MW 8 */ + 4259 "00110001" // /* MW 7 */ + 4260 "11110011" // /* MW 6 */ + 4261 "00000001" // /* MW 5 */ + 4262 "00000000" // /* MW 4 */ + 4263 "11010000" // /* MW 3 */ + 4264 "11000110" // /* MW 2 */ + 4265 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4266 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4267 "00000101" // /* MW 3 */ + 4268 "00100000" // /* MW 2 */ + 4269 "00010000" // /* MW 1 */ + 4270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4271 "00000000" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4280 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "00001000" // /* MW 3 */ + 4282 "01010001" // /* MW 2 */ + 4283 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 173 19 + 4284 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4285 "00010000" // /* MW 9 */ + 4286 "00001100" // /* MW 8 */ + 4287 "00110001" // /* MW 7 */ + 4288 "11110001" // /* MW 6 */ + 4289 "00000001" // /* MW 5 */ + 4290 "00000000" // /* MW 4 */ + 4291 "11010000" // /* MW 3 */ + 4292 "11001110" // /* MW 2 */ + 4293 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 first + 4294 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4295 "00110110" // /* MW 3 */ + 4296 "00000110" // /* MW 2 */ + 4297 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 19 + 4298 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4299 "01010110" // /* MW 3 */ + 4300 "00000110" // /* MW 2 */ + 4301 "00000010" // /* MW 1 */ + 4302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4303 "00000000" // /* MW 1 */ + 4304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4305 "00000000" // /* MW 1 */ + 4306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4307 "00000000" // /* MW 1 */ + 4308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4309 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4310 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "00110001" // /* MW 3 */ + 4312 "00100001" // /* MW 2 */ + 4313 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4314 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4315 "00010001" // /* MW 3 */ + 4316 "11100110" // /* MW 2 */ + 4317 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 16 first + 4318 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4319 "00101000" // /* MW 3 */ + 4320 "01100001" // /* MW 2 */ + 4321 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 + 4322 "10000100" // JNZ r16, #4352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4352 delay_slots=5 */ + 4323 "00000001" // /* MW 5 */ + 4324 "01000000" // /* MW 4 */ + 4325 "10000000" // /* MW 3 */ + 4326 "00001000" // /* MW 2 */ + 4327 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4337 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 + 4338 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00000001" // /* MW 3 */ + 4340 "00100000" // /* MW 2 */ + 4341 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 first + 4342 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "00000000" // /* MW 7 */ + 4346 "10000000" // /* MW 6 */ + 4347 "00010001" // /* MW 5 */ + 4348 "00000110" // /* MW 4 */ + 4349 "11110110" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 176 + 4352 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4353 "00111001" // /* MW 3 */ + 4354 "11110100" // /* MW 2 */ + 4355 "00000111" // /* MW 1 */ + 4356 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4357 "00011001" // /* MW 3 */ + 4358 "11111011" // /* MW 2 */ + 4359 "00000111" // /* MW 1 */ + 4360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4361 "00000000" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4367 "11110001" // /* MW 3 */ + 4368 "11111101" // /* MW 2 */ + 4369 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4373 "00000000" // /* MW 3 */ + 4374 "00101000" // /* MW 2 */ + 4375 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4377 "10100000" // /* MW 3 */ + 4378 "01100111" // /* MW 2 */ + 4379 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 +.delay_slot + 4380 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4381 "00000001" // /* MW 5 */ + 4382 "00000000" // /* MW 4 */ + 4383 "00000000" // /* MW 3 */ + 4384 "11111000" // /* MW 2 */ + 4385 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4391 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 2 "elementwise_unary.h" 95 first +.src_ref 2 "elementwise_unary.h" 97 22 +.src_ref 2 "elementwise_unary.h" 97 24 first +.function_start + 4400 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4401 "00010000" // /* MW 9 */ + 4402 "11000000" // /* MW 8 */ + 4403 "00110001" // /* MW 7 */ + 4404 "11110000" // /* MW 6 */ + 4405 "00000001" // /* MW 5 */ + 4406 "00000000" // /* MW 4 */ + 4407 "11010000" // /* MW 3 */ + 4408 "10000101" // /* MW 2 */ + 4409 "00100011" // /* MW 1 */ + 4410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4411 "00000000" // /* MW 1 */ + 4412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4413 "00000000" // /* MW 1 */ + 4414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4415 "00000000" // /* MW 1 */ + 4416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4417 "00000000" // /* MW 1 */ + 4418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4419 "00000000" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 97 22 first + 4422 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4423 "00101001" // /* MW 3 */ + 4424 "00011100" // /* MW 2 */ + 4425 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 24 first + 4426 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4427 "00101110" // /* MW 3 */ + 4428 "00000100" // /* MW 2 */ + 4429 "00000001" // /* MW 1 */ + 4430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4431 "00000000" // /* MW 1 */ + 4432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4433 "00000000" // /* MW 1 */ + 4434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4435 "00000000" // /* MW 1 */ + 4436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4437 "00000000" // /* MW 1 */ + 4438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4439 "00000000" // /* MW 1 */ + 4440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4441 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 22 + 4442 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00101001" // /* MW 3 */ + 4444 "00000100" // /* MW 2 */ + 4445 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 24 first + 4446 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "00101110" // /* MW 3 */ + 4448 "00010100" // /* MW 2 */ + 4449 "00000001" // /* MW 1 */ + 4450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4451 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 101 4 first + 4452 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4453 "00000000" // /* MW 3 */ + 4454 "00101000" // /* MW 2 */ + 4455 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 22 first +.delay_slot + 4464 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4465 "00101001" // /* MW 3 */ + 4466 "00010100" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 4467 "00001000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 107 first +.src_ref 2 "elementwise_unary.h" 113 37 +.src_ref 2 "elementwise_unary.h" 113 78 +.src_ref 2 "elementwise_unary.h" 142 19 +.function_start + 4480 "10110110" // MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4481 "00010000" // /* MW 11 */ + 4482 "11000000" // /* MW 10 */ + 4483 "00110001" // /* MW 9 */ + 4484 "11110001" // /* MW 8 */ + 4485 "00000001" // /* MW 7 */ + 4486 "00000000" // /* MW 6 */ + 4487 "01101000" // /* MW 5 */ + 4488 "00111101" // /* MW 4 */ + 4489 "00000000" // /* MW 3 */ + 4490 "01000000" // /* MW 2 */ + 4491 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 113 37 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 "10110110" // LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4493 "00010000" // /* MW 11 */ + 4494 "00010000" // /* MW 10 */ + 4495 "00110001" // /* MW 9 */ + 4496 "11110001" // /* MW 8 */ + 4497 "00000001" // /* MW 7 */ + 4498 "00000000" // /* MW 6 */ + 4499 "11101000" // /* MW 5 */ + 4500 "00111011" // /* MW 4 */ + 4501 "11010000" // /* MW 3 */ + 4502 "10001010" // /* MW 2 */ + 4503 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 142 19 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 "10110110" // LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4505 "00010000" // /* MW 11 */ + 4506 "01000000" // /* MW 10 */ + 4507 "11001000" // /* MW 9 */ + 4508 "00010000" // /* MW 8 */ + 4509 "00000000" // /* MW 7 */ + 4510 "00000000" // /* MW 6 */ + 4511 "01101000" // /* MW 5 */ + 4512 "00111101" // /* MW 4 */ + 4513 "01010000" // /* MW 3 */ + 4514 "10000100" // /* MW 2 */ + 4515 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 "11110100" // VLDB x7, [p0], #64; VBCST.16 x0, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4517 "11100101" // /* MW 5 */ + 4518 "00110010" // /* MW 4 */ + 4519 "10000000" // /* MW 3 */ + 4520 "10111110" // /* MW 2 */ + 4521 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 "01000100" // MOVXM r4, #49280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4523 "00000000" // /* MW 5 */ + 4524 "00100001" // /* MW 4 */ + 4525 "11000010" // /* MW 3 */ + 4526 "00000000" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "11111000" // VBCST.16 x1, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4529 "01110010" // /* MW 3 */ + 4530 "10010001" // /* MW 2 */ + 4531 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 "01000100" // MOVXM r3, #32767 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4533 "11111110" // /* MW 5 */ + 4534 "10111111" // /* MW 4 */ + 4535 "01110001" // /* MW 3 */ + 4536 "00000000" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 "11111000" // VMIN_GE.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4539 "00101100" // /* MW 3 */ + 4540 "01010000" // /* MW 2 */ + 4541 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "elementwise_unary.h" 113 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 "11100100" // LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4543 "11011001" // /* MW 5 */ + 4544 "10000001" // /* MW 4 */ + 4545 "10110110" // /* MW 3 */ + 4546 "00000001" // /* MW 2 */ + 4547 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 +.src_ref 2 "elementwise_unary.h" 166 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 "11100100" // MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4549 "01011001" // /* MW 5 */ + 4550 "01110000" // /* MW 4 */ + 4551 "00001000" // /* MW 3 */ + 4552 "01010000" // /* MW 2 */ + 4553 "00001111" // /* MW 1 */ + 4554 "11111000" // VBCST.16 x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4555 "01110010" // /* MW 3 */ + 4556 "00001101" // /* MW 2 */ + 4557 "00011001" // /* MW 1 */ + 4558 "01000100" // MOVXM r5, #15616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4559 "00000000" // /* MW 5 */ + 4560 "10111010" // /* MW 4 */ + 4561 "00110010" // /* MW 3 */ + 4562 "00000000" // /* MW 2 */ + 4563 "00000000" // /* MW 1 */ + 4564 "11111000" // VBCST.16 x3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "01110010" // /* MW 3 */ + 4566 "10010101" // /* MW 2 */ + 4567 "00011001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 + 4568 "01000100" // MOVXM r17, #16128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "00000000" // /* MW 5 */ + 4570 "10111110" // /* MW 4 */ + 4571 "00111000" // /* MW 3 */ + 4572 "00000000" // /* MW 2 */ + 4573 "00000000" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4574 "01111000" // VBAND x11, x6, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4575 "00101011" // /* MW 3 */ + 4576 "10110001" // /* MW 2 */ + 4577 "00011101" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4578 "11100100" // MOVX r17, #828; VBCST.16 x5, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4579 "11100101" // /* MW 5 */ + 4580 "10001010" // /* MW 4 */ + 4581 "00100101" // /* MW 3 */ + 4582 "01011110" // /* MW 2 */ + 4583 "01100100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4584 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4585 "01100001" // /* MW 7 */ + 4586 "11100111" // /* MW 6 */ + 4587 "10001100" // /* MW 5 */ + 4588 "11100110" // /* MW 4 */ + 4589 "11101100" // /* MW 3 */ + 4590 "11000000" // /* MW 2 */ + 4591 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4592 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4593 "00101011" // /* MW 3 */ + 4594 "01001001" // /* MW 2 */ + 4595 "00011100" // /* MW 1 */ + 4596 "01000100" // MOVXM r2, #16000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4597 "00000000" // /* MW 5 */ + 4598 "00111101" // /* MW 4 */ + 4599 "00110001" // /* MW 3 */ + 4600 "00000000" // /* MW 2 */ + 4601 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4602 "01100010" // VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4603 "00000001" // /* MW 7 */ + 4604 "11100111" // /* MW 6 */ + 4605 "10001010" // /* MW 5 */ + 4606 "11100110" // /* MW 4 */ + 4607 "01110010" // /* MW 3 */ + 4608 "00001001" // /* MW 2 */ + 4609 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 4610 "11111000" // VCONV.fp32.bf16 cml0, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4611 "10001010" // /* MW 3 */ + 4612 "00001011" // /* MW 2 */ + 4613 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4614 "01100010" // VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4615 "10000001" // /* MW 7 */ + 4616 "00001100" // /* MW 6 */ + 4617 "10001011" // /* MW 5 */ + 4618 "11100110" // /* MW 4 */ + 4619 "00101100" // /* MW 3 */ + 4620 "01010000" // /* MW 2 */ + 4621 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 "01010110" // VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4623 "10000001" // /* MW 11 */ + 4624 "00010010" // /* MW 10 */ + 4625 "10001001" // /* MW 9 */ + 4626 "00000010" // /* MW 8 */ + 4627 "00100100" // /* MW 7 */ + 4628 "10001111" // /* MW 6 */ + 4629 "00000000" // /* MW 5 */ + 4630 "00000000" // /* MW 4 */ + 4631 "11000000" // /* MW 3 */ + 4632 "01000010" // /* MW 2 */ + 4633 "10110010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 "11111000" // VMAX_LT.bf16 x6, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11101100" // /* MW 3 */ + 4636 "01000000" // /* MW 2 */ + 4637 "00011011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 "01011010" // MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4639 "11000011" // /* MW 9 */ + 4640 "01110110" // /* MW 8 */ + 4641 "10001010" // /* MW 7 */ + 4642 "00000010" // /* MW 6 */ + 4643 "00101010" // /* MW 5 */ + 4644 "10110111" // /* MW 4 */ + 4645 "00000000" // /* MW 3 */ + 4646 "00000000" // /* MW 2 */ + 4647 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 125 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 "00000010" // VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4649 "10000000" // /* MW 7 */ + 4650 "00111111" // /* MW 6 */ + 4651 "10111000" // /* MW 5 */ + 4652 "00000010" // /* MW 4 */ + 4653 "11000000" // /* MW 3 */ + 4654 "00100010" // /* MW 2 */ + 4655 "01010010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first + 4656 "11111000" // VMIN_GE.bf16 x8, r16, x7, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4657 "00101100" // /* MW 3 */ + 4658 "00111000" // /* MW 2 */ + 4659 "00011100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4660 "11110110" // NOPA; NOPB; NOPS; VBAND x11, x6, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4661 "10110000" // /* MW 11 */ + 4662 "10010101" // /* MW 10 */ + 4663 "11011000" // /* MW 9 */ + 4664 "00000010" // /* MW 8 */ + 4665 "01011011" // /* MW 7 */ + 4666 "00000001" // /* MW 6 */ + 4667 "00100000" // /* MW 5 */ + 4668 "00000000" // /* MW 4 */ + 4669 "11110000" // /* MW 3 */ + 4670 "00101100" // /* MW 2 */ + 4671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 142 19 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first +.loop_nesting 1 + 4672 "01001010" // VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4673 "00100011" // /* MW 9 */ + 4674 "00101011" // /* MW 8 */ + 4675 "10001100" // /* MW 7 */ + 4676 "11100110" // /* MW 6 */ + 4677 "11101100" // /* MW 5 */ + 4678 "11000000" // /* MW 4 */ + 4679 "01101100" // /* MW 3 */ + 4680 "00111101" // /* MW 2 */ + 4681 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "abs.hpp" 32 22 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 "01001010" // VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4683 "01100001" // /* MW 9 */ + 4684 "11100111" // /* MW 8 */ + 4685 "10001100" // /* MW 7 */ + 4686 "01100110" // /* MW 6 */ + 4687 "00101011" // /* MW 5 */ + 4688 "01001001" // /* MW 4 */ + 4689 "11101100" // /* MW 3 */ + 4690 "00111011" // /* MW 2 */ + 4691 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "10000001" // /* MW 3 */ + 4694 "00001100" // /* MW 2 */ + 4695 "10001011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4697 "00000001" // /* MW 3 */ + 4698 "11100111" // /* MW 2 */ + 4699 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 "01100010" // VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4701 "10000001" // /* MW 7 */ + 4702 "00010010" // /* MW 6 */ + 4703 "10001001" // /* MW 5 */ + 4704 "00000010" // /* MW 4 */ + 4705 "01100000" // /* MW 3 */ + 4706 "10100100" // /* MW 2 */ + 4707 "00100011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 "01111010" // NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4711 "00000000" // /* MW 9 */ + 4712 "00000000" // /* MW 8 */ + 4713 "00000000" // /* MW 7 */ + 4714 "00000000" // /* MW 6 */ + 4715 "00100011" // /* MW 5 */ + 4716 "00011110" // /* MW 4 */ + 4717 "11110001" // /* MW 3 */ + 4718 "00101100" // /* MW 2 */ + 4719 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "00010110" // /* MW 12 */ + 4725 "00101000" // /* MW 11 */ + 4726 "00000010" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "00010110" // /* MW 7 */ + 4730 "10010010" // /* MW 6 */ + 4731 "00100101" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "01110110" // /* MW 12 */ + 4741 "10100000" // /* MW 11 */ + 4742 "00000001" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 "00011011" // NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "10110110" // /* MW 15 */ + 4754 "01010011" // /* MW 14 */ + 4755 "01111100" // /* MW 13 */ + 4756 "00010110" // /* MW 12 */ + 4757 "00011100" // /* MW 11 */ + 4758 "00000010" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "00010110" // /* MW 7 */ + 4762 "10010001" // /* MW 6 */ + 4763 "00100010" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.src_ref 4 "abs.hpp" 32 22 first +.end_of_loop + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "10111000" // /* MW 13 */ + 4772 "10010101" // /* MW 12 */ + 4773 "11011000" // /* MW 11 */ + 4774 "00000010" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.loop_nesting 0 + 4784 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4785 "00100011" // /* MW 7 */ + 4786 "00101011" // /* MW 6 */ + 4787 "10001100" // /* MW 5 */ + 4788 "11100110" // /* MW 4 */ + 4789 "11101100" // /* MW 3 */ + 4790 "11000000" // /* MW 2 */ + 4791 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4793 "00101011" // /* MW 3 */ + 4794 "01001001" // /* MW 2 */ + 4795 "00011100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 "01001000" // VMUL.f dm4, x3, x11, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100001" // /* MW 3 */ + 4798 "11100111" // /* MW 2 */ + 4799 "10001100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4801 "00000001" // /* MW 3 */ + 4802 "11100111" // /* MW 2 */ + 4803 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4805 "00100011" // /* MW 3 */ + 4806 "00011101" // /* MW 2 */ + 4807 "00001001" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4809 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4811 "00100011" // /* MW 3 */ + 4812 "00011110" // /* MW 2 */ + 4813 "00001001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4814 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "10000001" // /* MW 3 */ + 4816 "00001100" // /* MW 2 */ + 4817 "10001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 + 4818 "01100010" // VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4819 "10000001" // /* MW 7 */ + 4820 "00010010" // /* MW 6 */ + 4821 "10001001" // /* MW 5 */ + 4822 "00000010" // /* MW 4 */ + 4823 "11000000" // /* MW 3 */ + 4824 "01000010" // /* MW 2 */ + 4825 "10110010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 + 4826 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4827 "00010110" // /* MW 3 */ + 4828 "10010001" // /* MW 2 */ + 4829 "00001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first + 4830 "01001000" // VMSC.f dm2, dm3, x11, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4831 "11000011" // /* MW 3 */ + 4832 "01110110" // /* MW 2 */ + 4833 "10001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4834 "01001000" // VMSC.f dm4, dm1, x5, x9, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4835 "00100011" // /* MW 3 */ + 4836 "00101011" // /* MW 2 */ + 4837 "10001100" // /* MW 1 */ + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 129 4 first + 4840 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4841 "00000000" // /* MW 3 */ + 4842 "00101000" // /* MW 2 */ + 4843 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4847 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.delay_slot + 4848 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "00100011" // /* MW 3 */ + 4850 "00011101" // /* MW 2 */ + 4851 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.delay_slot + 4852 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4853 "00100011" // /* MW 3 */ + 4854 "00011110" // /* MW 2 */ + 4855 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 4857 "00000000" // /* MW 1 */ +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_sigmoid1d _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 210 first +.src_ref 6 "superkernels.cpp" 215 6 +.function_start + 4864 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4865 "00000000" // /* MW 5 */ + 4866 "11000100" // /* MW 4 */ + 4867 "11000110" // /* MW 3 */ + 4868 "00000111" // /* MW 2 */ + 4869 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 first + 4870 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4871 "11000001" // /* MW 5 */ + 4872 "10110101" // /* MW 4 */ + 4873 "11011000" // /* MW 3 */ + 4874 "11000010" // /* MW 2 */ + 4875 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 210 + 4876 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4877 "00000001" // /* MW 5 */ + 4878 "00000000" // /* MW 4 */ + 4879 "00000000" // /* MW 3 */ + 4880 "00001000" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ + 4882 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4883 "01110000" // /* MW 7 */ + 4884 "11010000" // /* MW 6 */ + 4885 "00001011" // /* MW 5 */ + 4886 "00000000" // /* MW 4 */ + 4887 "10110000" // /* MW 3 */ + 4888 "01100011" // /* MW 2 */ + 4889 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 11 + 4890 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4891 "00010001" // /* MW 9 */ + 4892 "00000010" // /* MW 8 */ + 4893 "00110001" // /* MW 7 */ + 4894 "11110011" // /* MW 6 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "10110000" // /* MW 3 */ + 4898 "10000010" // /* MW 2 */ + 4899 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 4900 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "11000000" // /* MW 3 */ + 4902 "11010100" // /* MW 2 */ + 4903 "00011011" // /* MW 1 */ + 4904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4905 "00000000" // /* MW 1 */ + 4906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4907 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 +.src_ref 6 "superkernels.cpp" 215 16 + 4908 "10000100" // JNZ r16, #5072 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5072 delay_slots=5 */ + 4909 "00000001" // /* MW 5 */ + 4910 "01000000" // /* MW 4 */ + 4911 "11101000" // /* MW 3 */ + 4912 "00001001" // /* MW 2 */ + 4913 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 22 first +.delay_slot + 4914 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4915 "10010000" // /* MW 3 */ + 4916 "01100010" // /* MW 2 */ + 4917 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 30 +.delay_slot + 4918 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4919 "11111011" // /* MW 3 */ + 4920 "01100011" // /* MW 2 */ + 4921 "00010100" // /* MW 1 */ +.delay_slot + 4922 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4923 "00111101" // /* MW 3 */ + 4924 "11110100" // /* MW 2 */ + 4925 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 212 11 +.delay_slot + 4926 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4927 "01110000" // /* MW 7 */ + 4928 "01100000" // /* MW 6 */ + 4929 "00110000" // /* MW 5 */ + 4930 "00000011" // /* MW 4 */ + 4931 "00110000" // /* MW 3 */ + 4932 "11000110" // /* MW 2 */ + 4933 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 +.src_ref 6 "superkernels.cpp" 229 2 +.delay_slot + 4934 "01000100" // MOVXM p0, #508800 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4935 "00000000" // /* MW 5 */ + 4936 "11000111" // /* MW 4 */ + 4937 "11000000" // /* MW 3 */ + 4938 "00000111" // /* MW 2 */ + 4939 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4941 "01000000" // /* MW 5 */ + 4942 "11000100" // /* MW 4 */ + 4943 "11000100" // /* MW 3 */ + 4944 "00000111" // /* MW 2 */ + 4945 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4947 "00010000" // /* MW 9 */ + 4948 "00001110" // /* MW 8 */ + 4949 "00110001" // /* MW 7 */ + 4950 "11110001" // /* MW 6 */ + 4951 "00000001" // /* MW 5 */ + 4952 "00000000" // /* MW 4 */ + 4953 "11100000" // /* MW 3 */ + 4954 "11000000" // /* MW 2 */ + 4955 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4957 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 "00000100" // JL #4400 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4400 delay_slots=5 */ + 4959 "00000001" // /* MW 5 */ + 4960 "00000000" // /* MW 4 */ + 4961 "10011000" // /* MW 3 */ + 4962 "00001000" // /* MW 2 */ + 4963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4967 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4969 "00110001" // /* MW 3 */ + 4970 "00100000" // /* MW 2 */ + 4971 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4972 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4973 "00000101" // /* MW 3 */ + 4974 "00100000" // /* MW 2 */ + 4975 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4976 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "10000000" // /* MW 8 */ + 4985 "00010001" // /* MW 7 */ + 4986 "00000110" // /* MW 6 */ + 4987 "00100010" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 +.return_address + 4992 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4993 "00001000" // /* MW 5 */ + 4994 "11000100" // /* MW 4 */ + 4995 "11000100" // /* MW 3 */ + 4996 "00000111" // /* MW 2 */ + 4997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 first +.src_ref 6 "superkernels.cpp" 222 46 + 4998 "10111010" // LDA r16, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4999 "00010000" // /* MW 9 */ + 5000 "11000000" // /* MW 8 */ + 5001 "00110001" // /* MW 7 */ + 5002 "11110001" // /* MW 6 */ + 5003 "00000001" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11010000" // /* MW 3 */ + 5006 "11000010" // /* MW 2 */ + 5007 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 +.src_ref 6 "superkernels.cpp" 222 46 +.src_ref 6 "superkernels.cpp" 229 2 + 5008 "10111010" // LDA r17, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5009 "00010000" // /* MW 9 */ + 5010 "11000000" // /* MW 8 */ + 5011 "00110001" // /* MW 7 */ + 5012 "11110001" // /* MW 6 */ + 5013 "00000001" // /* MW 5 */ + 5014 "00000000" // /* MW 4 */ + 5015 "11010000" // /* MW 3 */ + 5016 "11000110" // /* MW 2 */ + 5017 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 first +.src_ref 6 "superkernels.cpp" 222 16 +.src_ref 6 "superkernels.cpp" 227 47 + 5018 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5019 "00010000" // /* MW 9 */ + 5020 "00000100" // /* MW 8 */ + 5021 "10110001" // /* MW 7 */ + 5022 "11110000" // /* MW 6 */ + 5023 "00000001" // /* MW 5 */ + 5024 "00000000" // /* MW 4 */ + 5025 "01010000" // /* MW 3 */ + 5026 "11001011" // /* MW 2 */ + 5027 "01001000" // /* MW 1 */ + 5028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5029 "00000000" // /* MW 1 */ + 5030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5031 "00000000" // /* MW 1 */ + 5032 "10000100" // J #5088 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5088 delay_slots=5 */ + 5033 "00000000" // /* MW 5 */ + 5034 "00000000" // /* MW 4 */ + 5035 "11110000" // /* MW 3 */ + 5036 "00001001" // /* MW 2 */ + 5037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 +.delay_slot + 5038 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5039 "00110000" // /* MW 5 */ + 5040 "11000100" // /* MW 4 */ + 5041 "11000000" // /* MW 3 */ + 5042 "00000111" // /* MW 2 */ + 5043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5045 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 27 first +.delay_slot + 5046 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5047 "00001111" // /* MW 3 */ + 5048 "01100001" // /* MW 2 */ + 5049 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 first +.delay_slot + 5050 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5051 "10100011" // /* MW 5 */ + 5052 "00001100" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 16 first +.delay_slot + 5056 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "10000000" // /* MW 8 */ + 5065 "00010001" // /* MW 7 */ + 5066 "00000110" // /* MW 6 */ + 5067 "00100001" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 227 47 +.src_ref 6 "superkernels.cpp" 229 2 + 5072 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "00010000" // /* MW 13 */ + 5076 "00000100" // /* MW 12 */ + 5077 "10110001" // /* MW 11 */ + 5078 "11110000" // /* MW 10 */ + 5079 "00000001" // /* MW 9 */ + 5080 "00000000" // /* MW 8 */ + 5081 "10001011" // /* MW 7 */ + 5082 "10000000" // /* MW 6 */ + 5083 "00100010" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5088 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5089 "00000000" // /* MW 7 */ + 5090 "11000011" // /* MW 6 */ + 5091 "10110011" // /* MW 5 */ + 5092 "00000011" // /* MW 4 */ + 5093 "01100000" // /* MW 3 */ + 5094 "10010001" // /* MW 2 */ + 5095 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 226 2 + 5096 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5097 "00010000" // /* MW 9 */ + 5098 "00000000" // /* MW 8 */ + 5099 "00110001" // /* MW 7 */ + 5100 "11110000" // /* MW 6 */ + 5101 "00000001" // /* MW 5 */ + 5102 "00000000" // /* MW 4 */ + 5103 "11010000" // /* MW 3 */ + 5104 "11101110" // /* MW 2 */ + 5105 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5106 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5107 "00010110" // /* MW 3 */ + 5108 "11111110" // /* MW 2 */ + 5109 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5110 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5111 "00110110" // /* MW 3 */ + 5112 "11111110" // /* MW 2 */ + 5113 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5114 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5115 "01010110" // /* MW 3 */ + 5116 "01000110" // /* MW 2 */ + 5117 "00000111" // /* MW 1 */ + 5118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5119 "00000000" // /* MW 1 */ + 5120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5121 "00000000" // /* MW 1 */ + 5122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5123 "00000000" // /* MW 1 */ + 5124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5125 "00000000" // /* MW 1 */ + 5126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5128 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5129 "00000010" // /* MW 3 */ + 5130 "01100001" // /* MW 2 */ + 5131 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 5132 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5133 "00010001" // /* MW 3 */ + 5134 "00000110" // /* MW 2 */ + 5135 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 5136 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5137 "11111101" // /* MW 3 */ + 5138 "11100000" // /* MW 2 */ + 5139 "00010111" // /* MW 1 */ + 5140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5141 "00000000" // /* MW 1 */ + 5142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5143 "00000000" // /* MW 1 */ + 5144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5145 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5146 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5147 "00001000" // /* MW 3 */ + 5148 "10010011" // /* MW 2 */ + 5149 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 + 5150 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5151 "10000001" // /* MW 5 */ + 5152 "10101101" // /* MW 4 */ + 5153 "10100111" // /* MW 3 */ + 5154 "00000000" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ + 5156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5157 "00000000" // /* MW 1 */ + 5158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5159 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first + 5160 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "00110110" // /* MW 3 */ + 5162 "00000110" // /* MW 2 */ + 5163 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 5164 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5165 "10000001" // /* MW 5 */ + 5166 "11011101" // /* MW 4 */ + 5167 "11011100" // /* MW 3 */ + 5168 "11001010" // /* MW 2 */ + 5169 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 47 first + 5170 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5171 "01110110" // /* MW 3 */ + 5172 "00000110" // /* MW 2 */ + 5173 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 5174 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5175 "10011110" // /* MW 3 */ + 5176 "01011100" // /* MW 2 */ + 5177 "00000111" // /* MW 1 */ + 5178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5179 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 229 2 first +.no_stack_arguments + 5180 "00000100" // JL #4480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4480 delay_slots=5 */ + 5181 "00000001" // /* MW 5 */ + 5182 "00000000" // /* MW 4 */ + 5183 "11000000" // /* MW 3 */ + 5184 "00001000" // /* MW 2 */ + 5185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5187 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first +.delay_slot + 5188 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "00000111" // /* MW 3 */ + 5190 "01100010" // /* MW 2 */ + 5191 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 +.delay_slot + 5192 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "00110001" // /* MW 3 */ + 5194 "00000110" // /* MW 2 */ + 5195 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 first +.delay_slot + 5196 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "00001101" // /* MW 3 */ + 5198 "11100001" // /* MW 2 */ + 5199 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 +.delay_slot + 5200 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5201 "00000000" // /* MW 15 */ + 5202 "00000000" // /* MW 14 */ + 5203 "10101000" // /* MW 13 */ + 5204 "10100000" // /* MW 12 */ + 5205 "00110100" // /* MW 11 */ + 5206 "00000000" // /* MW 10 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "01011011" // /* MW 7 */ + 5210 "00000001" // /* MW 6 */ + 5211 "00100000" // /* MW 5 */ + 5212 "00000000" // /* MW 4 */ + 5213 "11110000" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 231 6 +.src_ref 6 "superkernels.cpp" 232 14 +.return_address + 5216 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5217 "00010000" // /* MW 9 */ + 5218 "00000000" // /* MW 8 */ + 5219 "00110001" // /* MW 7 */ + 5220 "11110011" // /* MW 6 */ + 5221 "00000001" // /* MW 5 */ + 5222 "00000000" // /* MW 4 */ + 5223 "11010000" // /* MW 3 */ + 5224 "11000110" // /* MW 2 */ + 5225 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 5226 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5227 "00000101" // /* MW 3 */ + 5228 "00100000" // /* MW 2 */ + 5229 "00010000" // /* MW 1 */ + 5230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5231 "00000000" // /* MW 1 */ + 5232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5233 "00000000" // /* MW 1 */ + 5234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5235 "00000000" // /* MW 1 */ + 5236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5237 "00000000" // /* MW 1 */ + 5238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5239 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5240 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5241 "00001000" // /* MW 3 */ + 5242 "01010001" // /* MW 2 */ + 5243 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 231 19 + 5244 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5245 "00010000" // /* MW 9 */ + 5246 "00001100" // /* MW 8 */ + 5247 "00110001" // /* MW 7 */ + 5248 "11110001" // /* MW 6 */ + 5249 "00000001" // /* MW 5 */ + 5250 "00000000" // /* MW 4 */ + 5251 "11010000" // /* MW 3 */ + 5252 "11001110" // /* MW 2 */ + 5253 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 first + 5254 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5255 "00110110" // /* MW 3 */ + 5256 "00000110" // /* MW 2 */ + 5257 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 19 + 5258 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "01010110" // /* MW 3 */ + 5260 "00000110" // /* MW 2 */ + 5261 "00000010" // /* MW 1 */ + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ + 5264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5265 "00000000" // /* MW 1 */ + 5266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5267 "00000000" // /* MW 1 */ + 5268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5270 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5271 "00110001" // /* MW 3 */ + 5272 "00100001" // /* MW 2 */ + 5273 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5274 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5275 "00010001" // /* MW 3 */ + 5276 "11100110" // /* MW 2 */ + 5277 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 16 first + 5278 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5279 "00101000" // /* MW 3 */ + 5280 "01100001" // /* MW 2 */ + 5281 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 + 5282 "10000100" // JNZ r16, #5312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5312 delay_slots=5 */ + 5283 "00000001" // /* MW 5 */ + 5284 "01000000" // /* MW 4 */ + 5285 "01100000" // /* MW 3 */ + 5286 "00001010" // /* MW 2 */ + 5287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5297 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 + 5298 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5299 "00000001" // /* MW 3 */ + 5300 "00100000" // /* MW 2 */ + 5301 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 first + 5302 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00000000" // /* MW 7 */ + 5306 "10000000" // /* MW 6 */ + 5307 "00010001" // /* MW 5 */ + 5308 "00000110" // /* MW 4 */ + 5309 "11110110" // /* MW 3 */ + 5310 "00101100" // /* MW 2 */ + 5311 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 234 + 5312 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "00111001" // /* MW 3 */ + 5314 "11110100" // /* MW 2 */ + 5315 "00000111" // /* MW 1 */ + 5316 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5317 "00011001" // /* MW 3 */ + 5318 "11111011" // /* MW 2 */ + 5319 "00000111" // /* MW 1 */ + 5320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5321 "00000000" // /* MW 1 */ + 5322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "11110001" // /* MW 3 */ + 5328 "11111101" // /* MW 2 */ + 5329 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5331 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5333 "00000000" // /* MW 3 */ + 5334 "00101000" // /* MW 2 */ + 5335 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5337 "10100000" // /* MW 3 */ + 5338 "01100111" // /* MW 2 */ + 5339 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 +.delay_slot + 5340 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5341 "00000001" // /* MW 5 */ + 5342 "00000000" // /* MW 4 */ + 5343 "00000000" // /* MW 3 */ + 5344 "11111000" // /* MW 2 */ + 5345 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 5351 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 5360 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5361 "00000000" // /* MW 3 */ + 5362 "00101000" // /* MW 2 */ + 5363 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5364 "01000100" // MOVXM p0, #508768 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5365 "11000000" // /* MW 5 */ + 5366 "11000110" // /* MW 4 */ + 5367 "11000000" // /* MW 3 */ + 5368 "00000111" // /* MW 2 */ + 5369 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5370 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "10000000" // /* MW 3 */ + 5372 "00000000" // /* MW 2 */ + 5373 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 5374 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "00000001" // /* MW 3 */ + 5376 "00000100" // /* MW 2 */ + 5377 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5378 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "00000001" // /* MW 3 */ + 5380 "00010100" // /* MW 2 */ + 5381 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 5383 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 5392 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5393 "00010000" // /* MW 9 */ + 5394 "10100000" // /* MW 8 */ + 5395 "00110001" // /* MW 7 */ + 5396 "11110000" // /* MW 6 */ + 5397 "00000001" // /* MW 5 */ + 5398 "00000000" // /* MW 4 */ + 5399 "11010000" // /* MW 3 */ + 5400 "10000101" // /* MW 2 */ + 5401 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 5402 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5403 "00000001" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "00000000" // /* MW 3 */ + 5406 "00001000" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ + 5408 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "00111101" // /* MW 3 */ + 5410 "11111100" // /* MW 2 */ + 5411 "00001111" // /* MW 1 */ + 5412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5413 "00000000" // /* MW 1 */ + 5414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5415 "00000000" // /* MW 1 */ + 5416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5417 "00000000" // /* MW 1 */ + 5418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5419 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 5420 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "00101001" // /* MW 3 */ + 5422 "00011100" // /* MW 2 */ + 5423 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 5424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5425 "00101110" // /* MW 3 */ + 5426 "00011100" // /* MW 2 */ + 5427 "00000001" // /* MW 1 */ + 5428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5429 "00000000" // /* MW 1 */ + 5430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5431 "00000000" // /* MW 1 */ + 5432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5433 "00000000" // /* MW 1 */ + 5434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5435 "00000000" // /* MW 1 */ + 5436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5437 "00000000" // /* MW 1 */ + 5438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5439 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 5440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5441 "00101001" // /* MW 3 */ + 5442 "00011100" // /* MW 2 */ + 5443 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 5444 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "00101110" // /* MW 3 */ + 5446 "00000100" // /* MW 2 */ + 5447 "00000001" // /* MW 1 */ + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ + 5450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5451 "00000000" // /* MW 1 */ + 5452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5453 "00000000" // /* MW 1 */ + 5454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5455 "00000000" // /* MW 1 */ + 5456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5457 "00000000" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 5460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5461 "00101001" // /* MW 3 */ + 5462 "00011100" // /* MW 2 */ + 5463 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 5464 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5465 "00101110" // /* MW 3 */ + 5466 "00010100" // /* MW 2 */ + 5467 "00000001" // /* MW 1 */ + 5468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5469 "00000000" // /* MW 1 */ + 5470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 5472 "00000100" // JL #5360 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5360 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "00000000" // /* MW 4 */ + 5475 "01111000" // /* MW 3 */ + 5476 "00001010" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot + 5478 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5479 "10011101" // /* MW 3 */ + 5480 "11111011" // /* MW 2 */ + 5481 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5485 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 5486 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5487 "00101001" // /* MW 3 */ + 5488 "11011100" // /* MW 2 */ + 5489 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 5490 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5491 "00011100" // /* MW 13 */ + 5492 "00000000" // /* MW 12 */ + 5493 "00000000" // /* MW 11 */ + 5494 "00000111" // /* MW 10 */ + 5495 "00000110" // /* MW 9 */ + 5496 "01111011" // /* MW 8 */ + 5497 "00000000" // /* MW 7 */ + 5498 "00000000" // /* MW 6 */ + 5499 "10110110" // /* MW 5 */ + 5500 "00000010" // /* MW 4 */ + 5501 "11110000" // /* MW 3 */ + 5502 "00101100" // /* MW 2 */ + 5503 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 5504 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5505 "00111001" // /* MW 3 */ + 5506 "11111100" // /* MW 2 */ + 5507 "00000111" // /* MW 1 */ + 5508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5509 "00000000" // /* MW 1 */ + 5510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5511 "00000000" // /* MW 1 */ + 5512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5513 "00000000" // /* MW 1 */ + 5514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "10011001" // /* MW 3 */ + 5520 "11111011" // /* MW 2 */ + 5521 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5523 "00000000" // /* MW 3 */ + 5524 "00101000" // /* MW 2 */ + 5525 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5531 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5533 "00000001" // /* MW 3 */ + 5534 "00100000" // /* MW 2 */ + 5535 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5537 "01110001" // /* MW 9 */ + 5538 "00000000" // /* MW 8 */ + 5539 "00000000" // /* MW 7 */ + 5540 "00000000" // /* MW 6 */ + 5541 "11111110" // /* MW 5 */ + 5542 "00111111" // /* MW 4 */ + 5543 "00110000" // /* MW 3 */ + 5544 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 5545 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 5552 "10111010" // MOVA m0, #32; MOVXM p3, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "00010000" // /* MW 9 */ + 5554 "10100000" // /* MW 8 */ + 5555 "10110001" // /* MW 7 */ + 5556 "11110001" // /* MW 6 */ + 5557 "00000001" // /* MW 5 */ + 5558 "00000000" // /* MW 4 */ + 5559 "10000000" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 5562 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5563 "00010000" // /* MW 9 */ + 5564 "00010000" // /* MW 8 */ + 5565 "00110001" // /* MW 7 */ + 5566 "11110010" // /* MW 6 */ + 5567 "00000001" // /* MW 5 */ + 5568 "00000000" // /* MW 4 */ + 5569 "11010000" // /* MW 3 */ + 5570 "00000110" // /* MW 2 */ + 5571 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 5572 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5573 "01011000" // /* MW 9 */ + 5574 "11111010" // /* MW 8 */ + 5575 "01101111" // /* MW 7 */ + 5576 "10001000" // /* MW 6 */ + 5577 "00000111" // /* MW 5 */ + 5578 "00011000" // /* MW 4 */ + 5579 "11010000" // /* MW 3 */ + 5580 "10010000" // /* MW 2 */ + 5581 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 5582 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #5744 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5583 "00010000" // /* MW 9 */ + 5584 "00111000" // /* MW 8 */ + 5585 "01111011" // /* MW 7 */ + 5586 "00000100" // /* MW 6 */ + 5587 "00000000" // /* MW 5 */ + 5588 "00000000" // /* MW 4 */ + 5589 "11010000" // /* MW 3 */ + 5590 "10000000" // /* MW 2 */ + 5591 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 5592 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5593 "00010000" // /* MW 9 */ + 5594 "01000000" // /* MW 8 */ + 5595 "10111011" // /* MW 7 */ + 5596 "00000101" // /* MW 6 */ + 5597 "00000000" // /* MW 5 */ + 5598 "00000000" // /* MW 4 */ + 5599 "01010000" // /* MW 3 */ + 5600 "10001000" // /* MW 2 */ + 5601 "10000000" // /* MW 1 */ + 5602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5603 "00000000" // /* MW 1 */ + 5604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5605 "00000000" // /* MW 1 */ + 5606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5607 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 5608 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5609 "00111101" // /* MW 3 */ + 5610 "01000010" // /* MW 2 */ + 5611 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 5612 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5613 "11111100" // /* MW 3 */ + 5614 "01110000" // /* MW 2 */ + 5615 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 5616 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5617 "11101000" // /* MW 5 */ + 5618 "01010000" // /* MW 4 */ + 5619 "01110000" // /* MW 3 */ + 5620 "00010011" // /* MW 2 */ + 5621 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5623 "10000000" // /* MW 7 */ + 5624 "10111010" // /* MW 6 */ + 5625 "01101000" // /* MW 5 */ + 5626 "01010000" // /* MW 4 */ + 5627 "01110000" // /* MW 3 */ + 5628 "00011011" // /* MW 2 */ + 5629 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5631 "11101000" // /* MW 5 */ + 5632 "01010000" // /* MW 4 */ + 5633 "01110000" // /* MW 3 */ + 5634 "00010011" // /* MW 2 */ + 5635 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5637 "01101000" // /* MW 5 */ + 5638 "01010000" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00011011" // /* MW 2 */ + 5641 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5643 "11101000" // /* MW 5 */ + 5644 "01010000" // /* MW 4 */ + 5645 "01110000" // /* MW 3 */ + 5646 "00010011" // /* MW 2 */ + 5647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5649 "01101000" // /* MW 5 */ + 5650 "01010000" // /* MW 4 */ + 5651 "01110000" // /* MW 3 */ + 5652 "00011011" // /* MW 2 */ + 5653 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5655 "11101000" // /* MW 5 */ + 5656 "01010000" // /* MW 4 */ + 5657 "01110000" // /* MW 3 */ + 5658 "00010011" // /* MW 2 */ + 5659 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5661 "01000001" // /* MW 9 */ + 5662 "11100010" // /* MW 8 */ + 5663 "00000000" // /* MW 7 */ + 5664 "00011101" // /* MW 6 */ + 5665 "00110100" // /* MW 5 */ + 5666 "00101000" // /* MW 4 */ + 5667 "01110000" // /* MW 3 */ + 5668 "00011011" // /* MW 2 */ + 5669 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5671 "01100001" // /* MW 9 */ + 5672 "11100000" // /* MW 8 */ + 5673 "00000001" // /* MW 7 */ + 5674 "00011101" // /* MW 6 */ + 5675 "01110100" // /* MW 5 */ + 5676 "00101000" // /* MW 4 */ + 5677 "01110000" // /* MW 3 */ + 5678 "00010011" // /* MW 2 */ + 5679 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5681 "01000001" // /* MW 9 */ + 5682 "11100010" // /* MW 8 */ + 5683 "00000000" // /* MW 7 */ + 5684 "00011101" // /* MW 6 */ + 5685 "00110100" // /* MW 5 */ + 5686 "00101000" // /* MW 4 */ + 5687 "01110000" // /* MW 3 */ + 5688 "00011011" // /* MW 2 */ + 5689 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5691 "01100001" // /* MW 9 */ + 5692 "11100000" // /* MW 8 */ + 5693 "00000001" // /* MW 7 */ + 5694 "00011101" // /* MW 6 */ + 5695 "01110100" // /* MW 5 */ + 5696 "00101000" // /* MW 4 */ + 5697 "01110000" // /* MW 3 */ + 5698 "00010011" // /* MW 2 */ + 5699 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5701 "01000001" // /* MW 11 */ + 5702 "11100010" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "10001110" // /* MW 8 */ + 5705 "10101101" // /* MW 7 */ + 5706 "00000000" // /* MW 6 */ + 5707 "01101000" // /* MW 5 */ + 5708 "01010000" // /* MW 4 */ + 5709 "01110000" // /* MW 3 */ + 5710 "00011011" // /* MW 2 */ + 5711 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "00000011" // /* MW 15 */ + 5714 "00001111" // /* MW 14 */ + 5715 "01111000" // /* MW 13 */ + 5716 "10100101" // /* MW 12 */ + 5717 "00000001" // /* MW 11 */ + 5718 "00000000" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "11101000" // /* MW 5 */ + 5724 "01010000" // /* MW 4 */ + 5725 "01110000" // /* MW 3 */ + 5726 "00010011" // /* MW 2 */ + 5727 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00010010" // /* MW 15 */ + 5730 "00000111" // /* MW 14 */ + 5731 "01111000" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "00100011" // /* MW 7 */ + 5738 "00011100" // /* MW 6 */ + 5739 "01101010" // /* MW 5 */ + 5740 "01010000" // /* MW 4 */ + 5741 "01110000" // /* MW 3 */ + 5742 "00011011" // /* MW 2 */ + 5743 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5744 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000011" // /* MW 15 */ + 5746 "00001111" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "10100011" // /* MW 7 */ + 5754 "00011100" // /* MW 6 */ + 5755 "11101010" // /* MW 5 */ + 5756 "01010000" // /* MW 4 */ + 5757 "01110000" // /* MW 3 */ + 5758 "00010011" // /* MW 2 */ + 5759 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "00010010" // /* MW 15 */ + 5762 "00000111" // /* MW 14 */ + 5763 "01111000" // /* MW 13 */ + 5764 "10100101" // /* MW 12 */ + 5765 "00000001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "00100011" // /* MW 7 */ + 5770 "00011100" // /* MW 6 */ + 5771 "01101010" // /* MW 5 */ + 5772 "01010000" // /* MW 4 */ + 5773 "01110000" // /* MW 3 */ + 5774 "00011011" // /* MW 2 */ + 5775 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5776 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5777 "01100001" // /* MW 7 */ + 5778 "11100000" // /* MW 6 */ + 5779 "00000001" // /* MW 5 */ + 5780 "00000010" // /* MW 4 */ + 5781 "01100000" // /* MW 3 */ + 5782 "10010100" // /* MW 2 */ + 5783 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5785 "01000001" // /* MW 7 */ + 5786 "11100010" // /* MW 6 */ + 5787 "00000000" // /* MW 5 */ + 5788 "00000010" // /* MW 4 */ + 5789 "01100000" // /* MW 3 */ + 5790 "10000100" // /* MW 2 */ + 5791 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5793 "01100001" // /* MW 7 */ + 5794 "11100000" // /* MW 6 */ + 5795 "00000001" // /* MW 5 */ + 5796 "00000010" // /* MW 4 */ + 5797 "01100000" // /* MW 3 */ + 5798 "10010100" // /* MW 2 */ + 5799 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5801 "01000001" // /* MW 7 */ + 5802 "11100010" // /* MW 6 */ + 5803 "00000000" // /* MW 5 */ + 5804 "00000010" // /* MW 4 */ + 5805 "01100000" // /* MW 3 */ + 5806 "10000100" // /* MW 2 */ + 5807 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5809 "01100001" // /* MW 7 */ + 5810 "11100000" // /* MW 6 */ + 5811 "00000001" // /* MW 5 */ + 5812 "00000010" // /* MW 4 */ + 5813 "01100000" // /* MW 3 */ + 5814 "10010100" // /* MW 2 */ + 5815 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5817 "01000001" // /* MW 7 */ + 5818 "11100010" // /* MW 6 */ + 5819 "00000000" // /* MW 5 */ + 5820 "00000010" // /* MW 4 */ + 5821 "01100000" // /* MW 3 */ + 5822 "10000100" // /* MW 2 */ + 5823 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5825 "01100001" // /* MW 7 */ + 5826 "11100000" // /* MW 6 */ + 5827 "00000001" // /* MW 5 */ + 5828 "00000010" // /* MW 4 */ + 5829 "01100000" // /* MW 3 */ + 5830 "10010100" // /* MW 2 */ + 5831 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "00100011" // /* MW 3 */ + 5834 "00011100" // /* MW 2 */ + 5835 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 5837 "00000000" // /* MW 5 */ + 5838 "01010000" // /* MW 4 */ + 5839 "01100000" // /* MW 3 */ + 5840 "10010100" // /* MW 2 */ + 5841 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "00100011" // /* MW 3 */ + 5844 "00011100" // /* MW 2 */ + 5845 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5847 "10100011" // /* MW 3 */ + 5848 "00011100" // /* MW 2 */ + 5849 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 5850 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5851 "00100011" // /* MW 3 */ + 5852 "00011100" // /* MW 2 */ + 5853 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 5854 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10100011" // /* MW 3 */ + 5856 "00011100" // /* MW 2 */ + 5857 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 5859 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 5872 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5873 "00000000" // /* MW 5 */ + 5874 "11000100" // /* MW 4 */ + 5875 "11001000" // /* MW 3 */ + 5876 "00000111" // /* MW 2 */ + 5877 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 5878 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5879 "11000001" // /* MW 5 */ + 5880 "10110101" // /* MW 4 */ + 5881 "11011000" // /* MW 3 */ + 5882 "11000010" // /* MW 2 */ + 5883 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 5884 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5885 "00000001" // /* MW 5 */ + 5886 "00000000" // /* MW 4 */ + 5887 "00000000" // /* MW 3 */ + 5888 "00001000" // /* MW 2 */ + 5889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 5890 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5891 "01111001" // /* MW 9 */ + 5892 "01100000" // /* MW 8 */ + 5893 "11001010" // /* MW 7 */ + 5894 "10000001" // /* MW 6 */ + 5895 "00010100" // /* MW 5 */ + 5896 "00100011" // /* MW 4 */ + 5897 "10110000" // /* MW 3 */ + 5898 "00111010" // /* MW 2 */ + 5899 "11111111" // /* MW 1 */ + 5900 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5901 "01110000" // /* MW 7 */ + 5902 "11010000" // /* MW 6 */ + 5903 "00001011" // /* MW 5 */ + 5904 "00000000" // /* MW 4 */ + 5905 "10110000" // /* MW 3 */ + 5906 "10000011" // /* MW 2 */ + 5907 "11111101" // /* MW 1 */ + 5908 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5909 "00010101" // /* MW 3 */ + 5910 "11111100" // /* MW 2 */ + 5911 "00001111" // /* MW 1 */ + 5912 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5913 "00111101" // /* MW 3 */ + 5914 "11110000" // /* MW 2 */ + 5915 "00001111" // /* MW 1 */ + 5916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5917 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 5918 "10000100" // JNZ r16, #6064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6064 delay_slots=5 */ + 5919 "00000001" // /* MW 5 */ + 5920 "01000000" // /* MW 4 */ + 5921 "11011000" // /* MW 3 */ + 5922 "00001011" // /* MW 2 */ + 5923 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 5924 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5925 "11111011" // /* MW 3 */ + 5926 "01100011" // /* MW 2 */ + 5927 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5928 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5929 "00001000" // /* MW 5 */ + 5930 "11000100" // /* MW 4 */ + 5931 "11000100" // /* MW 3 */ + 5932 "00000111" // /* MW 2 */ + 5933 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5934 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "01110000" // /* MW 7 */ + 5936 "01100000" // /* MW 6 */ + 5937 "00110111" // /* MW 5 */ + 5938 "00000001" // /* MW 4 */ + 5939 "00110000" // /* MW 3 */ + 5940 "11000110" // /* MW 2 */ + 5941 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 5942 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "11000000" // /* MW 3 */ + 5944 "11010110" // /* MW 2 */ + 5945 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 5946 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "00010001" // /* MW 9 */ + 5948 "10100000" // /* MW 8 */ + 5949 "10110001" // /* MW 7 */ + 5950 "11110011" // /* MW 6 */ + 5951 "00000001" // /* MW 5 */ + 5952 "00000000" // /* MW 4 */ + 5953 "10110000" // /* MW 3 */ + 5954 "10100011" // /* MW 2 */ + 5955 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 "00111010" // MOVS p0, p7; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5957 "00010001" // /* MW 9 */ + 5958 "00010000" // /* MW 8 */ + 5959 "00110001" // /* MW 7 */ + 5960 "11110001" // /* MW 6 */ + 5961 "00000001" // /* MW 5 */ + 5962 "00000000" // /* MW 4 */ + 5963 "01100000" // /* MW 3 */ + 5964 "10010001" // /* MW 2 */ + 5965 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5967 "00010000" // /* MW 9 */ + 5968 "00001110" // /* MW 8 */ + 5969 "00110001" // /* MW 7 */ + 5970 "11110001" // /* MW 6 */ + 5971 "00000001" // /* MW 5 */ + 5972 "00000000" // /* MW 4 */ + 5973 "11100000" // /* MW 3 */ + 5974 "11000000" // /* MW 2 */ + 5975 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5977 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 "00000100" // JL #5392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5392 delay_slots=5 */ + 5979 "00000001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "10001000" // /* MW 3 */ + 5982 "00001010" // /* MW 2 */ + 5983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5987 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5989 "00110001" // /* MW 3 */ + 5990 "00100000" // /* MW 2 */ + 5991 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5992 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5993 "00000101" // /* MW 3 */ + 5994 "00100000" // /* MW 2 */ + 5995 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5996 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010001" // /* MW 3 */ + 5998 "00000110" // /* MW 2 */ + 5999 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 6000 "10111010" // LDA r16, [p7]; MOVXM p1, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6001 "00010000" // /* MW 9 */ + 6002 "00000010" // /* MW 8 */ + 6003 "10110001" // /* MW 7 */ + 6004 "11110000" // /* MW 6 */ + 6005 "00000001" // /* MW 5 */ + 6006 "00000000" // /* MW 4 */ + 6007 "11010000" // /* MW 3 */ + 6008 "11000010" // /* MW 2 */ + 6009 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 6010 "10111010" // LDA r17, [p1]; MOVXM p3, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6011 "00010000" // /* MW 9 */ + 6012 "00000100" // /* MW 8 */ + 6013 "10110001" // /* MW 7 */ + 6014 "11110001" // /* MW 6 */ + 6015 "00000001" // /* MW 5 */ + 6016 "00000000" // /* MW 4 */ + 6017 "11010000" // /* MW 3 */ + 6018 "11000110" // /* MW 2 */ + 6019 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 6020 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6021 "00010000" // /* MW 9 */ + 6022 "00000110" // /* MW 8 */ + 6023 "10110001" // /* MW 7 */ + 6024 "11110000" // /* MW 6 */ + 6025 "00000001" // /* MW 5 */ + 6026 "00000000" // /* MW 4 */ + 6027 "01010000" // /* MW 3 */ + 6028 "11001011" // /* MW 2 */ + 6029 "11101010" // /* MW 1 */ + 6030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6031 "00000000" // /* MW 1 */ + 6032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6033 "00000000" // /* MW 1 */ + 6034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6035 "00000000" // /* MW 1 */ + 6036 "10000100" // J #6080 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6080 delay_slots=5 */ + 6037 "00000000" // /* MW 5 */ + 6038 "00000000" // /* MW 4 */ + 6039 "11100000" // /* MW 3 */ + 6040 "00001011" // /* MW 2 */ + 6041 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 6042 "01000100" // MOVXM p2, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6043 "00110000" // /* MW 5 */ + 6044 "11000100" // /* MW 4 */ + 6045 "11000100" // /* MW 3 */ + 6046 "00000111" // /* MW 2 */ + 6047 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 6048 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6049 "00001111" // /* MW 3 */ + 6050 "01100001" // /* MW 2 */ + 6051 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 6052 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6053 "01010001" // /* MW 3 */ + 6054 "00000110" // /* MW 2 */ + 6055 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 6056 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6057 "00010001" // /* MW 3 */ + 6058 "00000110" // /* MW 2 */ + 6059 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 6060 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "00010001" // /* MW 3 */ + 6062 "00000110" // /* MW 2 */ + 6063 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 6064 "01000100" // MOVXM p3, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6065 "00010000" // /* MW 5 */ + 6066 "11000100" // /* MW 4 */ + 6067 "11000110" // /* MW 3 */ + 6068 "00000111" // /* MW 2 */ + 6069 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 6070 "10111010" // NOPA; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6071 "00010000" // /* MW 9 */ + 6072 "00000110" // /* MW 8 */ + 6073 "10110001" // /* MW 7 */ + 6074 "11110000" // /* MW 6 */ + 6075 "00000001" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "11110000" // /* MW 3 */ + 6078 "00101100" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6080 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6081 "10000110" // /* MW 3 */ + 6082 "01100111" // /* MW 2 */ + 6083 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 6084 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6085 "00010000" // /* MW 9 */ + 6086 "00000000" // /* MW 8 */ + 6087 "00110001" // /* MW 7 */ + 6088 "11110001" // /* MW 6 */ + 6089 "00000001" // /* MW 5 */ + 6090 "00000000" // /* MW 4 */ + 6091 "11010000" // /* MW 3 */ + 6092 "11101110" // /* MW 2 */ + 6093 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6094 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6095 "00010110" // /* MW 3 */ + 6096 "11111110" // /* MW 2 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6098 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00110110" // /* MW 3 */ + 6100 "11111110" // /* MW 2 */ + 6101 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 6102 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6103 "01010110" // /* MW 3 */ + 6104 "00000110" // /* MW 2 */ + 6105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6106 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6107 "01110110" // /* MW 3 */ + 6108 "01000110" // /* MW 2 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6118 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00000010" // /* MW 3 */ + 6120 "01100001" // /* MW 2 */ + 6121 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 6122 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6123 "00001110" // /* MW 5 */ + 6124 "01000000" // /* MW 4 */ + 6125 "00111001" // /* MW 3 */ + 6126 "11000010" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 6128 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "00010001" // /* MW 3 */ + 6130 "00000110" // /* MW 2 */ + 6131 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6132 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6133 "11111101" // /* MW 3 */ + 6134 "11100000" // /* MW 2 */ + 6135 "00010111" // /* MW 1 */ + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ + 6138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6139 "00000000" // /* MW 1 */ + 6140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6141 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6142 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00001000" // /* MW 3 */ + 6144 "11010011" // /* MW 2 */ + 6145 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6146 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00000110" // /* MW 3 */ + 6148 "01100111" // /* MW 2 */ + 6149 "00011010" // /* MW 1 */ + 6150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6151 "00000000" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6154 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "01110110" // /* MW 3 */ + 6156 "11111111" // /* MW 2 */ + 6157 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6158 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6159 "00110110" // /* MW 3 */ + 6160 "11111110" // /* MW 2 */ + 6161 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6162 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6163 "01010110" // /* MW 3 */ + 6164 "11111110" // /* MW 2 */ + 6165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6166 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6167 "01110110" // /* MW 3 */ + 6168 "01010110" // /* MW 2 */ + 6169 "00000010" // /* MW 1 */ + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6180 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6181 "00010010" // /* MW 3 */ + 6182 "10100011" // /* MW 2 */ + 6183 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6184 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6185 "00110001" // /* MW 3 */ + 6186 "00000110" // /* MW 2 */ + 6187 "00001010" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ + 6190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6191 "00000000" // /* MW 1 */ + 6192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6193 "00000000" // /* MW 1 */ + 6194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6195 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6196 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6197 "00001000" // /* MW 3 */ + 6198 "11010011" // /* MW 2 */ + 6199 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 6200 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111001" // /* MW 9 */ + 6202 "01100000" // /* MW 8 */ + 6203 "11001110" // /* MW 7 */ + 6204 "00101001" // /* MW 6 */ + 6205 "00000000" // /* MW 5 */ + 6206 "00000001" // /* MW 4 */ + 6207 "01100000" // /* MW 3 */ + 6208 "00010001" // /* MW 2 */ + 6209 "11010001" // /* MW 1 */ + 6210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6211 "00000000" // /* MW 1 */ + 6212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6213 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6214 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6215 "00011001" // /* MW 3 */ + 6216 "11101110" // /* MW 2 */ + 6217 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 6218 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6219 "00111011" // /* MW 5 */ + 6220 "11011000" // /* MW 4 */ + 6221 "11011111" // /* MW 3 */ + 6222 "11000110" // /* MW 2 */ + 6223 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 6224 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6225 "10000001" // /* MW 5 */ + 6226 "11011101" // /* MW 4 */ + 6227 "11010110" // /* MW 3 */ + 6228 "11010010" // /* MW 2 */ + 6229 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6231 "01010110" // /* MW 3 */ + 6232 "01001110" // /* MW 2 */ + 6233 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6235 "00011110" // /* MW 3 */ + 6236 "01011101" // /* MW 2 */ + 6237 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6239 "11000000" // /* MW 3 */ + 6240 "01100000" // /* MW 2 */ + 6241 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6245 "01110110" // /* MW 3 */ + 6246 "00000110" // /* MW 2 */ + 6247 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 "00000100" // JL #5552 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5552 delay_slots=5 */ + 6251 "00000001" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "11011000" // /* MW 3 */ + 6254 "00001010" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "11000000" // /* MW 3 */ + 6258 "11010100" // /* MW 2 */ + 6259 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6260 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "00001101" // /* MW 3 */ + 6262 "01100011" // /* MW 2 */ + 6263 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 6264 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00001101" // /* MW 3 */ + 6266 "00100001" // /* MW 2 */ + 6267 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 6268 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "01000001" // /* MW 3 */ + 6270 "01101001" // /* MW 2 */ + 6271 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6272 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6273 "00000000" // /* MW 15 */ + 6274 "00000000" // /* MW 14 */ + 6275 "10101000" // /* MW 13 */ + 6276 "11100010" // /* MW 12 */ + 6277 "00110100" // /* MW 11 */ + 6278 "00000000" // /* MW 10 */ + 6279 "00000000" // /* MW 9 */ + 6280 "00000000" // /* MW 8 */ + 6281 "01011011" // /* MW 7 */ + 6282 "00000001" // /* MW 6 */ + 6283 "00100000" // /* MW 5 */ + 6284 "00000000" // /* MW 4 */ + 6285 "11110000" // /* MW 3 */ + 6286 "00101100" // /* MW 2 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6288 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6289 "01111000" // /* MW 9 */ + 6290 "11010000" // /* MW 8 */ + 6291 "10110011" // /* MW 7 */ + 6292 "00101000" // /* MW 6 */ + 6293 "00000000" // /* MW 5 */ + 6294 "00000001" // /* MW 4 */ + 6295 "11010000" // /* MW 3 */ + 6296 "11000110" // /* MW 2 */ + 6297 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 6298 "01000100" // MOVXM p6, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6299 "00110000" // /* MW 5 */ + 6300 "11000100" // /* MW 4 */ + 6301 "11001100" // /* MW 3 */ + 6302 "00000111" // /* MW 2 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ + 6308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6309 "00000000" // /* MW 1 */ + 6310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6311 "00000000" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6314 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "00001000" // /* MW 3 */ + 6316 "01010001" // /* MW 2 */ + 6317 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6318 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "00110110" // /* MW 3 */ + 6320 "11110110" // /* MW 2 */ + 6321 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6322 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6323 "00011001" // /* MW 3 */ + 6324 "11101101" // /* MW 2 */ + 6325 "00000111" // /* MW 1 */ + 6326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6327 "00000000" // /* MW 1 */ + 6328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6329 "00000000" // /* MW 1 */ + 6330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6331 "00000000" // /* MW 1 */ + 6332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6333 "00000000" // /* MW 1 */ + 6334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6335 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6336 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6337 "00010001" // /* MW 3 */ + 6338 "00100011" // /* MW 2 */ + 6339 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6340 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6341 "01100011" // /* MW 5 */ + 6342 "11101100" // /* MW 4 */ + 6343 "11010011" // /* MW 3 */ + 6344 "11000110" // /* MW 2 */ + 6345 "01001010" // /* MW 1 */ + 6346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6347 "00000000" // /* MW 1 */ + 6348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6349 "00000000" // /* MW 1 */ + 6350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6351 "00000000" // /* MW 1 */ + 6352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6353 "00000000" // /* MW 1 */ + 6354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6355 "00000000" // /* MW 1 */ + 6356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6357 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6358 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6359 "00001000" // /* MW 3 */ + 6360 "01010001" // /* MW 2 */ + 6361 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 6362 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6363 "00010000" // /* MW 9 */ + 6364 "00000000" // /* MW 8 */ + 6365 "10110001" // /* MW 7 */ + 6366 "11110000" // /* MW 6 */ + 6367 "00000001" // /* MW 5 */ + 6368 "00000000" // /* MW 4 */ + 6369 "11010000" // /* MW 3 */ + 6370 "11001110" // /* MW 2 */ + 6371 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 6372 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6373 "01010110" // /* MW 3 */ + 6374 "00000110" // /* MW 2 */ + 6375 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6376 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6377 "00110110" // /* MW 3 */ + 6378 "00000110" // /* MW 2 */ + 6379 "00000001" // /* MW 1 */ + 6380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6388 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6389 "00110001" // /* MW 3 */ + 6390 "00100001" // /* MW 2 */ + 6391 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6392 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6393 "00010001" // /* MW 3 */ + 6394 "11100110" // /* MW 2 */ + 6395 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 6396 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6397 "00101000" // /* MW 3 */ + 6398 "01100001" // /* MW 2 */ + 6399 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6400 "10000100" // JNZ r16, #6432 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6432 delay_slots=5 */ + 6401 "00000001" // /* MW 5 */ + 6402 "01000000" // /* MW 4 */ + 6403 "10010000" // /* MW 3 */ + 6404 "00001100" // /* MW 2 */ + 6405 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6415 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 6416 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6417 "00000001" // /* MW 3 */ + 6418 "00100000" // /* MW 2 */ + 6419 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 6420 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6421 "11000001" // /* MW 11 */ + 6422 "00001000" // /* MW 10 */ + 6423 "10000011" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "00000000" // /* MW 7 */ + 6426 "00000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 6432 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11110000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "11110001" // /* MW 3 */ + 6438 "11111101" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ + 6440 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "10011001" // /* MW 3 */ + 6442 "11110111" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6445 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6447 "11010001" // /* MW 3 */ + 6448 "11111001" // /* MW 2 */ + 6449 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00101000" // /* MW 2 */ + 6457 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00001011" // /* MW 3 */ + 6460 "10001110" // /* MW 2 */ + 6461 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 6462 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6463 "00000001" // /* MW 5 */ + 6464 "00000000" // /* MW 4 */ + 6465 "00000000" // /* MW 3 */ + 6466 "11111000" // /* MW 2 */ + 6467 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6473 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 7 "conv2d_dw_bf16_params.h" 177 first +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.function_start + 6480 "10111010" // LDA el0, [p0], #4; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6481 "00010000" // /* MW 9 */ + 6482 "11100000" // /* MW 8 */ + 6483 "10110001" // /* MW 7 */ + 6484 "11110000" // /* MW 6 */ + 6485 "00000001" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "11010000" // /* MW 3 */ + 6488 "10000101" // /* MW 2 */ + 6489 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6490 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6491 "01011000" // /* MW 9 */ + 6492 "00000000" // /* MW 8 */ + 6493 "00001000" // /* MW 7 */ + 6494 "01001011" // /* MW 6 */ + 6495 "00000000" // /* MW 5 */ + 6496 "00000001" // /* MW 4 */ + 6497 "11010000" // /* MW 3 */ + 6498 "10000001" // /* MW 2 */ + 6499 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 177 + 6500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6501 "00000001" // /* MW 5 */ + 6502 "00000000" // /* MW 4 */ + 6503 "00000000" // /* MW 3 */ + 6504 "00001000" // /* MW 2 */ + 6505 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 + 6506 "00111010" // ST p7, [sp, #-16]; MOVXM p7, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6507 "00010001" // /* MW 9 */ + 6508 "11100000" // /* MW 8 */ + 6509 "10110001" // /* MW 7 */ + 6510 "11110011" // /* MW 6 */ + 6511 "00000001" // /* MW 5 */ + 6512 "00000000" // /* MW 4 */ + 6513 "10110000" // /* MW 3 */ + 6514 "01110011" // /* MW 2 */ + 6515 "11111110" // /* MW 1 */ + 6516 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "00111101" // /* MW 3 */ + 6518 "11111100" // /* MW 2 */ + 6519 "00001111" // /* MW 1 */ + 6520 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6521 "11010101" // /* MW 3 */ + 6522 "11110101" // /* MW 2 */ + 6523 "00001111" // /* MW 1 */ + 6524 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6525 "11110101" // /* MW 3 */ + 6526 "11111001" // /* MW 2 */ + 6527 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6528 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6529 "00101001" // /* MW 3 */ + 6530 "00011100" // /* MW 2 */ + 6531 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6532 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6533 "00001001" // /* MW 3 */ + 6534 "00011100" // /* MW 2 */ + 6535 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6536 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00101110" // /* MW 3 */ + 6538 "00000100" // /* MW 2 */ + 6539 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6540 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "00001110" // /* MW 3 */ + 6542 "00010100" // /* MW 2 */ + 6543 "00000000" // /* MW 1 */ + 6544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6545 "00000000" // /* MW 1 */ + 6546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6547 "00000000" // /* MW 1 */ + 6548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6549 "00000000" // /* MW 1 */ + 6550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6551 "00000000" // /* MW 1 */ + 6552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6553 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6554 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6555 "00101001" // /* MW 3 */ + 6556 "00000100" // /* MW 2 */ + 6557 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6558 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6559 "00001001" // /* MW 3 */ + 6560 "00010100" // /* MW 2 */ + 6561 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 first + 6562 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6563 "00101010" // /* MW 3 */ + 6564 "01011110" // /* MW 2 */ + 6565 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 52 + 6566 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6567 "01001010" // /* MW 3 */ + 6568 "11101110" // /* MW 2 */ + 6569 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6570 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6571 "00101010" // /* MW 3 */ + 6572 "11101100" // /* MW 2 */ + 6573 "00000111" // /* MW 1 */ + 6574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6575 "00000000" // /* MW 1 */ + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ + 6580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6581 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.no_stack_arguments + 6582 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6583 "00000001" // /* MW 5 */ + 6584 "00000000" // /* MW 4 */ + 6585 "01011000" // /* MW 3 */ + 6586 "00011000" // /* MW 2 */ + 6587 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 38 +.delay_slot + 6588 "01011100" // ST r18, [sp, #-20]; SUB r14, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6589 "01000011" // /* MW 5 */ + 6590 "10111010" // /* MW 4 */ + 6591 "10111000" // /* MW 3 */ + 6592 "11001010" // /* MW 2 */ + 6593 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 +.delay_slot + 6594 "00111010" // ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6595 "01111001" // /* MW 9 */ + 6596 "01010000" // /* MW 8 */ + 6597 "11101000" // /* MW 7 */ + 6598 "01000101" // /* MW 6 */ + 6599 "00001000" // /* MW 5 */ + 6600 "00000011" // /* MW 4 */ + 6601 "10110000" // /* MW 3 */ + 6602 "10000110" // /* MW 2 */ + 6603 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6604 "01011100" // ST r16, [sp, #-24]; LT r27, r14, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6605 "00010101" // /* MW 5 */ + 6606 "01101111" // /* MW 4 */ + 6607 "10110111" // /* MW 3 */ + 6608 "01000010" // /* MW 2 */ + 6609 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6610 "10011000" // SUB r17, r24, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6611 "11100001" // /* MW 3 */ + 6612 "00100010" // /* MW 2 */ + 6613 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6614 "01111010" // NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6615 "00010010" // /* MW 9 */ + 6616 "10000001" // /* MW 8 */ + 6617 "00000011" // /* MW 7 */ + 6618 "00000000" // /* MW 6 */ + 6619 "01011011" // /* MW 5 */ + 6620 "00000001" // /* MW 4 */ + 6621 "11110000" // /* MW 3 */ + 6622 "00101100" // /* MW 2 */ + 6623 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 32 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.return_address + 6624 "10111010" // LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6625 "01011000" // /* MW 9 */ + 6626 "00000000" // /* MW 8 */ + 6627 "00001000" // /* MW 7 */ + 6628 "00110110" // /* MW 6 */ + 6629 "01000111" // /* MW 5 */ + 6630 "00011111" // /* MW 4 */ + 6631 "01010000" // /* MW 3 */ + 6632 "11000101" // /* MW 2 */ + 6633 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 52 + 6634 "00101100" // LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6635 "01000011" // /* MW 5 */ + 6636 "01001100" // /* MW 4 */ + 6637 "01011000" // /* MW 3 */ + 6638 "11001001" // /* MW 2 */ + 6639 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6640 "00101100" // LDA r1, [sp, #-28]; LT r27, r20, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6641 "00010101" // /* MW 5 */ + 6642 "01101110" // /* MW 4 */ + 6643 "00101010" // /* MW 3 */ + 6644 "10000110" // /* MW 2 */ + 6645 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 + 6646 "00011000" // SEL.EQZ r19, r2, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6647 "00110010" // /* MW 3 */ + 6648 "10100111" // /* MW 2 */ + 6649 "00010000" // /* MW 1 */ + 6650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6651 "00000000" // /* MW 1 */ + 6652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6653 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.no_stack_arguments + 6654 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6655 "00000001" // /* MW 5 */ + 6656 "00000000" // /* MW 4 */ + 6657 "01011000" // /* MW 3 */ + 6658 "00011000" // /* MW 2 */ + 6659 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.delay_slot + 6660 "00011000" // EXTEND.s16 r19, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6661 "01110000" // /* MW 3 */ + 6662 "11100110" // /* MW 2 */ + 6663 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 87 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 38 first +.delay_slot + 6664 "00111010" // ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6665 "01001001" // /* MW 9 */ + 6666 "11000000" // /* MW 8 */ + 6667 "11101100" // /* MW 7 */ + 6668 "00001101" // /* MW 6 */ + 6669 "11101001" // /* MW 5 */ + 6670 "00100010" // /* MW 4 */ + 6671 "10110000" // /* MW 3 */ + 6672 "01001010" // /* MW 2 */ + 6673 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6674 "10011000" // LT r27, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6675 "00001010" // /* MW 3 */ + 6676 "10110111" // /* MW 2 */ + 6677 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6678 "10011000" // SUB r17, r16, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6679 "11100001" // /* MW 3 */ + 6680 "00100010" // /* MW 2 */ + 6681 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6682 "00101100" // NOPA; SEL.EQZ r0, r14, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6683 "00100100" // /* MW 5 */ + 6684 "00000010" // /* MW 4 */ + 6685 "11110111" // /* MW 3 */ + 6686 "00101100" // /* MW 2 */ + 6687 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 +.return_address + 6688 "10111010" // LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6689 "01011000" // /* MW 9 */ + 6690 "01000010" // /* MW 8 */ + 6691 "00000000" // /* MW 7 */ + 6692 "01001000" // /* MW 6 */ + 6693 "00110000" // /* MW 5 */ + 6694 "00000001" // /* MW 4 */ + 6695 "00100000" // /* MW 3 */ + 6696 "10000110" // /* MW 2 */ + 6697 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6698 "10111010" // LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6699 "01011000" // /* MW 9 */ + 6700 "00001000" // /* MW 8 */ + 6701 "01001000" // /* MW 7 */ + 6702 "00001010" // /* MW 6 */ + 6703 "10000000" // /* MW 5 */ + 6704 "00000001" // /* MW 4 */ + 6705 "01010000" // /* MW 3 */ + 6706 "01010001" // /* MW 2 */ + 6707 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 + 6708 "10111010" // LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6709 "01011000" // /* MW 9 */ + 6710 "00010111" // /* MW 8 */ + 6711 "11101000" // /* MW 7 */ + 6712 "01001011" // /* MW 6 */ + 6713 "00000111" // /* MW 5 */ + 6714 "00111111" // /* MW 4 */ + 6715 "00100000" // /* MW 3 */ + 6716 "01110010" // /* MW 2 */ + 6717 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 + 6718 "10111010" // LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6719 "01011000" // /* MW 9 */ + 6720 "00000110" // /* MW 8 */ + 6721 "10101000" // /* MW 7 */ + 6722 "11001010" // /* MW 6 */ + 6723 "10100111" // /* MW 5 */ + 6724 "00111111" // /* MW 4 */ + 6725 "00100000" // /* MW 3 */ + 6726 "11011010" // /* MW 2 */ + 6727 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 + 6728 "10111010" // LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6729 "01011000" // /* MW 9 */ + 6730 "00100000" // /* MW 8 */ + 6731 "00000000" // /* MW 7 */ + 6732 "10001001" // /* MW 6 */ + 6733 "11010111" // /* MW 5 */ + 6734 "00001111" // /* MW 4 */ + 6735 "00100000" // /* MW 3 */ + 6736 "00001110" // /* MW 2 */ + 6737 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6738 "10111010" // MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6739 "01011000" // /* MW 9 */ + 6740 "10000000" // /* MW 8 */ + 6741 "00001000" // /* MW 7 */ + 6742 "00101000" // /* MW 6 */ + 6743 "01110000" // /* MW 5 */ + 6744 "00000001" // /* MW 4 */ + 6745 "10000000" // /* MW 3 */ + 6746 "11000000" // /* MW 2 */ + 6747 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 + 6748 "10111010" // MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6749 "01111000" // /* MW 9 */ + 6750 "10110000" // /* MW 8 */ + 6751 "10011101" // /* MW 7 */ + 6752 "00001100" // /* MW 6 */ + 6753 "00010001" // /* MW 5 */ + 6754 "00110001" // /* MW 4 */ + 6755 "10000000" // /* MW 3 */ + 6756 "01000100" // /* MW 2 */ + 6757 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6758 "10011000" // XOR r30, r1, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6759 "11100110" // /* MW 3 */ + 6760 "01111100" // /* MW 2 */ + 6761 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6762 "10011000" // LT r27, r30, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6763 "10001010" // /* MW 3 */ + 6764 "10110111" // /* MW 2 */ + 6765 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 206 70 + 6766 "00100100" // SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6767 "11111111" // /* MW 5 */ + 6768 "10111100" // /* MW 4 */ + 6769 "01000011" // /* MW 3 */ + 6770 "01100010" // /* MW 2 */ + 6771 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 + 6772 "00100100" // EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6773 "00000010" // /* MW 5 */ + 6774 "00110110" // /* MW 4 */ + 6775 "00001011" // /* MW 3 */ + 6776 "10001110" // /* MW 2 */ + 6777 "10001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 88 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 22 first + 6778 "00100100" // MUL r30, r15, r20; ADD.NC r14, r30, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6779 "00000001" // /* MW 5 */ + 6780 "00111110" // /* MW 4 */ + 6781 "11110111" // /* MW 3 */ + 6782 "10101001" // /* MW 2 */ + 6783 "01111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 first + 6784 "00100100" // MUL r2, r1, r14; ADD.NC r17, r22, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6785 "00000001" // /* MW 5 */ + 6786 "10110110" // /* MW 4 */ + 6787 "11111000" // /* MW 3 */ + 6788 "10011101" // /* MW 2 */ + 6789 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 first + 6790 "10011000" // EQ r27, r19, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6791 "00010111" // /* MW 3 */ + 6792 "11110110" // /* MW 2 */ + 6793 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 55 first + 6794 "10011000" // MUL r2, r30, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6795 "00101111" // /* MW 3 */ + 6796 "10000100" // /* MW 2 */ + 6797 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 + 6798 "01100100" // SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6799 "11110101" // /* MW 5 */ + 6800 "00111111" // /* MW 4 */ + 6801 "01001001" // /* MW 3 */ + 6802 "11100100" // /* MW 2 */ + 6803 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 first + 6804 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00001101" // /* MW 3 */ + 6806 "10100001" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 first + 6808 "10011000" // LSHL r2, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6809 "10101101" // /* MW 3 */ + 6810 "01000101" // /* MW 2 */ + 6811 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 204 79 + 6812 "00100100" // MUL r2, r2, r28; ADD.NC r4, r2, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6813 "11111111" // /* MW 5 */ + 6814 "00100010" // /* MW 4 */ + 6815 "11110010" // /* MW 3 */ + 6816 "10111001" // /* MW 2 */ + 6817 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 first + 6818 "10011000" // LSHL r3, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6819 "01011101" // /* MW 3 */ + 6820 "11000111" // /* MW 2 */ + 6821 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 197 39 first + 6822 "01011100" // ST r2, [p7], #-4; MUL r5, r15, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6823 "00111111" // /* MW 5 */ + 6824 "10010100" // /* MW 4 */ + 6825 "00110111" // /* MW 3 */ + 6826 "10001010" // /* MW 2 */ + 6827 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 + 6828 "00111010" // ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6829 "01011001" // /* MW 9 */ + 6830 "00000100" // /* MW 8 */ + 6831 "00001000" // /* MW 7 */ + 6832 "00100110" // /* MW 6 */ + 6833 "01101011" // /* MW 5 */ + 6834 "00111011" // /* MW 4 */ + 6835 "00110000" // /* MW 3 */ + 6836 "01000010" // /* MW 2 */ + 6837 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 44 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 42 first + 6838 "01011100" // ST r31, [p7], #-16; ADD r22, r5, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6839 "11000001" // /* MW 5 */ + 6840 "11011010" // /* MW 4 */ + 6841 "00110010" // /* MW 3 */ + 6842 "11111110" // /* MW 2 */ + 6843 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 193 22 first +.src_ref 7 "conv2d_dw_bf16_params.h" 201 47 first + 6844 "01011100" // ST r3, [p7], #24; MUL r31, r22, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6845 "10011111" // /* MW 5 */ + 6846 "01111110" // /* MW 4 */ + 6847 "00111011" // /* MW 3 */ + 6848 "10001110" // /* MW 2 */ + 6849 "11101101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 204 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 first + 6850 "01011100" // ST r4, [p7], #4; LSHL r22, r22, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6851 "00011011" // /* MW 5 */ + 6852 "01011010" // /* MW 4 */ + 6853 "00111011" // /* MW 3 */ + 6854 "10010010" // /* MW 2 */ + 6855 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 first + 6856 "10011000" // LSHL r3, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "01111101" // /* MW 3 */ + 6858 "11000111" // /* MW 2 */ + 6859 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 + 6860 "10011000" // LSHL r4, r4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6861 "01011101" // /* MW 3 */ + 6862 "00001001" // /* MW 2 */ + 6863 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 first + 6864 "10100100" // SUB r25, r22, r3; ADD.NC r4, r4, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6865 "00000010" // /* MW 5 */ + 6866 "00100100" // /* MW 4 */ + 6867 "00110010" // /* MW 3 */ + 6868 "01000110" // /* MW 2 */ + 6869 "10110110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6870 "10111010" // MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6871 "10101000" // /* MW 9 */ + 6872 "01000000" // /* MW 8 */ + 6873 "00101110" // /* MW 7 */ + 6874 "00001111" // /* MW 6 */ + 6875 "01100010" // /* MW 5 */ + 6876 "00000110" // /* MW 4 */ + 6877 "00000000" // /* MW 3 */ + 6878 "00000000" // /* MW 2 */ + 6879 "11111000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6880 "01011100" // ST r0, [p7], #4; MUL r1, r31, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6881 "00111111" // /* MW 5 */ + 6882 "10000100" // /* MW 4 */ + 6883 "00111111" // /* MW 3 */ + 6884 "10000010" // /* MW 2 */ + 6885 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 206 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 53 first + 6886 "01011100" // ST r7, [p7], #4; MUL r31, r31, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6887 "11111111" // /* MW 5 */ + 6888 "11111100" // /* MW 4 */ + 6889 "00111111" // /* MW 3 */ + 6890 "10011110" // /* MW 2 */ + 6891 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 207 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 first + 6892 "01011100" // ST r6, [p7], #4; LSHL r5, r5, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6893 "01011011" // /* MW 5 */ + 6894 "10010110" // /* MW 4 */ + 6895 "00110010" // /* MW 3 */ + 6896 "10011010" // /* MW 2 */ + 6897 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 73 + 6898 "00100100" // LSHL r6, r31, r23; ADD.NC r31, r5, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6899 "11111111" // /* MW 5 */ + 6900 "10100101" // /* MW 4 */ + 6901 "10111111" // /* MW 3 */ + 6902 "10101111" // /* MW 2 */ + 6903 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6904 "10111010" // MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6905 "10101000" // /* MW 9 */ + 6906 "10001000" // /* MW 8 */ + 6907 "01001001" // /* MW 7 */ + 6908 "01101110" // /* MW 6 */ + 6909 "01011001" // /* MW 5 */ + 6910 "00101000" // /* MW 4 */ + 6911 "00000000" // /* MW 3 */ + 6912 "11100100" // /* MW 2 */ + 6913 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 + 6914 "10011000" // SUB r18, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6915 "00100001" // /* MW 3 */ + 6916 "00100101" // /* MW 2 */ + 6917 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 211 77 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6918 "00111010" // ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6919 "11001001" // /* MW 9 */ + 6920 "01111111" // /* MW 8 */ + 6921 "01001001" // /* MW 7 */ + 6922 "11101110" // /* MW 6 */ + 6923 "00011011" // /* MW 5 */ + 6924 "00000010" // /* MW 4 */ + 6925 "00110000" // /* MW 3 */ + 6926 "11001010" // /* MW 2 */ + 6927 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 211 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6928 "01011100" // ST r18, [p7], #4; ADD r6, r1, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6929 "11000001" // /* MW 5 */ + 6930 "10011010" // /* MW 4 */ + 6931 "00110000" // /* MW 3 */ + 6932 "11001010" // /* MW 2 */ + 6933 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 212 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6934 "01011100" // ST r22, [p7], #4; LSHL r1, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6935 "10011011" // /* MW 5 */ + 6936 "10000100" // /* MW 4 */ + 6937 "00111111" // /* MW 3 */ + 6938 "11011010" // /* MW 2 */ + 6939 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 213 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 + 6940 "00111010" // ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6941 "01011001" // /* MW 9 */ + 6942 "11111111" // /* MW 8 */ + 6943 "00101111" // /* MW 7 */ + 6944 "10000100" // /* MW 6 */ + 6945 "01100000" // /* MW 5 */ + 6946 "00000111" // /* MW 4 */ + 6947 "00110000" // /* MW 3 */ + 6948 "11111110" // /* MW 2 */ + 6949 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 214 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 6950 "00111010" // ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6951 "01011001" // /* MW 9 */ + 6952 "01000000" // /* MW 8 */ + 6953 "11001000" // /* MW 7 */ + 6954 "00001110" // /* MW 6 */ + 6955 "00111011" // /* MW 5 */ + 6956 "00001100" // /* MW 4 */ + 6957 "00110000" // /* MW 3 */ + 6958 "11100110" // /* MW 2 */ + 6959 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 215 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 first + 6960 "01011100" // ST r3, [p7], #4; LSHL r16, r15, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6961 "00011011" // /* MW 5 */ + 6962 "11000010" // /* MW 4 */ + 6963 "00110111" // /* MW 3 */ + 6964 "10001110" // /* MW 2 */ + 6965 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 218 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6966 "01011100" // ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6967 "00000100" // /* MW 5 */ + 6968 "00000011" // /* MW 4 */ + 6969 "00110000" // /* MW 3 */ + 6970 "11001010" // /* MW 2 */ + 6971 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 60 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 first + 6972 "10100100" // LSHL r3, r30, r23; ADD.NC r0, r16, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6973 "00000010" // /* MW 5 */ + 6974 "00110000" // /* MW 4 */ + 6975 "10110000" // /* MW 3 */ + 6976 "11101111" // /* MW 2 */ + 6977 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 + 6978 "01011100" // ST r0, [p7], #4; SUB r16, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6979 "01100011" // /* MW 5 */ + 6980 "01000000" // /* MW 4 */ + 6981 "00111000" // /* MW 3 */ + 6982 "10000010" // /* MW 2 */ + 6983 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 220 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 first + 6984 "00111010" // ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6985 "00001001" // /* MW 9 */ + 6986 "00010000" // /* MW 8 */ + 6987 "11101100" // /* MW 7 */ + 6988 "00110011" // /* MW 6 */ + 6989 "00010001" // /* MW 5 */ + 6990 "00001010" // /* MW 4 */ + 6991 "00110000" // /* MW 3 */ + 6992 "11111110" // /* MW 2 */ + 6993 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 221 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 6994 "01011100" // ST r31, [p7], #4; LSHL r31, r18, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6995 "10111011" // /* MW 5 */ + 6996 "01111110" // /* MW 4 */ + 6997 "00111001" // /* MW 3 */ + 6998 "11111110" // /* MW 2 */ + 6999 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 first + 7000 "01011100" // ST r22, [p7], #4; LSHL r2, r1, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7001 "10111011" // /* MW 5 */ + 7002 "10001010" // /* MW 4 */ + 7003 "00110000" // /* MW 3 */ + 7004 "11011010" // /* MW 2 */ + 7005 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 224 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 + 7006 "01011100" // ST r1, [p7], #4; SUB r1, r24, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7007 "01000011" // /* MW 5 */ + 7008 "00000100" // /* MW 4 */ + 7009 "00111100" // /* MW 3 */ + 7010 "10000110" // /* MW 2 */ + 7011 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 7012 "01011100" // ST r22, [p7], #4; SUB r2, r24, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7013 "11100011" // /* MW 5 */ + 7014 "00001011" // /* MW 4 */ + 7015 "00111100" // /* MW 3 */ + 7016 "11011010" // /* MW 2 */ + 7017 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 226 43 first + 7018 "10011000" // ST r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7019 "00110001" // /* MW 3 */ + 7020 "00011100" // /* MW 2 */ + 7021 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 228 40 first + 7022 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7023 "01010001" // /* MW 3 */ + 7024 "00011110" // /* MW 2 */ + 7025 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 first + 7026 "10011000" // ST r22, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7027 "11010001" // /* MW 3 */ + 7028 "00011110" // /* MW 2 */ + 7029 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 first + 7030 "10011000" // ST r2, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7031 "01010001" // /* MW 3 */ + 7032 "00001000" // /* MW 2 */ + 7033 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 first + 7034 "10011000" // LDA.u8 r1, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "00101010" // /* MW 3 */ + 7036 "00101000" // /* MW 2 */ + 7037 "00000111" // /* MW 1 */ + 7038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7039 "00000000" // /* MW 1 */ + 7040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7041 "00000000" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ + 7046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7047 "00000000" // /* MW 1 */ + 7048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7049 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 58 + 7050 "10000100" // JZ r1, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 7051 "00000001" // /* MW 5 */ + 7052 "00000000" // /* MW 4 */ + 7053 "11011000" // /* MW 3 */ + 7054 "00001101" // /* MW 2 */ + 7055 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 +.delay_slot + 7056 "00011000" // MOVX r16, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "00001101" // /* MW 3 */ + 7058 "00100000" // /* MW 2 */ + 7059 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 first +.delay_slot + 7060 "10011000" // LSHL r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001101" // /* MW 3 */ + 7062 "11100001" // /* MW 2 */ + 7063 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.delay_slot + 7064 "01000100" // MOVXM r31, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7065 "00000000" // /* MW 5 */ + 7066 "10100000" // /* MW 4 */ + 7067 "00001111" // /* MW 3 */ + 7068 "01111111" // /* MW 2 */ + 7069 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 + 7074 "01111110" // NOPA; NOPB; NOPS; MOVX r31, #0; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7075 "01100000" // /* MW 13 */ + 7076 "00101011" // /* MW 12 */ + 7077 "00000000" // /* MW 11 */ + 7078 "10101111" // /* MW 10 */ + 7079 "00110100" // /* MW 9 */ + 7080 "00000000" // /* MW 8 */ + 7081 "00000001" // /* MW 7 */ + 7082 "00111110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 + 7088 "10111010" // MOVA m0, #-197; MOVXM p0, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00010000" // /* MW 8 */ + 7091 "00110001" // /* MW 7 */ + 7092 "11110000" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "10000000" // /* MW 3 */ + 7096 "01100000" // /* MW 2 */ + 7097 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 first + 7098 "10111010" // LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "10111000" // /* MW 9 */ + 7100 "11111000" // /* MW 8 */ + 7101 "00000001" // /* MW 7 */ + 7102 "10100100" // /* MW 6 */ + 7103 "11011000" // /* MW 5 */ + 7104 "00111011" // /* MW 4 */ + 7105 "01010000" // /* MW 3 */ + 7106 "11000100" // /* MW 2 */ + 7107 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 + 7108 "10111010" // LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "01111000" // /* MW 9 */ + 7110 "01001001" // /* MW 8 */ + 7111 "00000000" // /* MW 7 */ + 7112 "10101000" // /* MW 6 */ + 7113 "11110000" // /* MW 5 */ + 7114 "00000001" // /* MW 4 */ + 7115 "00100000" // /* MW 3 */ + 7116 "00000110" // /* MW 2 */ + 7117 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 first +.src_ref 7 "conv2d_dw_bf16_params.h" 240 + 7118 "10111010" // LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7119 "01011000" // /* MW 9 */ + 7120 "11001001" // /* MW 8 */ + 7121 "10000000" // /* MW 7 */ + 7122 "11101100" // /* MW 6 */ + 7123 "11111111" // /* MW 5 */ + 7124 "00011111" // /* MW 4 */ + 7125 "00100000" // /* MW 3 */ + 7126 "10000111" // /* MW 2 */ + 7127 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 7128 "00101100" // LDA p0, [sp, #-16]; MOVX r25, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7129 "00000010" // /* MW 5 */ + 7130 "01100100" // /* MW 4 */ + 7131 "00100000" // /* MW 3 */ + 7132 "00000011" // /* MW 2 */ + 7133 "11111110" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "11010001" // /* MW 3 */ + 7138 "11110101" // /* MW 2 */ + 7139 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 39 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 "00011000" // ST.s16 r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00110111" // /* MW 3 */ + 7142 "00101100" // /* MW 2 */ + 7143 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 "11100100" // MUL r28, r29, r28; MOV crRnd, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7145 "01000001" // /* MW 5 */ + 7146 "01110001" // /* MW 4 */ + 7147 "11111111" // /* MW 3 */ + 7148 "00111001" // /* MW 2 */ + 7149 "11101111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 "00111010" // VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7151 "01111001" // /* MW 9 */ + 7152 "01010000" // /* MW 8 */ + 7153 "11101000" // /* MW 7 */ + 7154 "01101101" // /* MW 6 */ + 7155 "00011101" // /* MW 5 */ + 7156 "00011111" // /* MW 4 */ + 7157 "11000000" // /* MW 3 */ + 7158 "00000010" // /* MW 2 */ + 7159 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 109 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 "00100100" // MUL r20, r28, r20; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7161 "11111111" // /* MW 5 */ + 7162 "10110001" // /* MW 4 */ + 7163 "11111000" // /* MW 3 */ + 7164 "00101001" // /* MW 2 */ + 7165 "11100101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 "01100100" // LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7167 "00000011" // /* MW 5 */ + 7168 "10000010" // /* MW 4 */ + 7169 "10110000" // /* MW 3 */ + 7170 "01100111" // /* MW 2 */ + 7171 "10100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 52 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 "10011000" // MUL r28, r30, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7173 "11101111" // /* MW 3 */ + 7174 "10111000" // /* MW 2 */ + 7175 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7176 "10011000" // LSHL r21, r17, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "01011101" // /* MW 3 */ + 7178 "01101011" // /* MW 2 */ + 7179 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 first + 7180 "01011100" // ST r20, [p7], #4; LSHL r23, r28, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7181 "11111011" // /* MW 5 */ + 7182 "01011110" // /* MW 4 */ + 7183 "00111110" // /* MW 3 */ + 7184 "11010010" // /* MW 2 */ + 7185 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 235 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7186 "01011100" // ST r29, [p7], #4; SUB r26, r31, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7187 "10100011" // /* MW 5 */ + 7188 "11101010" // /* MW 4 */ + 7189 "00111111" // /* MW 3 */ + 7190 "11110110" // /* MW 2 */ + 7191 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7192 "01011100" // ST r28, [p7], m0; MAC r21, r21, r31, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7193 "01001100" // /* MW 5 */ + 7194 "11010110" // /* MW 4 */ + 7195 "00111111" // /* MW 3 */ + 7196 "01110010" // /* MW 2 */ + 7197 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 first + 7198 "10011000" // LDA.u8 r20, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "10001010" // /* MW 3 */ + 7200 "00101010" // /* MW 2 */ + 7201 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7202 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7203 "00000001" // /* MW 5 */ + 7204 "00000000" // /* MW 4 */ + 7205 "00000000" // /* MW 3 */ + 7206 "11111000" // /* MW 2 */ + 7207 "11111111" // /* MW 1 */ + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 first + 7214 "10011000" // LSHL r30, r30, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7215 "00111101" // /* MW 3 */ + 7216 "10111101" // /* MW 2 */ + 7217 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7218 "10011000" // SUB r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7219 "01010001" // /* MW 3 */ + 7220 "10101011" // /* MW 2 */ + 7221 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 first + 7222 "10011000" // EQ r27, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7223 "01000111" // /* MW 3 */ + 7224 "11110111" // /* MW 2 */ + 7225 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 7226 "00011000" // SEL.EQZ r19, r24, r23, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7227 "01110010" // /* MW 3 */ + 7228 "00100111" // /* MW 2 */ + 7229 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 39 + 7230 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7231 "01110001" // /* MW 3 */ + 7232 "00011110" // /* MW 2 */ + 7233 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 238 39 first + 7234 "10011000" // ST r16, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7235 "00010001" // /* MW 3 */ + 7236 "00011110" // /* MW 2 */ + 7237 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first + 7238 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7239 "00110001" // /* MW 3 */ + 7240 "00011110" // /* MW 2 */ + 7241 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7242 "01011100" // ST r22, [p7], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7243 "00000000" // /* MW 5 */ + 7244 "01010000" // /* MW 4 */ + 7245 "00110000" // /* MW 3 */ + 7246 "11011010" // /* MW 2 */ + 7247 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first +.delay_slot + 7248 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7249 "01010001" // /* MW 3 */ + 7250 "00011110" // /* MW 2 */ + 7251 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7252 "10011000" // ST r26, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7253 "01010001" // /* MW 3 */ + 7254 "00011111" // /* MW 2 */ + 7255 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7256 "10011000" // ST r21, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7257 "10110001" // /* MW 3 */ + 7258 "00011110" // /* MW 2 */ + 7259 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7260 "10011000" // ST r25, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7261 "00110001" // /* MW 3 */ + 7262 "00000111" // /* MW 2 */ + 7263 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7264 "00000010" // ST r25, [p7, #4]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7265 "01110000" // /* MW 7 */ + 7266 "01100000" // /* MW 6 */ + 7267 "10110000" // /* MW 5 */ + 7268 "00000011" // /* MW 4 */ + 7269 "00110000" // /* MW 3 */ + 7270 "11100110" // /* MW 2 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 7271 "11100010" // /* MW 1 */ +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function conv2d_dw_core _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 158 first +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 183 4 +.function_start + 7280 "10110110" // MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7281 "00010000" // /* MW 11 */ + 7282 "00010110" // /* MW 10 */ + 7283 "00110010" // /* MW 9 */ + 7284 "11110010" // /* MW 8 */ + 7285 "00000001" // /* MW 7 */ + 7286 "00000000" // /* MW 6 */ + 7287 "01101000" // /* MW 5 */ + 7288 "00111011" // /* MW 4 */ + 7289 "10000000" // /* MW 3 */ + 7290 "00011000" // /* MW 2 */ + 7291 "11110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7292 "10110110" // LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7293 "01011000" // /* MW 11 */ + 7294 "10000000" // /* MW 10 */ + 7295 "10000000" // /* MW 9 */ + 7296 "00001010" // /* MW 8 */ + 7297 "00010010" // /* MW 7 */ + 7298 "00000000" // /* MW 6 */ + 7299 "11101000" // /* MW 5 */ + 7300 "00111000" // /* MW 4 */ + 7301 "11010000" // /* MW 3 */ + 7302 "10101000" // /* MW 2 */ + 7303 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 202 56 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 229 12 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 +.src_ref 7 "conv2d_dw_bf16.h" 231 12 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 +.src_ref 7 "conv2d_dw_bf16.h" 233 12 +.src_ref 7 "conv2d_dw_bf16.h" 234 12 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 + 7304 "10111010" // LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7305 "01011000" // /* MW 9 */ + 7306 "10010000" // /* MW 8 */ + 7307 "00000111" // /* MW 7 */ + 7308 "10001010" // /* MW 6 */ + 7309 "00000111" // /* MW 5 */ + 7310 "00000000" // /* MW 4 */ + 7311 "11010000" // /* MW 3 */ + 7312 "10100100" // /* MW 2 */ + 7313 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 + 7314 "10111010" // LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7315 "01011000" // /* MW 9 */ + 7316 "00000000" // /* MW 8 */ + 7317 "01100000" // /* MW 7 */ + 7318 "00001010" // /* MW 6 */ + 7319 "00100100" // /* MW 5 */ + 7320 "00000000" // /* MW 4 */ + 7321 "11010000" // /* MW 3 */ + 7322 "11101000" // /* MW 2 */ + 7323 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7324 "01110110" // LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7325 "00010000" // /* MW 11 */ + 7326 "10110000" // /* MW 10 */ + 7327 "01111110" // /* MW 9 */ + 7328 "00000100" // /* MW 8 */ + 7329 "00000000" // /* MW 7 */ + 7330 "00000000" // /* MW 6 */ + 7331 "01001011" // /* MW 5 */ + 7332 "00010000" // /* MW 4 */ + 7333 "11010111" // /* MW 3 */ + 7334 "11100100" // /* MW 2 */ + 7335 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 + 7336 "01110110" // LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7337 "00010000" // /* MW 11 */ + 7338 "11011000" // /* MW 10 */ + 7339 "10111110" // /* MW 9 */ + 7340 "00000101" // /* MW 8 */ + 7341 "00000000" // /* MW 7 */ + 7342 "00000000" // /* MW 6 */ + 7343 "01001011" // /* MW 5 */ + 7344 "00010000" // /* MW 4 */ + 7345 "11010010" // /* MW 3 */ + 7346 "10100000" // /* MW 2 */ + 7347 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7348 "01110110" // LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7349 "00010000" // /* MW 11 */ + 7350 "00010000" // /* MW 10 */ + 7351 "10110001" // /* MW 9 */ + 7352 "11110010" // /* MW 8 */ + 7353 "00000001" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "01001011" // /* MW 5 */ + 7356 "00010000" // /* MW 4 */ + 7357 "11010110" // /* MW 3 */ + 7358 "10001000" // /* MW 2 */ + 7359 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7360 "01110110" // LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7361 "01111000" // /* MW 11 */ + 7362 "11000000" // /* MW 10 */ + 7363 "11100100" // /* MW 9 */ + 7364 "00001001" // /* MW 8 */ + 7365 "00110110" // /* MW 7 */ + 7366 "00000000" // /* MW 6 */ + 7367 "01001011" // /* MW 5 */ + 7368 "00010000" // /* MW 4 */ + 7369 "11010001" // /* MW 3 */ + 7370 "10000100" // /* MW 2 */ + 7371 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7372 "10111010" // LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7373 "01100010" // /* MW 9 */ + 7374 "01000011" // /* MW 8 */ + 7375 "00011000" // /* MW 7 */ + 7376 "00000001" // /* MW 6 */ + 7377 "01001011" // /* MW 5 */ + 7378 "00010000" // /* MW 4 */ + 7379 "11010000" // /* MW 3 */ + 7380 "11001000" // /* MW 2 */ + 7381 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first + 7382 "11010100" // LDA dn4, [p4], #8; MOV dc5, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7383 "00000001" // /* MW 5 */ + 7384 "10010011" // /* MW 4 */ + 7385 "11011011" // /* MW 3 */ + 7386 "11000100" // /* MW 2 */ + 7387 "10000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7388 "10011000" // LDA m0, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00000110" // /* MW 3 */ + 7390 "00101100" // /* MW 2 */ + 7391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7392 "10011000" // LDA dj1, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7393 "11000110" // /* MW 3 */ + 7394 "11111100" // /* MW 2 */ + 7395 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7396 "00111100" // LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7397 "01101000" // /* MW 5 */ + 7398 "10110001" // /* MW 4 */ + 7399 "11010000" // /* MW 3 */ + 7400 "10010100" // /* MW 2 */ + 7401 "10000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7402 "10011000" // LDA dj5, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7403 "11000110" // /* MW 3 */ + 7404 "11111110" // /* MW 2 */ + 7405 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7406 "10011000" // LDA dn5, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7407 "10100110" // /* MW 3 */ + 7408 "00101110" // /* MW 2 */ + 7409 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7410 "10011000" // LDA m1, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7411 "10000110" // /* MW 3 */ + 7412 "00101100" // /* MW 2 */ + 7413 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 first + 7414 "10011000" // LDA dj7, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7415 "11000110" // /* MW 3 */ + 7416 "11111111" // /* MW 2 */ + 7417 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7418 "10011000" // LDA dn7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7419 "10100110" // /* MW 3 */ + 7420 "00101111" // /* MW 2 */ + 7421 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7422 "10011000" // LDA m7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7423 "10000110" // /* MW 3 */ + 7424 "00101111" // /* MW 2 */ + 7425 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 first + 7426 "10011000" // LDA dj3, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7427 "11000110" // /* MW 3 */ + 7428 "11111101" // /* MW 2 */ + 7429 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7430 "10011000" // LDA dn3, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7431 "10100110" // /* MW 3 */ + 7432 "00101101" // /* MW 2 */ + 7433 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7434 "10011000" // LDA m3, [p4], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7435 "10000110" // /* MW 3 */ + 7436 "11001001" // /* MW 2 */ + 7437 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7438 "10011000" // LDA r4, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7439 "10010110" // /* MW 3 */ + 7440 "10101000" // /* MW 2 */ + 7441 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7442 "10011000" // LDA.s16 r7, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7443 "11110010" // /* MW 3 */ + 7444 "10001000" // /* MW 2 */ + 7445 "00000100" // /* MW 1 */ + 7446 "10011000" // LDA m4, [p4], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7447 "00000110" // /* MW 3 */ + 7448 "01001110" // /* MW 2 */ + 7449 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7450 "00111100" // LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7451 "11101000" // /* MW 5 */ + 7452 "11100001" // /* MW 4 */ + 7453 "11010011" // /* MW 3 */ + 7454 "10010110" // /* MW 2 */ + 7455 "10010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first + 7456 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7457 "00101011" // /* MW 3 */ + 7458 "00000100" // /* MW 2 */ + 7459 "00000010" // /* MW 1 */ + 7460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7461 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7462 "10011000" // LDA.s8 r6, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000010" // /* MW 3 */ + 7464 "00000100" // /* MW 2 */ + 7465 "00000101" // /* MW 1 */ + 7466 "00011000" // ADD r4, r4, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "11111011" // /* MW 3 */ + 7468 "00001001" // /* MW 2 */ + 7469 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 4 + 7470 "10111010" // LDA r17, [p4]; MOVXM p4, #7664 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7471 "00010000" // /* MW 9 */ + 7472 "11111000" // /* MW 8 */ + 7473 "00110110" // /* MW 7 */ + 7474 "00000110" // /* MW 6 */ + 7475 "00000000" // /* MW 5 */ + 7476 "00000000" // /* MW 4 */ + 7477 "11010000" // /* MW 3 */ + 7478 "11000110" // /* MW 2 */ + 7479 "10000000" // /* MW 1 */ + 7480 "11111000" // VBCST.16 x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7481 "01110010" // /* MW 3 */ + 7482 "00011101" // /* MW 2 */ + 7483 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first + 7484 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7485 "00000011" // /* MW 3 */ + 7486 "00011100" // /* MW 2 */ + 7487 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7488 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "11111111" // /* MW 3 */ + 7490 "01110010" // /* MW 2 */ + 7491 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7492 "01100110" // NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7493 "01000001" // /* MW 11 */ + 7494 "01101101" // /* MW 10 */ + 7495 "00000100" // /* MW 9 */ + 7496 "11100010" // /* MW 8 */ + 7497 "10001010" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00100011" // /* MW 5 */ + 7500 "00000000" // /* MW 4 */ + 7501 "11110000" // /* MW 3 */ + 7502 "00101100" // /* MW 2 */ + 7503 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 "00001011" // NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7505 "00011010" // /* MW 15 */ + 7506 "00001000" // /* MW 14 */ + 7507 "10101000" // /* MW 13 */ + 7508 "00000011" // /* MW 12 */ + 7509 "00001110" // /* MW 11 */ + 7510 "00000010" // /* MW 10 */ + 7511 "11010100" // /* MW 9 */ + 7512 "00001101" // /* MW 8 */ + 7513 "01011011" // /* MW 7 */ + 7514 "00000001" // /* MW 6 */ + 7515 "00100000" // /* MW 5 */ + 7516 "00000000" // /* MW 4 */ + 7517 "11110000" // /* MW 3 */ + 7518 "00101100" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 7520 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7521 "01101110" // /* MW 9 */ + 7522 "10000011" // /* MW 8 */ + 7523 "10000100" // /* MW 7 */ + 7524 "00000010" // /* MW 6 */ + 7525 "11110100" // /* MW 5 */ + 7526 "11110000" // /* MW 4 */ + 7527 "01110001" // /* MW 3 */ + 7528 "10110011" // /* MW 2 */ + 7529 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7531 "00000001" // /* MW 9 */ + 7532 "10001001" // /* MW 8 */ + 7533 "00000010" // /* MW 7 */ + 7534 "01000110" // /* MW 6 */ + 7535 "00001011" // /* MW 5 */ + 7536 "10011100" // /* MW 4 */ + 7537 "11101010" // /* MW 3 */ + 7538 "00111000" // /* MW 2 */ + 7539 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7541 "00000001" // /* MW 9 */ + 7542 "00110101" // /* MW 8 */ + 7543 "00000001" // /* MW 7 */ + 7544 "11000110" // /* MW 6 */ + 7545 "10001010" // /* MW 5 */ + 7546 "00110000" // /* MW 4 */ + 7547 "01101010" // /* MW 3 */ + 7548 "10110001" // /* MW 2 */ + 7549 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00001010" // /* MW 3 */ + 7552 "10001001" // /* MW 2 */ + 7553 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7555 "10100001" // /* MW 7 */ + 7556 "01001000" // /* MW 6 */ + 7557 "00000100" // /* MW 5 */ + 7558 "11000110" // /* MW 4 */ + 7559 "10001110" // /* MW 3 */ + 7560 "10110000" // /* MW 2 */ + 7561 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7563 "10100001" // /* MW 7 */ + 7564 "00110110" // /* MW 6 */ + 7565 "00000010" // /* MW 5 */ + 7566 "01000110" // /* MW 4 */ + 7567 "00001111" // /* MW 3 */ + 7568 "10011100" // /* MW 2 */ + 7569 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7571 "00001110" // /* MW 3 */ + 7572 "10001001" // /* MW 2 */ + 7573 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7574 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7575 "11100001" // /* MW 7 */ + 7576 "10010010" // /* MW 6 */ + 7577 "00000011" // /* MW 5 */ + 7578 "01000110" // /* MW 4 */ + 7579 "00000011" // /* MW 3 */ + 7580 "00011100" // /* MW 2 */ + 7581 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7582 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7583 "11100001" // /* MW 7 */ + 7584 "01010110" // /* MW 6 */ + 7585 "00000000" // /* MW 5 */ + 7586 "01000110" // /* MW 4 */ + 7587 "00000111" // /* MW 3 */ + 7588 "00011100" // /* MW 2 */ + 7589 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7590 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7591 "00001101" // /* MW 5 */ + 7592 "01100001" // /* MW 4 */ + 7593 "11110100" // /* MW 3 */ + 7594 "00101100" // /* MW 2 */ + 7595 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7596 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7597 "01000001" // /* MW 3 */ + 7598 "01101101" // /* MW 2 */ + 7599 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7601 "00011010" // /* MW 15 */ + 7602 "00001000" // /* MW 14 */ + 7603 "01111000" // /* MW 13 */ + 7604 "10100101" // /* MW 12 */ + 7605 "00000001" // /* MW 11 */ + 7606 "00000000" // /* MW 10 */ + 7607 "00000000" // /* MW 9 */ + 7608 "00000000" // /* MW 8 */ + 7609 "01011011" // /* MW 7 */ + 7610 "00000001" // /* MW 6 */ + 7611 "00100000" // /* MW 5 */ + 7612 "00000000" // /* MW 4 */ + 7613 "11110000" // /* MW 3 */ + 7614 "00101100" // /* MW 2 */ + 7615 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7616 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7617 "01101110" // /* MW 9 */ + 7618 "10000011" // /* MW 8 */ + 7619 "10000100" // /* MW 7 */ + 7620 "00000010" // /* MW 6 */ + 7621 "10010000" // /* MW 5 */ + 7622 "01110011" // /* MW 4 */ + 7623 "11110010" // /* MW 3 */ + 7624 "00001100" // /* MW 2 */ + 7625 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7627 "00000001" // /* MW 7 */ + 7628 "10001001" // /* MW 6 */ + 7629 "00000010" // /* MW 5 */ + 7630 "01000110" // /* MW 4 */ + 7631 "00001011" // /* MW 3 */ + 7632 "10011100" // /* MW 2 */ + 7633 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7635 "00000001" // /* MW 7 */ + 7636 "00110101" // /* MW 6 */ + 7637 "00000001" // /* MW 5 */ + 7638 "11000110" // /* MW 4 */ + 7639 "10001010" // /* MW 3 */ + 7640 "00110000" // /* MW 2 */ + 7641 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7643 "00001010" // /* MW 3 */ + 7644 "10001001" // /* MW 2 */ + 7645 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7647 "10100001" // /* MW 7 */ + 7648 "01001000" // /* MW 6 */ + 7649 "00000100" // /* MW 5 */ + 7650 "01000110" // /* MW 4 */ + 7651 "00001111" // /* MW 3 */ + 7652 "10011100" // /* MW 2 */ + 7653 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7655 "10100001" // /* MW 9 */ + 7656 "00110110" // /* MW 8 */ + 7657 "00000010" // /* MW 7 */ + 7658 "11000010" // /* MW 6 */ + 7659 "10001110" // /* MW 5 */ + 7660 "10110000" // /* MW 4 */ + 7661 "11110100" // /* MW 3 */ + 7662 "00101100" // /* MW 2 */ + 7663 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7664 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7665 "00011101" // /* MW 5 */ + 7666 "00010010" // /* MW 4 */ + 7667 "10001011" // /* MW 3 */ + 7668 "00011110" // /* MW 2 */ + 7669 "00111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 "01011010" // MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7671 "11100001" // /* MW 9 */ + 7672 "10010010" // /* MW 8 */ + 7673 "00000011" // /* MW 7 */ + 7674 "00000010" // /* MW 6 */ + 7675 "11101010" // /* MW 5 */ + 7676 "10110111" // /* MW 4 */ + 7677 "00000000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7681 "11100001" // /* MW 11 */ + 7682 "01010110" // /* MW 10 */ + 7683 "00000000" // /* MW 9 */ + 7684 "00000010" // /* MW 8 */ + 7685 "11100101" // /* MW 7 */ + 7686 "10001111" // /* MW 6 */ + 7687 "00000000" // /* MW 5 */ + 7688 "00000000" // /* MW 4 */ + 7689 "01110000" // /* MW 3 */ + 7690 "10000101" // /* MW 2 */ + 7691 "01000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7693 "11111111" // /* MW 3 */ + 7694 "01110010" // /* MW 2 */ + 7695 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7697 "10011011" // /* MW 3 */ + 7698 "00011101" // /* MW 2 */ + 7699 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7701 "01110100" // /* MW 3 */ + 7702 "00011100" // /* MW 2 */ + 7703 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7705 "10110100" // /* MW 3 */ + 7706 "01011000" // /* MW 2 */ + 7707 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7709 "10010110" // /* MW 3 */ + 7710 "00010001" // /* MW 2 */ + 7711 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00010110" // /* MW 3 */ + 7714 "00010000" // /* MW 2 */ + 7715 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01101100" // /* MW 3 */ + 7718 "01010000" // /* MW 2 */ + 7719 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7721 "01000100" // /* MW 3 */ + 7722 "01010011" // /* MW 2 */ + 7723 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 "00000010" // VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7725 "01110000" // /* MW 7 */ + 7726 "00110110" // /* MW 6 */ + 7727 "10101000" // /* MW 5 */ + 7728 "00000010" // /* MW 4 */ + 7729 "01100000" // /* MW 3 */ + 7730 "01000010" // /* MW 2 */ + 7731 "01110001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7733 "00000011" // /* MW 3 */ + 7734 "00011100" // /* MW 2 */ + 7735 "00011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 "00000010" // VST.3D x10, [p3], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7737 "01110000" // /* MW 7 */ + 7738 "01000101" // /* MW 6 */ + 7739 "10000000" // /* MW 5 */ + 7740 "00000001" // /* MW 4 */ + 7741 "01100000" // /* MW 3 */ + 7742 "01010010" // /* MW 2 */ + 7743 "01100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7745 "01000001" // /* MW 7 */ + 7746 "01101101" // /* MW 6 */ + 7747 "00000100" // /* MW 5 */ + 7748 "01000110" // /* MW 4 */ + 7749 "00000111" // /* MW 3 */ + 7750 "00011100" // /* MW 2 */ + 7751 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7753 "01000001" // /* MW 7 */ + 7754 "00000011" // /* MW 6 */ + 7755 "00000001" // /* MW 5 */ + 7756 "11000110" // /* MW 4 */ + 7757 "10000110" // /* MW 3 */ + 7758 "00110000" // /* MW 2 */ + 7759 "00000010" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 7760 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7761 "01101110" // /* MW 9 */ + 7762 "10000011" // /* MW 8 */ + 7763 "10000100" // /* MW 7 */ + 7764 "00000010" // /* MW 6 */ + 7765 "11110100" // /* MW 5 */ + 7766 "11110000" // /* MW 4 */ + 7767 "01110001" // /* MW 3 */ + 7768 "10110011" // /* MW 2 */ + 7769 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7771 "00000001" // /* MW 9 */ + 7772 "10001001" // /* MW 8 */ + 7773 "00000010" // /* MW 7 */ + 7774 "01000110" // /* MW 6 */ + 7775 "00001011" // /* MW 5 */ + 7776 "10011100" // /* MW 4 */ + 7777 "11101010" // /* MW 3 */ + 7778 "00111000" // /* MW 2 */ + 7779 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7781 "00000001" // /* MW 9 */ + 7782 "00110101" // /* MW 8 */ + 7783 "00000001" // /* MW 7 */ + 7784 "11000110" // /* MW 6 */ + 7785 "10001010" // /* MW 5 */ + 7786 "00110000" // /* MW 4 */ + 7787 "01101010" // /* MW 3 */ + 7788 "10110001" // /* MW 2 */ + 7789 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "00001010" // /* MW 3 */ + 7792 "10001001" // /* MW 2 */ + 7793 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7795 "10100001" // /* MW 7 */ + 7796 "01001000" // /* MW 6 */ + 7797 "00000100" // /* MW 5 */ + 7798 "11000110" // /* MW 4 */ + 7799 "10001110" // /* MW 3 */ + 7800 "10110000" // /* MW 2 */ + 7801 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7803 "10100001" // /* MW 7 */ + 7804 "00110110" // /* MW 6 */ + 7805 "00000010" // /* MW 5 */ + 7806 "01000110" // /* MW 4 */ + 7807 "00001111" // /* MW 3 */ + 7808 "10011100" // /* MW 2 */ + 7809 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7811 "00001110" // /* MW 3 */ + 7812 "10001001" // /* MW 2 */ + 7813 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7814 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7815 "11100001" // /* MW 7 */ + 7816 "10010010" // /* MW 6 */ + 7817 "00000011" // /* MW 5 */ + 7818 "01000110" // /* MW 4 */ + 7819 "00000011" // /* MW 3 */ + 7820 "00011100" // /* MW 2 */ + 7821 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7822 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7823 "11100001" // /* MW 7 */ + 7824 "01010110" // /* MW 6 */ + 7825 "00000000" // /* MW 5 */ + 7826 "01000110" // /* MW 4 */ + 7827 "00000111" // /* MW 3 */ + 7828 "00011100" // /* MW 2 */ + 7829 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7830 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7831 "00001101" // /* MW 5 */ + 7832 "01100001" // /* MW 4 */ + 7833 "11110100" // /* MW 3 */ + 7834 "00101100" // /* MW 2 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7836 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "01000001" // /* MW 3 */ + 7838 "01101101" // /* MW 2 */ + 7839 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7841 "00011010" // /* MW 15 */ + 7842 "00001000" // /* MW 14 */ + 7843 "01111000" // /* MW 13 */ + 7844 "10100101" // /* MW 12 */ + 7845 "00000001" // /* MW 11 */ + 7846 "00000000" // /* MW 10 */ + 7847 "00000000" // /* MW 9 */ + 7848 "00000000" // /* MW 8 */ + 7849 "01011011" // /* MW 7 */ + 7850 "00000001" // /* MW 6 */ + 7851 "00100000" // /* MW 5 */ + 7852 "00000000" // /* MW 4 */ + 7853 "11110000" // /* MW 3 */ + 7854 "00101100" // /* MW 2 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 202 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7856 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 7857 "01101000" // /* MW 11 */ + 7858 "10000011" // /* MW 10 */ + 7859 "10000100" // /* MW 9 */ + 7860 "00000010" // /* MW 8 */ + 7861 "01001001" // /* MW 7 */ + 7862 "00001000" // /* MW 6 */ + 7863 "00100000" // /* MW 5 */ + 7864 "11100111" // /* MW 4 */ + 7865 "11110100" // /* MW 3 */ + 7866 "00001100" // /* MW 2 */ + 7867 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7869 "00000001" // /* MW 7 */ + 7870 "10001001" // /* MW 6 */ + 7871 "00000010" // /* MW 5 */ + 7872 "01000110" // /* MW 4 */ + 7873 "00001011" // /* MW 3 */ + 7874 "10011100" // /* MW 2 */ + 7875 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7877 "00000001" // /* MW 7 */ + 7878 "00110101" // /* MW 6 */ + 7879 "00000001" // /* MW 5 */ + 7880 "11000110" // /* MW 4 */ + 7881 "10001010" // /* MW 3 */ + 7882 "00110000" // /* MW 2 */ + 7883 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7885 "00001010" // /* MW 3 */ + 7886 "10001001" // /* MW 2 */ + 7887 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7889 "10100001" // /* MW 7 */ + 7890 "01001000" // /* MW 6 */ + 7891 "00000100" // /* MW 5 */ + 7892 "01000110" // /* MW 4 */ + 7893 "00001111" // /* MW 3 */ + 7894 "10011100" // /* MW 2 */ + 7895 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7897 "10100001" // /* MW 7 */ + 7898 "00110110" // /* MW 6 */ + 7899 "00000010" // /* MW 5 */ + 7900 "11000110" // /* MW 4 */ + 7901 "10001110" // /* MW 3 */ + 7902 "10110000" // /* MW 2 */ + 7903 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7904 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "00001110" // /* MW 3 */ + 7906 "10001001" // /* MW 2 */ + 7907 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7908 "01001000" // VMAC.f dm3, dm4, x9, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7909 "11100001" // /* MW 3 */ + 7910 "10010010" // /* MW 2 */ + 7911 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7912 "01001000" // VMAC.f dm0, dm2, x11, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7913 "11100001" // /* MW 3 */ + 7914 "01010110" // /* MW 2 */ + 7915 "00000000" // /* MW 1 */ + 7916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7917 "00000000" // /* MW 1 */ + 7918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7919 "00000000" // /* MW 1 */ + 7920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7921 "00000000" // /* MW 1 */ + 7922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7923 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 7924 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "10010110" // /* MW 3 */ + 7926 "00010001" // /* MW 2 */ + 7927 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 248 first + 7928 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7929 "00000000" // /* MW 5 */ + 7930 "01010000" // /* MW 4 */ + 7931 "11000000" // /* MW 3 */ + 7932 "00000010" // /* MW 2 */ + 7933 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7934 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7935 "01101100" // /* MW 3 */ + 7936 "01010000" // /* MW 2 */ + 7937 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.delay_slot + 7938 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7939 "01000100" // /* MW 3 */ + 7940 "01010011" // /* MW 2 */ + 7941 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7942 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7943 "01101100" // /* MW 3 */ + 7944 "01010000" // /* MW 2 */ + 7945 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.delay_slot + 7946 "00011000" // VST x8, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7947 "00010011" // /* MW 3 */ + 7948 "10001010" // /* MW 2 */ + 7949 "00001011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 7950 "00011000" // VST.3D x10, [p3], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7951 "10010011" // /* MW 3 */ + 7952 "00111010" // /* MW 2 */ +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + 7953 "00001011" // /* MW 1 */ +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function conv2d_dw_shuffle _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 254 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 +.function_start + 7968 "10110110" // MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7969 "00010000" // /* MW 11 */ + 7970 "01001100" // /* MW 10 */ + 7971 "10110010" // /* MW 9 */ + 7972 "11110001" // /* MW 8 */ + 7973 "00000001" // /* MW 7 */ + 7974 "00000000" // /* MW 6 */ + 7975 "01101000" // /* MW 5 */ + 7976 "00111001" // /* MW 4 */ + 7977 "10000010" // /* MW 3 */ + 7978 "10010000" // /* MW 2 */ + 7979 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 + 7980 "10110110" // LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7981 "01011000" // /* MW 11 */ + 7982 "00000001" // /* MW 10 */ + 7983 "01101000" // /* MW 9 */ + 7984 "01101000" // /* MW 8 */ + 7985 "01000111" // /* MW 7 */ + 7986 "00111110" // /* MW 6 */ + 7987 "01101000" // /* MW 5 */ + 7988 "00111000" // /* MW 4 */ + 7989 "11010100" // /* MW 3 */ + 7990 "10000100" // /* MW 2 */ + 7991 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first + 7992 "10111010" // LDA dj0, [p3], #4; MOVXM ls, #8080 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7993 "00010000" // /* MW 9 */ + 7994 "11001000" // /* MW 8 */ + 7995 "01111111" // /* MW 7 */ + 7996 "00000100" // /* MW 6 */ + 7997 "00000000" // /* MW 5 */ + 7998 "00000000" // /* MW 4 */ + 7999 "11010000" // /* MW 3 */ + 8000 "10001000" // /* MW 2 */ + 8001 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 258 4 + 8002 "10111010" // LDA dn4, [p3], #4; MOVXM le, #8176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8003 "00010000" // /* MW 9 */ + 8004 "11111000" // /* MW 8 */ + 8005 "10111111" // /* MW 7 */ + 8006 "00000101" // /* MW 6 */ + 8007 "00000000" // /* MW 5 */ + 8008 "00000000" // /* MW 4 */ + 8009 "11010000" // /* MW 3 */ + 8010 "11000100" // /* MW 2 */ + 8011 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 +.src_ref 7 "conv2d_dw_bf16.h" 264 16 +.src_ref 7 "conv2d_dw_bf16.h" 266 47 + 8012 "10111010" // LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8013 "01011000" // /* MW 9 */ + 8014 "00010010" // /* MW 8 */ + 8015 "00001000" // /* MW 7 */ + 8016 "01001000" // /* MW 6 */ + 8017 "00010110" // /* MW 5 */ + 8018 "00000000" // /* MW 4 */ + 8019 "11010000" // /* MW 3 */ + 8020 "11001000" // /* MW 2 */ + 8021 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 267 47 + 8022 "00101100" // LDA m0, [p3], #4; MOVX r2, #19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8023 "10011010" // /* MW 5 */ + 8024 "00001000" // /* MW 4 */ + 8025 "11010000" // /* MW 3 */ + 8026 "10000000" // /* MW 2 */ + 8027 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8028 "10011000" // LDA dc0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "01100110" // /* MW 3 */ + 8030 "00011100" // /* MW 2 */ + 8031 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8032 "10011000" // LDA dc4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "01100110" // /* MW 3 */ + 8034 "10001010" // /* MW 2 */ + 8035 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 51 first + 8036 "10011000" // LDA r5, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "10110110" // /* MW 3 */ + 8038 "00000100" // /* MW 2 */ + 8039 "00000011" // /* MW 1 */ + 8040 "10011000" // LDA r6, [p3, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8041 "11010110" // /* MW 3 */ + 8042 "00100100" // /* MW 2 */ + 8043 "00000011" // /* MW 1 */ + 8044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8045 "00000000" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ + 8050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8051 "00000000" // /* MW 1 */ + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ + 8054 "10011000" // LSHL r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8055 "01001101" // /* MW 3 */ + 8056 "01001000" // /* MW 2 */ + 8057 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8058 "00100100" // LSHL r3, r6, r3; ADD.NC lc, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8059 "11111111" // /* MW 5 */ + 8060 "11100100" // /* MW 4 */ + 8061 "10111010" // /* MW 3 */ + 8062 "11000111" // /* MW 2 */ + 8063 "00110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8065 "00000000" // /* MW 15 */ + 8066 "00000000" // /* MW 14 */ + 8067 "01111000" // /* MW 13 */ + 8068 "11010000" // /* MW 12 */ + 8069 "11000000" // /* MW 11 */ + 8070 "00000000" // /* MW 10 */ + 8071 "00000000" // /* MW 9 */ + 8072 "00000000" // /* MW 8 */ + 8073 "01011011" // /* MW 7 */ + 8074 "00000001" // /* MW 6 */ + 8075 "00100000" // /* MW 5 */ + 8076 "00000000" // /* MW 4 */ + 8077 "11110000" // /* MW 3 */ + 8078 "00101100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 8080 "11100001" // VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8081 "00000000" // /* MW 15 */ + 8082 "00000000" // /* MW 14 */ + 8083 "00111000" // /* MW 13 */ + 8084 "00000010" // /* MW 12 */ + 8085 "01001000" // /* MW 11 */ + 8086 "00000000" // /* MW 10 */ + 8087 "00000000" // /* MW 9 */ + 8088 "00000000" // /* MW 8 */ + 8089 "10001011" // /* MW 7 */ + 8090 "10000000" // /* MW 6 */ + 8091 "01101100" // /* MW 5 */ + 8092 "00111001" // /* MW 4 */ + 8093 "01110010" // /* MW 3 */ + 8094 "10000011" // /* MW 2 */ + 8095 "01000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8097 "00000000" // /* MW 15 */ + 8098 "00000000" // /* MW 14 */ + 8099 "00111000" // /* MW 13 */ + 8100 "00000010" // /* MW 12 */ + 8101 "11000000" // /* MW 11 */ + 8102 "00000000" // /* MW 10 */ + 8103 "00000000" // /* MW 9 */ + 8104 "00000000" // /* MW 8 */ + 8105 "01011011" // /* MW 7 */ + 8106 "00000001" // /* MW 6 */ + 8107 "00100000" // /* MW 5 */ + 8108 "00000000" // /* MW 4 */ + 8109 "11110000" // /* MW 3 */ + 8110 "00101100" // /* MW 2 */ + 8111 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first + 8112 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8113 "00000000" // /* MW 15 */ + 8114 "00000000" // /* MW 14 */ + 8115 "11101000" // /* MW 13 */ + 8116 "11000000" // /* MW 12 */ + 8117 "01000100" // /* MW 11 */ + 8118 "00000000" // /* MW 10 */ + 8119 "00000000" // /* MW 9 */ + 8120 "00000000" // /* MW 8 */ + 8121 "01011011" // /* MW 7 */ + 8122 "00000001" // /* MW 6 */ + 8123 "00100000" // /* MW 5 */ + 8124 "00000000" // /* MW 4 */ + 8125 "11110000" // /* MW 3 */ + 8126 "00101100" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first + 8128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8129 "00000000" // /* MW 15 */ + 8130 "00000000" // /* MW 14 */ + 8131 "11101000" // /* MW 13 */ + 8132 "11000100" // /* MW 12 */ + 8133 "00000100" // /* MW 11 */ + 8134 "00000000" // /* MW 10 */ + 8135 "00000000" // /* MW 9 */ + 8136 "00000000" // /* MW 8 */ + 8137 "01011011" // /* MW 7 */ + 8138 "00000001" // /* MW 6 */ + 8139 "00100000" // /* MW 5 */ + 8140 "00000000" // /* MW 4 */ + 8141 "11110000" // /* MW 3 */ + 8142 "00101100" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first + 8144 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00100110" // /* MW 7 */ + 8154 "00011000" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8160 "11100001" // NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8161 "00000000" // /* MW 15 */ + 8162 "00000000" // /* MW 14 */ + 8163 "01111000" // /* MW 13 */ + 8164 "10100101" // /* MW 12 */ + 8165 "00000001" // /* MW 11 */ + 8166 "00000000" // /* MW 10 */ + 8167 "00000000" // /* MW 9 */ + 8168 "10000000" // /* MW 8 */ + 8169 "00000110" // /* MW 7 */ + 8170 "00100000" // /* MW 6 */ + 8171 "00100100" // /* MW 5 */ + 8172 "00000000" // /* MW 4 */ + 8173 "11110000" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8177 "00000000" // /* MW 15 */ + 8178 "00000000" // /* MW 14 */ + 8179 "01111000" // /* MW 13 */ + 8180 "10100101" // /* MW 12 */ + 8181 "00000001" // /* MW 11 */ + 8182 "00000000" // /* MW 10 */ + 8183 "00000000" // /* MW 9 */ + 8184 "00000000" // /* MW 8 */ + 8185 "01011011" // /* MW 7 */ + 8186 "00000001" // /* MW 6 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.src_ref 7 "conv2d_dw_bf16.h" 274 first +.loop_nesting 0 + 8192 "00111010" // MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 8193 "00111001" // /* MW 9 */ + 8194 "00000010" // /* MW 8 */ + 8195 "01001000" // /* MW 7 */ + 8196 "00000000" // /* MW 6 */ + 8197 "01000000" // /* MW 5 */ + 8198 "00000001" // /* MW 4 */ + 8199 "01100000" // /* MW 3 */ + 8200 "00010001" // /* MW 2 */ + 8201 "10010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.delay_slot + 8202 "01111000" // VSHUFFLE x3, x0, x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8203 "00000100" // /* MW 3 */ + 8204 "10000000" // /* MW 2 */ + 8205 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first +.delay_slot + 8206 "11011000" // VSHUFFLE bmlh0, x1, x3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8207 "10000001" // /* MW 3 */ + 8208 "10001001" // /* MW 2 */ + 8209 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first +.delay_slot + 8210 "11011000" // VSHUFFLE bmll0, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8211 "10001001" // /* MW 3 */ + 8212 "00001001" // /* MW 2 */ + 8213 "00011000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 8214 "10011000" // VST.3D bmlh0, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8215 "00100110" // /* MW 3 */ + 8216 "00011000" // /* MW 2 */ + 8217 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first +.delay_slot + 8218 "10011000" // VST bmll0, [p4, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8219 "00000110" // /* MW 3 */ + 8220 "00100000" // /* MW 2 */ +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + 8221 "00001100" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 282 first +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.function_start + 8224 "10111010" // LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8225 "01011000" // /* MW 9 */ + 8226 "00111111" // /* MW 8 */ + 8227 "00000111" // /* MW 7 */ + 8228 "00101000" // /* MW 6 */ + 8229 "00010000" // /* MW 5 */ + 8230 "00000001" // /* MW 4 */ + 8231 "11010000" // /* MW 3 */ + 8232 "10010011" // /* MW 2 */ + 8233 "00100000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 77 + 8234 "10111010" // MOVA m1, #-208; MOVXM p4, #509064 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8235 "00010000" // /* MW 9 */ + 8236 "01000100" // /* MW 8 */ + 8237 "00110010" // /* MW 7 */ + 8238 "11110010" // /* MW 6 */ + 8239 "00000001" // /* MW 5 */ + 8240 "00000000" // /* MW 4 */ + 8241 "10000000" // /* MW 3 */ + 8242 "00000100" // /* MW 2 */ + 8243 "11100110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 first +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8244 "01010100" // LDA r16, [p4], m0; MOV m0, #201 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8245 "00100101" // /* MW 5 */ + 8246 "00000011" // /* MW 4 */ + 8247 "11010000" // /* MW 3 */ + 8248 "01000010" // /* MW 2 */ + 8249 "10000001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8250 "10011000" // LDA.u8 r19, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8251 "01101010" // /* MW 3 */ + 8252 "00001010" // /* MW 2 */ + 8253 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 287 77 first + 8254 "10011000" // LDA r18, [p4], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8255 "01010110" // /* MW 3 */ + 8256 "00101010" // /* MW 2 */ + 8257 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 282 + 8258 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8259 "00000001" // /* MW 5 */ + 8260 "00000000" // /* MW 4 */ + 8261 "00000000" // /* MW 3 */ + 8262 "00001000" // /* MW 2 */ + 8263 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 "00001100" // LDA p0, [p0]; ST lr, [sp, #-8] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8265 "01111011" // /* MW 5 */ + 8266 "11110000" // /* MW 4 */ + 8267 "11011111" // /* MW 3 */ + 8268 "10000011" // /* MW 2 */ + 8269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 "00001100" // LDA r15, [p2]; ST p2, [sp, #-16] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8271 "00111011" // /* MW 5 */ + 8272 "11100010" // /* MW 4 */ + 8273 "11011111" // /* MW 3 */ + 8274 "10111110" // /* MW 2 */ + 8275 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "10011101" // /* MW 3 */ + 8278 "11111111" // /* MW 2 */ + 8279 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 "00111010" // ST p6, [sp, #-20]; JL #7280 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8281 "01000001" // /* MW 9 */ + 8282 "00000000" // /* MW 8 */ + 8283 "00000000" // /* MW 7 */ + 8284 "10001110" // /* MW 6 */ + 8285 "00000011" // /* MW 5 */ + 8286 "00000000" // /* MW 4 */ + 8287 "10110000" // /* MW 3 */ + 8288 "11100011" // /* MW 2 */ + 8289 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 +.src_ref 7 "conv2d_dw_bf16.h" 285 89 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 "00111010" // ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8291 "01111001" // /* MW 9 */ + 8292 "01100000" // /* MW 8 */ + 8293 "00110001" // /* MW 7 */ + 8294 "01111101" // /* MW 6 */ + 8295 "00001000" // /* MW 5 */ + 8296 "00100111" // /* MW 4 */ + 8297 "10110000" // /* MW 3 */ + 8298 "10111110" // /* MW 2 */ + 8299 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 +.delay_slot + 8300 "11111000" // MOV p6, p4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8301 "11000000" // /* MW 3 */ + 8302 "01101000" // /* MW 2 */ + 8303 "00011110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.delay_slot + 8304 "01011100" // ST p1, [sp, #-24]; LSHL r16, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8305 "00111011" // /* MW 5 */ + 8306 "01000010" // /* MW 4 */ + 8307 "10111000" // /* MW 3 */ + 8308 "00010011" // /* MW 2 */ + 8309 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 first +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.delay_slot + 8310 "00111010" // MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8311 "01111001" // /* MW 9 */ + 8312 "00010000" // /* MW 8 */ + 8313 "00000100" // /* MW 7 */ + 8314 "11101100" // /* MW 6 */ + 8315 "00001000" // /* MW 5 */ + 8316 "00100101" // /* MW 4 */ + 8317 "01100000" // /* MW 3 */ + 8318 "00010001" // /* MW 2 */ + 8319 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 first +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.delay_slot + 8320 "11100001" // NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8321 "00000000" // /* MW 15 */ + 8322 "00000000" // /* MW 14 */ + 8323 "10101000" // /* MW 13 */ + 8324 "11100000" // /* MW 12 */ + 8325 "10110011" // /* MW 11 */ + 8326 "00000001" // /* MW 10 */ + 8327 "00000000" // /* MW 9 */ + 8328 "00000000" // /* MW 8 */ + 8329 "01011011" // /* MW 7 */ + 8330 "00000001" // /* MW 6 */ + 8331 "00100000" // /* MW 5 */ + 8332 "00010111" // /* MW 4 */ + 8333 "11110010" // /* MW 3 */ + 8334 "00101100" // /* MW 2 */ + 8335 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 first +.src_ref 7 "conv2d_dw_bf16.h" 290 24 +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.return_address + 8336 "00101100" // LDA.u8 r16, [p6, #7]; MOVX r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8337 "00010010" // /* MW 5 */ + 8338 "01000100" // /* MW 4 */ + 8339 "01010000" // /* MW 3 */ + 8340 "11000001" // /* MW 2 */ + 8341 "11001110" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ + 8346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8347 "00000000" // /* MW 1 */ + 8348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8349 "00000000" // /* MW 1 */ + 8350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8351 "00000000" // /* MW 1 */ + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 24 + 8354 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8355 "00001000" // /* MW 3 */ + 8356 "01100001" // /* MW 2 */ + 8357 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 8 + 8358 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8359 "00000001" // /* MW 5 */ + 8360 "01000000" // /* MW 4 */ + 8361 "10110000" // /* MW 3 */ + 8362 "00010000" // /* MW 2 */ + 8363 "10000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 61 +.delay_slot + 8364 "01000100" // MOVXM p4, #509064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8365 "00010000" // /* MW 5 */ + 8366 "11001001" // /* MW 4 */ + 8367 "11001000" // /* MW 3 */ + 8368 "00000111" // /* MW 2 */ + 8369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 292 61 first +.src_ref 7 "conv2d_dw_bf16.h" 292 71 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 + 8378 "10111010" // LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8379 "01011000" // /* MW 9 */ + 8380 "00110000" // /* MW 8 */ + 8381 "00000111" // /* MW 7 */ + 8382 "00101000" // /* MW 6 */ + 8383 "00000000" // /* MW 5 */ + 8384 "00000000" // /* MW 4 */ + 8385 "11010000" // /* MW 3 */ + 8386 "11000010" // /* MW 2 */ + 8387 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 118 + 8388 "10011000" // LDA r21, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "10110110" // /* MW 3 */ + 8390 "00101110" // /* MW 2 */ + 8391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 59 first + 8392 "10011000" // LDA r18, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8393 "01010110" // /* MW 3 */ + 8394 "00011110" // /* MW 2 */ + 8395 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 293 31 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8396 "11010100" // LDA r19, [sp, #-24]; MOV p0, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8397 "10000001" // /* MW 5 */ + 8398 "11010001" // /* MW 4 */ + 8399 "00100000" // /* MW 3 */ + 8400 "01001110" // /* MW 2 */ + 8401 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8402 "10111010" // LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8403 "01110010" // /* MW 9 */ + 8404 "01100000" // /* MW 8 */ + 8405 "10110000" // /* MW 7 */ + 8406 "00000011" // /* MW 6 */ + 8407 "10001011" // /* MW 5 */ + 8408 "10011100" // /* MW 4 */ + 8409 "11010000" // /* MW 3 */ + 8410 "01010010" // /* MW 2 */ + 8411 "10000001" // /* MW 1 */ + 8412 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8413 "00011001" // /* MW 3 */ + 8414 "11101111" // /* MW 2 */ + 8415 "00000111" // /* MW 1 */ + 8416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8417 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first + 8418 "10011000" // LSHL r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00011101" // /* MW 3 */ + 8420 "00100011" // /* MW 2 */ + 8421 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 71 + 8422 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001101" // /* MW 3 */ + 8424 "00100000" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 first +.no_stack_arguments + 8426 "00000100" // JL #7280 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "00111000" // /* MW 3 */ + 8430 "00001110" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first +.src_ref 7 "conv2d_dw_bf16.h" 294 30 first +.delay_slot + 8432 "10100100" // LSHL r18, r18, r0; ADD.NC r22, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8433 "10001010" // /* MW 5 */ + 8434 "00110011" // /* MW 4 */ + 8435 "10111011" // /* MW 3 */ + 8436 "10000001" // /* MW 2 */ + 8437 "10010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.delay_slot + 8438 "10100100" // LSHL r17, r21, r0; ADD.NC r21, r15, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8439 "10010010" // /* MW 5 */ + 8440 "10101111" // /* MW 4 */ + 8441 "10111010" // /* MW 3 */ + 8442 "01000001" // /* MW 2 */ + 8443 "10101100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.delay_slot + 8444 "10100100" // LSHL r18, r20, r0; ADD.NC p1, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8445 "10110010" // /* MW 5 */ + 8446 "11010001" // /* MW 4 */ + 8447 "10110010" // /* MW 3 */ + 8448 "10000001" // /* MW 2 */ + 8449 "10100100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 293 31 first +.delay_slot + 8450 "01011000" // ADD.NC p2, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8451 "11000001" // /* MW 3 */ + 8452 "01101001" // /* MW 2 */ + 8453 "00011010" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 68 first +.delay_slot + 8454 "10111010" // NOPA; NOPB; ADD.NC p3, r21, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8455 "10101110" // /* MW 9 */ + 8456 "01100100" // /* MW 8 */ + 8457 "10110101" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00010000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 297 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 84 +.return_address + 8464 "10111010" // LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8465 "01011000" // /* MW 9 */ + 8466 "00110100" // /* MW 8 */ + 8467 "00000111" // /* MW 7 */ + 8468 "00101000" // /* MW 6 */ + 8469 "00000000" // /* MW 5 */ + 8470 "00000000" // /* MW 4 */ + 8471 "00100000" // /* MW 3 */ + 8472 "01000011" // /* MW 2 */ + 8473 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 84 first + 8474 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8475 "00010110" // /* MW 3 */ + 8476 "11111110" // /* MW 2 */ + 8477 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 + 8478 "11010100" // LDA p7, [sp, #-4]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8479 "10000001" // /* MW 5 */ + 8480 "11011101" // /* MW 4 */ + 8481 "00100110" // /* MW 3 */ + 8482 "11110011" // /* MW 2 */ + 8483 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 first + 8484 "10011000" // LDA r17, [p3], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8485 "00110110" // /* MW 3 */ + 8486 "00001010" // /* MW 2 */ + 8487 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 + 8488 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8489 "00111001" // /* MW 3 */ + 8490 "11111000" // /* MW 2 */ + 8491 "00000111" // /* MW 1 */ + 8492 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8493 "11110001" // /* MW 3 */ + 8494 "11110101" // /* MW 2 */ + 8495 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8496 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8497 "00000001" // /* MW 5 */ + 8498 "00000000" // /* MW 4 */ + 8499 "00000000" // /* MW 3 */ + 8500 "11111000" // /* MW 2 */ + 8501 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8502 "10011000" // LDA r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8503 "01010110" // /* MW 3 */ + 8504 "00000110" // /* MW 2 */ + 8505 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first + 8506 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8507 "00001101" // /* MW 3 */ + 8508 "00100000" // /* MW 2 */ + 8509 "00010100" // /* MW 1 */ + 8510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8511 "00000000" // /* MW 1 */ + 8512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8513 "00000000" // /* MW 1 */ + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 first +.tail_call + 8516 "10000100" // J #7968 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7968 delay_slots=5 */ + 8517 "00000000" // /* MW 5 */ + 8518 "00000000" // /* MW 4 */ + 8519 "10010000" // /* MW 3 */ + 8520 "00001111" // /* MW 2 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 first +.delay_slot + 8522 "10011000" // LSHL r17, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001101" // /* MW 3 */ + 8524 "01100010" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.delay_slot + 8526 "01011000" // ADD.NC r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "11001001" // /* MW 3 */ + 8528 "01011000" // /* MW 2 */ + 8529 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first +.delay_slot + 8530 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8531 "01000001" // /* MW 3 */ + 8532 "01101001" // /* MW 2 */ + 8533 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 +.delay_slot + 8534 "11111000" // MOV p0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8535 "00100000" // /* MW 3 */ + 8536 "01101001" // /* MW 2 */ + 8537 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 93 first +.delay_slot + 8538 "10010100" // NOPA; ADD.NC p2, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "10000010" // /* MW 5 */ + 8540 "11010001" // /* MW 4 */ + 8541 "11110100" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 301 +.return_address + 8544 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8545 "00111001" // /* MW 3 */ + 8546 "11111000" // /* MW 2 */ + 8547 "00000111" // /* MW 1 */ + 8548 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8549 "11110001" // /* MW 3 */ + 8550 "11110101" // /* MW 2 */ + 8551 "00000111" // /* MW 1 */ + 8552 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8553 "10011001" // /* MW 3 */ + 8554 "11111111" // /* MW 2 */ + 8555 "00000111" // /* MW 1 */ + 8556 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8557 "00011001" // /* MW 3 */ + 8558 "11101111" // /* MW 2 */ + 8559 "00000111" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8566 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8567 "00000000" // /* MW 3 */ + 8568 "00101000" // /* MW 2 */ + 8569 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 +.delay_slot + 8570 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8571 "00000001" // /* MW 5 */ + 8572 "00000000" // /* MW 4 */ + 8573 "00000000" // /* MW 3 */ + 8574 "11111000" // /* MW 2 */ + 8575 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 8583 "00000000" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 352 first +.src_ref 6 "superkernels.cpp" 357 6 +.function_start + 8592 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8593 "00000000" // /* MW 5 */ + 8594 "11000100" // /* MW 4 */ + 8595 "11001000" // /* MW 3 */ + 8596 "00000111" // /* MW 2 */ + 8597 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 first + 8598 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8599 "01000001" // /* MW 5 */ + 8600 "00101111" // /* MW 4 */ + 8601 "11010000" // /* MW 3 */ + 8602 "11000010" // /* MW 2 */ + 8603 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 352 + 8604 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8605 "00000001" // /* MW 5 */ + 8606 "00000000" // /* MW 4 */ + 8607 "00000000" // /* MW 3 */ + 8608 "00010000" // /* MW 2 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8611 "01110000" // /* MW 7 */ + 8612 "01110000" // /* MW 6 */ + 8613 "00101101" // /* MW 5 */ + 8614 "00000010" // /* MW 4 */ + 8615 "10110000" // /* MW 3 */ + 8616 "00111010" // /* MW 2 */ + 8617 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 + 8618 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8619 "01110000" // /* MW 7 */ + 8620 "11110000" // /* MW 6 */ + 8621 "10101000" // /* MW 5 */ + 8622 "00000001" // /* MW 4 */ + 8623 "10110000" // /* MW 3 */ + 8624 "10110110" // /* MW 2 */ + 8625 "11111111" // /* MW 1 */ + 8626 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8627 "00011101" // /* MW 3 */ + 8628 "11101100" // /* MW 2 */ + 8629 "00001111" // /* MW 1 */ + 8630 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "10011101" // /* MW 3 */ + 8632 "11110111" // /* MW 2 */ + 8633 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 + 8634 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8635 "01110000" // /* MW 7 */ + 8636 "01100000" // /* MW 6 */ + 8637 "11001010" // /* MW 5 */ + 8638 "00000001" // /* MW 4 */ + 8639 "10110000" // /* MW 3 */ + 8640 "00000010" // /* MW 2 */ + 8641 "11111110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 +.src_ref 6 "superkernels.cpp" 357 16 + 8642 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8643 "00000001" // /* MW 5 */ + 8644 "01000000" // /* MW 4 */ + 8645 "00100000" // /* MW 3 */ + 8646 "00010001" // /* MW 2 */ + 8647 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 8648 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8649 "11000000" // /* MW 3 */ + 8650 "11010110" // /* MW 2 */ + 8651 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 22 first +.delay_slot + 8652 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8653 "10010000" // /* MW 3 */ + 8654 "01100010" // /* MW 2 */ + 8655 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 30 +.delay_slot + 8656 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8657 "11111011" // /* MW 3 */ + 8658 "01100011" // /* MW 2 */ + 8659 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8660 "01000100" // MOVXM p3, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8661 "00001000" // /* MW 5 */ + 8662 "11000100" // /* MW 4 */ + 8663 "11000110" // /* MW 3 */ + 8664 "00000111" // /* MW 2 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8666 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8667 "00110001" // /* MW 3 */ + 8668 "00000110" // /* MW 2 */ + 8669 "00001011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 369 2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 "00111010" // MOVS p7, p1; MOVXM p1, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8671 "00010001" // /* MW 9 */ + 8672 "00010000" // /* MW 8 */ + 8673 "10110001" // /* MW 7 */ + 8674 "11110000" // /* MW 6 */ + 8675 "00000001" // /* MW 5 */ + 8676 "00000000" // /* MW 4 */ + 8677 "01100000" // /* MW 3 */ + 8678 "10010001" // /* MW 2 */ + 8679 "11110000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 6 "superkernels.cpp" 359 4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8681 "00010000" // /* MW 11 */ + 8682 "00001110" // /* MW 10 */ + 8683 "10110001" // /* MW 9 */ + 8684 "11110000" // /* MW 8 */ + 8685 "00000001" // /* MW 7 */ + 8686 "00000000" // /* MW 6 */ + 8687 "10001011" // /* MW 5 */ + 8688 "10001000" // /* MW 4 */ + 8689 "11100000" // /* MW 3 */ + 8690 "11000000" // /* MW 2 */ + 8691 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 359 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 "00000100" // JL #6480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 8695 "00000001" // /* MW 5 */ + 8696 "00000000" // /* MW 4 */ + 8697 "10101000" // /* MW 3 */ + 8698 "00001100" // /* MW 2 */ + 8699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8705 "00110001" // /* MW 3 */ + 8706 "00100000" // /* MW 2 */ + 8707 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8708 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8709 "00000101" // /* MW 3 */ + 8710 "00100000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8712 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8713 "01110000" // /* MW 7 */ + 8714 "10100101" // /* MW 6 */ + 8715 "00000001" // /* MW 5 */ + 8716 "00000000" // /* MW 4 */ + 8717 "00110000" // /* MW 3 */ + 8718 "11000010" // /* MW 2 */ + 8719 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 +.src_ref 6 "superkernels.cpp" 369 2 +.return_address + 8720 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8721 "00000000" // /* MW 7 */ + 8722 "10000010" // /* MW 6 */ + 8723 "00110011" // /* MW 5 */ + 8724 "00000001" // /* MW 4 */ + 8725 "01100000" // /* MW 3 */ + 8726 "10010001" // /* MW 2 */ + 8727 "00110011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 17 first + 8728 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8729 "00111010" // /* MW 3 */ + 8730 "00000110" // /* MW 2 */ + 8731 "00000010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 +.src_ref 6 "superkernels.cpp" 361 15 first + 8732 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8733 "00010000" // /* MW 9 */ + 8734 "00001100" // /* MW 8 */ + 8735 "00110001" // /* MW 7 */ + 8736 "11110001" // /* MW 6 */ + 8737 "00000001" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "01010000" // /* MW 3 */ + 8740 "11000011" // /* MW 2 */ + 8741 "01000100" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8745 "00000000" // /* MW 5 */ + 8746 "00000000" // /* MW 4 */ + 8747 "00101000" // /* MW 3 */ + 8748 "00010001" // /* MW 2 */ + 8749 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 +.src_ref 6 "superkernels.cpp" 365 26 +.delay_slot + 8750 "01000100" // MOVXM p3, #508432 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8751 "00100000" // /* MW 5 */ + 8752 "11000100" // /* MW 4 */ + 8753 "11000110" // /* MW 3 */ + 8754 "00000111" // /* MW 2 */ + 8755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8757 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8759 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 first +.delay_slot + 8760 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8761 "00110001" // /* MW 3 */ + 8762 "00000110" // /* MW 2 */ + 8763 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 first +.delay_slot + 8764 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8765 "00010001" // /* MW 3 */ + 8766 "00000110" // /* MW 2 */ + 8767 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 6 "superkernels.cpp" 365 26 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "00010000" // /* MW 13 */ + 8772 "00001000" // /* MW 12 */ + 8773 "10110001" // /* MW 11 */ + 8774 "11110001" // /* MW 10 */ + 8775 "00000001" // /* MW 9 */ + 8776 "00000000" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 242 49 first + 8784 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8785 "10000110" // /* MW 3 */ + 8786 "01100111" // /* MW 2 */ + 8787 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 365 15 + 8788 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8789 "00010000" // /* MW 9 */ + 8790 "00000010" // /* MW 8 */ + 8791 "00110001" // /* MW 7 */ + 8792 "11110010" // /* MW 6 */ + 8793 "00000001" // /* MW 5 */ + 8794 "00000000" // /* MW 4 */ + 8795 "11010000" // /* MW 3 */ + 8796 "11101110" // /* MW 2 */ + 8797 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 8798 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010110" // /* MW 3 */ + 8800 "11111110" // /* MW 2 */ + 8801 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 8802 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8803 "00110110" // /* MW 3 */ + 8804 "11111110" // /* MW 2 */ + 8805 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 8806 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8807 "01010110" // /* MW 3 */ + 8808 "01000110" // /* MW 2 */ + 8809 "00000010" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ + 8814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8815 "00000000" // /* MW 1 */ + 8816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8817 "00000000" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 8820 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8821 "00000010" // /* MW 3 */ + 8822 "01100001" // /* MW 2 */ + 8823 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 8824 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "00010001" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 8828 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "11111101" // /* MW 3 */ + 8830 "11100000" // /* MW 2 */ + 8831 "00010111" // /* MW 1 */ + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8835 "00000000" // /* MW 1 */ + 8836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8837 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 8838 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8839 "00001000" // /* MW 3 */ + 8840 "10010011" // /* MW 2 */ + 8841 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 364 11 +.src_ref 6 "superkernels.cpp" 367 47 +.src_ref 6 "superkernels.cpp" 372 6 +.src_ref 6 "superkernels.cpp" 373 16 + 8842 "10111010" // MOVA r15, #1; MOVXM p7, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8843 "00010000" // /* MW 9 */ + 8844 "00000000" // /* MW 8 */ + 8845 "10110001" // /* MW 7 */ + 8846 "11110011" // /* MW 6 */ + 8847 "00000001" // /* MW 5 */ + 8848 "00000000" // /* MW 4 */ + 8849 "00000000" // /* MW 3 */ + 8850 "00101111" // /* MW 2 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 + 8852 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8853 "11000001" // /* MW 5 */ + 8854 "00101011" // /* MW 4 */ + 8855 "00101000" // /* MW 3 */ + 8856 "00000000" // /* MW 2 */ + 8857 "00000110" // /* MW 1 */ + 8858 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8859 "01011010" // /* MW 3 */ + 8860 "01101000" // /* MW 2 */ + 8861 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 + 8862 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8863 "10000001" // /* MW 5 */ + 8864 "00101001" // /* MW 4 */ + 8865 "00100111" // /* MW 3 */ + 8866 "11010011" // /* MW 2 */ + 8867 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 15 first + 8868 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8869 "00110110" // /* MW 3 */ + 8870 "00000110" // /* MW 2 */ + 8871 "00000100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 26 +.src_ref 6 "superkernels.cpp" 369 2 + 8872 "10111010" // LDA r16, [p3]; MOVXM p3, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8873 "00010000" // /* MW 9 */ + 8874 "11100000" // /* MW 8 */ + 8875 "10110001" // /* MW 7 */ + 8876 "11110001" // /* MW 6 */ + 8877 "00000001" // /* MW 5 */ + 8878 "00000000" // /* MW 4 */ + 8879 "11010000" // /* MW 3 */ + 8880 "11000010" // /* MW 2 */ + 8881 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8882 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "01010110" // /* MW 3 */ + 8884 "00000110" // /* MW 2 */ + 8885 "00000111" // /* MW 1 */ + 8886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8887 "00000000" // /* MW 1 */ + 8888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8889 "00000000" // /* MW 1 */ + 8890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8891 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8892 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8893 "01110110" // /* MW 3 */ + 8894 "00000110" // /* MW 2 */ + 8895 "00000101" // /* MW 1 */ + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 24 first + 8898 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8899 "00001111" // /* MW 3 */ + 8900 "01100001" // /* MW 2 */ + 8901 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8902 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8903 "00000111" // /* MW 3 */ + 8904 "10100010" // /* MW 2 */ + 8905 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first + 8906 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8907 "11111101" // /* MW 3 */ + 8908 "00100000" // /* MW 2 */ + 8909 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 369 2 first +.no_stack_arguments + 8910 "00000100" // JL #8224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8224 delay_slots=5 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "00010000" // /* MW 3 */ + 8914 "00010000" // /* MW 2 */ + 8915 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first +.delay_slot + 8916 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8917 "00110001" // /* MW 3 */ + 8918 "00000110" // /* MW 2 */ + 8919 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first +.delay_slot + 8920 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8921 "11000001" // /* MW 3 */ + 8922 "01001001" // /* MW 2 */ + 8923 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 225 10 first +.delay_slot + 8924 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8925 "00100101" // /* MW 3 */ + 8926 "10110100" // /* MW 2 */ + 8927 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 first +.delay_slot + 8928 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8929 "00010101" // /* MW 3 */ + 8930 "10111011" // /* MW 2 */ + 8931 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 +.delay_slot + 8932 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8933 "11000001" // /* MW 11 */ + 8934 "10001010" // /* MW 10 */ + 8935 "11011111" // /* MW 9 */ + 8936 "00000011" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "00100000" // /* MW 5 */ + 8940 "00000000" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.return_address + 8944 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8945 "00001010" // /* MW 3 */ + 8946 "01100111" // /* MW 2 */ + 8947 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first + 8948 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8949 "00010110" // /* MW 3 */ + 8950 "00000110" // /* MW 2 */ + 8951 "00000010" // /* MW 1 */ + 8952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8953 "00000000" // /* MW 1 */ + 8954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8955 "00000000" // /* MW 1 */ + 8956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8957 "00000000" // /* MW 1 */ + 8958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8959 "00000000" // /* MW 1 */ + 8960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8961 "00000000" // /* MW 1 */ + 8962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8963 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 8964 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8965 "11111000" // /* MW 3 */ + 8966 "00010000" // /* MW 2 */ + 8967 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 372 19 + 8968 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8969 "00010000" // /* MW 9 */ + 8970 "00001100" // /* MW 8 */ + 8971 "10110001" // /* MW 7 */ + 8972 "11110000" // /* MW 6 */ + 8973 "00000001" // /* MW 5 */ + 8974 "00000000" // /* MW 4 */ + 8975 "11010000" // /* MW 3 */ + 8976 "11000010" // /* MW 2 */ + 8977 "01011100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 19 first + 8978 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8979 "01010110" // /* MW 3 */ + 8980 "00000110" // /* MW 2 */ + 8981 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 8982 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8983 "00110110" // /* MW 3 */ + 8984 "00000110" // /* MW 2 */ + 8985 "00000111" // /* MW 1 */ + 8986 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8987 "10011001" // /* MW 3 */ + 8988 "11110100" // /* MW 2 */ + 8989 "00000111" // /* MW 1 */ + 8990 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "11010001" // /* MW 3 */ + 8992 "11111001" // /* MW 2 */ + 8993 "00000111" // /* MW 1 */ + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 8998 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8999 "00000001" // /* MW 3 */ + 9000 "11100001" // /* MW 2 */ + 9001 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 9002 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9003 "00010001" // /* MW 3 */ + 9004 "11100110" // /* MW 2 */ + 9005 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 16 first + 9006 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9007 "00101000" // /* MW 3 */ + 9008 "01100001" // /* MW 2 */ + 9009 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 9010 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 9011 "00000001" // /* MW 5 */ + 9012 "01000000" // /* MW 4 */ + 9013 "10101000" // /* MW 3 */ + 9014 "00010001" // /* MW 2 */ + 9015 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 +.delay_slot + 9016 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9017 "00000001" // /* MW 3 */ + 9018 "00110000" // /* MW 2 */ + 9019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9027 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 first + 9028 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9029 "11000001" // /* MW 11 */ + 9030 "10001000" // /* MW 10 */ + 9031 "10000011" // /* MW 9 */ + 9032 "00000011" // /* MW 8 */ + 9033 "00000000" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 375 + 9040 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9041 "01000001" // /* MW 5 */ + 9042 "11101101" // /* MW 4 */ + 9043 "00101110" // /* MW 3 */ + 9044 "10110110" // /* MW 2 */ + 9045 "11111111" // /* MW 1 */ + 9046 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9047 "11110001" // /* MW 3 */ + 9048 "11110001" // /* MW 2 */ + 9049 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 first + 9050 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9051 "00000000" // /* MW 3 */ + 9052 "00101000" // /* MW 2 */ + 9053 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 +.delay_slot + 9054 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9055 "00000001" // /* MW 5 */ + 9056 "00000000" // /* MW 4 */ + 9057 "00000000" // /* MW 3 */ + 9058 "11110000" // /* MW 2 */ + 9059 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9065 "00000000" // /* MW 1 */ +.delay_slot + 9066 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "11000000" // /* MW 3 */ + 9068 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9069 "00011111" // /* MW 1 */ +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function _b14160_wrapper _Z15_b14160_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 21 first +.src_ref 0 "0_0_reloadable4.cc" 23 79 +.function_start + 9072 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9073 "11000000" // /* MW 3 */ + 9074 "01100000" // /* MW 2 */ + 9075 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 23 79 first + 9076 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "00011110" // /* MW 3 */ + 9078 "00011100" // /* MW 2 */ + 9079 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 24 79 first + 9080 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9081 "10011110" // /* MW 3 */ + 9082 "00101100" // /* MW 2 */ + 9083 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 26 81 first + 9084 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9085 "10011110" // /* MW 3 */ + 9086 "11110101" // /* MW 2 */ + 9087 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 25 47 first + 9088 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9089 "00011110" // /* MW 3 */ + 9090 "00000101" // /* MW 2 */ + 9091 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 22 4 first +.tail_call + 9092 "10000100" // J #8592 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8592 delay_slots=5 */ + 9093 "00000000" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "11001000" // /* MW 3 */ + 9096 "00010000" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + 9107 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.src_ref 3 "transposeshuffle_params.h" 71 first +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 76 18 first +.function_start + 9120 "10111010" // LDA el0, [p1], #4; MOVXM r0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9121 "00010000" // /* MW 9 */ + 9122 "01000000" // /* MW 8 */ + 9123 "00001001" // /* MW 7 */ + 9124 "11110000" // /* MW 6 */ + 9125 "00000001" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "11010000" // /* MW 3 */ + 9128 "10000101" // /* MW 2 */ + 9129 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 9 +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 80 28 +.src_ref 3 "transposeshuffle_params.h" 80 36 +.src_ref 3 "transposeshuffle_params.h" 81 28 +.src_ref 3 "transposeshuffle_params.h" 81 36 + 9130 "01110110" // MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9131 "00001000" // /* MW 11 */ + 9132 "00000001" // /* MW 10 */ + 9133 "00110000" // /* MW 9 */ + 9134 "10101001" // /* MW 8 */ + 9135 "00100111" // /* MW 7 */ + 9136 "00111110" // /* MW 6 */ + 9137 "00001011" // /* MW 5 */ + 9138 "10000000" // /* MW 4 */ + 9139 "10000000" // /* MW 3 */ + 9140 "00000000" // /* MW 2 */ + 9141 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 +.src_ref 3 "transposeshuffle_params.h" 86 17 +.src_ref 3 "transposeshuffle_params.h" 89 43 +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 94 4 + 9142 "01100100" // MOVX r1, #4; MOV r0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9143 "00000001" // /* MW 5 */ + 9144 "00100010" // /* MW 4 */ + 9145 "00100000" // /* MW 3 */ + 9146 "01000010" // /* MW 2 */ + 9147 "00000000" // /* MW 1 */ + 9148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9149 "00000000" // /* MW 1 */ + 9150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9151 "00000000" // /* MW 1 */ + 9152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9153 "00000000" // /* MW 1 */ + 9154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9155 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 first + 9156 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9157 "00101001" // /* MW 3 */ + 9158 "00011100" // /* MW 2 */ + 9159 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9160 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9161 "00101110" // /* MW 3 */ + 9162 "00011100" // /* MW 2 */ + 9163 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9164 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9165 "00001110" // /* MW 3 */ + 9166 "00011100" // /* MW 2 */ + 9167 "00000001" // /* MW 1 */ + 9168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9169 "00000000" // /* MW 1 */ + 9170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9171 "00000000" // /* MW 1 */ + 9172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9173 "00000000" // /* MW 1 */ + 9174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9175 "00000000" // /* MW 1 */ + 9176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9177 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9178 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9179 "00101001" // /* MW 3 */ + 9180 "00011100" // /* MW 2 */ + 9181 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9182 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "00001001" // /* MW 3 */ + 9184 "00011100" // /* MW 2 */ + 9185 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9186 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9187 "00101110" // /* MW 3 */ + 9188 "00011100" // /* MW 2 */ + 9189 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9190 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9191 "00001110" // /* MW 3 */ + 9192 "00011100" // /* MW 2 */ + 9193 "00000001" // /* MW 1 */ + 9194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9195 "00000000" // /* MW 1 */ + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ + 9198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9199 "00000000" // /* MW 1 */ + 9200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9201 "00000000" // /* MW 1 */ + 9202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9203 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9204 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9205 "00101001" // /* MW 3 */ + 9206 "00011100" // /* MW 2 */ + 9207 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9208 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00001001" // /* MW 3 */ + 9210 "00011100" // /* MW 2 */ + 9211 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9212 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00001110" // /* MW 3 */ + 9214 "00000100" // /* MW 2 */ + 9215 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9216 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9217 "00101110" // /* MW 3 */ + 9218 "00010100" // /* MW 2 */ + 9219 "00000001" // /* MW 1 */ + 9220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9221 "00000000" // /* MW 1 */ + 9222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9223 "00000000" // /* MW 1 */ + 9224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9225 "00000000" // /* MW 1 */ + 9226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9227 "00000000" // /* MW 1 */ + 9228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9229 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9230 "10011000" // ST eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9231 "00001001" // /* MW 3 */ + 9232 "00000100" // /* MW 2 */ + 9233 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9234 "10011000" // ST el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9235 "00101001" // /* MW 3 */ + 9236 "00010100" // /* MW 2 */ + 9237 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 28 first + 9238 "10011000" // LDA r3, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9239 "01110110" // /* MW 3 */ + 9240 "00001000" // /* MW 2 */ + 9241 "00000000" // /* MW 1 */ + 9242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9243 "00000000" // /* MW 1 */ + 9244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9245 "00000000" // /* MW 1 */ + 9246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9247 "00000000" // /* MW 1 */ + 9248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9249 "00000000" // /* MW 1 */ + 9250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9251 "00000000" // /* MW 1 */ + 9252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9253 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 36 + 9254 "10011000" // LSHL r4, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9255 "00101101" // /* MW 3 */ + 9256 "11001000" // /* MW 2 */ + 9257 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 42 +.src_ref 3 "transposeshuffle_params.h" 89 43 first + 9258 "00100100" // LSHL r3, r3, r1; ADD.NC r1, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9259 "11111111" // /* MW 5 */ + 9260 "10100100" // /* MW 4 */ + 9261 "10110000" // /* MW 3 */ + 9262 "11000011" // /* MW 2 */ + 9263 "00011000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 +.src_ref 3 "transposeshuffle_params.h" 80 19 first + 9264 "00000010" // ST r1, [p0]; MOV r4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9265 "01110000" // /* MW 7 */ + 9266 "01100000" // /* MW 6 */ + 9267 "10001000" // /* MW 5 */ + 9268 "00000000" // /* MW 4 */ + 9269 "00110000" // /* MW 3 */ + 9270 "10000110" // /* MW 2 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 + 9272 "00011000" // ADD.NC p1, r4, #-60 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "01100010" // /* MW 3 */ + 9274 "01100010" // /* MW 2 */ + 9275 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 28 first + 9276 "10011000" // LDA r4, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9277 "10010110" // /* MW 3 */ + 9278 "00001000" // /* MW 2 */ + 9279 "00000001" // /* MW 1 */ + 9280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9283 "00000000" // /* MW 1 */ + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ + 9286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9287 "00000000" // /* MW 1 */ + 9288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9289 "00000000" // /* MW 1 */ + 9290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9291 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 36 +.src_ref 3 "transposeshuffle_params.h" 90 77 + 9292 "01100100" // LSHL r2, r4, r2; MOV r4, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "00100010" // /* MW 4 */ + 9295 "10110010" // /* MW 3 */ + 9296 "10000101" // /* MW 2 */ + 9297 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 42 + 9298 "00011000" // ADD r2, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "11111111" // /* MW 3 */ + 9300 "10000101" // /* MW 2 */ + 9301 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 19 +.src_ref 3 "transposeshuffle_params.h" 90 77 first + 9302 "01011100" // ST r2, [p1], #4; MSC r4, r4, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9303 "01011100" // /* MW 5 */ + 9304 "10010000" // /* MW 4 */ + 9305 "00110001" // /* MW 3 */ + 9306 "10001010" // /* MW 2 */ + 9307 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 first + 9308 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9309 "00010001" // /* MW 3 */ + 9310 "00011100" // /* MW 2 */ + 9311 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 + 9312 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9313 "00010001" // /* MW 3 */ + 9314 "00011100" // /* MW 2 */ + 9315 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 86 17 first + 9316 "10011000" // ST r0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9317 "00010001" // /* MW 3 */ + 9318 "00101100" // /* MW 2 */ + 9319 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 89 23 first + 9320 "10011000" // ST r3, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9321 "01110001" // /* MW 3 */ + 9322 "11111100" // /* MW 2 */ + 9323 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 90 23 first + 9324 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9325 "10010001" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 91 18 first + 9328 "00000010" // ST r0, [p1]; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "01110000" // /* MW 7 */ + 9330 "01100000" // /* MW 6 */ + 9331 "10101001" // /* MW 5 */ + 9332 "00000000" // /* MW 4 */ + 9333 "00110000" // /* MW 3 */ + 9334 "10000010" // /* MW 2 */ + 9335 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 + 9336 "00011000" // ADD.NC p1, r5, #-68 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9337 "11011110" // /* MW 3 */ + 9338 "01100010" // /* MW 2 */ + 9339 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 first + 9340 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9341 "00010001" // /* MW 3 */ + 9342 "00011100" // /* MW 2 */ + 9343 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9344 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9345 "00010001" // /* MW 3 */ + 9346 "00011100" // /* MW 2 */ + 9347 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9348 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9349 "01010001" // /* MW 3 */ + 9350 "00011100" // /* MW 2 */ + 9351 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9352 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9353 "00110001" // /* MW 3 */ + 9354 "00011100" // /* MW 2 */ + 9355 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 95 first + 9356 "01011100" // ST r0, [p1], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9357 "00000000" // /* MW 5 */ + 9358 "01010000" // /* MW 4 */ + 9359 "00110000" // /* MW 3 */ + 9360 "10000010" // /* MW 2 */ + 9361 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 first +.delay_slot + 9362 "10011000" // ST r3, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9363 "01110001" // /* MW 3 */ + 9364 "00101100" // /* MW 2 */ + 9365 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9366 "10011000" // ST r2, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9367 "01010001" // /* MW 3 */ + 9368 "11111100" // /* MW 2 */ + 9369 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9370 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "10010001" // /* MW 3 */ + 9372 "00101100" // /* MW 2 */ + 9373 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9374 "10011000" // ST r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00110001" // /* MW 3 */ + 9376 "00000100" // /* MW 2 */ + 9377 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9378 "10011000" // ST r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9379 "00010001" // /* MW 3 */ + 9380 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + 9381 "00001001" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.src_ref 3 "transposeshuffle.h" 38 first +.src_ref 3 "transposeshuffle.h" 72 14 +.src_ref 3 "transposeshuffle.h" 79 23 +.function_start + 9392 "10111010" // MOVA r1, #2; MOVXM p2, #508556 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010000" // /* MW 9 */ + 9394 "01000110" // /* MW 8 */ + 9395 "00110001" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "00000000" // /* MW 3 */ + 9400 "01000001" // /* MW 2 */ + 9401 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 14 first +.src_ref 3 "transposeshuffle.h" 72 23 + 9402 "00101100" // LDA r27, [p2]; MOVX r0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10110010" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11010000" // /* MW 3 */ + 9406 "11101110" // /* MW 2 */ + 9407 "01000000" // /* MW 1 */ + 9408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9409 "00000000" // /* MW 1 */ + 9410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9411 "00000000" // /* MW 1 */ + 9412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9413 "00000000" // /* MW 1 */ + 9414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9415 "00000000" // /* MW 1 */ + 9416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9417 "00000000" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 23 first + 9420 "10011000" // EQ r1, r27, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9421 "00010111" // /* MW 3 */ + 9422 "11000010" // /* MW 2 */ + 9423 "00010110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 8 + 9424 "10000100" // JNZ r1, #9888 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9888 delay_slots=5 */ + 9425 "00000001" // /* MW 5 */ + 9426 "01000000" // /* MW 4 */ + 9427 "01010000" // /* MW 3 */ + 9428 "00010011" // /* MW 2 */ + 9429 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 23 +.delay_slot + 9430 "00011000" // MOVX r2, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9431 "01110101" // /* MW 3 */ + 9432 "00000100" // /* MW 2 */ + 9433 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 first +.src_ref 3 "transposeshuffle.h" 72 23 first +.delay_slot + 9434 "00011000" // SEL.EQZ r0, r0, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9435 "00100010" // /* MW 3 */ + 9436 "00000000" // /* MW 2 */ + 9437 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9443 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 + 9444 "01000100" // MOVXM p2, #508560 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9445 "00100000" // /* MW 5 */ + 9446 "11000101" // /* MW 4 */ + 9447 "11000100" // /* MW 3 */ + 9448 "00000111" // /* MW 2 */ + 9449 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 first + 9450 "10011000" // LDA r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9451 "00110110" // /* MW 3 */ + 9452 "00000100" // /* MW 2 */ + 9453 "00000010" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ + 9456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9457 "00000000" // /* MW 1 */ + 9458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9459 "00000000" // /* MW 1 */ + 9460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9461 "00000000" // /* MW 1 */ + 9462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9463 "00000000" // /* MW 1 */ + 9464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9465 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 116 26 + 9466 "10000100" // JZ r1, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9467 "00000001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "10010000" // /* MW 3 */ + 9470 "00010100" // /* MW 2 */ + 9471 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9481 "00000000" // /* MW 1 */ + 9482 "00011000" // MOVX r2, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9483 "00101001" // /* MW 3 */ + 9484 "00000100" // /* MW 2 */ + 9485 "00010000" // /* MW 1 */ + 9486 "10011000" // LTU r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9487 "00101100" // /* MW 3 */ + 9488 "01000100" // /* MW 2 */ + 9489 "00010000" // /* MW 1 */ + 9490 "10000100" // JNZ r2, #9728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9728 delay_slots=5 */ + 9491 "00000001" // /* MW 5 */ + 9492 "01000000" // /* MW 4 */ + 9493 "00000000" // /* MW 3 */ + 9494 "00010011" // /* MW 2 */ + 9495 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9506 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9507 "00010000" // /* MW 9 */ + 9508 "11010000" // /* MW 8 */ + 9509 "01111010" // /* MW 7 */ + 9510 "00001000" // /* MW 6 */ + 9511 "00000000" // /* MW 5 */ + 9512 "00000000" // /* MW 4 */ + 9513 "01101000" // /* MW 3 */ + 9514 "00111000" // /* MW 2 */ + 9515 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 116 8 first +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9516 "00111010" // VLDB x0, [p0], #64; MOVXM le, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9517 "00010000" // /* MW 9 */ + 9518 "11010000" // /* MW 8 */ + 9519 "10111010" // /* MW 7 */ + 9520 "00001001" // /* MW 6 */ + 9521 "00000000" // /* MW 5 */ + 9522 "00000000" // /* MW 4 */ + 9523 "01101000" // /* MW 3 */ + 9524 "00111000" // /* MW 2 */ + 9525 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9526 "10111010" // NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "11001110" // /* MW 9 */ + 9528 "01111101" // /* MW 8 */ + 9529 "10111000" // /* MW 7 */ + 9530 "00000010" // /* MW 6 */ + 9531 "00110100" // /* MW 5 */ + 9532 "00011100" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9536 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9537 "00000000" // /* MW 15 */ + 9538 "00000000" // /* MW 14 */ + 9539 "01111000" // /* MW 13 */ + 9540 "10100101" // /* MW 12 */ + 9541 "00000001" // /* MW 11 */ + 9542 "00000000" // /* MW 10 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "01011011" // /* MW 7 */ + 9546 "00000001" // /* MW 6 */ + 9547 "01101000" // /* MW 5 */ + 9548 "00111000" // /* MW 4 */ + 9549 "11110000" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9552 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9553 "00000000" // /* MW 15 */ + 9554 "00000000" // /* MW 14 */ + 9555 "01111000" // /* MW 13 */ + 9556 "10100101" // /* MW 12 */ + 9557 "00000001" // /* MW 11 */ + 9558 "00000000" // /* MW 10 */ + 9559 "00000000" // /* MW 9 */ + 9560 "00000000" // /* MW 8 */ + 9561 "01011011" // /* MW 7 */ + 9562 "00000001" // /* MW 6 */ + 9563 "01101000" // /* MW 5 */ + 9564 "00111000" // /* MW 4 */ + 9565 "11110000" // /* MW 3 */ + 9566 "00101100" // /* MW 2 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9568 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9569 "00000000" // /* MW 15 */ + 9570 "00000000" // /* MW 14 */ + 9571 "01111000" // /* MW 13 */ + 9572 "10100101" // /* MW 12 */ + 9573 "00000001" // /* MW 11 */ + 9574 "00000000" // /* MW 10 */ + 9575 "00000000" // /* MW 9 */ + 9576 "00000000" // /* MW 8 */ + 9577 "01011011" // /* MW 7 */ + 9578 "00000001" // /* MW 6 */ + 9579 "01101000" // /* MW 5 */ + 9580 "00111000" // /* MW 4 */ + 9581 "11110000" // /* MW 3 */ + 9582 "00101100" // /* MW 2 */ + 9583 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9584 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9585 "00000000" // /* MW 15 */ + 9586 "00000000" // /* MW 14 */ + 9587 "01111000" // /* MW 13 */ + 9588 "10100101" // /* MW 12 */ + 9589 "00000001" // /* MW 11 */ + 9590 "00000000" // /* MW 10 */ + 9591 "00000000" // /* MW 9 */ + 9592 "00000000" // /* MW 8 */ + 9593 "01011011" // /* MW 7 */ + 9594 "00000001" // /* MW 6 */ + 9595 "01101000" // /* MW 5 */ + 9596 "00111000" // /* MW 4 */ + 9597 "11110000" // /* MW 3 */ + 9598 "00101100" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9600 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9601 "00000000" // /* MW 15 */ + 9602 "00000000" // /* MW 14 */ + 9603 "11101000" // /* MW 13 */ + 9604 "00000000" // /* MW 12 */ + 9605 "00000000" // /* MW 11 */ + 9606 "00000000" // /* MW 10 */ + 9607 "00000000" // /* MW 9 */ + 9608 "00000000" // /* MW 8 */ + 9609 "01011011" // /* MW 7 */ + 9610 "00000001" // /* MW 6 */ + 9611 "01101000" // /* MW 5 */ + 9612 "00111000" // /* MW 4 */ + 9613 "11110000" // /* MW 3 */ + 9614 "00101100" // /* MW 2 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.src_ref 3 "transposeshuffle.h" 120 17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9616 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9617 "00000000" // /* MW 15 */ + 9618 "00000000" // /* MW 14 */ + 9619 "11101000" // /* MW 13 */ + 9620 "00000000" // /* MW 12 */ + 9621 "00000000" // /* MW 11 */ + 9622 "00000000" // /* MW 10 */ + 9623 "00000000" // /* MW 9 */ + 9624 "00000000" // /* MW 8 */ + 9625 "01011011" // /* MW 7 */ + 9626 "00000001" // /* MW 6 */ + 9627 "01101000" // /* MW 5 */ + 9628 "00111000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9632 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "11101000" // /* MW 13 */ + 9636 "00000000" // /* MW 12 */ + 9637 "00000000" // /* MW 11 */ + 9638 "00000000" // /* MW 10 */ + 9639 "00000000" // /* MW 9 */ + 9640 "10000000" // /* MW 8 */ + 9641 "00000110" // /* MW 7 */ + 9642 "00011100" // /* MW 6 */ + 9643 "01101001" // /* MW 5 */ + 9644 "00111000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9648 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9649 "11100000" // /* MW 7 */ + 9650 "00000000" // /* MW 6 */ + 9651 "00000000" // /* MW 5 */ + 9652 "00000000" // /* MW 4 */ + 9653 "11010000" // /* MW 3 */ + 9654 "10000000" // /* MW 2 */ + 9655 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9656 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9657 "11100000" // /* MW 7 */ + 9658 "00000000" // /* MW 6 */ + 9659 "00000000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "10000000" // /* MW 2 */ + 9663 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9664 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9665 "11100000" // /* MW 7 */ + 9666 "00000000" // /* MW 6 */ + 9667 "00000000" // /* MW 5 */ + 9668 "00000000" // /* MW 4 */ + 9669 "11010000" // /* MW 3 */ + 9670 "10000000" // /* MW 2 */ + 9671 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.src_ref 3 "transposeshuffle.h" 126 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9672 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 9673 "11101001" // /* MW 9 */ + 9674 "00000000" // /* MW 8 */ + 9675 "00000000" // /* MW 7 */ + 9676 "00000000" // /* MW 6 */ + 9677 "01000000" // /* MW 5 */ + 9678 "00000001" // /* MW 4 */ + 9679 "11010000" // /* MW 3 */ + 9680 "10000000" // /* MW 2 */ + 9681 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9682 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9683 "11100000" // /* MW 7 */ + 9684 "00000000" // /* MW 6 */ + 9685 "00000000" // /* MW 5 */ + 9686 "00000000" // /* MW 4 */ + 9687 "11010000" // /* MW 3 */ + 9688 "10000000" // /* MW 2 */ + 9689 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9690 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9691 "11100000" // /* MW 7 */ + 9692 "00000000" // /* MW 6 */ + 9693 "00000000" // /* MW 5 */ + 9694 "00000000" // /* MW 4 */ + 9695 "11010000" // /* MW 3 */ + 9696 "10000000" // /* MW 2 */ + 9697 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9698 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9699 "11100000" // /* MW 7 */ + 9700 "00000000" // /* MW 6 */ + 9701 "00000000" // /* MW 5 */ + 9702 "00000000" // /* MW 4 */ + 9703 "11010000" // /* MW 3 */ + 9704 "10000000" // /* MW 2 */ + 9705 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9706 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9707 "00001101" // /* MW 5 */ + 9708 "00111000" // /* MW 4 */ + 9709 "11110010" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot + 9712 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9713 "00000000" // /* MW 15 */ + 9714 "00000000" // /* MW 14 */ + 9715 "01111000" // /* MW 13 */ + 9716 "10100101" // /* MW 12 */ + 9717 "00000001" // /* MW 11 */ + 9718 "00000000" // /* MW 10 */ + 9719 "00000000" // /* MW 9 */ + 9720 "10000000" // /* MW 8 */ + 9721 "00000110" // /* MW 7 */ + 9722 "00011100" // /* MW 6 */ + 9723 "00100001" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "11110000" // /* MW 3 */ + 9726 "00101100" // /* MW 2 */ + 9727 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.src_ref 3 "transposeshuffle.h" 116 8 first + 9728 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "10100000" // /* MW 3 */ + 9730 "01110000" // /* MW 2 */ + 9731 "00011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9732 "01000100" // MOVXM ls, #9744 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00100000" // /* MW 5 */ + 9734 "11101100" // /* MW 4 */ + 9735 "00100001" // /* MW 3 */ + 9736 "00000000" // /* MW 2 */ + 9737 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9738 "01000100" // MOVXM le, #9856 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "00000000" // /* MW 5 */ + 9740 "11101101" // /* MW 4 */ + 9741 "00100110" // /* MW 3 */ + 9742 "00000000" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.begin_of_loop +.loop_nesting 1 + 9744 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "00110100" // /* MW 3 */ + 9746 "00011100" // /* MW 2 */ + 9747 "00111000" // /* MW 1 */ + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ + 9750 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9751 "01111110" // /* MW 9 */ + 9752 "10100101" // /* MW 8 */ + 9753 "00000001" // /* MW 7 */ + 9754 "00000000" // /* MW 6 */ + 9755 "00010000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ + 9760 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9761 "00000000" // /* MW 15 */ + 9762 "00000000" // /* MW 14 */ + 9763 "01111000" // /* MW 13 */ + 9764 "10100101" // /* MW 12 */ + 9765 "00000001" // /* MW 11 */ + 9766 "00000000" // /* MW 10 */ + 9767 "00000000" // /* MW 9 */ + 9768 "00000000" // /* MW 8 */ + 9769 "01011011" // /* MW 7 */ + 9770 "00000001" // /* MW 6 */ + 9771 "00100000" // /* MW 5 */ + 9772 "00000000" // /* MW 4 */ + 9773 "11110000" // /* MW 3 */ + 9774 "00101100" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ + 9776 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9777 "00000000" // /* MW 15 */ + 9778 "00000000" // /* MW 14 */ + 9779 "01111000" // /* MW 13 */ + 9780 "10100101" // /* MW 12 */ + 9781 "00000001" // /* MW 11 */ + 9782 "00000000" // /* MW 10 */ + 9783 "00000000" // /* MW 9 */ + 9784 "00000000" // /* MW 8 */ + 9785 "01011011" // /* MW 7 */ + 9786 "00000001" // /* MW 6 */ + 9787 "00100000" // /* MW 5 */ + 9788 "00000000" // /* MW 4 */ + 9789 "11110000" // /* MW 3 */ + 9790 "00101100" // /* MW 2 */ + 9791 "00000000" // /* MW 1 */ + 9792 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9793 "00000000" // /* MW 15 */ + 9794 "00000000" // /* MW 14 */ + 9795 "01111000" // /* MW 13 */ + 9796 "10100101" // /* MW 12 */ + 9797 "00000001" // /* MW 11 */ + 9798 "00000000" // /* MW 10 */ + 9799 "00000000" // /* MW 9 */ + 9800 "00000000" // /* MW 8 */ + 9801 "01011011" // /* MW 7 */ + 9802 "00000001" // /* MW 6 */ + 9803 "00100000" // /* MW 5 */ + 9804 "00000000" // /* MW 4 */ + 9805 "11110000" // /* MW 3 */ + 9806 "00101100" // /* MW 2 */ + 9807 "00000000" // /* MW 1 */ + 9808 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9809 "00000000" // /* MW 15 */ + 9810 "00000000" // /* MW 14 */ + 9811 "01111000" // /* MW 13 */ + 9812 "10100101" // /* MW 12 */ + 9813 "00000001" // /* MW 11 */ + 9814 "00000000" // /* MW 10 */ + 9815 "00000000" // /* MW 9 */ + 9816 "00000000" // /* MW 8 */ + 9817 "01011011" // /* MW 7 */ + 9818 "00000001" // /* MW 6 */ + 9819 "00100000" // /* MW 5 */ + 9820 "00000000" // /* MW 4 */ + 9821 "11110000" // /* MW 3 */ + 9822 "00101100" // /* MW 2 */ + 9823 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 120 17 first + 9824 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9825 "00000000" // /* MW 15 */ + 9826 "00000000" // /* MW 14 */ + 9827 "11101000" // /* MW 13 */ + 9828 "00000000" // /* MW 12 */ + 9829 "00000000" // /* MW 11 */ + 9830 "00000000" // /* MW 10 */ + 9831 "00000000" // /* MW 9 */ + 9832 "00000000" // /* MW 8 */ + 9833 "01011011" // /* MW 7 */ + 9834 "00000001" // /* MW 6 */ + 9835 "00100000" // /* MW 5 */ + 9836 "00000000" // /* MW 4 */ + 9837 "11110000" // /* MW 3 */ + 9838 "00101100" // /* MW 2 */ + 9839 "00000000" // /* MW 1 */ + 9840 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9841 "00000000" // /* MW 15 */ + 9842 "00000000" // /* MW 14 */ + 9843 "01111000" // /* MW 13 */ + 9844 "10100101" // /* MW 12 */ + 9845 "00000001" // /* MW 11 */ + 9846 "00000000" // /* MW 10 */ + 9847 "00000000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "01011011" // /* MW 7 */ + 9850 "00000001" // /* MW 6 */ + 9851 "00100000" // /* MW 5 */ + 9852 "00000000" // /* MW 4 */ + 9853 "11110000" // /* MW 3 */ + 9854 "00101100" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.end_of_loop + 9856 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "10000000" // /* MW 8 */ + 9865 "00000110" // /* MW 7 */ + 9866 "00011100" // /* MW 6 */ + 9867 "00100001" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9873 "00000000" // /* MW 3 */ + 9874 "00101000" // /* MW 2 */ + 9875 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9881 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9884 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9885 "01100111" // /* MW 3 */ + 9886 "00000001" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.src_ref 3 "transposeshuffle.h" 86 34 + 9888 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000000" // /* MW 5 */ + 9890 "11000101" // /* MW 4 */ + 9891 "11000100" // /* MW 3 */ + 9892 "00000111" // /* MW 2 */ + 9893 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 34 first + 9894 "10011000" // LDA r0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00010110" // /* MW 3 */ + 9896 "00000100" // /* MW 2 */ + 9897 "00000010" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ + 9906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9907 "00000000" // /* MW 1 */ + 9908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9909 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 26 + 9910 "10000100" // JZ r0, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "00000000" // /* MW 4 */ + 9913 "10010000" // /* MW 3 */ + 9914 "00010100" // /* MW 2 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9926 "10111010" // MOVA m5, #36; MOVXM p4, #508548 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9927 "00010000" // /* MW 9 */ + 9928 "01000010" // /* MW 8 */ + 9929 "00110001" // /* MW 7 */ + 9930 "11110010" // /* MW 6 */ + 9931 "00000001" // /* MW 5 */ + 9932 "00000000" // /* MW 4 */ + 9933 "10000000" // /* MW 3 */ + 9934 "10010100" // /* MW 2 */ + 9935 "00000100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 + 9936 "10111010" // LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9937 "01011000" // /* MW 9 */ + 9938 "11111101" // /* MW 8 */ + 9939 "01001111" // /* MW 7 */ + 9940 "00001000" // /* MW 6 */ + 9941 "01010001" // /* MW 5 */ + 9942 "00000000" // /* MW 4 */ + 9943 "11010000" // /* MW 3 */ + 9944 "10000110" // /* MW 2 */ + 9945 "10000011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 + 9946 "10111010" // LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9947 "01011000" // /* MW 9 */ + 9948 "00000000" // /* MW 8 */ + 9949 "01100000" // /* MW 7 */ + 9950 "00101010" // /* MW 6 */ + 9951 "00110000" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11010000" // /* MW 3 */ + 9954 "00010010" // /* MW 2 */ + 9955 "10010101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9956 "01110110" // LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01011000" // /* MW 11 */ + 9958 "00100000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "10001010" // /* MW 8 */ + 9961 "01100000" // /* MW 7 */ + 9962 "00000000" // /* MW 6 */ + 9963 "01001011" // /* MW 5 */ + 9964 "00010000" // /* MW 4 */ + 9965 "11010000" // /* MW 3 */ + 9966 "10010000" // /* MW 2 */ + 9967 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 9968 "01110110" // LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9969 "01011000" // /* MW 11 */ + 9970 "00110100" // /* MW 10 */ + 9971 "11101000" // /* MW 9 */ + 9972 "11111000" // /* MW 8 */ + 9973 "00001111" // /* MW 7 */ + 9974 "00000000" // /* MW 6 */ + 9975 "01001011" // /* MW 5 */ + 9976 "00010000" // /* MW 4 */ + 9977 "11010001" // /* MW 3 */ + 9978 "10010100" // /* MW 2 */ + 9979 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9980 "01110110" // LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #10064 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9981 "00010000" // /* MW 11 */ + 9982 "10101000" // /* MW 10 */ + 9983 "00110011" // /* MW 9 */ + 9984 "00001001" // /* MW 8 */ + 9985 "00000000" // /* MW 7 */ + 9986 "00000000" // /* MW 6 */ + 9987 "01001011" // /* MW 5 */ + 9988 "00010000" // /* MW 4 */ + 9989 "11010101" // /* MW 3 */ + 9990 "10011000" // /* MW 2 */ + 9991 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 12 + 9992 "10111010" // LDA dn5, [p4], #-8; MOVXM p3, #10096 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9993 "00010000" // /* MW 9 */ + 9994 "10111000" // /* MW 8 */ + 9995 "10110011" // /* MW 7 */ + 9996 "00001001" // /* MW 6 */ + 9997 "00000000" // /* MW 5 */ + 9998 "00000000" // /* MW 4 */ + 9999 "11010000" // /* MW 3 */ + 10000 "11010100" // /* MW 2 */ + 10001 "10011101" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 10002 "00101100" // LDA dj5, [p4], m4; MOVX r16, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10003 "10101010" // /* MW 5 */ + 10004 "01000001" // /* MW 4 */ + 10005 "11010000" // /* MW 3 */ + 10006 "01011000" // /* MW 2 */ + 10007 "10010001" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 first + 10008 "10111010" // LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10009 "11001000" // /* MW 9 */ + 10010 "01111111" // /* MW 8 */ + 10011 "10101000" // /* MW 7 */ + 10012 "11100100" // /* MW 6 */ + 10013 "10110000" // /* MW 5 */ + 10014 "00001011" // /* MW 4 */ + 10015 "11010000" // /* MW 3 */ + 10016 "10000000" // /* MW 2 */ + 10017 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 first +.src_ref 3 "transposeshuffle.h" 86 8 first + 10018 "10111010" // LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10019 "11001000" // /* MW 9 */ + 10020 "00111111" // /* MW 8 */ + 10021 "10101001" // /* MW 7 */ + 10022 "01101100" // /* MW 6 */ + 10023 "00010001" // /* MW 5 */ + 10024 "00001011" // /* MW 4 */ + 10025 "11010000" // /* MW 3 */ + 10026 "10000100" // /* MW 2 */ + 10027 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 10028 "10111010" // LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10029 "01001000" // /* MW 9 */ + 10030 "01000000" // /* MW 8 */ + 10031 "10101100" // /* MW 7 */ + 10032 "01101100" // /* MW 6 */ + 10033 "00100001" // /* MW 5 */ + 10034 "00001010" // /* MW 4 */ + 10035 "11010000" // /* MW 3 */ + 10036 "10001000" // /* MW 2 */ + 10037 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10038 "10111010" // LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "01001000" // /* MW 9 */ + 10040 "10000000" // /* MW 8 */ + 10041 "01101000" // /* MW 7 */ + 10042 "10010000" // /* MW 6 */ + 10043 "01010010" // /* MW 5 */ + 10044 "00000110" // /* MW 4 */ + 10045 "11010000" // /* MW 3 */ + 10046 "11000100" // /* MW 2 */ + 10047 "10000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10048 "11100001" // LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10049 "00000000" // /* MW 15 */ + 10050 "00000000" // /* MW 14 */ + 10051 "01111000" // /* MW 13 */ + 10052 "10100101" // /* MW 12 */ + 10053 "00000001" // /* MW 11 */ + 10054 "11111000" // /* MW 10 */ + 10055 "01011111" // /* MW 9 */ + 10056 "00001010" // /* MW 8 */ + 10057 "01011011" // /* MW 7 */ + 10058 "00000001" // /* MW 6 */ + 10059 "00100000" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "11010000" // /* MW 3 */ + 10062 "11001000" // /* MW 2 */ + 10063 "10011100" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 1 + 10064 "10000100" // JZ r1, #10512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10512 delay_slots=5 */ + 10065 "00000001" // /* MW 5 */ + 10066 "00000000" // /* MW 4 */ + 10067 "10001000" // /* MW 3 */ + 10068 "00010100" // /* MW 2 */ + 10069 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10079 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 87 12 + 10080 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "01010000" // /* MW 12 */ + 10085 "00101001" // /* MW 11 */ + 10086 "00000010" // /* MW 10 */ + 10087 "00000000" // /* MW 9 */ + 10088 "00000000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11110000" // /* MW 3 */ + 10094 "00101100" // /* MW 2 */ + 10095 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.src_ref 3 "transposeshuffle.h" 88 16 first +.loop_nesting 2 + 10096 "10000100" // JZ r4, #10496 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10496 delay_slots=5 */ + 10097 "00000001" // /* MW 5 */ + 10098 "00000000" // /* MW 4 */ + 10099 "10000000" // /* MW 3 */ + 10100 "00010100" // /* MW 2 */ + 10101 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "10011000" // LTU r18, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10113 "01101100" // /* MW 3 */ + 10114 "11100100" // /* MW 2 */ + 10115 "00010000" // /* MW 1 */ + 10116 "10000100" // JNZ r18, #10352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10352 delay_slots=5 */ + 10117 "00000001" // /* MW 5 */ + 10118 "01000000" // /* MW 4 */ + 10119 "00111000" // /* MW 3 */ + 10120 "00010100" // /* MW 2 */ + 10121 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10131 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 3 "transposeshuffle.h" 88 16 + 10132 "00111010" // VLDB x0, [p0, #64]; MOVXM ls, #10240 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10133 "00010000" // /* MW 9 */ + 10134 "00000000" // /* MW 8 */ + 10135 "01111100" // /* MW 7 */ + 10136 "00001000" // /* MW 6 */ + 10137 "00000000" // /* MW 5 */ + 10138 "00000000" // /* MW 4 */ + 10139 "01101000" // /* MW 3 */ + 10140 "00101000" // /* MW 2 */ + 10141 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10142 "00111010" // VLDB.3D x1, [p0], d1; MOVXM le, #10272 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10143 "00010000" // /* MW 9 */ + 10144 "00010000" // /* MW 8 */ + 10145 "10111100" // /* MW 7 */ + 10146 "00001001" // /* MW 6 */ + 10147 "00000000" // /* MW 5 */ + 10148 "00000000" // /* MW 4 */ + 10149 "11101000" // /* MW 3 */ + 10150 "01110000" // /* MW 2 */ + 10151 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10152 "10011000" // ADD.NC lc, r3, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10153 "11111110" // /* MW 3 */ + 10154 "01110001" // /* MW 2 */ + 10155 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10156 "00011000" // VLDB x0, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10157 "00110100" // /* MW 3 */ + 10158 "00010100" // /* MW 2 */ + 10159 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "11101000" // /* MW 5 */ + 10172 "01110000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10177 "00000000" // /* MW 15 */ + 10178 "00000000" // /* MW 14 */ + 10179 "01111000" // /* MW 13 */ + 10180 "10100101" // /* MW 12 */ + 10181 "00000001" // /* MW 11 */ + 10182 "00000000" // /* MW 10 */ + 10183 "00000000" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "01011011" // /* MW 7 */ + 10186 "00000001" // /* MW 6 */ + 10187 "00100000" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "00000000" // /* MW 9 */ + 10200 "00000000" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "01101000" // /* MW 5 */ + 10204 "00101000" // /* MW 4 */ + 10205 "11110000" // /* MW 3 */ + 10206 "00101100" // /* MW 2 */ + 10207 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "11101000" // /* MW 5 */ + 10220 "01110000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00000000" // /* MW 15 */ + 10226 "00000000" // /* MW 14 */ + 10227 "11101000" // /* MW 13 */ + 10228 "00001110" // /* MW 12 */ + 10229 "01000100" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 3 + 10240 "11100001" // NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "11101000" // /* MW 13 */ + 10244 "00100000" // /* MW 12 */ + 10245 "00000100" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "10001011" // /* MW 7 */ + 10250 "10000100" // /* MW 6 */ + 10251 "01101100" // /* MW 5 */ + 10252 "00101000" // /* MW 4 */ + 10253 "11110000" // /* MW 3 */ + 10254 "00101100" // /* MW 2 */ + 10255 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "10000000" // /* MW 8 */ + 10265 "00100110" // /* MW 7 */ + 10266 "00011000" // /* MW 6 */ + 10267 "11101001" // /* MW 5 */ + 10268 "01110000" // /* MW 4 */ + 10269 "11110000" // /* MW 3 */ + 10270 "00101100" // /* MW 2 */ + 10271 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "11101000" // /* MW 13 */ + 10276 "00001110" // /* MW 12 */ + 10277 "01000100" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "10000000" // /* MW 8 */ + 10281 "00000110" // /* MW 7 */ + 10282 "00010100" // /* MW 6 */ + 10283 "00100100" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 10288 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10289 "11100000" // /* MW 7 */ + 10290 "00100000" // /* MW 6 */ + 10291 "00000100" // /* MW 5 */ + 10292 "00000000" // /* MW 4 */ + 10293 "01100000" // /* MW 3 */ + 10294 "10010001" // /* MW 2 */ + 10295 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10296 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10297 "00100110" // /* MW 3 */ + 10298 "00011000" // /* MW 2 */ + 10299 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10300 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10301 "11100000" // /* MW 7 */ + 10302 "00001110" // /* MW 6 */ + 10303 "01000100" // /* MW 5 */ + 10304 "00000000" // /* MW 4 */ + 10305 "11010000" // /* MW 3 */ + 10306 "10000000" // /* MW 2 */ + 10307 "10000010" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10308 "11011000" // VSHUFFLE bmll0, x1, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10309 "01000001" // /* MW 3 */ + 10310 "00001000" // /* MW 2 */ + 10311 "00011000" // /* MW 1 */ + 10312 "10000100" // J #10496 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10496 delay_slots=5 */ + 10313 "00000000" // /* MW 5 */ + 10314 "00000000" // /* MW 4 */ + 10315 "10000000" // /* MW 3 */ + 10316 "00010100" // /* MW 2 */ + 10317 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10318 "00000010" // VST.3D bmlh0, [p1], d0; MOV p4, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10319 "01110000" // /* MW 7 */ + 10320 "01100000" // /* MW 6 */ + 10321 "00110001" // /* MW 5 */ + 10322 "00000010" // /* MW 4 */ + 10323 "11010000" // /* MW 3 */ + 10324 "00000100" // /* MW 2 */ + 10325 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.delay_slot + 10326 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10327 "11100000" // /* MW 7 */ + 10328 "00001110" // /* MW 6 */ + 10329 "01000100" // /* MW 5 */ + 10330 "00000000" // /* MW 4 */ + 10331 "11010000" // /* MW 3 */ + 10332 "10000000" // /* MW 2 */ + 10333 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.delay_slot + 10334 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10335 "11100000" // /* MW 7 */ + 10336 "00100000" // /* MW 6 */ + 10337 "00000100" // /* MW 5 */ + 10338 "00000000" // /* MW 4 */ + 10339 "01100000" // /* MW 3 */ + 10340 "10010001" // /* MW 2 */ + 10341 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10342 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10343 "00100110" // /* MW 3 */ + 10344 "00011000" // /* MW 2 */ + 10345 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 10346 "00001100" // NOPA; VST bmll0, [p4, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10347 "00001101" // /* MW 5 */ + 10348 "00101000" // /* MW 4 */ + 10349 "11111000" // /* MW 3 */ + 10350 "00101100" // /* MW 2 */ + 10351 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10352 "01000100" // MOVXM ls, #10368 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10353 "00000000" // /* MW 5 */ + 10354 "11110001" // /* MW 4 */ + 10355 "00100001" // /* MW 3 */ + 10356 "00000000" // /* MW 2 */ + 10357 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10358 "01000100" // MOVXM le, #10480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10359 "11100000" // /* MW 5 */ + 10360 "11110001" // /* MW 4 */ + 10361 "00100110" // /* MW 3 */ + 10362 "00000000" // /* MW 2 */ + 10363 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10364 "10011000" // ADD.NC lc, r2, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10365 "00000000" // /* MW 3 */ + 10366 "01110001" // /* MW 2 */ + 10367 "00011101" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.begin_of_loop +.loop_nesting 3 + 10368 "11110100" // VLDB x0, [p0, #64]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10369 "10000001" // /* MW 5 */ + 10370 "11000101" // /* MW 4 */ + 10371 "10001000" // /* MW 3 */ + 10372 "10000110" // /* MW 2 */ + 10373 "00000010" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 + 10374 "00011000" // VLDB.3D x1, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10375 "01110100" // /* MW 3 */ + 10376 "00111000" // /* MW 2 */ + 10377 "00111000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ + 10380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10381 "00000000" // /* MW 1 */ + 10382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10383 "00000000" // /* MW 1 */ + 10384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10385 "00000000" // /* MW 15 */ + 10386 "00000000" // /* MW 14 */ + 10387 "01111000" // /* MW 13 */ + 10388 "10100101" // /* MW 12 */ + 10389 "00000001" // /* MW 11 */ + 10390 "00000000" // /* MW 10 */ + 10391 "00000000" // /* MW 9 */ + 10392 "00000000" // /* MW 8 */ + 10393 "01011011" // /* MW 7 */ + 10394 "00000001" // /* MW 6 */ + 10395 "00100000" // /* MW 5 */ + 10396 "00000000" // /* MW 4 */ + 10397 "11110000" // /* MW 3 */ + 10398 "00101100" // /* MW 2 */ + 10399 "00000000" // /* MW 1 */ + 10400 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10401 "00000000" // /* MW 15 */ + 10402 "00000000" // /* MW 14 */ + 10403 "01111000" // /* MW 13 */ + 10404 "10100101" // /* MW 12 */ + 10405 "00000001" // /* MW 11 */ + 10406 "00000000" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "01011011" // /* MW 7 */ + 10410 "00000001" // /* MW 6 */ + 10411 "00100000" // /* MW 5 */ + 10412 "00000000" // /* MW 4 */ + 10413 "11110000" // /* MW 3 */ + 10414 "00101100" // /* MW 2 */ + 10415 "00000000" // /* MW 1 */ + 10416 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10417 "00000000" // /* MW 15 */ + 10418 "00000000" // /* MW 14 */ + 10419 "01111000" // /* MW 13 */ + 10420 "10100101" // /* MW 12 */ + 10421 "00000001" // /* MW 11 */ + 10422 "00000000" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "01011011" // /* MW 7 */ + 10426 "00000001" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first + 10432 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10433 "00000000" // /* MW 15 */ + 10434 "00000000" // /* MW 14 */ + 10435 "11101000" // /* MW 13 */ + 10436 "00001110" // /* MW 12 */ + 10437 "01000100" // /* MW 11 */ + 10438 "00000000" // /* MW 10 */ + 10439 "00000000" // /* MW 9 */ + 10440 "00000000" // /* MW 8 */ + 10441 "01011011" // /* MW 7 */ + 10442 "00000001" // /* MW 6 */ + 10443 "00100000" // /* MW 5 */ + 10444 "00000000" // /* MW 4 */ + 10445 "11110000" // /* MW 3 */ + 10446 "00101100" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first + 10448 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00000000" // /* MW 15 */ + 10450 "00000000" // /* MW 14 */ + 10451 "11101000" // /* MW 13 */ + 10452 "00100000" // /* MW 12 */ + 10453 "00000100" // /* MW 11 */ + 10454 "00000000" // /* MW 10 */ + 10455 "00000000" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "01011011" // /* MW 7 */ + 10458 "00000001" // /* MW 6 */ + 10459 "00100000" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first + 10464 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "01111000" // /* MW 13 */ + 10468 "10100101" // /* MW 12 */ + 10469 "00000001" // /* MW 11 */ + 10470 "00000000" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "10000000" // /* MW 8 */ + 10473 "00100110" // /* MW 7 */ + 10474 "00011000" // /* MW 6 */ + 10475 "00100001" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.src_ref 4 "vector.hpp" 1152 43 +.end_of_loop + 10480 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10481 "00000000" // /* MW 15 */ + 10482 "00000000" // /* MW 14 */ + 10483 "01111000" // /* MW 13 */ + 10484 "10100101" // /* MW 12 */ + 10485 "00000001" // /* MW 11 */ + 10486 "00000000" // /* MW 10 */ + 10487 "00000000" // /* MW 9 */ + 10488 "10000000" // /* MW 8 */ + 10489 "00000110" // /* MW 7 */ + 10490 "00010100" // /* MW 6 */ + 10491 "00100100" // /* MW 5 */ + 10492 "00000000" // /* MW 4 */ + 10493 "11110000" // /* MW 3 */ + 10494 "00101100" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 2 + 10496 "00011000" // JNZD r17, r17, p3 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10497 "11100000" // /* MW 3 */ + 10498 "01100010" // /* MW 2 */ + 10499 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10508 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10509 "01100111" // /* MW 3 */ + 10510 "00000001" // /* MW 2 */ + 10511 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.src_ref 3 "transposeshuffle.h" 86 8 first +.loop_nesting 1 + 10512 "00011000" // JNZD r0, r0, p2 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10513 "10100000" // /* MW 3 */ + 10514 "00000000" // /* MW 2 */ + 10515 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10525 "01100111" // /* MW 3 */ + 10526 "00000001" // /* MW 2 */ + 10527 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 10528 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10529 "00000000" // /* MW 3 */ + 10530 "00101000" // /* MW 2 */ + 10531 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10537 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + 10541 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 first +.function_start + 10544 "11111000" // MOV p3, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10545 "11000000" // /* MW 3 */ + 10546 "01101100" // /* MW 2 */ + 10547 "00011011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 + 10548 "00111010" // MOVS p6, p1; MOVXM p1, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10549 "00010001" // /* MW 9 */ + 10550 "00001010" // /* MW 8 */ + 10551 "10110001" // /* MW 7 */ + 10552 "11110000" // /* MW 6 */ + 10553 "00000001" // /* MW 5 */ + 10554 "00000000" // /* MW 4 */ + 10555 "01100000" // /* MW 3 */ + 10556 "10010001" // /* MW 2 */ + 10557 "11010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 first + 10558 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10559 "00010110" // /* MW 3 */ + 10560 "00000110" // /* MW 2 */ + 10561 "00000001" // /* MW 1 */ + 10562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10563 "00000000" // /* MW 1 */ + 10564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10565 "00000000" // /* MW 1 */ + 10566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10567 "00000000" // /* MW 1 */ + 10568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10569 "00000000" // /* MW 1 */ + 10570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10571 "00000000" // /* MW 1 */ + 10572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10573 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 15 + 10574 "10000100" // JNZ r16, #10640 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10640 delay_slots=5 */ + 10575 "00000001" // /* MW 5 */ + 10576 "01000000" // /* MW 4 */ + 10577 "11001000" // /* MW 3 */ + 10578 "00010100" // /* MW 2 */ + 10579 "10000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 +.delay_slot + 10580 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10581 "00000001" // /* MW 5 */ + 10582 "00000000" // /* MW 4 */ + 10583 "00000000" // /* MW 3 */ + 10584 "00001000" // /* MW 2 */ + 10585 "00000000" // /* MW 1 */ +.delay_slot + 10586 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10587 "00111101" // /* MW 3 */ + 10588 "11110100" // /* MW 2 */ + 10589 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 10590 "00000010" // MOVS p7, p0; MOV p1, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10591 "01110000" // /* MW 7 */ + 10592 "01100000" // /* MW 6 */ + 10593 "10110111" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "01100000" // /* MW 3 */ + 10596 "00010001" // /* MW 2 */ + 10597 "11110000" // /* MW 1 */ +.delay_slot + 10598 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10599 "10011101" // /* MW 3 */ + 10600 "11111001" // /* MW 2 */ + 10601 "00001111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10602 "00111010" // ST p1, [sp, #-4]; MOVXM p0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10603 "00010001" // /* MW 9 */ + 10604 "01000000" // /* MW 8 */ + 10605 "00110001" // /* MW 7 */ + 10606 "11110000" // /* MW 6 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "10110000" // /* MW 3 */ + 10610 "10010011" // /* MW 2 */ + 10611 "11111111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 first +.no_stack_arguments + 10612 "00000100" // JL #9120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9120 delay_slots=5 */ + 10613 "00000001" // /* MW 5 */ + 10614 "00000000" // /* MW 4 */ + 10615 "11010000" // /* MW 3 */ + 10616 "00010001" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10618 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "11000000" // /* MW 3 */ + 10620 "01100100" // /* MW 2 */ + 10621 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10627 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10628 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10629 "10000001" // /* MW 11 */ + 10630 "10101101" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "00000000" // /* MW 8 */ + 10633 "00000000" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00100000" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 3 "transposeshuffle.h" 137 72 +.return_address + 10640 "10111010" // LDA r16, [p7]; MOVXM p7, #508564 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10641 "00010000" // /* MW 9 */ + 10642 "01001010" // /* MW 8 */ + 10643 "10110001" // /* MW 7 */ + 10644 "11110011" // /* MW 6 */ + 10645 "00000001" // /* MW 5 */ + 10646 "00000000" // /* MW 4 */ + 10647 "11010000" // /* MW 3 */ + 10648 "11000010" // /* MW 2 */ + 10649 "11100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 72 first + 10650 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10651 "00110110" // /* MW 3 */ + 10652 "00000110" // /* MW 2 */ + 10653 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 10654 "10011000" // LDA p1, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10655 "10011110" // /* MW 3 */ + 10656 "00000100" // /* MW 2 */ + 10657 "00000110" // /* MW 1 */ + 10658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10659 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 first +.no_stack_arguments + 10660 "00000100" // JL #9392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9392 delay_slots=5 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "01011000" // /* MW 3 */ + 10664 "00010010" // /* MW 2 */ + 10665 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10666 "00011000" // MOVX r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10667 "00000101" // /* MW 3 */ + 10668 "00100100" // /* MW 2 */ + 10669 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10670 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10671 "00000000" // /* MW 5 */ + 10672 "11000101" // /* MW 4 */ + 10673 "11000100" // /* MW 3 */ + 10674 "00000111" // /* MW 2 */ + 10675 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10676 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10677 "11000000" // /* MW 3 */ + 10678 "01100100" // /* MW 2 */ + 10679 "00011110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10680 "10011000" // LSHL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10681 "00101101" // /* MW 3 */ + 10682 "01100011" // /* MW 2 */ + 10683 "00010100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10684 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10685 "11000001" // /* MW 3 */ + 10686 "01101000" // /* MW 2 */ + 10687 "00011000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 +.return_address + 10688 "10111010" // LDA lr, [sp, #-12]; MOVXM p2, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10689 "00010000" // /* MW 9 */ + 10690 "00001010" // /* MW 8 */ + 10691 "00110001" // /* MW 7 */ + 10692 "11110001" // /* MW 6 */ + 10693 "00000001" // /* MW 5 */ + 10694 "00000000" // /* MW 4 */ + 10695 "00100000" // /* MW 3 */ + 10696 "10000111" // /* MW 2 */ + 10697 "11111110" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first + 10698 "00101100" // LDA r16, [p2]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "00000010" // /* MW 5 */ + 10700 "01100000" // /* MW 4 */ + 10701 "11010000" // /* MW 3 */ + 10702 "11000010" // /* MW 2 */ + 10703 "01000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 + 10704 "10011000" // LDA r17, [p6, #24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10705 "00110110" // /* MW 3 */ + 10706 "01100110" // /* MW 2 */ + 10707 "00000110" // /* MW 1 */ + 10708 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10709 "00011001" // /* MW 3 */ + 10710 "11111011" // /* MW 2 */ + 10711 "00000111" // /* MW 1 */ + 10712 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10713 "10011001" // /* MW 3 */ + 10714 "11111111" // /* MW 2 */ + 10715 "00000111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 first + 10716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10717 "00000001" // /* MW 5 */ + 10718 "00000000" // /* MW 4 */ + 10719 "00000000" // /* MW 3 */ + 10720 "11111000" // /* MW 2 */ + 10721 "11111111" // /* MW 1 */ + 10722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10723 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 + 10724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10725 "00000000" // /* MW 3 */ + 10726 "00101000" // /* MW 2 */ + 10727 "00010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first +.delay_slot + 10728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10729 "00000111" // /* MW 3 */ + 10730 "00100000" // /* MW 2 */ + 10731 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 17 +.delay_slot + 10732 "10011000" // EQ r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10733 "00000111" // /* MW 3 */ + 10734 "01110111" // /* MW 2 */ + 10735 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.delay_slot + 10736 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10737 "10000010" // /* MW 3 */ + 10738 "00100001" // /* MW 2 */ + 10739 "00010100" // /* MW 1 */ +.delay_slot + 10740 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10741 "00010001" // /* MW 3 */ + 10742 "00000110" // /* MW 2 */ + 10743 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + 10745 "00000000" // /* MW 1 */ +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function _b7835_wrapper _Z14_b7835_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 30 first +.src_ref 0 "0_0_reloadable4.cc" 32 79 +.function_start + 10752 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10753 "11000000" // /* MW 3 */ + 10754 "01100000" // /* MW 2 */ + 10755 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 32 79 first + 10756 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10757 "00011110" // /* MW 3 */ + 10758 "00011100" // /* MW 2 */ + 10759 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 34 46 first + 10760 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10761 "00011110" // /* MW 3 */ + 10762 "00010101" // /* MW 2 */ + 10763 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 33 80 first + 10764 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10765 "10011110" // /* MW 3 */ + 10766 "00000100" // /* MW 2 */ + 10767 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 31 4 first +.tail_call + 10768 "10000100" // J #10544 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10544 delay_slots=5 */ + 10769 "00000000" // /* MW 5 */ + 10770 "00000000" // /* MW 4 */ + 10771 "10011000" // /* MW 3 */ + 10772 "00010100" // /* MW 2 */ + 10773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 + 10783 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function buffer_pad_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.function_start + 10784 "11010100" // LDA el0, [p1]; MOV r17, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10785 "10000001" // /* MW 5 */ + 10786 "10101001" // /* MW 4 */ + 10787 "11011000" // /* MW 3 */ + 10788 "10000101" // /* MW 2 */ + 10789 "00100000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 first + 10790 "00011000" // ADD.NC p1, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10791 "10000010" // /* MW 3 */ + 10792 "01101000" // /* MW 2 */ + 10793 "00011001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10794 "10011000" // LDA r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "01010110" // /* MW 3 */ + 10796 "00011110" // /* MW 2 */ + 10797 "00000001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 27 33 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10798 "10011000" // LDA r15, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10799 "11110110" // /* MW 3 */ + 10800 "00000101" // /* MW 2 */ + 10801 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10803 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10809 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10810 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10811 "10100000" // /* MW 3 */ + 10812 "00010111" // /* MW 2 */ + 10813 "00011000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10814 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10815 "00000001" // /* MW 5 */ + 10816 "00000000" // /* MW 4 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00001000" // /* MW 2 */ + 10819 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 43 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10820 "01100100" // MUL r18, r15, r18; MOV r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10821 "11111101" // /* MW 5 */ + 10822 "00111111" // /* MW 4 */ + 10823 "11111000" // /* MW 3 */ + 10824 "10100101" // /* MW 2 */ + 10825 "01111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10826 "00111010" // ST r18, [sp, #-20]; MOVXM r17, #1073741823 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10827 "10010001" // /* MW 9 */ + 10828 "11111111" // /* MW 8 */ + 10829 "00101111" // /* MW 7 */ + 10830 "11111110" // /* MW 6 */ + 10831 "11111111" // /* MW 5 */ + 10832 "00001111" // /* MW 4 */ + 10833 "10110000" // /* MW 3 */ + 10834 "11001010" // /* MW 2 */ + 10835 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10836 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00001101" // /* MW 3 */ + 10838 "10100001" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10840 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00000100" // /* MW 3 */ + 10842 "01100001" // /* MW 2 */ + 10843 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 22 + 10844 "10000100" // JZ r16, #10928 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10928 delay_slots=5 */ + 10845 "00000001" // /* MW 5 */ + 10846 "00000000" // /* MW 4 */ + 10847 "01011000" // /* MW 3 */ + 10848 "00010101" // /* MW 2 */ + 10849 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.delay_slot + 10850 "11010100" // LDA p7, [p0]; MOV p0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10851 "10000001" // /* MW 5 */ + 10852 "11011101" // /* MW 4 */ + 10853 "11010000" // /* MW 3 */ + 10854 "11110011" // /* MW 2 */ + 10855 "00000000" // /* MW 1 */ +.delay_slot + 10856 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10857 "00011101" // /* MW 3 */ + 10858 "11111000" // /* MW 2 */ + 10859 "00001111" // /* MW 1 */ +.delay_slot + 10860 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10861 "11010101" // /* MW 3 */ + 10862 "11110101" // /* MW 2 */ + 10863 "00001111" // /* MW 1 */ +.delay_slot + 10864 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10865 "00111101" // /* MW 3 */ + 10866 "11110000" // /* MW 2 */ + 10867 "00001111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 25 24 first +.delay_slot + 10868 "00001100" // LDA r14, [p1, #-8]; ST r0, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10869 "00101011" // /* MW 5 */ + 10870 "11111000" // /* MW 4 */ + 10871 "11011111" // /* MW 3 */ + 10872 "10111010" // /* MW 2 */ + 10873 "00111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10874 "01011100" // ST el0, [sp, #-24]; MOVX r0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10875 "00000010" // /* MW 5 */ + 10876 "00000000" // /* MW 4 */ + 10877 "10110000" // /* MW 3 */ + 10878 "00000101" // /* MW 2 */ + 10879 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10880 "00011000" // LDA p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10881 "10011001" // /* MW 3 */ + 10882 "11101000" // /* MW 2 */ + 10883 "00000111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 first +.no_stack_arguments + 10884 "00000100" // JL #12608 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12608 delay_slots=5 */ + 10885 "00000001" // /* MW 5 */ + 10886 "00000000" // /* MW 4 */ + 10887 "10100000" // /* MW 3 */ + 10888 "00011000" // /* MW 2 */ + 10889 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.delay_slot + 10890 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "00001001" // /* MW 3 */ + 10892 "00100010" // /* MW 2 */ + 10893 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 first +.delay_slot + 10894 "10011000" // LSHL r1, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00011101" // /* MW 3 */ + 10896 "00000011" // /* MW 2 */ + 10897 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10903 "01111110" // /* MW 9 */ + 10904 "10100101" // /* MW 8 */ + 10905 "00000001" // /* MW 7 */ + 10906 "00000000" // /* MW 6 */ + 10907 "00010000" // /* MW 5 */ + 10908 "00000000" // /* MW 4 */ + 10909 "11110000" // /* MW 3 */ + 10910 "00101100" // /* MW 2 */ + 10911 "00000000" // /* MW 1 */ +.return_address + 10912 "10000100" // J #10944 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10944 delay_slots=5 */ + 10913 "00000000" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01100000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 10928 "11100001" // NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10929 "00000000" // /* MW 15 */ + 10930 "00000000" // /* MW 14 */ + 10931 "01111000" // /* MW 13 */ + 10932 "10100101" // /* MW 12 */ + 10933 "00000001" // /* MW 11 */ + 10934 "00000000" // /* MW 10 */ + 10935 "00000000" // /* MW 9 */ + 10936 "10000000" // /* MW 8 */ + 10937 "00101101" // /* MW 7 */ + 10938 "11101000" // /* MW 6 */ + 10939 "00100111" // /* MW 5 */ + 10940 "00000000" // /* MW 4 */ + 10941 "11110000" // /* MW 3 */ + 10942 "00101100" // /* MW 2 */ + 10943 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 22 first + 10944 "10000100" // JZ r15, #11216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11216 delay_slots=5 */ + 10945 "00000001" // /* MW 5 */ + 10946 "00000000" // /* MW 4 */ + 10947 "11101000" // /* MW 3 */ + 10948 "00010101" // /* MW 2 */ + 10949 "01111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10959 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 first + 10960 "10111010" // LDA r17, [sp, #-20]; MOVXM ls, #11056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10961 "00010000" // /* MW 9 */ + 10962 "10011000" // /* MW 8 */ + 10963 "01111101" // /* MW 7 */ + 10964 "00001000" // /* MW 6 */ + 10965 "00000000" // /* MW 5 */ + 10966 "00000000" // /* MW 4 */ + 10967 "00100000" // /* MW 3 */ + 10968 "11000110" // /* MW 2 */ + 10969 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 + 10970 "10111010" // MOVA r19, #1; MOVXM le, #11152 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10971 "00010000" // /* MW 9 */ + 10972 "11001000" // /* MW 8 */ + 10973 "10111101" // /* MW 7 */ + 10974 "00001001" // /* MW 6 */ + 10975 "00000000" // /* MW 5 */ + 10976 "00000000" // /* MW 4 */ + 10977 "00000000" // /* MW 3 */ + 10978 "00110011" // /* MW 2 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 10980 "10111010" // LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10981 "11001000" // /* MW 9 */ + 10982 "11111111" // /* MW 8 */ + 10983 "00001011" // /* MW 7 */ + 10984 "11101110" // /* MW 6 */ + 10985 "01001001" // /* MW 5 */ + 10986 "00011101" // /* MW 4 */ + 10987 "00100000" // /* MW 3 */ + 10988 "01001010" // /* MW 2 */ + 10989 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 10990 "10111010" // LDA lr, [sp, #-16]; MOVXM p0, #11024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10991 "00010000" // /* MW 9 */ + 10992 "10001000" // /* MW 8 */ + 10993 "00110101" // /* MW 7 */ + 10994 "00001000" // /* MW 6 */ + 10995 "00000000" // /* MW 5 */ + 10996 "00000000" // /* MW 4 */ + 10997 "00100000" // /* MW 3 */ + 10998 "00000111" // /* MW 2 */ + 10999 "11111110" // /* MW 1 */ + 11000 "11111000" // MOV m0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11001 "00100000" // /* MW 3 */ + 11002 "00001010" // /* MW 2 */ + 11003 "00011000" // /* MW 1 */ + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ + 11008 "11100001" // NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11009 "00000000" // /* MW 15 */ + 11010 "00000000" // /* MW 14 */ + 11011 "01111000" // /* MW 13 */ + 11012 "10100101" // /* MW 12 */ + 11013 "00000001" // /* MW 11 */ + 11014 "11101100" // /* MW 10 */ + 11015 "00011001" // /* MW 9 */ + 11016 "00100011" // /* MW 8 */ + 11017 "01011011" // /* MW 7 */ + 11018 "00000001" // /* MW 6 */ + 11019 "00100000" // /* MW 5 */ + 11020 "00000000" // /* MW 4 */ + 11021 "11110000" // /* MW 3 */ + 11022 "00101100" // /* MW 2 */ + 11023 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.loop_nesting 1 + 11024 "10000100" // JZ r14, #11168 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11168 delay_slots=5 */ + 11025 "00000001" // /* MW 5 */ + 11026 "00000000" // /* MW 4 */ + 11027 "11010000" // /* MW 3 */ + 11028 "00010101" // /* MW 2 */ + 11029 "01110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11033 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11035 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11037 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11039 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11040 "00000010" // MOVS p2, p7; MOV lc, r14 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11041 "01110000" // /* MW 7 */ + 11042 "10010000" // /* MW 6 */ + 11043 "10111011" // /* MW 5 */ + 11044 "00000010" // /* MW 4 */ + 11045 "01100000" // /* MW 3 */ + 11046 "10010001" // /* MW 2 */ + 11047 "01010011" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11048 "00000010" // NOPS; MOV p1, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11049 "01110000" // /* MW 7 */ + 11050 "10010000" // /* MW 6 */ + 11051 "10110100" // /* MW 5 */ + 11052 "00000000" // /* MW 4 */ + 11053 "01100000" // /* MW 3 */ + 11054 "00101011" // /* MW 2 */ + 11055 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 first +.begin_of_loop +.loop_nesting 2 + 11056 "11100001" // LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11057 "00000000" // /* MW 15 */ + 11058 "00000000" // /* MW 14 */ + 11059 "01111000" // /* MW 13 */ + 11060 "10100101" // /* MW 12 */ + 11061 "00000001" // /* MW 11 */ + 11062 "00000000" // /* MW 10 */ + 11063 "00000000" // /* MW 9 */ + 11064 "00000000" // /* MW 8 */ + 11065 "01011011" // /* MW 7 */ + 11066 "00000001" // /* MW 6 */ + 11067 "00100000" // /* MW 5 */ + 11068 "00000000" // /* MW 4 */ + 11069 "01010000" // /* MW 3 */ + 11070 "11001110" // /* MW 2 */ + 11071 "01000011" // /* MW 1 */ + 11072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11073 "00000000" // /* MW 15 */ + 11074 "00000000" // /* MW 14 */ + 11075 "01111000" // /* MW 13 */ + 11076 "10100101" // /* MW 12 */ + 11077 "00000001" // /* MW 11 */ + 11078 "00000000" // /* MW 10 */ + 11079 "00000000" // /* MW 9 */ + 11080 "00000000" // /* MW 8 */ + 11081 "01011011" // /* MW 7 */ + 11082 "00000001" // /* MW 6 */ + 11083 "00100000" // /* MW 5 */ + 11084 "00000000" // /* MW 4 */ + 11085 "11110000" // /* MW 3 */ + 11086 "00101100" // /* MW 2 */ + 11087 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11088 "11100001" // ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11089 "00000000" // /* MW 15 */ + 11090 "00000000" // /* MW 14 */ + 11091 "01111000" // /* MW 13 */ + 11092 "10100101" // /* MW 12 */ + 11093 "00000001" // /* MW 11 */ + 11094 "00000000" // /* MW 10 */ + 11095 "00000000" // /* MW 9 */ + 11096 "00000000" // /* MW 8 */ + 11097 "01011011" // /* MW 7 */ + 11098 "00000001" // /* MW 6 */ + 11099 "00100000" // /* MW 5 */ + 11100 "00000000" // /* MW 4 */ + 11101 "11100000" // /* MW 3 */ + 11102 "11001110" // /* MW 2 */ + 11103 "00100011" // /* MW 1 */ + 11104 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11105 "00000000" // /* MW 15 */ + 11106 "00000000" // /* MW 14 */ + 11107 "01111000" // /* MW 13 */ + 11108 "10100101" // /* MW 12 */ + 11109 "00000001" // /* MW 11 */ + 11110 "00000000" // /* MW 10 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "01011011" // /* MW 7 */ + 11114 "00000001" // /* MW 6 */ + 11115 "00100000" // /* MW 5 */ + 11116 "00000000" // /* MW 4 */ + 11117 "11110000" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ + 11120 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11121 "00000000" // /* MW 15 */ + 11122 "00000000" // /* MW 14 */ + 11123 "01111000" // /* MW 13 */ + 11124 "10100101" // /* MW 12 */ + 11125 "00000001" // /* MW 11 */ + 11126 "00000000" // /* MW 10 */ + 11127 "00000000" // /* MW 9 */ + 11128 "00000000" // /* MW 8 */ + 11129 "01011011" // /* MW 7 */ + 11130 "00000001" // /* MW 6 */ + 11131 "00100000" // /* MW 5 */ + 11132 "00000000" // /* MW 4 */ + 11133 "11110000" // /* MW 3 */ + 11134 "00101100" // /* MW 2 */ + 11135 "00000000" // /* MW 1 */ + 11136 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11137 "00000000" // /* MW 15 */ + 11138 "00000000" // /* MW 14 */ + 11139 "01111000" // /* MW 13 */ + 11140 "10100101" // /* MW 12 */ + 11141 "00000001" // /* MW 11 */ + 11142 "00000000" // /* MW 10 */ + 11143 "00000000" // /* MW 9 */ + 11144 "00000000" // /* MW 8 */ + 11145 "01011011" // /* MW 7 */ + 11146 "00000001" // /* MW 6 */ + 11147 "00100000" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "11110000" // /* MW 3 */ + 11150 "00101100" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 11152 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11153 "00000000" // /* MW 15 */ + 11154 "00000000" // /* MW 14 */ + 11155 "01111000" // /* MW 13 */ + 11156 "10100101" // /* MW 12 */ + 11157 "00000001" // /* MW 11 */ + 11158 "00000000" // /* MW 10 */ + 11159 "00000000" // /* MW 9 */ + 11160 "00000000" // /* MW 8 */ + 11161 "01011011" // /* MW 7 */ + 11162 "00000001" // /* MW 6 */ + 11163 "00100000" // /* MW 5 */ + 11164 "00000000" // /* MW 4 */ + 11165 "11110000" // /* MW 3 */ + 11166 "00101100" // /* MW 2 */ + 11167 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.loop_nesting 1 + 11168 "00011100" // PADDB [p7], m0; JNZD r16, r16, p0 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 11169 "01000000" // /* MW 5 */ + 11170 "01000000" // /* MW 4 */ + 11171 "00001000" // /* MW 3 */ + 11172 "01110010" // /* MW 2 */ + 11173 "11100001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11175 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11177 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11179 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11181 "00000000" // /* MW 1 */ +.delay_slot + 11182 "01011000" // ADD.NC r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "11001001" // /* MW 3 */ + 11184 "10011000" // /* MW 2 */ + 11185 "00011100" // /* MW 1 */ +.loop_nesting 0 + 11186 "10000100" // J #11232 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11232 delay_slots=5 */ + 11187 "00000000" // /* MW 5 */ + 11188 "00000000" // /* MW 4 */ + 11189 "11110000" // /* MW 3 */ + 11190 "00010101" // /* MW 2 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11195 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11197 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11199 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11200 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11201 "00000000" // /* MW 15 */ + 11202 "00000000" // /* MW 14 */ + 11203 "01111000" // /* MW 13 */ + 11204 "10100101" // /* MW 12 */ + 11205 "00000001" // /* MW 11 */ + 11206 "00000000" // /* MW 10 */ + 11207 "00000000" // /* MW 9 */ + 11208 "00000000" // /* MW 8 */ + 11209 "01011011" // /* MW 7 */ + 11210 "00000001" // /* MW 6 */ + 11211 "00100000" // /* MW 5 */ + 11212 "00000000" // /* MW 4 */ + 11213 "11110000" // /* MW 3 */ + 11214 "00101100" // /* MW 2 */ + 11215 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 11216 "11100001" // LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11217 "00000000" // /* MW 15 */ + 11218 "00000000" // /* MW 14 */ + 11219 "01111000" // /* MW 13 */ + 11220 "10100101" // /* MW 12 */ + 11221 "00000001" // /* MW 11 */ + 11222 "00000000" // /* MW 10 */ + 11223 "00000000" // /* MW 9 */ + 11224 "00000000" // /* MW 8 */ + 11225 "01011011" // /* MW 7 */ + 11226 "00000001" // /* MW 6 */ + 11227 "00100000" // /* MW 5 */ + 11228 "00000000" // /* MW 4 */ + 11229 "00100000" // /* MW 3 */ + 11230 "00000111" // /* MW 2 */ + 11231 "11111110" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 11232 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11233 "11010001" // /* MW 3 */ + 11234 "11110101" // /* MW 2 */ + 11235 "00000111" // /* MW 1 */ + 11236 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11237 "10011001" // /* MW 3 */ + 11238 "11111011" // /* MW 2 */ + 11239 "00000111" // /* MW 1 */ + 11240 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11241 "11110001" // /* MW 3 */ + 11242 "11111101" // /* MW 2 */ + 11243 "00000111" // /* MW 1 */ + 11244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11245 "00000000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ + 11248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11249 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 first + 11250 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11251 "00000000" // /* MW 3 */ + 11252 "00101000" // /* MW 2 */ + 11253 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 +.delay_slot + 11254 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000001" // /* MW 5 */ + 11256 "00000000" // /* MW 4 */ + 11257 "00000000" // /* MW 3 */ + 11258 "11111000" // /* MW 2 */ + 11259 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + 11267 "00000000" // /* MW 1 */ +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function _b8148_wrapper _Z14_b8148_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 38 first +.src_ref 0 "0_0_reloadable4.cc" 40 79 +.function_start + 11280 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "01100000" // /* MW 2 */ + 11283 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 40 79 first + 11284 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11285 "00011110" // /* MW 3 */ + 11286 "00011100" // /* MW 2 */ + 11287 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 42 46 first + 11288 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11289 "00011110" // /* MW 3 */ + 11290 "00010101" // /* MW 2 */ + 11291 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 41 80 first + 11292 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "10011110" // /* MW 3 */ + 11294 "00000100" // /* MW 2 */ + 11295 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 39 4 first +.tail_call + 11296 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 11297 "00000000" // /* MW 5 */ + 11298 "00000000" // /* MW 4 */ + 11299 "00010000" // /* MW 3 */ + 11300 "00010101" // /* MW 2 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11307 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 + 11311 "00000000" // /* MW 1 */ +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function _b13739_wrapper _Z15_b13739_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 46 first +.src_ref 0 "0_0_reloadable4.cc" 48 79 +.function_start + 11312 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "11000000" // /* MW 3 */ + 11314 "01100000" // /* MW 2 */ + 11315 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 48 79 first + 11316 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00011110" // /* MW 3 */ + 11318 "00101100" // /* MW 2 */ + 11319 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 50 81 first + 11320 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00011110" // /* MW 3 */ + 11322 "11110101" // /* MW 2 */ + 11323 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 49 47 first + 11324 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "10011110" // /* MW 3 */ + 11326 "00000100" // /* MW 2 */ + 11327 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 47 4 first +.tail_call + 11328 "10000100" // J #3904 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3904 delay_slots=5 */ + 11329 "00000000" // /* MW 5 */ + 11330 "00000000" // /* MW 4 */ + 11331 "10100000" // /* MW 3 */ + 11332 "00000111" // /* MW 2 */ + 11333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 + 11343 "00000000" // /* MW 1 */ +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function _b13744_wrapper _Z15_b13744_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 54 first +.src_ref 0 "0_0_reloadable4.cc" 56 79 +.function_start + 11344 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11345 "11000000" // /* MW 3 */ + 11346 "01100000" // /* MW 2 */ + 11347 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 56 79 first + 11348 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11349 "00011110" // /* MW 3 */ + 11350 "00101100" // /* MW 2 */ + 11351 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 58 81 first + 11352 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11353 "00011110" // /* MW 3 */ + 11354 "11110101" // /* MW 2 */ + 11355 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 57 47 first + 11356 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11357 "10011110" // /* MW 3 */ + 11358 "00000100" // /* MW 2 */ + 11359 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 55 4 first +.tail_call + 11360 "10000100" // J #4864 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4864 delay_slots=5 */ + 11361 "00000000" // /* MW 5 */ + 11362 "00000000" // /* MW 4 */ + 11363 "10000000" // /* MW 3 */ + 11364 "00001001" // /* MW 2 */ + 11365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 + 11375 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 62 first +.src_ref 0 "0_0_reloadable4.cc" 64 79 +.function_start + 11376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "11000000" // /* MW 3 */ + 11378 "01100000" // /* MW 2 */ + 11379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 64 79 first + 11380 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11381 "00011110" // /* MW 3 */ + 11382 "00111100" // /* MW 2 */ + 11383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 65 47 first + 11384 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "10011110" // /* MW 3 */ + 11386 "11101100" // /* MW 2 */ + 11387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 67 81 first + 11388 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "10011110" // /* MW 3 */ + 11390 "00010101" // /* MW 2 */ + 11391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 66 80 first + 11392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00011110" // /* MW 3 */ + 11394 "00000101" // /* MW 2 */ + 11395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 63 4 first +.tail_call + 11396 "10000100" // J #5872 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5872 delay_slots=5 */ + 11397 "00000000" // /* MW 5 */ + 11398 "00000000" // /* MW 4 */ + 11399 "01111000" // /* MW 3 */ + 11400 "00001011" // /* MW 2 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11411 "00000000" // /* MW 1 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function load_slice_generic_innermost_rtp _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 40 first +.src_ref 3 "slice_generic_innermost_params.h" 41 19 first +.function_start + 11424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11425 "00101110" // /* MW 3 */ + 11426 "00011100" // /* MW 2 */ + 11427 "00000001" // /* MW 1 */ + 11428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11429 "00000000" // /* MW 1 */ + 11430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11431 "00000000" // /* MW 1 */ + 11432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11433 "00000000" // /* MW 1 */ + 11434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11435 "00000000" // /* MW 1 */ + 11436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11437 "00000000" // /* MW 1 */ + 11438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11439 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 41 17 first + 11440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11441 "00101001" // /* MW 3 */ + 11442 "00011100" // /* MW 2 */ + 11443 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 19 first + 11444 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11445 "00101110" // /* MW 3 */ + 11446 "00011100" // /* MW 2 */ + 11447 "00000001" // /* MW 1 */ + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ + 11456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11457 "00000000" // /* MW 1 */ + 11458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11459 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 17 + 11460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00101001" // /* MW 3 */ + 11462 "00011100" // /* MW 2 */ + 11463 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 19 first + 11464 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00101110" // /* MW 3 */ + 11466 "00011100" // /* MW 2 */ + 11467 "00000001" // /* MW 1 */ + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ + 11472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11473 "00000000" // /* MW 1 */ + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11475 "00000000" // /* MW 1 */ + 11476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11477 "00000000" // /* MW 1 */ + 11478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11479 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 17 + 11480 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "00101001" // /* MW 3 */ + 11482 "00011100" // /* MW 2 */ + 11483 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 19 first + 11484 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11485 "00101110" // /* MW 3 */ + 11486 "00011100" // /* MW 2 */ + 11487 "00000001" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ + 11490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11491 "00000000" // /* MW 1 */ + 11492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11493 "00000000" // /* MW 1 */ + 11494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11495 "00000000" // /* MW 1 */ + 11496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11497 "00000000" // /* MW 1 */ + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 17 + 11500 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "00101001" // /* MW 3 */ + 11502 "00011100" // /* MW 2 */ + 11503 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 19 first + 11504 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11505 "00101110" // /* MW 3 */ + 11506 "00011100" // /* MW 2 */ + 11507 "00000001" // /* MW 1 */ + 11508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11509 "00000000" // /* MW 1 */ + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 17 + 11520 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11521 "00101001" // /* MW 3 */ + 11522 "00011100" // /* MW 2 */ + 11523 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 17 first + 11524 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "00101110" // /* MW 3 */ + 11526 "00011100" // /* MW 2 */ + 11527 "00000001" // /* MW 1 */ + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ + 11536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11537 "00000000" // /* MW 1 */ + 11538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11539 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 15 + 11540 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00101001" // /* MW 3 */ + 11542 "00011100" // /* MW 2 */ + 11543 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 18 first + 11544 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "00101110" // /* MW 3 */ + 11546 "00000100" // /* MW 2 */ + 11547 "00000001" // /* MW 1 */ + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ + 11552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11553 "00000000" // /* MW 1 */ + 11554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11555 "00000000" // /* MW 1 */ + 11556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11557 "00000000" // /* MW 1 */ + 11558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11559 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 16 + 11560 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11561 "00101001" // /* MW 3 */ + 11562 "00000100" // /* MW 2 */ + 11563 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 18 first + 11564 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11565 "00101110" // /* MW 3 */ + 11566 "00010100" // /* MW 2 */ + 11567 "00000001" // /* MW 1 */ + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 49 first + 11570 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11571 "00000000" // /* MW 3 */ + 11572 "00101000" // /* MW 2 */ + 11573 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 16 first +.delay_slot + 11582 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11583 "00101001" // /* MW 3 */ + 11584 "00010100" // /* MW 2 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11585 "00001000" // /* MW 1 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function setup_slice_generic_innermost _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.src_ref 3 "slice_generic_innermost_params.h" 52 first +.src_ref 3 "slice_generic_innermost_params.h" 53 25 first +.src_ref 3 "slice_generic_innermost_params.h" 55 42 +.src_ref 3 "slice_generic_innermost_params.h" 58 40 +.function_start + 11600 "10111010" // LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11601 "01011000" // /* MW 9 */ + 11602 "00100000" // /* MW 8 */ + 11603 "10000000" // /* MW 7 */ + 11604 "00101000" // /* MW 6 */ + 11605 "00000000" // /* MW 5 */ + 11606 "00000000" // /* MW 4 */ + 11607 "11010000" // /* MW 3 */ + 11608 "10000110" // /* MW 2 */ + 11609 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 38 first +.src_ref 3 "slice_generic_innermost_params.h" 58 30 +.src_ref 3 "slice_generic_innermost_params.h" 59 31 + 11610 "10111010" // LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11611 "01011000" // /* MW 9 */ + 11612 "11111010" // /* MW 8 */ + 11613 "01001111" // /* MW 7 */ + 11614 "01001000" // /* MW 6 */ + 11615 "00110000" // /* MW 5 */ + 11616 "00000000" // /* MW 4 */ + 11617 "11010000" // /* MW 3 */ + 11618 "10010110" // /* MW 2 */ + 11619 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 51 +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.src_ref 3 "slice_generic_innermost_params.h" 62 27 + 11620 "01010100" // LDA r4, [p0], #8; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11621 "00000001" // /* MW 5 */ + 11622 "00000001" // /* MW 4 */ + 11623 "11010000" // /* MW 3 */ + 11624 "10010010" // /* MW 2 */ + 11625 "00000101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 55 42 first +.src_ref 3 "slice_generic_innermost_params.h" 60 27 + 11626 "01010100" // LDA r6, [p0], m1; MOV dj0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11627 "00000001" // /* MW 5 */ + 11628 "00000010" // /* MW 4 */ + 11629 "11010001" // /* MW 3 */ + 11630 "00011010" // /* MW 2 */ + 11631 "00000101" // /* MW 1 */ + 11632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11633 "00000000" // /* MW 1 */ + 11634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11635 "00000000" // /* MW 1 */ + 11636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11637 "00000000" // /* MW 1 */ + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 30 first + 11642 "10011000" // MUL r1, r5, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11643 "00011111" // /* MW 3 */ + 11644 "01000010" // /* MW 2 */ + 11645 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 40 first + 11646 "10011000" // AND r0, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11647 "00000100" // /* MW 3 */ + 11648 "10000000" // /* MW 2 */ + 11649 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 30 + 11650 "10011000" // OR r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11651 "00000101" // /* MW 3 */ + 11652 "11000000" // /* MW 2 */ + 11653 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 43 first +.src_ref 3 "slice_generic_innermost_params.h" 58 28 + 11654 "01011100" // ST r0, [p0], #-16; MUL r1, r1, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11655 "10011111" // /* MW 5 */ + 11656 "10000100" // /* MW 4 */ + 11657 "00110000" // /* MW 3 */ + 11658 "10000010" // /* MW 2 */ + 11659 "00011001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 75 first + 11660 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11661 "00000000" // /* MW 3 */ + 11662 "00101000" // /* MW 2 */ + 11663 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 31 first +.delay_slot + 11664 "10011000" // LSHL r0, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "00101101" // /* MW 3 */ + 11666 "01000000" // /* MW 2 */ + 11667 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 25 +.delay_slot + 11668 "10011000" // ST r0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "00010001" // /* MW 3 */ + 11670 "00011100" // /* MW 2 */ + 11671 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 first +.delay_slot + 11672 "10011000" // ST m0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00000001" // /* MW 3 */ + 11674 "00011100" // /* MW 2 */ + 11675 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.delay_slot + 11676 "10011000" // ST dj0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "01000001" // /* MW 3 */ + 11678 "00000100" // /* MW 2 */ + 11679 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 62 27 first +.delay_slot + 11680 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11681 "00000001" // /* MW 3 */ + 11682 "00010100" // /* MW 2 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + 11683 "00001000" // /* MW 1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function setup_slice_generic_innermost_params _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 79 first +.src_ref 3 "slice_generic_innermost_params.h" 80 4 first +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 11696 "00000100" // JL #11424 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11424 delay_slots=5 */ + 11697 "00000001" // /* MW 5 */ + 11698 "00000000" // /* MW 4 */ + 11699 "01010000" // /* MW 3 */ + 11700 "00010110" // /* MW 2 */ + 11701 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11702 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11703 "11100000" // /* MW 3 */ + 11704 "11000001" // /* MW 2 */ + 11705 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11706 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11707 "11000000" // /* MW 3 */ + 11708 "01100000" // /* MW 2 */ + 11709 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11714 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11715 "00011100" // /* MW 13 */ + 11716 "00000000" // /* MW 12 */ + 11717 "00000000" // /* MW 11 */ + 11718 "01010111" // /* MW 10 */ + 11719 "00011010" // /* MW 9 */ + 11720 "01000000" // /* MW 8 */ + 11721 "00000000" // /* MW 7 */ + 11722 "00000000" // /* MW 6 */ + 11723 "10110110" // /* MW 5 */ + 11724 "00000010" // /* MW 4 */ + 11725 "11110000" // /* MW 3 */ + 11726 "00101100" // /* MW 2 */ + 11727 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 first +.tail_call +.return_address + 11728 "10000100" // J #11600 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11600 delay_slots=5 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00000000" // /* MW 4 */ + 11731 "10101000" // /* MW 3 */ + 11732 "00010110" // /* MW 2 */ + 11733 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11734 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "10000000" // /* MW 3 */ + 11736 "01110001" // /* MW 2 */ + 11737 "00011111" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11738 "11111000" // MOV p0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "11000000" // /* MW 3 */ + 11740 "01100100" // /* MW 2 */ + 11741 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11743 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11745 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11747 "00000000" // /* MW 1 */ +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function slice_generic_innermost _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 25 first +.src_ref 3 "slice_generic_innermost.h" 35 60 +.src_ref 3 "slice_generic_innermost.h" 54 19 +.function_start + 11760 "00000010" // MOVS p5, p1; MOV r0, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11761 "01110000" // /* MW 7 */ + 11762 "01100000" // /* MW 6 */ + 11763 "00001010" // /* MW 5 */ + 11764 "00000000" // /* MW 4 */ + 11765 "01100000" // /* MW 3 */ + 11766 "10010001" // /* MW 2 */ + 11767 "10110000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 first + 11768 "00011000" // ADD.NC p3, r0, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11769 "00010010" // /* MW 3 */ + 11770 "01100000" // /* MW 2 */ + 11771 "00011011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 + 11772 "11010100" // LDA m2, [p3], #4; MOV r0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11773 "10000001" // /* MW 5 */ + 11774 "00111101" // /* MW 4 */ + 11775 "11010000" // /* MW 3 */ + 11776 "10100000" // /* MW 2 */ + 11777 "01100011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 36 61 first + 11778 "10011000" // LDA m0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11779 "00000110" // /* MW 3 */ + 11780 "00011100" // /* MW 2 */ + 11781 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 35 first + 11782 "10011000" // LDA r2, [p3, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01010110" // /* MW 3 */ + 11784 "11010100" // /* MW 2 */ + 11785 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 59 first + 11786 "10011000" // LDA m1, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11787 "10000110" // /* MW 3 */ + 11788 "00000100" // /* MW 2 */ + 11789 "00000011" // /* MW 1 */ + 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11791 "00000000" // /* MW 1 */ + 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11793 "00000000" // /* MW 1 */ + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ + 11796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11797 "00000000" // /* MW 1 */ + 11798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11799 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 40 26 first + 11800 "10000100" // JZ r2, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11801 "00000001" // /* MW 5 */ + 11802 "00000000" // /* MW 4 */ + 11803 "11100000" // /* MW 3 */ + 11804 "00010111" // /* MW 2 */ + 11805 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11806 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11807 "11000000" // /* MW 3 */ + 11808 "01100000" // /* MW 2 */ + 11809 "00011111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 first +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11810 "11110100" // PADDB [p7], m0; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11811 "10000001" // /* MW 5 */ + 11812 "11011101" // /* MW 4 */ + 11813 "00000110" // /* MW 3 */ + 11814 "01110010" // /* MW 2 */ + 11815 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 38 first +.delay_slot + 11816 "00011000" // PADDB [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "10010000" // /* MW 3 */ + 11818 "01001011" // /* MW 2 */ + 11819 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 37 39 first +.src_ref 3 "slice_generic_innermost.h" 52 20 +.delay_slot + 11820 "11110100" // PADDB [p0], m0; MOV p4, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11821 "10000001" // /* MW 5 */ + 11822 "11000001" // /* MW 4 */ + 11823 "00001000" // /* MW 3 */ + 11824 "01110010" // /* MW 2 */ + 11825 "00000001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 37 first +.delay_slot + 11826 "00011000" // PADDB [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11827 "10010000" // /* MW 3 */ + 11828 "00101011" // /* MW 2 */ + 11829 "00111001" // /* MW 1 */ + 11830 "00011000" // MOVX r1, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11831 "00010001" // /* MW 3 */ + 11832 "00000010" // /* MW 2 */ + 11833 "00010000" // /* MW 1 */ + 11834 "10011000" // LTU r3, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00011100" // /* MW 3 */ + 11836 "10000110" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ + 11838 "10000100" // JNZ r3, #12080 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12080 delay_slots=5 */ + 11839 "00000001" // /* MW 5 */ + 11840 "01000000" // /* MW 4 */ + 11841 "10011000" // /* MW 3 */ + 11842 "00010111" // /* MW 2 */ + 11843 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 +.delay_slot + 11844 "10111000" // MOV dj0, #48 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11845 "01100000" // /* MW 3 */ + 11846 "10000000" // /* MW 2 */ + 11847 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.delay_slot + 11848 "10011000" // LDA r1, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11849 "00110110" // /* MW 3 */ + 11850 "00000000" // /* MW 2 */ + 11851 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first + 11858 "10110110" // VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #11952 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11859 "00010000" // /* MW 11 */ + 11860 "01011000" // /* MW 10 */ + 11861 "01111111" // /* MW 9 */ + 11862 "00001000" // /* MW 8 */ + 11863 "00000000" // /* MW 7 */ + 11864 "00000000" // /* MW 6 */ + 11865 "11101000" // /* MW 5 */ + 11866 "00010000" // /* MW 4 */ + 11867 "01110110" // /* MW 3 */ + 11868 "00010011" // /* MW 2 */ + 11869 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11870 "01111110" // PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #12000 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11871 "01100000" // /* MW 13 */ + 11872 "00001011" // /* MW 12 */ + 11873 "01100001" // /* MW 11 */ + 11874 "00000010" // /* MW 10 */ + 11875 "11101110" // /* MW 9 */ + 11876 "00110111" // /* MW 8 */ + 11877 "00000001" // /* MW 7 */ + 11878 "00000000" // /* MW 6 */ + 11879 "01101000" // /* MW 5 */ + 11880 "00010000" // /* MW 4 */ + 11881 "11111110" // /* MW 3 */ + 11882 "00001100" // /* MW 2 */ + 11883 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11884 "11110110" // VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11885 "01000000" // /* MW 11 */ + 11886 "10111111" // /* MW 10 */ + 11887 "10111000" // /* MW 9 */ + 11888 "00000010" // /* MW 8 */ + 11889 "01011011" // /* MW 7 */ + 11890 "00001000" // /* MW 6 */ + 11891 "11101111" // /* MW 5 */ + 11892 "00010001" // /* MW 4 */ + 11893 "01110000" // /* MW 3 */ + 11894 "00001011" // /* MW 2 */ + 11895 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11896 "00110010" // PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00001000" // /* MW 6 */ + 11899 "01101011" // /* MW 5 */ + 11900 "00010001" // /* MW 4 */ + 11901 "11111000" // /* MW 3 */ + 11902 "00001100" // /* MW 2 */ + 11903 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11904 "00111100" // PADDA [p4], m0; VLDB x0, [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11905 "01101000" // /* MW 5 */ + 11906 "00010000" // /* MW 4 */ + 11907 "11111110" // /* MW 3 */ + 11908 "00001100" // /* MW 2 */ + 11909 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11910 "01001100" // VLDB x3, [p0], m0; PADDS [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11911 "10110110" // /* MW 5 */ + 11912 "00010000" // /* MW 4 */ + 11913 "10001110" // /* MW 3 */ + 11914 "00011110" // /* MW 2 */ + 11915 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11916 "00111100" // PADDA [p0], m0; VLDB x1, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11917 "11101000" // /* MW 5 */ + 11918 "00010000" // /* MW 4 */ + 11919 "11110110" // /* MW 3 */ + 11920 "00001100" // /* MW 2 */ + 11921 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11922 "10110100" // VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11923 "00001011" // /* MW 5 */ + 11924 "00010010" // /* MW 4 */ + 11925 "10000000" // /* MW 3 */ + 11926 "00010110" // /* MW 2 */ + 11927 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11928 "00110010" // NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11929 "01011011" // /* MW 7 */ + 11930 "00001000" // /* MW 6 */ + 11931 "01101011" // /* MW 5 */ + 11932 "00010000" // /* MW 4 */ + 11933 "11111110" // /* MW 3 */ + 11934 "00101100" // /* MW 2 */ + 11935 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11936 "11100001" // NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11937 "00000000" // /* MW 15 */ + 11938 "00000000" // /* MW 14 */ + 11939 "11101000" // /* MW 13 */ + 11940 "11000010" // /* MW 12 */ + 11941 "01000000" // /* MW 11 */ + 11942 "00000000" // /* MW 10 */ + 11943 "00000000" // /* MW 9 */ + 11944 "10000000" // /* MW 8 */ + 11945 "00000110" // /* MW 7 */ + 11946 "00101000" // /* MW 6 */ + 11947 "11101101" // /* MW 5 */ + 11948 "00010001" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11952 "11100001" // PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11953 "00000000" // /* MW 15 */ + 11954 "00000000" // /* MW 14 */ + 11955 "11101000" // /* MW 13 */ + 11956 "10000010" // /* MW 12 */ + 11957 "00000100" // /* MW 11 */ + 11958 "00000000" // /* MW 10 */ + 11959 "00000000" // /* MW 9 */ + 11960 "00000000" // /* MW 8 */ + 11961 "01011011" // /* MW 7 */ + 11962 "00001000" // /* MW 6 */ + 11963 "11101111" // /* MW 5 */ + 11964 "00010000" // /* MW 4 */ + 11965 "11110110" // /* MW 3 */ + 11966 "00001100" // /* MW 2 */ + 11967 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11968 "11100001" // PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11969 "00000000" // /* MW 15 */ + 11970 "00000000" // /* MW 14 */ + 11971 "01111000" // /* MW 13 */ + 11972 "10100101" // /* MW 12 */ + 11973 "00000001" // /* MW 11 */ + 11974 "00000000" // /* MW 10 */ + 11975 "00000000" // /* MW 9 */ + 11976 "10000000" // /* MW 8 */ + 11977 "00100110" // /* MW 7 */ + 11978 "00101000" // /* MW 6 */ + 11979 "01101001" // /* MW 5 */ + 11980 "00010001" // /* MW 4 */ + 11981 "11111000" // /* MW 3 */ + 11982 "00001100" // /* MW 2 */ + 11983 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11984 "11100001" // PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11985 "00000000" // /* MW 15 */ + 11986 "00000000" // /* MW 14 */ + 11987 "11101000" // /* MW 13 */ + 11988 "11000010" // /* MW 12 */ + 11989 "01000000" // /* MW 11 */ + 11990 "00000000" // /* MW 10 */ + 11991 "00000000" // /* MW 9 */ + 11992 "00000000" // /* MW 8 */ + 11993 "01011011" // /* MW 7 */ + 11994 "00001000" // /* MW 6 */ + 11995 "01101011" // /* MW 5 */ + 11996 "00010000" // /* MW 4 */ + 11997 "11111110" // /* MW 3 */ + 11998 "00001100" // /* MW 2 */ + 11999 "10100101" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12000 "11100001" // PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "00000000" // /* MW 10 */ + 12007 "00000000" // /* MW 9 */ + 12008 "10000000" // /* MW 8 */ + 12009 "00000110" // /* MW 7 */ + 12010 "00101000" // /* MW 6 */ + 12011 "11101101" // /* MW 5 */ + 12012 "00010001" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00001100" // /* MW 2 */ + 12015 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12016 "11011000" // VSHUFFLE bmll0, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12017 "00000101" // /* MW 3 */ + 12018 "00001001" // /* MW 2 */ + 12019 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12020 "10011000" // VST bmlh0, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12021 "00100110" // /* MW 3 */ + 12022 "00101000" // /* MW 2 */ + 12023 "00001001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12024 "10010100" // PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12025 "00001011" // /* MW 5 */ + 12026 "00000011" // /* MW 4 */ + 12027 "11110001" // /* MW 3 */ + 12028 "00001100" // /* MW 2 */ + 12029 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12030 "10000100" // J #12224 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12224 delay_slots=5 */ + 12031 "00000000" // /* MW 5 */ + 12032 "00000000" // /* MW 4 */ + 12033 "11100000" // /* MW 3 */ + 12034 "00010111" // /* MW 2 */ + 12035 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12036 "10111010" // PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12037 "11100010" // /* MW 9 */ + 12038 "10000010" // /* MW 8 */ + 12039 "00000100" // /* MW 7 */ + 12040 "10000000" // /* MW 6 */ + 12041 "00100110" // /* MW 5 */ + 12042 "00101000" // /* MW 4 */ + 12043 "11110001" // /* MW 3 */ + 12044 "00001100" // /* MW 2 */ + 12045 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12046 "00001100" // PADDA [p1], m1; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12047 "00001101" // /* MW 5 */ + 12048 "01010000" // /* MW 4 */ + 12049 "11111010" // /* MW 3 */ + 12050 "00001100" // /* MW 2 */ + 12051 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.delay_slot + 12052 "10010100" // PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12053 "00001011" // /* MW 5 */ + 12054 "00000011" // /* MW 4 */ + 12055 "11110001" // /* MW 3 */ + 12056 "00001100" // /* MW 2 */ + 12057 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.delay_slot + 12058 "00001100" // NOPA; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12059 "00001101" // /* MW 5 */ + 12060 "01010000" // /* MW 4 */ + 12061 "11111010" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot + 12064 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12065 "00000000" // /* MW 15 */ + 12066 "00000000" // /* MW 14 */ + 12067 "01111000" // /* MW 13 */ + 12068 "10100101" // /* MW 12 */ + 12069 "00000001" // /* MW 11 */ + 12070 "00000000" // /* MW 10 */ + 12071 "00000000" // /* MW 9 */ + 12072 "10000000" // /* MW 8 */ + 12073 "00100110" // /* MW 7 */ + 12074 "00101000" // /* MW 6 */ + 12075 "00100001" // /* MW 5 */ + 12076 "00000000" // /* MW 4 */ + 12077 "11110000" // /* MW 3 */ + 12078 "00101100" // /* MW 2 */ + 12079 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.src_ref 3 "slice_generic_innermost.h" 40 8 first + 12080 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00100000" // /* MW 3 */ + 12082 "01110001" // /* MW 2 */ + 12083 "00011101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12084 "01000100" // MOVXM ls, #12096 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12085 "10000000" // /* MW 5 */ + 12086 "11111110" // /* MW 4 */ + 12087 "00100001" // /* MW 3 */ + 12088 "00000000" // /* MW 2 */ + 12089 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12090 "01000100" // MOVXM le, #12208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "01100000" // /* MW 5 */ + 12092 "11111111" // /* MW 4 */ + 12093 "00100110" // /* MW 3 */ + 12094 "00000000" // /* MW 2 */ + 12095 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.begin_of_loop +.loop_nesting 1 + 12096 "00111100" // VLDA x1, [p4], m0; VLDB x2, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "01101000" // /* MW 5 */ + 12098 "00010001" // /* MW 4 */ + 12099 "01110110" // /* MW 3 */ + 12100 "00001011" // /* MW 2 */ + 12101 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first + 12102 "00110010" // PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12103 "01011011" // /* MW 7 */ + 12104 "00001000" // /* MW 6 */ + 12105 "01101100" // /* MW 5 */ + 12106 "00010000" // /* MW 4 */ + 12107 "11111110" // /* MW 3 */ + 12108 "00001100" // /* MW 2 */ + 12109 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first + 12110 "00111100" // PADDA [p7], m0; VLDB x3, [p0], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12111 "11101000" // /* MW 5 */ + 12112 "00010001" // /* MW 4 */ + 12113 "11110000" // /* MW 3 */ + 12114 "00001100" // /* MW 2 */ + 12115 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 59 21 first + 12116 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "10010000" // /* MW 3 */ + 12118 "00001011" // /* MW 2 */ + 12119 "00111000" // /* MW 1 */ + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ + 12124 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12125 "01100111" // /* MW 3 */ + 12126 "00000001" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first + 12128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12129 "00000000" // /* MW 15 */ + 12130 "00000000" // /* MW 14 */ + 12131 "11101000" // /* MW 13 */ + 12132 "01000010" // /* MW 12 */ + 12133 "00001000" // /* MW 11 */ + 12134 "00000000" // /* MW 10 */ + 12135 "00000000" // /* MW 9 */ + 12136 "00000000" // /* MW 8 */ + 12137 "01011011" // /* MW 7 */ + 12138 "00000001" // /* MW 6 */ + 12139 "00100000" // /* MW 5 */ + 12140 "00000000" // /* MW 4 */ + 12141 "11110000" // /* MW 3 */ + 12142 "00101100" // /* MW 2 */ + 12143 "00000000" // /* MW 1 */ + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "01111000" // /* MW 13 */ + 12148 "10100101" // /* MW 12 */ + 12149 "00000001" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first + 12160 "11100001" // NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "11101000" // /* MW 13 */ + 12164 "11000010" // /* MW 12 */ + 12165 "01000000" // /* MW 11 */ + 12166 "00000000" // /* MW 10 */ + 12167 "00000000" // /* MW 9 */ + 12168 "10000000" // /* MW 8 */ + 12169 "00000110" // /* MW 7 */ + 12170 "00101000" // /* MW 6 */ + 12171 "00100101" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 55 19 first + 12176 "11100001" // NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12177 "00000000" // /* MW 15 */ + 12178 "00000000" // /* MW 14 */ + 12179 "01111000" // /* MW 13 */ + 12180 "10100101" // /* MW 12 */ + 12181 "00000001" // /* MW 11 */ + 12182 "00000000" // /* MW 10 */ + 12183 "00000000" // /* MW 9 */ + 12184 "00000000" // /* MW 8 */ + 12185 "01011011" // /* MW 7 */ + 12186 "00000001" // /* MW 6 */ + 12187 "00100000" // /* MW 5 */ + 12188 "01010111" // /* MW 4 */ + 12189 "11111010" // /* MW 3 */ + 12190 "00101100" // /* MW 2 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first + 12192 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12193 "00000000" // /* MW 15 */ + 12194 "00000000" // /* MW 14 */ + 12195 "01111000" // /* MW 13 */ + 12196 "10100101" // /* MW 12 */ + 12197 "00000001" // /* MW 11 */ + 12198 "00000000" // /* MW 10 */ + 12199 "00000000" // /* MW 9 */ + 12200 "10000000" // /* MW 8 */ + 12201 "00100110" // /* MW 7 */ + 12202 "00101000" // /* MW 6 */ + 12203 "00100001" // /* MW 5 */ + 12204 "00000000" // /* MW 4 */ + 12205 "11110000" // /* MW 3 */ + 12206 "00101100" // /* MW 2 */ + 12207 "00000000" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop + 12208 "11100001" // NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12209 "00000000" // /* MW 15 */ + 12210 "00000000" // /* MW 14 */ + 12211 "01111000" // /* MW 13 */ + 12212 "10100101" // /* MW 12 */ + 12213 "00000001" // /* MW 11 */ + 12214 "00000000" // /* MW 10 */ + 12215 "00000000" // /* MW 9 */ + 12216 "00000000" // /* MW 8 */ + 12217 "01011011" // /* MW 7 */ + 12218 "00000001" // /* MW 6 */ + 12219 "00100000" // /* MW 5 */ + 12220 "01010111" // /* MW 4 */ + 12221 "11110010" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.src_ref 3 "slice_generic_innermost.h" 76 first +.loop_nesting 0 + 12224 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12225 "00000000" // /* MW 3 */ + 12226 "00101000" // /* MW 2 */ + 12227 "00010000" // /* MW 1 */ +.delay_slot + 12228 "11111000" // MOV p7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12229 "00100000" // /* MW 3 */ + 12230 "01100000" // /* MW 2 */ + 12231 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 + 12239 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function slice_generic_innermost_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 first +.function_start + 12240 "00111010" // MOVS p5, p0; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12241 "01110001" // /* MW 9 */ + 12242 "00000000" // /* MW 8 */ + 12243 "00000000" // /* MW 7 */ + 12244 "00000000" // /* MW 6 */ + 12245 "00000100" // /* MW 5 */ + 12246 "00000000" // /* MW 4 */ + 12247 "01100000" // /* MW 3 */ + 12248 "00010001" // /* MW 2 */ + 12249 "10110000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 12250 "00000010" // ST lr, [sp, #-4]; MOV p3, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12251 "01110000" // /* MW 7 */ + 12252 "01100000" // /* MW 6 */ + 12253 "10110001" // /* MW 5 */ + 12254 "00000001" // /* MW 4 */ + 12255 "10110000" // /* MW 3 */ + 12256 "10000111" // /* MW 2 */ + 12257 "11111111" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 first +.no_stack_arguments + 12258 "00111010" // MOVS p1, p2; JL #11696 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */ + 12259 "01000001" // /* MW 9 */ + 12260 "00000000" // /* MW 8 */ + 12261 "00000000" // /* MW 7 */ + 12262 "10110110" // /* MW 6 */ + 12263 "00000101" // /* MW 5 */ + 12264 "00000000" // /* MW 4 */ + 12265 "01100000" // /* MW 3 */ + 12266 "00010001" // /* MW 2 */ + 12267 "00110001" // /* MW 1 */ +.delay_slot + 12268 "11111000" // MOV p0, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "11100000" // /* MW 3 */ + 12270 "01100101" // /* MW 2 */ + 12271 "00011000" // /* MW 1 */ +.delay_slot + 12272 "00011000" // PADDB [p0], #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "10010000" // /* MW 3 */ + 12274 "11101111" // /* MW 2 */ + 12275 "00111000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.delay_slot + 12276 "11111000" // MOV p4, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12277 "11000000" // /* MW 3 */ + 12278 "01100000" // /* MW 2 */ + 12279 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12283 "00100000" // /* MW 5 */ + 12284 "00000000" // /* MW 4 */ + 12285 "11110000" // /* MW 3 */ + 12286 "00101100" // /* MW 2 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 31 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.return_address + 12288 "10111010" // LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12289 "01110010" // /* MW 9 */ + 12290 "01110000" // /* MW 8 */ + 12291 "00101101" // /* MW 7 */ + 12292 "00000010" // /* MW 6 */ + 12293 "10001011" // /* MW 5 */ + 12294 "10010000" // /* MW 4 */ + 12295 "00100010" // /* MW 3 */ + 12296 "01001010" // /* MW 2 */ + 12297 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 44 + 12298 "00101100" // LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12299 "00100000" // /* MW 5 */ + 12300 "11000101" // /* MW 4 */ + 12301 "00101000" // /* MW 3 */ + 12302 "11011010" // /* MW 2 */ + 12303 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 57 first + 12304 "10111010" // LDA r20, [sp, #-120]; MOVXM r19, #65534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12305 "00010000" // /* MW 9 */ + 12306 "11111111" // /* MW 8 */ + 12307 "01101111" // /* MW 7 */ + 12308 "00111110" // /* MW 6 */ + 12309 "00000000" // /* MW 5 */ + 12310 "00000000" // /* MW 4 */ + 12311 "00100000" // /* MW 3 */ + 12312 "01010010" // /* MW 2 */ + 12313 "11110001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first + 12314 "00101100" // LDA p1, [p3]; ADD r17, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12315 "00100001" // /* MW 5 */ + 12316 "11000110" // /* MW 4 */ + 12317 "11011001" // /* MW 3 */ + 12318 "10010011" // /* MW 2 */ + 12319 "01100000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 70 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 59 first + 12320 "00101100" // LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12321 "01100000" // /* MW 5 */ + 12322 "11010101" // /* MW 4 */ + 12323 "00101000" // /* MW 3 */ + 12324 "11001110" // /* MW 2 */ + 12325 "11110001" // /* MW 1 */ + 12326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12327 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 12328 "10011000" // LDA r17, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12329 "00110110" // /* MW 3 */ + 12330 "00000110" // /* MW 2 */ + 12331 "00000101" // /* MW 1 */ + 12332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12333 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 36 first + 12334 "10011000" // MUL r18, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12335 "00101111" // /* MW 3 */ + 12336 "10100101" // /* MW 2 */ + 12337 "00010101" // /* MW 1 */ + 12338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12339 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 49 + 12340 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12341 "01001111" // /* MW 3 */ + 12342 "10100101" // /* MW 2 */ + 12343 "00010100" // /* MW 1 */ + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12345 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 62 + 12346 "10011000" // MUL r18, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00101111" // /* MW 3 */ + 12348 "01100101" // /* MW 2 */ + 12349 "00010101" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 first +.no_stack_arguments + 12350 "00000100" // JL #11760 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11760 delay_slots=5 */ + 12351 "00000001" // /* MW 5 */ + 12352 "00000000" // /* MW 4 */ + 12353 "11111000" // /* MW 3 */ + 12354 "00010110" // /* MW 2 */ + 12355 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 57 +.delay_slot + 12356 "10011000" // MUL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12357 "00101111" // /* MW 3 */ + 12358 "11100101" // /* MW 2 */ + 12359 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12360 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12361 "00000101" // /* MW 3 */ + 12362 "00100000" // /* MW 2 */ + 12363 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12364 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12365 "00001101" // /* MW 3 */ + 12366 "10100001" // /* MW 2 */ + 12367 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12368 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12369 "11000001" // /* MW 3 */ + 12370 "01101000" // /* MW 2 */ + 12371 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12372 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12373 "10000001" // /* MW 11 */ + 12374 "10101101" // /* MW 10 */ + 12375 "00000000" // /* MW 9 */ + 12376 "00000000" // /* MW 8 */ + 12377 "00000000" // /* MW 7 */ + 12378 "00000000" // /* MW 6 */ + 12379 "00100000" // /* MW 5 */ + 12380 "00000000" // /* MW 4 */ + 12381 "11110000" // /* MW 3 */ + 12382 "00101100" // /* MW 2 */ + 12383 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.return_address + 12384 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12385 "00111001" // /* MW 3 */ + 12386 "11111100" // /* MW 2 */ + 12387 "00000111" // /* MW 1 */ + 12388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12389 "00000000" // /* MW 1 */ + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 first + 12400 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12401 "00000000" // /* MW 3 */ + 12402 "00101000" // /* MW 2 */ + 12403 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.delay_slot + 12404 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12405 "00000001" // /* MW 5 */ + 12406 "00000000" // /* MW 4 */ + 12407 "00000000" // /* MW 3 */ + 12408 "11110000" // /* MW 2 */ + 12409 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + 12417 "00000000" // /* MW 1 */ +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function _b8170_wrapper _Z14_b8170_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 71 first +.src_ref 0 "0_0_reloadable4.cc" 73 79 +.function_start + 12432 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "11000000" // /* MW 3 */ + 12434 "01100000" // /* MW 2 */ + 12435 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 73 79 first + 12436 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "00011110" // /* MW 3 */ + 12438 "00011100" // /* MW 2 */ + 12439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 75 47 first + 12440 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00011110" // /* MW 3 */ + 12442 "00010101" // /* MW 2 */ + 12443 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 74 80 first + 12444 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12445 "10011110" // /* MW 3 */ + 12446 "00000100" // /* MW 2 */ + 12447 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 72 4 first +.tail_call + 12448 "10000100" // J #12240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12240 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11101000" // /* MW 3 */ + 12452 "00010111" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 + 12463 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 115 4 first +.function_start + 12464 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "01000001" // /* MW 5 */ + 12466 "10100000" // /* MW 4 */ + 12467 "00101111" // /* MW 3 */ + 12468 "11000000" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12470 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12471 "00011100" // /* MW 3 */ + 12472 "11000110" // /* MW 2 */ + 12473 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12474 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12475 "00011100" // /* MW 3 */ + 12476 "11000110" // /* MW 2 */ + 12477 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12478 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12479 "00011100" // /* MW 3 */ + 12480 "11000110" // /* MW 2 */ + 12481 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12482 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12483 "00011100" // /* MW 3 */ + 12484 "11000110" // /* MW 2 */ + 12485 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12486 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12487 "00011100" // /* MW 3 */ + 12488 "11000110" // /* MW 2 */ + 12489 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12490 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12491 "00011100" // /* MW 3 */ + 12492 "11000110" // /* MW 2 */ + 12493 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12494 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12495 "00011100" // /* MW 3 */ + 12496 "11000110" // /* MW 2 */ + 12497 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12498 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12499 "00011100" // /* MW 3 */ + 12500 "11000110" // /* MW 2 */ + 12501 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12502 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12503 "00011100" // /* MW 3 */ + 12504 "11000110" // /* MW 2 */ + 12505 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12506 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12507 "00011100" // /* MW 3 */ + 12508 "11000110" // /* MW 2 */ + 12509 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12510 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12511 "00011100" // /* MW 3 */ + 12512 "11000110" // /* MW 2 */ + 12513 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12514 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12515 "00011100" // /* MW 3 */ + 12516 "11000110" // /* MW 2 */ + 12517 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12518 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12519 "00011100" // /* MW 3 */ + 12520 "11000110" // /* MW 2 */ + 12521 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12522 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "00011100" // /* MW 3 */ + 12524 "11000110" // /* MW 2 */ + 12525 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12526 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00011100" // /* MW 3 */ + 12528 "11000110" // /* MW 2 */ + 12529 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12530 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12531 "00011100" // /* MW 3 */ + 12532 "11000110" // /* MW 2 */ + 12533 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12534 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12535 "00011100" // /* MW 3 */ + 12536 "11000110" // /* MW 2 */ + 12537 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12538 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12539 "00011100" // /* MW 3 */ + 12540 "11000110" // /* MW 2 */ + 12541 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12542 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12543 "00011100" // /* MW 3 */ + 12544 "11000110" // /* MW 2 */ + 12545 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12546 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00011100" // /* MW 3 */ + 12548 "11000110" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12550 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00011100" // /* MW 3 */ + 12552 "11000110" // /* MW 2 */ + 12553 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12554 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00011100" // /* MW 3 */ + 12556 "11000110" // /* MW 2 */ + 12557 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12558 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "00011100" // /* MW 3 */ + 12560 "11000110" // /* MW 2 */ + 12561 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12562 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12563 "00011100" // /* MW 3 */ + 12564 "11000110" // /* MW 2 */ + 12565 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12566 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12567 "00011100" // /* MW 3 */ + 12568 "11000110" // /* MW 2 */ + 12569 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12570 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12571 "00011100" // /* MW 3 */ + 12572 "11000110" // /* MW 2 */ + 12573 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12574 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "00011100" // /* MW 3 */ + 12576 "11000110" // /* MW 2 */ + 12577 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12578 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12579 "00011100" // /* MW 3 */ + 12580 "11000110" // /* MW 2 */ + 12581 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 119 first + 12582 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12583 "00000000" // /* MW 3 */ + 12584 "00101000" // /* MW 2 */ + 12585 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 first +.delay_slot + 12586 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12587 "00011100" // /* MW 3 */ + 12588 "11000110" // /* MW 2 */ + 12589 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12590 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12591 "00011100" // /* MW 3 */ + 12592 "11000110" // /* MW 2 */ + 12593 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12594 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12595 "00011100" // /* MW 3 */ + 12596 "11000110" // /* MW 2 */ + 12597 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12598 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12599 "00011100" // /* MW 3 */ + 12600 "11000110" // /* MW 2 */ + 12601 "00010000" // /* MW 1 */ +.delay_slot + 12602 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12603 "10100000" // /* MW 3 */ + 12604 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 12605 "00011000" // /* MW 1 */ +.label memset +.function memset memset +.src_ref 12 "string.c" 325 first +.src_ref 12 "string.c" 328 4 first +.function_start + 12608 "10000100" // JZ r1, #12768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12768 delay_slots=5 */ + 12609 "00000001" // /* MW 5 */ + 12610 "00000000" // /* MW 4 */ + 12611 "11110000" // /* MW 3 */ + 12612 "00011000" // /* MW 2 */ + 12613 "00001000" // /* MW 1 */ +.src_ref 12 "string.c" 329 3 +.delay_slot + 12614 "11111000" // MOV p0, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12615 "11000000" // /* MW 3 */ + 12616 "01100010" // /* MW 2 */ + 12617 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 first +.src_ref 12 "string.c" 329 3 + 12626 "00000010" // MOVS p1, p0; MOV lc, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12627 "01110000" // /* MW 7 */ + 12628 "01010000" // /* MW 6 */ + 12629 "10111000" // /* MW 5 */ + 12630 "00000010" // /* MW 4 */ + 12631 "01100000" // /* MW 3 */ + 12632 "00010001" // /* MW 2 */ + 12633 "00110000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12634 "01000100" // MOVXM ls, #12656 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12635 "11100000" // /* MW 5 */ + 12636 "11100010" // /* MW 4 */ + 12637 "00110001" // /* MW 3 */ + 12638 "00000000" // /* MW 2 */ + 12639 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12640 "11100001" // NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12641 "00000000" // /* MW 15 */ + 12642 "00000000" // /* MW 14 */ + 12643 "00010000" // /* MW 13 */ + 12644 "11101000" // /* MW 12 */ + 12645 "10111000" // /* MW 11 */ + 12646 "00001101" // /* MW 10 */ + 12647 "00000000" // /* MW 9 */ + 12648 "00000000" // /* MW 8 */ + 12649 "01011011" // /* MW 7 */ + 12650 "00000001" // /* MW 6 */ + 12651 "00100000" // /* MW 5 */ + 12652 "00000000" // /* MW 4 */ + 12653 "11110000" // /* MW 3 */ + 12654 "00101100" // /* MW 2 */ + 12655 "00000000" // /* MW 1 */ +.label ZLS_Fmemset_48 +.src_ref 12 "string.c" 329 3 first +.begin_of_loop +.loop_nesting 1 + 12656 "11100001" // ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12657 "00000000" // /* MW 15 */ + 12658 "00000000" // /* MW 14 */ + 12659 "01111000" // /* MW 13 */ + 12660 "10100101" // /* MW 12 */ + 12661 "00000001" // /* MW 11 */ + 12662 "00000000" // /* MW 10 */ + 12663 "00000000" // /* MW 9 */ + 12664 "00000000" // /* MW 8 */ + 12665 "01011011" // /* MW 7 */ + 12666 "00000001" // /* MW 6 */ + 12667 "00100000" // /* MW 5 */ + 12668 "00000000" // /* MW 4 */ + 12669 "11100000" // /* MW 3 */ + 12670 "10000000" // /* MW 2 */ + 12671 "00100011" // /* MW 1 */ + 12672 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12673 "00000000" // /* MW 15 */ + 12674 "00000000" // /* MW 14 */ + 12675 "01111000" // /* MW 13 */ + 12676 "10100101" // /* MW 12 */ + 12677 "00000001" // /* MW 11 */ + 12678 "00000000" // /* MW 10 */ + 12679 "00000000" // /* MW 9 */ + 12680 "00000000" // /* MW 8 */ + 12681 "01011011" // /* MW 7 */ + 12682 "00000001" // /* MW 6 */ + 12683 "00100000" // /* MW 5 */ + 12684 "00000000" // /* MW 4 */ + 12685 "11110000" // /* MW 3 */ + 12686 "00101100" // /* MW 2 */ + 12687 "00000000" // /* MW 1 */ + 12688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12689 "00000000" // /* MW 15 */ + 12690 "00000000" // /* MW 14 */ + 12691 "01111000" // /* MW 13 */ + 12692 "10100101" // /* MW 12 */ + 12693 "00000001" // /* MW 11 */ + 12694 "00000000" // /* MW 10 */ + 12695 "00000000" // /* MW 9 */ + 12696 "00000000" // /* MW 8 */ + 12697 "01011011" // /* MW 7 */ + 12698 "00000001" // /* MW 6 */ + 12699 "00100000" // /* MW 5 */ + 12700 "00000000" // /* MW 4 */ + 12701 "11110000" // /* MW 3 */ + 12702 "00101100" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ + 12704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12705 "00000000" // /* MW 15 */ + 12706 "00000000" // /* MW 14 */ + 12707 "01111000" // /* MW 13 */ + 12708 "10100101" // /* MW 12 */ + 12709 "00000001" // /* MW 11 */ + 12710 "00000000" // /* MW 10 */ + 12711 "00000000" // /* MW 9 */ + 12712 "00000000" // /* MW 8 */ + 12713 "01011011" // /* MW 7 */ + 12714 "00000001" // /* MW 6 */ + 12715 "00100000" // /* MW 5 */ + 12716 "00000000" // /* MW 4 */ + 12717 "11110000" // /* MW 3 */ + 12718 "00101100" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ + 12720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12721 "00000000" // /* MW 15 */ + 12722 "00000000" // /* MW 14 */ + 12723 "01111000" // /* MW 13 */ + 12724 "10100101" // /* MW 12 */ + 12725 "00000001" // /* MW 11 */ + 12726 "00000000" // /* MW 10 */ + 12727 "00000000" // /* MW 9 */ + 12728 "00000000" // /* MW 8 */ + 12729 "01011011" // /* MW 7 */ + 12730 "00000001" // /* MW 6 */ + 12731 "00100000" // /* MW 5 */ + 12732 "00000000" // /* MW 4 */ + 12733 "11110000" // /* MW 3 */ + 12734 "00101100" // /* MW 2 */ + 12735 "00000000" // /* MW 1 */ + 12736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12737 "00000000" // /* MW 15 */ + 12738 "00000000" // /* MW 14 */ + 12739 "01111000" // /* MW 13 */ + 12740 "10100101" // /* MW 12 */ + 12741 "00000001" // /* MW 11 */ + 12742 "00000000" // /* MW 10 */ + 12743 "00000000" // /* MW 9 */ + 12744 "00000000" // /* MW 8 */ + 12745 "01011011" // /* MW 7 */ + 12746 "00000001" // /* MW 6 */ + 12747 "00100000" // /* MW 5 */ + 12748 "00000000" // /* MW 4 */ + 12749 "11110000" // /* MW 3 */ + 12750 "00101100" // /* MW 2 */ + 12751 "00000000" // /* MW 1 */ +.label ZLE_Fmemset_144 +.end_of_loop + 12752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12753 "00000000" // /* MW 15 */ + 12754 "00000000" // /* MW 14 */ + 12755 "01111000" // /* MW 13 */ + 12756 "10100101" // /* MW 12 */ + 12757 "00000001" // /* MW 11 */ + 12758 "00000000" // /* MW 10 */ + 12759 "00000000" // /* MW 9 */ + 12760 "00000000" // /* MW 8 */ + 12761 "01011011" // /* MW 7 */ + 12762 "00000001" // /* MW 6 */ + 12763 "00100000" // /* MW 5 */ + 12764 "00000000" // /* MW 4 */ + 12765 "11110000" // /* MW 3 */ + 12766 "00101100" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.label TGT_Fmemset_160 +.src_ref 12 "string.c" 330 4 first +.loop_nesting 0 + 12768 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12769 "00000000" // /* MW 3 */ + 12770 "00101000" // /* MW 2 */ + 12771 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label memset__end + 12781 "00000000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/conv" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 11 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 12 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.cmico b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.lst b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.lst new file mode 100644 index 0000000000000000000000000000000000000000..0b60ff24e25725d17ee1d91c96109567c043ad75 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.lst @@ -0,0 +1,4100 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable4 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2528 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2528 0x00 0xc2 0xd0 0xe9 0xe0 0x2c LDA r16, [p0]; NEZ r26, r1 + 2534 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2540 0x0f 0xef 0x1d 0x98 ST p6, [sp, #-20] + 2544 0xfe 0x3a 0xb0 0x01 0xc8 0xd0 0x70 0x02 ST r14, [sp, #-16]; MOV r14, r3 + 2552 0xff 0x3e 0xb0 0x01 0xe8 0x50 0x70 0x02 ST r15, [sp, #-8]; MOV r15, r1 + 2560 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 2564 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 2568 0x1e 0x68 0x02 0x18 ADD.NC p6, r16, #4 + 2572 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 2576 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 2580 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 2584 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2588 0x00 0x00 NOPX + 2590 0x00 0x00 NOPX + 2592 0x00 0x00 NOPX + 2594 0x00 0x00 NOPX + 2596 0x00 0x00 NOPX + 2598 0x00 0x00 NOPX + 2600 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2604 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 2608 0xfc 0x1f 0xa0 0x35 0x39 0xe4 MOVX r16, #-1; MOV el0, r26 + 2614 0x00 0x00 NOPX + 2616 0x00 0x00 NOPX + 2618 0x00 0x00 NOPX + 2620 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2624 0x04 0x41 0x29 0xa0 0x05 0x64 MOVX r17, #2; MOV r19, #1 + 2630 0xd5 0x23 0xb9 0x21 0x81 0xe4 LSHL r20, r26, r17; MOV r18, p0 + 2636 0x9c 0x9f 0x9c 0xd2 0xa2 0xa4 LTU r18, r19, r15; ADD.NC p6, r18, r20 + 2642 0xc0 0xd2 0xd7 0xe6 0x95 0x82 0x6e 0x60 0x72 0xba LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 + 2652 0xfd 0x4a 0xb0 0x03 0x4c 0x90 0x70 0x02 ST r18, [sp, #-24]; MOV r26, r18 + 2660 0x00 0x00 NOPX + 2662 0x00 0x00 NOPX + 2664 0x00 0x00 NOPX + 2666 0x00 0x00 NOPX + 2668 0x00 0x00 NOPX + 2670 0x1e 0x6a 0x02 0x18 ADD.NC p6, r20, #4 + 2674 0x06 0x1e 0x96 0x98 LDA r20, [p6], #4 + 2678 0x06 0x3e 0xd6 0x98 LDA r22, [p6], #12 + 2682 0x06 0xee 0xb6 0x98 LDA r21, [p6], #-8 + 2686 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2690 0x00 0x00 NOPX + 2692 0x00 0x00 NOPX + 2694 0x00 0x00 NOPX + 2696 0x00 0x00 NOPX + 2698 0x00 0x00 NOPX + 2700 0x00 0x00 NOPX + 2702 0x15 0x29 0x62 0x18 SEL.EQZ r20, r20, r22, r27 + 2706 0x0e 0xd6 0x91 0x98 ST r20, [p6, #-12] + 2710 0x00 0x00 NOPX + 2712 0x00 0x00 NOPX + 2714 0x00 0x00 NOPX + 2716 0x00 0x00 NOPX + 2718 0x15 0x57 0x08 0x18 ACQ.COND r21, r16, r26 + 2722 0x14 0xa5 0x1d 0x98 LSHL r18, r18, r17 + 2726 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 2732 0x76 0x9e 0x0c 0xd3 0x92 0xa4 NEZ r26, r14; ADD.NC p6, r19, r18 + 2738 0xc0 0xca 0xdf 0xc6 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-32] + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x00 0x00 NOPX + 2756 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 2760 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2764 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2768 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 2772 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2776 0x00 0x00 NOPX + 2778 0x00 0x00 NOPX + 2780 0x00 0x00 NOPX + 2782 0x00 0x00 NOPX + 2784 0x00 0x00 NOPX + 2786 0x00 0x00 NOPX + 2788 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 2792 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2796 0x00 0x00 NOPX + 2798 0x00 0x00 NOPX + 2800 0x00 0x00 NOPX + 2802 0x00 0x00 NOPX + 2804 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 2808 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 2812 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 2816 0x00 0x07 0xce 0xc4 0x80 0x44 MOVXM p7, #508480 + 2822 0xe0 0x13 0xdf 0xb8 0x5b 0x0c LDA p1, [p7, dj0]; ST el0, [sp, #-36] + 2828 0x00 0x00 NOPX + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX +.no_stack_arguments + 2840 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2844 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2848 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2854 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2864 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2870 0x07 0xdf 0x51 0x18 LDA r26, [sp, #-36] + 2874 0x07 0xe4 0x41 0x18 LDA dj0, [sp, #-28] + 2878 0x07 0xe8 0x29 0x18 LDA el0, [sp, #-24] + 2882 0x07 0xe0 0x09 0x18 LDA eh0, [sp, #-32] + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x18 0x68 0x88 0x18 ADD.NC p0, r17, #16 + 2894 0x00 0x06 0x36 0x98 LDA r17, [p0] + 2898 0x00 0x00 NOPX + 2900 0x00 0x00 NOPX + 2902 0x00 0x00 NOPX + 2904 0x00 0x00 NOPX + 2906 0x00 0x00 NOPX + 2908 0x00 0x00 NOPX + 2910 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2914 0x1e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p0, #-4]; MOV r27, r15 + 2920 0xe0 0x4a 0xdd 0x40 0x39 0xd4 LDA r18, [p7, dj0]; MOV r26, el0 + 2926 0x00 0x00 NOPX + 2928 0x00 0x00 NOPX + 2930 0x00 0x00 NOPX + 2932 0x00 0x00 NOPX + 2934 0x00 0x00 NOPX + 2936 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2940 0x8c 0x66 0x4e 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 + 2946 0xe0 0xc6 0xd1 0xec 0x63 0x0c LDA r17, [p7]; ST r17, [p0, #-4] + 2952 0x00 0x00 NOPX + 2954 0x00 0x00 NOPX + 2956 0x00 0x00 NOPX + 2958 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 0x1e 0xa1 0x1c 0xf8 MOV r26, eh0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2970 0xfe 0xc6 0xdd 0xc0 0x39 0xd4 LDA r17, [p7, #-4]; MOV r27, el0 + 2976 0x06 0x06 0x56 0x98 LDA r18, [p6] + 2980 0x00 0x00 NOPX + 2982 0x00 0x00 NOPX + 2984 0x00 0x00 NOPX + 2986 0x00 0x00 NOPX + 2988 0x00 0x00 NOPX + 2990 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2994 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 3000 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x00 0x00 NOPX + 3016 0x00 0x00 NOPX + 3018 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 3022 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3026 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] + 3030 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 0x1e 0xd7 0x20 0xf8 MOV r27, r14 +.delay_slot + 3066 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 3070 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 3088 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3088 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3092 0x00 0x07 0xc0 0xc6 0x40 0x44 MOVXM p0, #508704 +.delay_slot + 3098 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 3102 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 3106 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 3110 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3120 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3120 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0x80 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508672 + 3130 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3136 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 3140 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 3144 0x00 0x00 NOPX + 3146 0x00 0x00 NOPX + 3148 0x00 0x00 NOPX + 3150 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3154 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 3158 0x00 0x00 NOPX + 3160 0x00 0x00 NOPX + 3162 0x00 0x00 NOPX + 3164 0x00 0x00 NOPX + 3166 0x00 0x00 NOPX + 3168 0x00 0x00 NOPX + 3170 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3174 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x00 0x00 NOPX + 3184 0x00 0x00 NOPX + 3186 0x00 0x00 NOPX + 3188 0x00 0x00 NOPX + 3190 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 3194 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 3198 0x00 0x00 NOPX + 3200 0x00 0x00 NOPX +.no_stack_arguments + 3202 0x00 0x06 0x08 0x00 0x01 0x04 JL #3088 +.delay_slot +.swstall delay_slot + 3208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3210 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3212 0x00 0x00 NOPX +.delay_slot + 3214 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 3218 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x5e 0x86 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV r15, p0; NOPV +.return_address + 3232 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 3242 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 3252 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 3262 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 3266 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 3296 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3312 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 3312 0x02 0x80 0x80 0x00 0x01 0xf0 0x31 0x86 0x10 0xba MOVA m0, #20; MOVXM p0, #508684 + 3322 0x01 0x01 0x50 0x00 0x20 0x28 0x28 0x06 0x58 0xba LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 + 3332 0x00 0x00 NOPX + 3334 0x00 0x00 NOPX + 3336 0x00 0x00 NOPX + 3338 0x00 0x00 NOPX + 3340 0x00 0x00 NOPX + 3342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3346 0x10 0x06 0xf0 0x18 NEZ r3, r0 +.delay_slot + 3350 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 3354 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 3358 0x02 0x82 0x31 0x88 0x3b 0x5c ST r0, [p0, #4]; LSHL r2, r3, r1 +.delay_slot + 3364 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3376 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 3376 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3382 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 3386 0x00 0x06 0x18 0x00 0x01 0x04 JL #3120 +.delay_slot + 3392 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.delay_slot +.swstall delay_slot + 3398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3404 0x00 0x01 0x67 0x98 NOPA +.return_address + 3408 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call + 3412 0x00 0x06 0x78 0x00 0x00 0x84 J #3312 +.delay_slot + 3418 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.delay_slot + 3424 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3430 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3434 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 3440 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3440 0x02 0x80 0x80 0x00 0x01 0xf1 0xb1 0x80 0x10 0xba MOVA m0, #20; MOVXM p3, #508672 + 3450 0x03 0x3c 0x16 0x98 LDA r0, [p3], #12 + 3454 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 3460 0x00 0x00 NOPX + 3462 0x00 0x00 NOPX + 3464 0x00 0x00 NOPX + 3466 0x00 0x00 NOPX + 3468 0x00 0x00 NOPX + 3470 0x00 0x00 NOPX + 3472 0x08 0x06 0xe8 0x40 0x01 0x84 JNZ r1, #3536 +.delay_slot + 3478 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 3482 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 3486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3488 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3490 0x00 0x00 NOPX + 3492 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 3496 0x00 0x00 NOPX + 3498 0x00 0x00 NOPX + 3500 0x00 0x00 NOPX + 3502 0x00 0x06 0xf8 0x00 0x00 0x84 J #3568 +.delay_slot +.swstall delay_slot + 3508 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3510 0x00 0x00 NOPX +.delay_slot + 3512 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 3516 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 3520 0x00 0x2c 0xf0 0x00 0x20 0x04 0x13 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 + 3536 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 3540 0x00 0x00 NOPX + 3542 0x00 0x00 NOPX + 3544 0x00 0x00 NOPX + 3546 0x00 0x00 NOPX + 3548 0x00 0x00 NOPX + 3550 0x00 0x00 NOPX + 3552 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 3556 0x00 0x00 NOPX + 3558 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 + 3568 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 3578 0x62 0x90 0xd0 0x00 0x00 0x00 0x7f 0x30 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #3680 + 3588 0x00 0x00 0x06 0xfd 0x00 0x44 MOVXM le, #3712 + 3594 0x00 0x07 0xc8 0xc4 0x40 0x44 MOVXM p4, #508448 + 3600 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 3604 0x00 0x00 NOPX + 3606 0x00 0x00 NOPX + 3608 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 3612 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 3616 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3680 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3728 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 3756 0x00 0x00 NOPX +.delay_slot + 3758 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 3762 0x00 0x00 NOPX +.delay_slot + 3764 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 3768 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3776 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 3776 0x50 0x91 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p2, p1; PADDXM [sp], #128 + 3786 0xff 0x87 0xb0 0x02 0x08 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p0 + 3794 0x1c 0x55 0xe0 0xf8 MOV r17, sp + 3798 0x00 0x07 0xc6 0xc6 0x18 0x44 MOVXM p3, #508684 + 3804 0x65 0xed 0x50 0xd1 0x80 0x14 LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 + 3810 0x73 0xca 0x50 0x0e 0x56 0x0c LDA.s16 r18, [p3], #-14; VST sfh, [p0] + 3816 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 3820 0x00 0x00 NOPX + 3822 0x00 0x00 NOPX +.no_stack_arguments + 3824 0x00 0x06 0xb8 0x00 0x01 0x04 JL #3440 +.delay_slot + 3830 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 3834 0x00 0x00 NOPX +.delay_slot + 3836 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 3840 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 3846 0x00 0x2c 0xf0 0x00 0x10 0x00 0x34 0x10 0x7e 0xba NOPA; NOPB; MOV p0, r16 +.return_address + 3856 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 3860 0x00 0x00 NOPX + 3862 0x00 0x00 NOPX + 3864 0x00 0x00 NOPX + 3866 0x00 0x00 NOPX + 3868 0x00 0x00 NOPX + 3870 0x00 0x00 NOPX + 3872 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3876 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 3882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3888 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 3904 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 3904 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 3910 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 3916 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 3922 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 3930 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0x02 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508420 + 3940 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 3944 0x00 0x00 NOPX + 3946 0x00 0x00 NOPX + 3948 0x80 0x08 0x08 0x40 0x01 0x84 JNZ r16, #4112 +.delay_slot + 3954 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 3958 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 3962 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 3966 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 3974 0x00 0x07 0xc0 0xc6 0x00 0x44 MOVXM p0, #508672 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 0x00 0x07 0xc4 0xc4 0x40 0x44 MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 0x00 0x06 0x98 0x00 0x01 0x04 JL #3376 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4012 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4016 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 4032 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 + 4038 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #508672 + 4048 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x31 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #508672 + 4058 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 + 4068 0x00 0x00 NOPX + 4070 0x00 0x00 NOPX + 4072 0x00 0x08 0x10 0x00 0x00 0x84 J #4128 +.delay_slot + 4078 0x00 0x07 0xc0 0xc4 0x30 0x44 MOVXM p0, #508440 +.delay_slot +.swstall delay_slot + 4084 0x00 0x00 NOPX +.delay_slot + 4086 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 4090 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 4096 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 4112 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0x04 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 4128 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 4136 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508416 + 4146 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 4150 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 4154 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 4158 0x00 0x00 NOPX + 4160 0x00 0x00 NOPX + 4162 0x00 0x00 NOPX + 4164 0x00 0x00 NOPX + 4166 0x00 0x00 NOPX + 4168 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 4172 0x0f 0x06 0x11 0x98 ST r16, [p7] + 4176 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 4180 0x00 0x00 NOPX + 4182 0x00 0x00 NOPX + 4184 0x00 0x00 NOPX + 4186 0x14 0x93 0x08 0x18 ACQ r18, r16 + 4190 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 4196 0x00 0x00 NOPX + 4198 0x00 0x00 NOPX + 4200 0x00 0x06 0x36 0x98 LDA r17, [p0] + 4204 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 4210 0x01 0x06 0x76 0x98 LDA r19, [p1] + 4214 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 4218 0x00 0x00 NOPX +.no_stack_arguments + 4220 0x00 0x07 0x60 0x00 0x01 0x04 JL #3776 +.delay_slot +.swstall delay_slot + 4226 0x00 0x00 NOPX +.delay_slot + 4228 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 4232 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 4236 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 4240 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 4256 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508416 + 4266 0x10 0x20 0x05 0x18 MOVX r16, #1 + 4270 0x00 0x00 NOPX + 4272 0x00 0x00 NOPX + 4274 0x00 0x00 NOPX + 4276 0x00 0x00 NOPX + 4278 0x00 0x00 NOPX + 4280 0x14 0x51 0x08 0x18 REL r17, r16 + 4284 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508440 + 4294 0x06 0x06 0x36 0x98 LDA r17, [p6] + 4298 0x02 0x06 0x56 0x98 LDA r18, [p2] + 4302 0x00 0x00 NOPX + 4304 0x00 0x00 NOPX + 4306 0x00 0x00 NOPX + 4308 0x00 0x00 NOPX + 4310 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 4314 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 4318 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 4322 0x80 0x08 0x80 0x40 0x01 0x84 JNZ r16, #4352 +.delay_slot +.swstall delay_slot + 4328 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4330 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4332 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4336 0x00 0x00 NOPX + 4338 0x10 0x20 0x01 0x18 MOVX r16, #0 + 4342 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 4352 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 4356 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 4360 0x00 0x00 NOPX + 4362 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 4380 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 4386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4390 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 4400 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 4400 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xc0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508800 + 4410 0x00 0x00 NOPX + 4412 0x00 0x00 NOPX + 4414 0x00 0x00 NOPX + 4416 0x00 0x00 NOPX + 4418 0x00 0x00 NOPX + 4420 0x00 0x00 NOPX + 4422 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 4426 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 4430 0x00 0x00 NOPX + 4432 0x00 0x00 NOPX + 4434 0x00 0x00 NOPX + 4436 0x00 0x00 NOPX + 4438 0x00 0x00 NOPX + 4440 0x00 0x00 NOPX + 4442 0x08 0x04 0x29 0x98 ST el0, [p0] + 4446 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 4450 0x00 0x00 NOPX + 4452 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4462 0x00 0x00 NOPX +.delay_slot + 4464 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 4480 +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 4480 0xff 0x40 0x00 0x3d 0x68 0x00 0x01 0xf1 0x31 0xc0 0x10 0xb6 MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 0x40 0x8a 0xd0 0x3b 0xe8 0x00 0x01 0xf1 0x31 0x10 0x10 0xb6 LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 0x40 0x84 0x50 0x3d 0x68 0x00 0x00 0x10 0xc8 0x40 0x10 0xb6 LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 0x03 0xbe 0x80 0x32 0xe5 0xf4 VLDB x7, [p0], #64; VBCST.16 x0, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 0x00 0x00 0xc2 0x21 0x00 0x44 MOVXM r4, #49280 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 0x18 0x91 0x72 0xf8 VBCST.16 x1, r4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 0x00 0x00 0x71 0xbf 0xfe 0x44 MOVXM r3, #32767 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 0x1c 0x50 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 0x10 0x01 0xb6 0x81 0xd9 0xe4 LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 0x0f 0x50 0x08 0x70 0x59 0xe4 MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 + 4554 0x19 0x0d 0x72 0xf8 VBCST.16 x2, r3 + 4558 0x00 0x00 0x32 0xba 0x00 0x44 MOVXM r5, #15616 + 4564 0x19 0x95 0x72 0xf8 VBCST.16 x3, r5 + 4568 0x00 0x00 0x38 0xbe 0x00 0x44 MOVXM r17, #16128 + 4574 0x1d 0xb1 0x2b 0x78 VBAND x11, x6, x2 + 4578 0x64 0x5e 0x25 0x8a 0xe5 0xe4 MOVX r17, #828; VBCST.16 x5, r17 + 4584 0x04 0xc0 0xec 0xe6 0x8c 0xe7 0x61 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 + 4592 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 + 4596 0x00 0x00 0x31 0x3d 0x00 0x44 MOVXM r2, #16000 + 4602 0x02 0x09 0x72 0xe6 0x8a 0xe7 0x01 0x62 VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 + 4610 0x18 0x0b 0x8a 0xf8 VCONV.fp32.bf16 cml0, x5 + 4614 0x04 0x50 0x2c 0xe6 0x8b 0x0c 0x81 0x62 VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 0xb2 0x42 0xc0 0x00 0x00 0x8f 0x24 0x02 0x89 0x12 0x81 0x56 VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 0x1b 0x40 0xec 0xf8 VMAX_LT.bf16 x6, r16, x8, x1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 0x00 0x00 0x00 0xb7 0x2a 0x02 0x8a 0x76 0xc3 0x5a MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 0x52 0x22 0xc0 0x02 0xb8 0x3f 0x80 0x02 VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 + 4656 0x1c 0x38 0x2c 0xf8 VMIN_GE.bf16 x8, r16, x7, x0 + 4660 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0xd8 0x95 0xb0 0xf6 NOPA; NOPB; NOPS; VBAND x11, x6, x2 +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4672 0x00 0x3d 0x6c 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x4a VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 0x00 0x3b 0xec 0x49 0x2b 0x66 0x8c 0xe7 0x61 0x4a VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 0x23 0xa4 0x60 0x02 0x89 0x12 0x81 0x62 VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 0x00 0x2c 0xf1 0x1e 0x23 0x00 0x00 0x00 0x00 0x7a NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 0x00 0x2c 0xf0 0x00 0x25 0x92 0x16 0x00 0x00 0x02 0x28 0x16 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0xa0 0x76 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 0x00 0x2c 0xf0 0x00 0x22 0x91 0x16 0x00 0x00 0x02 0x1c 0x16 0x7c 0x53 0xb6 0x1b NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.end_of_loop + 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xd8 0x95 0xb8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV +.loop_nesting 0 + 4784 0x04 0xc0 0xec 0xe6 0x8c 0x2b 0x23 0x62 VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 0x1c 0x49 0x2b 0x78 VBAND x8, x9, x2 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 0x8c 0xe7 0x61 0x48 VMUL.f dm4, x3, x11, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 0x8a 0xe7 0x01 0x48 VMUL.f dm2, x3, x8, r17 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 + 4814 0x8b 0x0c 0x81 0x48 VMAC.f dm3, dm0, x6, x4, r17 + 4818 0xb2 0x42 0xc0 0x02 0x89 0x12 0x81 0x62 VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 + 4826 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 + 4830 0x8a 0x76 0xc3 0x48 VMSC.f dm2, dm3, x11, x6, r17 + 4834 0x8c 0x2b 0x23 0x48 VMSC.f dm4, dm1, x5, x9, r17 + 4838 0x00 0x00 NOPX + 4840 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4844 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4846 0x00 0x00 NOPX +.delay_slot + 4848 0x09 0x1d 0x23 0x18 VST.CONV.bf16.fp32 cml2, [p1], #64 +.delay_slot + 4852 0x09 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p1], #64 +.delay_slot +.swstall delay_slot + 4856 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 4864 +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 4864 0x00 0x07 0xc6 0xc4 0x00 0x44 MOVXM p3, #508416 + 4870 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 4876 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 4882 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 4890 0xff 0x82 0xb0 0x00 0x01 0xf3 0x31 0x02 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #508420 + 4900 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 4904 0x00 0x00 NOPX + 4906 0x00 0x00 NOPX + 4908 0x80 0x09 0xe8 0x40 0x01 0x84 JNZ r16, #5072 +.delay_slot + 4914 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 4918 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 4922 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 4926 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 4934 0x00 0x07 0xc0 0xc7 0x00 0x44 MOVXM p0, #508800 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 0x00 0x07 0xc4 0xc4 0x40 0x44 MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 0x00 0x08 0x98 0x00 0x01 0x04 JL #4400 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 4972 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 4976 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 4992 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 + 4998 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x31 0xc0 0x10 0xba LDA r16, [p2]; MOVXM p2, #508800 + 5008 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x31 0xc0 0x10 0xba LDA r17, [p2]; MOVXM p2, #508800 + 5018 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x04 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 + 5028 0x00 0x00 NOPX + 5030 0x00 0x00 NOPX + 5032 0x00 0x09 0xf0 0x00 0x00 0x84 J #5088 +.delay_slot + 5038 0x00 0x07 0xc0 0xc4 0x30 0x44 MOVXM p0, #508440 +.delay_slot +.swstall delay_slot + 5044 0x00 0x00 NOPX +.delay_slot + 5046 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 5050 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 5056 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 5072 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb1 0x04 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 5088 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 5096 0xff 0xee 0xd0 0x00 0x01 0xf0 0x31 0x00 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508416 + 5106 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 5110 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 5114 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 5118 0x00 0x00 NOPX + 5120 0x00 0x00 NOPX + 5122 0x00 0x00 NOPX + 5124 0x00 0x00 NOPX + 5126 0x00 0x00 NOPX + 5128 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 5132 0x0f 0x06 0x11 0x98 ST r16, [p7] + 5136 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 5140 0x00 0x00 NOPX + 5142 0x00 0x00 NOPX + 5144 0x00 0x00 NOPX + 5146 0x14 0x93 0x08 0x18 ACQ r18, r16 + 5150 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 5156 0x00 0x00 NOPX + 5158 0x00 0x00 NOPX + 5160 0x00 0x06 0x36 0x98 LDA r17, [p0] + 5164 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 5170 0x01 0x06 0x76 0x98 LDA r19, [p1] + 5174 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 5178 0x00 0x00 NOPX +.no_stack_arguments + 5180 0x00 0x08 0xc0 0x00 0x01 0x04 JL #4480 +.delay_slot +.swstall delay_slot + 5186 0x00 0x00 NOPX +.delay_slot + 5188 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 5192 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 5196 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 5200 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 5216 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x31 0x00 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508416 + 5226 0x10 0x20 0x05 0x18 MOVX r16, #1 + 5230 0x00 0x00 NOPX + 5232 0x00 0x00 NOPX + 5234 0x00 0x00 NOPX + 5236 0x00 0x00 NOPX + 5238 0x00 0x00 NOPX + 5240 0x14 0x51 0x08 0x18 REL r17, r16 + 5244 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #508440 + 5254 0x06 0x06 0x36 0x98 LDA r17, [p6] + 5258 0x02 0x06 0x56 0x98 LDA r18, [p2] + 5262 0x00 0x00 NOPX + 5264 0x00 0x00 NOPX + 5266 0x00 0x00 NOPX + 5268 0x00 0x00 NOPX + 5270 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 5274 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 5278 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 5282 0x80 0x0a 0x60 0x40 0x01 0x84 JNZ r16, #5312 +.delay_slot +.swstall delay_slot + 5288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5296 0x00 0x00 NOPX + 5298 0x10 0x20 0x01 0x18 MOVX r16, #0 + 5302 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 5312 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 5316 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 5320 0x00 0x00 NOPX + 5322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 5340 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 5346 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5350 0x00 0x00 NOPX +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 5360 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 5360 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 5364 0x00 0x07 0xc0 0xc6 0xc0 0x44 MOVXM p0, #508768 +.delay_slot + 5370 0x18 0x00 0x80 0xb8 MOV m0, #64 +.delay_slot + 5374 0x08 0x04 0x01 0x98 ST m0, [p0] +.delay_slot + 5378 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.delay_slot +.swstall delay_slot + 5382 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5392 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 5392 0x23 0x85 0xd0 0x00 0x01 0xf0 0x31 0xa0 0x10 0xba LDA el0, [p1], #4; MOVXM p0, #508736 + 5402 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5408 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 5412 0x00 0x00 NOPX + 5414 0x00 0x00 NOPX + 5416 0x00 0x00 NOPX + 5418 0x00 0x00 NOPX + 5420 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5424 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 5428 0x00 0x00 NOPX + 5430 0x00 0x00 NOPX + 5432 0x00 0x00 NOPX + 5434 0x00 0x00 NOPX + 5436 0x00 0x00 NOPX + 5438 0x00 0x00 NOPX + 5440 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5444 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 5448 0x00 0x00 NOPX + 5450 0x00 0x00 NOPX + 5452 0x00 0x00 NOPX + 5454 0x00 0x00 NOPX + 5456 0x00 0x00 NOPX + 5458 0x00 0x00 NOPX + 5460 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 5464 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 5468 0x00 0x00 NOPX + 5470 0x00 0x00 NOPX +.no_stack_arguments + 5472 0x00 0x0a 0x78 0x00 0x01 0x04 JL #5360 +.delay_slot + 5478 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 5482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5484 0x00 0x00 NOPX +.delay_slot + 5486 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 5490 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x7b 0x06 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p7, p0; NOPV +.return_address + 5504 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 5508 0x00 0x00 NOPX + 5510 0x00 0x00 NOPX + 5512 0x00 0x00 NOPX + 5514 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 5552 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 5552 0x04 0x00 0x80 0x00 0x01 0xf1 0xb1 0xa0 0x10 0xba MOVA m0, #32; MOVXM p3, #508736 + 5562 0x61 0x06 0xd0 0x00 0x01 0xf2 0x31 0x10 0x10 0xba LDA r1, [p3], m0; MOVXM p4, #508448 + 5572 0x60 0x90 0xd0 0x18 0x07 0x88 0x6f 0xfa 0x58 0xba LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 + 5582 0x62 0x80 0xd0 0x00 0x00 0x04 0x7b 0x38 0x10 0xba LDA m0, [p3, #4]; MOVXM ls, #5744 + 5592 0x80 0x88 0x50 0x00 0x00 0x05 0xbb 0x40 0x10 0xba LDA.s8 r2, [p4]; MOVXM le, #5760 + 5602 0x00 0x00 NOPX + 5604 0x00 0x00 NOPX + 5606 0x00 0x00 NOPX + 5608 0x10 0x42 0x3d 0x98 LSHL r1, r1, r3 + 5612 0x1d 0x70 0xfc 0x98 ADD.NC lc, r1, #-7 + 5616 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 0x21 0x1b 0x70 0x50 0x68 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 0x21 0x1b 0x70 0x50 0x68 0x00 0xad 0x8e 0x00 0xe2 0x41 0x66 VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 0x21 0x13 0x70 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5744 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5776 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 5850 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 5854 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 5858 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 5872 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 5872 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 5878 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 5884 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 5890 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 5900 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 5908 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 5912 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 5916 0x00 0x00 NOPX + 5918 0x80 0x0b 0xd8 0x40 0x01 0x84 JNZ r16, #6064 +.delay_slot + 5924 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 5928 0x00 0x07 0xc4 0xc4 0x08 0x44 MOVXM p2, #508420 +.delay_slot + 5934 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 5942 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 5946 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb1 0xa0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #508736 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 0x13 0x91 0x60 0x00 0x01 0xf1 0x31 0x10 0x11 0x3a MOVS p0, p7; MOVXM p2, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x31 0x0e 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 0x00 0x0a 0x88 0x00 0x01 0x04 JL #5392 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 5992 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 5996 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 6000 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x02 0x10 0xba LDA r16, [p7]; MOVXM p1, #508420 + 6010 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb1 0x04 0x10 0xba LDA r17, [p1]; MOVXM p3, #508424 + 6020 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 + 6030 0x00 0x00 NOPX + 6032 0x00 0x00 NOPX + 6034 0x00 0x00 NOPX + 6036 0x00 0x0b 0xe0 0x00 0x00 0x84 J #6080 +.delay_slot + 6042 0x00 0x07 0xc4 0xc4 0x30 0x44 MOVXM p2, #508440 +.delay_slot + 6048 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 6052 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 6056 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 6060 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 6064 0x00 0x07 0xc6 0xc4 0x10 0x44 MOVXM p3, #508424 + 6070 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb1 0x06 0x10 0xba NOPA; MOVXM p1, #508428 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 6080 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 6084 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x31 0x00 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508416 + 6094 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 6098 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 6102 0x02 0x06 0x56 0x98 LDA r18, [p2] + 6106 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 6110 0x00 0x00 NOPX + 6112 0x00 0x00 NOPX + 6114 0x00 0x00 NOPX + 6116 0x00 0x00 NOPX + 6118 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 6122 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 6128 0x0a 0x06 0x11 0x98 ST r16, [p2] + 6132 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 6136 0x00 0x00 NOPX + 6138 0x00 0x00 NOPX + 6140 0x00 0x00 NOPX + 6142 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6146 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 6150 0x00 0x00 NOPX + 6152 0x00 0x00 NOPX + 6154 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 6158 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 6162 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 6166 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 6170 0x00 0x00 NOPX + 6172 0x00 0x00 NOPX + 6174 0x00 0x00 NOPX + 6176 0x00 0x00 NOPX + 6178 0x00 0x00 NOPX + 6180 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 6184 0x0a 0x06 0x31 0x98 ST r17, [p2] + 6188 0x00 0x00 NOPX + 6190 0x00 0x00 NOPX + 6192 0x00 0x00 NOPX + 6194 0x00 0x00 NOPX + 6196 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 6200 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 6210 0x00 0x00 NOPX + 6212 0x00 0x00 NOPX + 6214 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 6218 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 6224 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 0x00 0x0a 0xd8 0x00 0x01 0x04 JL #5552 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 6260 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 6264 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 6268 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 6272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 6288 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 6298 0x00 0x07 0xcc 0xc4 0x30 0x44 MOVXM p6, #508440 + 6304 0x00 0x00 NOPX + 6306 0x00 0x00 NOPX + 6308 0x00 0x00 NOPX + 6310 0x00 0x00 NOPX + 6312 0x00 0x00 NOPX + 6314 0x14 0x51 0x08 0x18 REL r17, r16 + 6318 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 6322 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 6326 0x00 0x00 NOPX + 6328 0x00 0x00 NOPX + 6330 0x00 0x00 NOPX + 6332 0x00 0x00 NOPX + 6334 0x00 0x00 NOPX + 6336 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 6340 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 6346 0x00 0x00 NOPX + 6348 0x00 0x00 NOPX + 6350 0x00 0x00 NOPX + 6352 0x00 0x00 NOPX + 6354 0x00 0x00 NOPX + 6356 0x00 0x00 NOPX + 6358 0x14 0x51 0x08 0x18 REL r17, r16 + 6362 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb1 0x00 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508416 + 6372 0x06 0x06 0x56 0x98 LDA r18, [p6] + 6376 0x01 0x06 0x36 0x98 LDA r17, [p1] + 6380 0x00 0x00 NOPX + 6382 0x00 0x00 NOPX + 6384 0x00 0x00 NOPX + 6386 0x00 0x00 NOPX + 6388 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 6392 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 6396 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 6400 0x80 0x0c 0x90 0x40 0x01 0x84 JNZ r16, #6432 +.delay_slot +.swstall delay_slot + 6406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6414 0x00 0x00 NOPX + 6416 0x10 0x20 0x01 0x18 MOVX r16, #0 + 6420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 6432 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 6436 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 6440 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 6462 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 6468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6472 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 6480 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function_start + 6480 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb1 0xe0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #508864 + 6490 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 + 6500 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6506 0xfe 0x73 0xb0 0x00 0x01 0xf3 0xb1 0xe0 0x11 0x3a ST p7, [sp, #-16]; MOVXM p7, #508864 + 6516 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 6520 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] + 6524 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] + 6528 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 6532 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 6536 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 6540 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 6544 0x00 0x00 NOPX + 6546 0x00 0x00 NOPX + 6548 0x00 0x00 NOPX + 6550 0x00 0x00 NOPX + 6552 0x00 0x00 NOPX + 6554 0x09 0x04 0x29 0x98 ST el0, [p1] + 6558 0x09 0x14 0x09 0x98 ST eh0, [p1, #4] + 6562 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5 + 6566 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2 + 6570 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2 + 6574 0x00 0x00 NOPX + 6576 0x00 0x00 NOPX + 6578 0x00 0x00 NOPX + 6580 0x00 0x00 NOPX +.no_stack_arguments + 6582 0x00 0x18 0x58 0x00 0x01 0x04 JL #12464 +.delay_slot + 6588 0xfd 0xca 0xb8 0xba 0x43 0x5c ST r18, [sp, #-20]; SUB r14, r17, r18 +.delay_slot + 6594 0xfc 0x86 0xb0 0x03 0x08 0x45 0xe8 0x50 0x79 0x3a ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 +.delay_slot + 6604 0xfd 0x42 0xb7 0x6f 0x15 0x5c ST r16, [sp, #-24]; LT r27, r14, r24 +.delay_slot + 6610 0x16 0x22 0xe1 0x98 SUB r17, r24, r14 +.delay_slot + 6614 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x03 0x81 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 +.return_address + 6624 0xe7 0xc5 0x50 0x1f 0x47 0x36 0x08 0x00 0x58 0xba LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 + 6634 0xfd 0xc9 0x58 0x4c 0x43 0x2c LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 + 6640 0xfc 0x86 0x2a 0x6e 0x15 0x2c LDA r1, [sp, #-28]; LT r27, r20, r16 + 6646 0x10 0xa7 0x32 0x18 SEL.EQZ r19, r2, r19, r27 + 6650 0x00 0x00 NOPX + 6652 0x00 0x00 NOPX +.no_stack_arguments + 6654 0x00 0x18 0x58 0x00 0x01 0x04 JL #12464 +.delay_slot + 6660 0x14 0xe6 0x70 0x18 EXTEND.s16 r19, r19 +.delay_slot + 6664 0xfc 0x4a 0xb0 0x22 0xe9 0x0d 0xec 0xc0 0x49 0x3a ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 +.delay_slot + 6674 0x13 0xb7 0x0a 0x98 LT r27, r14, r16 +.delay_slot + 6678 0x14 0x22 0xe1 0x98 SUB r17, r16, r14 +.delay_slot + 6682 0x00 0x2c 0xf7 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r14, r17, r27 +.return_address + 6688 0xfc 0x86 0x20 0x01 0x30 0x48 0x00 0x42 0x58 0xba LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 + 6698 0xe1 0x51 0x50 0x01 0x80 0x0a 0x48 0x08 0x58 0xba LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 + 6708 0xfc 0x72 0x20 0x3f 0x07 0x4b 0xe8 0x17 0x58 0xba LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 + 6718 0xfd 0xda 0x20 0x3f 0xa7 0xca 0xa8 0x06 0x58 0xba LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 + 6728 0xfd 0x0e 0x20 0x0f 0xd7 0x89 0x00 0x20 0x58 0xba LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 + 6738 0xe9 0xc0 0x80 0x01 0x70 0x28 0x08 0x80 0x58 0xba MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 + 6748 0x17 0x44 0x80 0x31 0x11 0x0c 0x9d 0xb0 0x78 0xba MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn + 6758 0x10 0x7c 0xe6 0x98 XOR r30, r1, r14 + 6762 0x17 0xb7 0x8a 0x98 LT r27, r30, r24 + 6766 0x14 0x62 0x43 0xbc 0xff 0x24 SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 + 6772 0x8f 0x8e 0x0b 0x36 0x02 0x24 EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 + 6778 0x7f 0xa9 0xf7 0x3e 0x01 0x24 MUL r30, r15, r20; ADD.NC r14, r30, #1 + 6784 0x08 0x9d 0xf8 0xb6 0x01 0x24 MUL r2, r1, r14; ADD.NC r17, r22, #1 + 6790 0x14 0xf6 0x17 0x98 EQ r27, r19, r1 + 6794 0x17 0x84 0x2f 0x98 MUL r2, r30, r2 + 6798 0xff 0xe4 0x49 0x3f 0xf5 0x64 SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 + 6804 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16 + 6808 0x14 0x45 0xad 0x98 LSHL r2, r17, r26 + 6812 0x10 0xb9 0xf2 0x22 0xff 0x24 MUL r2, r2, r28; ADD.NC r4, r2, #-1 + 6818 0x10 0xc7 0x5d 0x98 LSHL r3, r3, r21 + 6822 0xff 0x8a 0x37 0x94 0x3f 0x5c ST r2, [p7], #-4; MUL r5, r15, r1 + 6828 0xe9 0x42 0x30 0x3b 0x6b 0x26 0x08 0x04 0x59 0x3a ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 + 6838 0xf9 0xfe 0x32 0xda 0xc1 0x5c ST r31, [p7], #-16; ADD r22, r5, r22 + 6844 0xed 0x8e 0x3b 0x7e 0x9f 0x5c ST r3, [p7], #24; MUL r31, r22, r20 + 6850 0xe3 0x92 0x3b 0x5a 0x1b 0x5c ST r4, [p7], #4; LSHL r22, r22, r16 + 6856 0x17 0xc7 0x7d 0x98 LSHL r3, r31, r23 + 6860 0x11 0x09 0x5d 0x98 LSHL r4, r4, r21 + 6864 0xb6 0x46 0x32 0x24 0x02 0xa4 SUB r25, r22, r3; ADD.NC r4, r4, r0 + 6870 0xf8 0x00 0x00 0x06 0x62 0x0f 0x2e 0x40 0xa8 0xba MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 + 6880 0xe3 0x82 0x3f 0x84 0x3f 0x5c ST r0, [p7], #4; MUL r1, r31, r1 + 6886 0xe3 0x9e 0x3f 0xfc 0xff 0x5c ST r7, [p7], #4; MUL r31, r31, r7 + 6892 0xe3 0x9a 0x32 0x96 0x5b 0x5c ST r6, [p7], #4; LSHL r5, r5, r18 + 6898 0xf9 0xaf 0xbf 0xa5 0xff 0x24 LSHL r6, r31, r23; ADD.NC r31, r5, #-1 + 6904 0x00 0xe4 0x00 0x28 0x59 0x6e 0x49 0x88 0xa8 0xba MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 + 6914 0x16 0x25 0x21 0x98 SUB r18, r24, r18 + 6918 0xe3 0xca 0x30 0x02 0x1b 0xee 0x49 0x7f 0xc9 0x3a ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 + 6928 0xe3 0xca 0x30 0x9a 0xc1 0x5c ST r18, [p7], #4; ADD r6, r1, r22 + 6934 0xe3 0xda 0x3f 0x84 0x9b 0x5c ST r22, [p7], #4; LSHL r1, r31, r4 + 6940 0xe3 0xfe 0x30 0x07 0x60 0x84 0x2f 0xff 0x59 0x3a ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 + 6950 0xe3 0xe6 0x30 0x0c 0x3b 0x0e 0xc8 0x40 0x59 0x3a ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 + 6960 0xe3 0x8e 0x37 0xc2 0x1b 0x5c ST r3, [p7], #4; LSHL r16, r15, r16 + 6966 0xe3 0xca 0x30 0x03 0x04 0x5c ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 + 6972 0xf0 0xef 0xb0 0x30 0x02 0xa4 LSHL r3, r30, r23; ADD.NC r0, r16, r0 + 6978 0xe3 0x82 0x38 0x40 0x63 0x5c ST r0, [p7], #4; SUB r16, r16, r3 + 6984 0xe3 0xfe 0x30 0x0a 0x11 0x33 0xec 0x10 0x09 0x3a ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 + 6994 0xe3 0xfe 0x39 0x7e 0xbb 0x5c ST r31, [p7], #4; LSHL r31, r18, r21 + 7000 0xe3 0xda 0x30 0x8a 0xbb 0x5c ST r22, [p7], #4; LSHL r2, r1, r21 + 7006 0xe3 0x86 0x3c 0x04 0x43 0x5c ST r1, [p7], #4; SUB r1, r24, r2 + 7012 0xe3 0xda 0x3c 0x0b 0xe3 0x5c ST r22, [p7], #4; SUB r2, r24, r31 + 7018 0x0f 0x1c 0x31 0x98 ST r1, [p7], #4 + 7022 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 7026 0x0f 0x1e 0xd1 0x98 ST r22, [p7], #4 + 7030 0x0f 0x08 0x51 0x98 ST r2, [p7], m0 + 7034 0x07 0x28 0x2a 0x98 LDA.u8 r1, [p7], m1 + 7038 0x00 0x00 NOPX + 7040 0x00 0x00 NOPX + 7042 0x00 0x00 NOPX + 7044 0x00 0x00 NOPX + 7046 0x00 0x00 NOPX + 7048 0x00 0x00 NOPX + 7050 0x08 0x0d 0xd8 0x00 0x01 0x84 JZ r1, #7088 +.delay_slot + 7056 0x10 0x20 0x0d 0x18 MOVX r16, #3 +.delay_slot + 7060 0x13 0xe1 0x0d 0x98 LSHL r16, r15, r16 +.delay_slot + 7064 0xff 0x7f 0x0f 0xa0 0x00 0x44 MOVXM r31, #-8454144 +.delay_slot +.swstall delay_slot + 7070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7072 0x00 0x00 NOPX + 7074 0x00 0x2c 0xf0 0x00 0x20 0x3e 0x01 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; MOVX r31, #0; NOPM +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 + 7088 0xe7 0x60 0x80 0x00 0x01 0xf0 0x31 0x10 0x10 0xba MOVA m0, #-197; MOVXM p0, #508448 + 7098 0x00 0xc4 0x50 0x3b 0xd8 0xa4 0x01 0xf8 0xb8 0xba LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 + 7108 0xff 0x06 0x20 0x01 0xf0 0xa8 0x00 0x49 0x78 0xba LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 + 7118 0xff 0x87 0x20 0x1f 0xff 0xec 0x80 0xc9 0x58 0xba LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 + 7128 0xfe 0x03 0x20 0x64 0x02 0x2c LDA p0, [sp, #-16]; MOVX r25, #0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 0x07 0x2c 0x37 0x18 ST.s16 r1, [p7], #4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 0xef 0x39 0xff 0x71 0x41 0xe4 MUL r28, r29, r28; MOV crRnd, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 0x08 0x02 0xc0 0x1f 0x1d 0x6d 0xe8 0x50 0x79 0x3a VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 0xe5 0x29 0xf8 0xb1 0xff 0x24 MUL r20, r28, r20; ADD.NC r17, r17, #-1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 0xa7 0x67 0xb0 0x82 0x03 0x64 LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 0x17 0xb8 0xef 0x98 MUL r28, r30, r14 + 7176 0x14 0x6b 0x5d 0x98 LSHL r21, r17, r21 + 7180 0xe3 0xd2 0x3e 0x5e 0xfb 0x5c ST r20, [p7], #4; LSHL r23, r28, r23 + 7186 0xe3 0xf6 0x3f 0xea 0xa3 0x5c ST r29, [p7], #4; SUB r26, r31, r21 + 7192 0xe1 0x72 0x3f 0xd6 0x4c 0x5c ST r28, [p7], m0; MAC r21, r21, r31, r18 + 7198 0x07 0x2a 0x8a 0x98 LDA.u8 r20, [p7], m1 + 7202 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 7208 0x00 0x00 NOPX + 7210 0x00 0x00 NOPX + 7212 0x00 0x00 NOPX + 7214 0x17 0xbd 0x3d 0x98 LSHL r30, r30, r19 + 7218 0x17 0xab 0x51 0x98 SUB r21, r30, r21 + 7222 0x14 0xf7 0x47 0x98 EQ r27, r19, r20 + 7226 0x16 0x27 0x72 0x18 SEL.EQZ r19, r24, r23, r27 + 7230 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 7234 0x0f 0x1e 0x11 0x98 ST r16, [p7], #4 + 7238 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 7242 0xe3 0xda 0x30 0x50 0x00 0x5c ST r22, [p7], #4; RET lr +.delay_slot + 7248 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 +.delay_slot + 7252 0x0f 0x1f 0x51 0x98 ST r26, [p7], #4 +.delay_slot + 7256 0x0f 0x1e 0xb1 0x98 ST r21, [p7], #4 +.delay_slot + 7260 0x0f 0x07 0x31 0x98 ST r25, [p7] +.delay_slot + 7264 0xe2 0xe6 0x30 0x03 0xb0 0x60 0x70 0x02 ST r25, [p7, #4]; MOV p7, p0 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + +.text_segment PM 7280 +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function_start + 7280 0xf1 0x18 0x80 0x3b 0x68 0x00 0x01 0xf2 0x32 0x16 0x10 0xb6 MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 + 7292 0x9f 0xa8 0xd0 0x38 0xe8 0x00 0x12 0x0a 0x80 0x80 0x58 0xb6 LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 + 7304 0x87 0xa4 0xd0 0x00 0x07 0x8a 0x07 0x90 0x58 0xba LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 + 7314 0x9f 0xe8 0xd0 0x00 0x24 0x0a 0x60 0x00 0x58 0xba LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 + 7324 0x85 0xe4 0xd7 0x10 0x4b 0x00 0x00 0x04 0x7e 0xb0 0x10 0x76 LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 + 7336 0x85 0xa0 0xd2 0x10 0x4b 0x00 0x00 0x05 0xbe 0xd8 0x10 0x76 LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 + 7348 0x9f 0x88 0xd6 0x10 0x4b 0x00 0x01 0xf2 0xb1 0x10 0x10 0x76 LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 + 7360 0x87 0x84 0xd1 0x10 0x4b 0x00 0x36 0x09 0xe4 0xc0 0x78 0x76 LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 + 7372 0x9f 0xc8 0xd0 0x10 0x4b 0x01 0x18 0x43 0x62 0xba LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 + 7382 0x85 0xc4 0xdb 0x93 0x01 0xd4 LDA dn4, [p4], #8; MOV dc5, dc4 + 7388 0x04 0x2c 0x06 0x98 LDA m0, [p4], #8 + 7392 0x04 0xfc 0xc6 0x98 LDA dj1, [p4], #-4 + 7396 0x87 0x94 0xd0 0xb1 0x68 0x3c LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 + 7402 0x04 0xfe 0xc6 0x98 LDA dj5, [p4], #-4 + 7406 0x04 0x2e 0xa6 0x98 LDA dn5, [p4], #8 + 7410 0x04 0x2c 0x86 0x98 LDA m1, [p4], #8 + 7414 0x04 0xff 0xc6 0x98 LDA dj7, [p4], #-4 + 7418 0x04 0x2f 0xa6 0x98 LDA dn7, [p4], #8 + 7422 0x04 0x2f 0x86 0x98 LDA m7, [p4], #8 + 7426 0x04 0xfd 0xc6 0x98 LDA dj3, [p4], #-4 + 7430 0x04 0x2d 0xa6 0x98 LDA dn3, [p4], #8 + 7434 0x04 0xc9 0x86 0x98 LDA m3, [p4], m6 + 7438 0x04 0xa8 0x96 0x98 LDA r4, [p4], m5 + 7442 0x04 0x88 0xf2 0x98 LDA.s16 r7, [p4], m4 + 7446 0x04 0x4e 0x06 0x98 LDA m4, [p4], #16 + 7450 0x92 0x96 0xd3 0xe1 0xe8 0x3c LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 + 7456 0x02 0x04 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p2] + 7460 0x00 0x00 NOPX + 7462 0x05 0x04 0xc2 0x98 LDA.s8 r6, [p5] + 7466 0x11 0x09 0xfb 0x18 ADD r4, r4, #-2 + 7470 0x80 0xc6 0xd0 0x00 0x00 0x06 0x36 0xf8 0x10 0xba LDA r17, [p4]; MOVXM p4, #7664 + 7480 0x18 0x1d 0x72 0xf8 VBCST.16 x0, r7 + 7484 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 + 7488 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 + 7492 0x00 0x2c 0xf0 0x00 0x23 0x00 0x8a 0xe2 0x04 0x6d 0x41 0x66 NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0d 0xd4 0x02 0x0e 0x03 0xa8 0x08 0x1a 0x0b NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7520 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7574 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 7582 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 7590 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 7596 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7616 0x03 0x0c 0xf2 0x73 0x90 0x02 0x84 0x83 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x02 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.loop_nesting 1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7664 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 0x00 0x00 0x00 0xb7 0xea 0x02 0x03 0x92 0xe1 0x5a MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 0x40 0x85 0x70 0x00 0x00 0x8f 0xe5 0x02 0x00 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 0x1d 0x72 0xff 0x98 ADD.NC lc, r5, #-1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 0x71 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 0x67 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p3], d1; VMOV cml3, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 0x04 0x1c 0x07 0x46 0x04 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 0x02 0x30 0x86 0xc6 0x01 0x03 0x41 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7760 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x83 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 0x00 0x38 0xea 0x9c 0x0b 0x46 0x02 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 0x00 0xb1 0x6a 0x30 0x8a 0xc6 0x01 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 0x04 0xb0 0x8e 0xc6 0x04 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 0x03 0x9c 0x0f 0x46 0x02 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7814 0x05 0x1c 0x03 0x46 0x03 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 + 7822 0x04 0x1c 0x07 0x46 0x00 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 + 7830 0x00 0x2c 0xf4 0x61 0x0d 0x94 NOPA; VSHIFT x4, x6, x1, r1 + 7836 0x04 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r0 +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 +.loop_nesting 1 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7856 0x03 0x0c 0xf4 0xe7 0x20 0x08 0x49 0x02 0x84 0x83 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 0x02 0x9c 0x0b 0x46 0x02 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 0x02 0x30 0x8a 0xc6 0x01 0x35 0x01 0x62 VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 0x1d 0x89 0x0a 0xd8 VSHIFT x11, x1, x2, r2 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 0x03 0x9c 0x0f 0x46 0x04 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 0x04 0xb0 0x8e 0xc6 0x02 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7904 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 7908 0x03 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r0 + 7912 0x00 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r0 + 7916 0x00 0x00 NOPX + 7918 0x00 0x00 NOPX + 7920 0x00 0x00 NOPX + 7922 0x00 0x00 NOPX + 7924 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 + 7928 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr +.delay_slot + 7934 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.delay_slot + 7938 0x1d 0x53 0x44 0x78 VSHUFFLE x10, x10, x6, r17 +.delay_slot + 7942 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0 +.delay_slot + 7946 0x0b 0x8a 0x13 0x18 VST x8, [p3], m4 +.delay_slot + 7950 0x0b 0x3a 0x93 0x18 VST.3D x10, [p3], d1 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 7968 +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function_start + 7968 0xfb 0x90 0x82 0x39 0x68 0x00 0x01 0xf1 0xb2 0x4c 0x10 0xb6 MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 + 7980 0x63 0x84 0xd4 0x38 0x68 0x3e 0x47 0x68 0x68 0x01 0x58 0xb6 LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 + 7992 0x63 0x88 0xd0 0x00 0x00 0x04 0x7f 0xc8 0x10 0xba LDA dj0, [p3], #4; MOVXM ls, #8080 + 8002 0x63 0xc4 0xd0 0x00 0x00 0x05 0xbf 0xf8 0x10 0xba LDA dn4, [p3], #4; MOVXM le, #8176 + 8012 0x63 0xc8 0xd0 0x00 0x16 0x48 0x08 0x12 0x58 0xba LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 + 8022 0x63 0x80 0xd0 0x08 0x9a 0x2c LDA m0, [p3], #4; MOVX r2, #19 + 8028 0x03 0x1c 0x66 0x98 LDA dc0, [p3], #4 + 8032 0x03 0x8a 0x66 0x98 LDA dc4, [p3], m4 + 8036 0x03 0x04 0xb6 0x98 LDA r5, [p3] + 8040 0x03 0x24 0xd6 0x98 LDA r6, [p3, #8] + 8044 0x00 0x00 NOPX + 8046 0x00 0x00 NOPX + 8048 0x00 0x00 NOPX + 8050 0x00 0x00 NOPX + 8052 0x00 0x00 NOPX + 8054 0x11 0x48 0x4d 0x98 LSHL r4, r5, r4 + 8058 0x30 0xc7 0xba 0xe4 0xff 0x24 LSHL r3, r6, r3; ADD.NC lc, r4, #-1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0xd0 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8080 0x43 0x83 0x72 0x39 0x6c 0x80 0x8b 0x00 0x00 0x00 0x48 0x02 0x38 0x00 0x00 0xe1 VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xc0 0x02 0x38 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV + 8112 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0xc0 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV + 8128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0xc4 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV + 8144 0x00 0x2c 0xf0 0x00 0x20 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV + 8160 0x00 0x2c 0xf0 0x00 0x24 0x20 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.loop_nesting 0 + 8192 0x90 0x11 0x60 0x01 0x40 0x00 0x48 0x02 0x39 0x3a MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 +.delay_slot + 8202 0x19 0x80 0x04 0x78 VSHUFFLE x3, x0, x0, r1 +.delay_slot + 8206 0x18 0x89 0x81 0xd8 VSHUFFLE bmlh0, x1, x3, r0 +.delay_slot + 8210 0x18 0x09 0x89 0xd8 VSHUFFLE bmll0, x1, x3, r2 +.delay_slot + 8214 0x08 0x18 0x26 0x98 VST.3D bmlh0, [p0], d0 +.delay_slot + 8218 0x0c 0x20 0x06 0x98 VST bmll0, [p4, dj1] +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 8224 +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function_start + 8224 0x20 0x93 0xd0 0x01 0x10 0x28 0x07 0x3f 0x58 0xba LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 + 8234 0xe6 0x04 0x80 0x00 0x01 0xf2 0x32 0x44 0x10 0xba MOVA m1, #-208; MOVXM p4, #509064 + 8244 0x81 0x42 0xd0 0x03 0x25 0x54 LDA r16, [p4], m0; MOV m0, #201 + 8250 0x04 0x0a 0x6a 0x98 LDA.u8 r19, [p4], m0 + 8254 0x04 0x2a 0x56 0x98 LDA r18, [p4], m1 + 8258 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 0x00 0x83 0xdf 0xf0 0x7b 0x0c LDA p0, [p0]; ST lr, [sp, #-8] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 0x40 0xbe 0xdf 0xe2 0x3b 0x0c LDA r15, [p2]; ST p2, [sp, #-16] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 0xfd 0xe3 0xb0 0x00 0x03 0x8e 0x00 0x00 0x41 0x3a ST p6, [sp, #-20]; JL #7280 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 0xfe 0xbe 0xb0 0x27 0x08 0x7d 0x31 0x60 0x79 0x3a ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 +.delay_slot + 8300 0x1e 0x68 0xc0 0xf8 MOV p6, p4 +.delay_slot + 8304 0xfd 0x13 0xb8 0x42 0x3b 0x5c ST p1, [sp, #-24]; LSHL r16, r16, r17 +.delay_slot + 8310 0xf0 0x11 0x60 0x25 0x08 0xec 0x04 0x10 0x79 0x3a MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 +.delay_slot + 8320 0x00 0x2c 0xf2 0x17 0x20 0x01 0x5b 0x00 0x00 0x01 0xb3 0xe0 0xa8 0x00 0x00 0xe1 NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV +.return_address + 8336 0xce 0xc1 0x50 0x44 0x12 0x2c LDA.u8 r16, [p6, #7]; MOVX r17, #2 + 8342 0x00 0x00 NOPX + 8344 0x00 0x00 NOPX + 8346 0x00 0x00 NOPX + 8348 0x00 0x00 NOPX + 8350 0x00 0x00 NOPX + 8352 0x00 0x00 NOPX + 8354 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 8358 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544 +.delay_slot + 8364 0x00 0x07 0xc8 0xc9 0x10 0x44 MOVXM p4, #509064 +.delay_slot +.swstall delay_slot + 8370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8376 0x00 0x00 NOPX + 8378 0x9f 0xc2 0xd0 0x00 0x00 0x28 0x07 0x30 0x58 0xba LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 + 8388 0x04 0x2e 0xb6 0x98 LDA r21, [p4], #8 + 8392 0x04 0x1e 0x56 0x98 LDA r18, [p4], #4 + 8396 0xfd 0x4e 0x20 0xd1 0x81 0xd4 LDA r19, [sp, #-24]; MOV p0, p4 + 8402 0x81 0x52 0xd0 0x9c 0x8b 0x03 0xb0 0x60 0x72 0xba LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 + 8412 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 8416 0x00 0x00 NOPX + 8418 0x14 0x23 0x1d 0x98 LSHL r17, r16, r17 + 8422 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 +.no_stack_arguments + 8426 0x00 0x0e 0x38 0x00 0x01 0x04 JL #7280 +.delay_slot + 8432 0x94 0x81 0xbb 0x33 0x8a 0xa4 LSHL r18, r18, r0; ADD.NC r22, r19, r17 +.delay_slot + 8438 0xac 0x41 0xba 0xaf 0x92 0xa4 LSHL r17, r21, r0; ADD.NC r21, r15, r18 +.delay_slot + 8444 0xa4 0x81 0xb2 0xd1 0xb2 0xa4 LSHL r18, r20, r0; ADD.NC p1, r17, r22 +.delay_slot + 8450 0x1a 0x69 0xc1 0x58 ADD.NC p2, r19, r16 +.delay_slot + 8454 0x00 0x2c 0xf0 0x00 0x10 0x01 0xb5 0x64 0xae 0xba NOPA; NOPB; ADD.NC p3, r21, r18 +.return_address + 8464 0xfe 0x43 0x20 0x00 0x00 0x28 0x07 0x34 0x58 0xba LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 + 8474 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 8478 0xff 0xf3 0x26 0xdd 0x81 0xd4 LDA p7, [sp, #-4]; MOV p3, p7 + 8484 0x03 0x0a 0x36 0x98 LDA r17, [p3], m0 + 8488 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 8492 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 8496 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 8502 0x04 0x06 0x56 0x98 LDA r18, [p4] + 8506 0x14 0x20 0x0d 0x98 LSHL r16, r16, r0 + 8510 0x00 0x00 NOPX + 8512 0x00 0x00 NOPX + 8514 0x00 0x00 NOPX +.tail_call + 8516 0x00 0x0f 0x90 0x00 0x00 0x84 J #7968 +.delay_slot + 8522 0x14 0x62 0x0d 0x98 LSHL r17, r17, r0 +.delay_slot + 8526 0x1c 0x58 0xc9 0x58 ADD.NC r17, r17, r18 +.delay_slot + 8530 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 8534 0x18 0x69 0x20 0xf8 MOV p0, r18 +.delay_slot + 8538 0x00 0x2c 0xf4 0xd1 0x82 0x94 NOPA; ADD.NC p2, r17, r16 +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.return_address + 8544 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 8548 0x07 0xf5 0xf1 0x18 LDA r15, [sp, #-12] + 8552 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 8556 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] + 8560 0x00 0x00 NOPX + 8562 0x00 0x00 NOPX + 8564 0x00 0x00 NOPX + 8566 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8570 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8582 0x00 0x00 NOPX +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 8592 +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 8592 0x00 0x07 0xc8 0xc4 0x00 0x44 MOVXM p4, #508416 + 8598 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15 + 8604 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 8610 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID + 8618 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr + 8626 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20] + 8630 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 8634 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2 + 8642 0x80 0x11 0x20 0x40 0x01 0x84 JNZ r16, #8768 +.delay_slot + 8648 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 8652 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 8656 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 8660 0x00 0x07 0xc6 0xc4 0x08 0x44 MOVXM p3, #508420 +.delay_slot + 8666 0x0b 0x06 0x31 0x98 ST r17, [p3] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb1 0x10 0x11 0x3a MOVS p7, p1; MOVXM p1, #508448 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb1 0x0e 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 0x00 0x0c 0xa8 0x00 0x01 0x04 JL #6480 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 8708 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8712 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM +.return_address + 8720 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8 + 8728 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 8732 0x44 0xc3 0x50 0x00 0x01 0xf1 0x31 0x0c 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 + 8742 0x00 0x00 NOPX + 8744 0x00 0x11 0x28 0x00 0x00 0x84 J #8784 +.delay_slot + 8750 0x00 0x07 0xc6 0xc4 0x20 0x44 MOVXM p3, #508432 +.delay_slot +.swstall delay_slot + 8756 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8758 0x00 0x00 NOPX +.delay_slot + 8760 0x0b 0x06 0x31 0x98 ST r17, [p3] +.delay_slot + 8764 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 + 8768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb1 0x08 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 + 8784 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 8788 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x31 0x02 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #508420 + 8798 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 8802 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 8806 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 8810 0x00 0x00 NOPX + 8812 0x00 0x00 NOPX + 8814 0x00 0x00 NOPX + 8816 0x00 0x00 NOPX + 8818 0x00 0x00 NOPX + 8820 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 8824 0x0a 0x06 0x11 0x98 ST r16, [p2] + 8828 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 8832 0x00 0x00 NOPX + 8834 0x00 0x00 NOPX + 8836 0x00 0x00 NOPX + 8838 0x14 0x93 0x08 0x18 ACQ r18, r16 + 8842 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb1 0x00 0x10 0xba MOVA r15, #1; MOVXM p7, #508416 + 8852 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp + 8858 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76 + 8862 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2 + 8868 0x04 0x06 0x36 0x98 LDA r17, [p4] + 8872 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb1 0xe0 0x10 0xba LDA r16, [p3]; MOVXM p3, #508864 + 8882 0x07 0x06 0x56 0x98 LDA r18, [p7] + 8886 0x00 0x00 NOPX + 8888 0x00 0x00 NOPX + 8890 0x00 0x00 NOPX + 8892 0x05 0x06 0x76 0x98 LDA r19, [p5] + 8896 0x00 0x00 NOPX + 8898 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 8902 0x14 0xa2 0x07 0x18 ADD r17, r18, #1 + 8906 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15 +.no_stack_arguments + 8910 0x00 0x10 0x10 0x00 0x01 0x04 JL #8224 +.delay_slot + 8916 0x0f 0x06 0x31 0x98 ST r17, [p7] +.delay_slot + 8920 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16 +.delay_slot + 8924 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76] +.delay_slot + 8928 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72] +.delay_slot + 8932 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX +.return_address + 8944 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20 + 8948 0x02 0x06 0x16 0x98 LDA r16, [p2] + 8952 0x00 0x00 NOPX + 8954 0x00 0x00 NOPX + 8956 0x00 0x00 NOPX + 8958 0x00 0x00 NOPX + 8960 0x00 0x00 NOPX + 8962 0x00 0x00 NOPX + 8964 0x14 0x10 0xf8 0x18 REL r16, r15 + 8968 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb1 0x0c 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #508440 + 8978 0x01 0x06 0x56 0x98 LDA r18, [p1] + 8982 0x07 0x06 0x36 0x98 LDA r17, [p7] + 8986 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12] + 8990 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] + 8994 0x00 0x00 NOPX + 8996 0x00 0x00 NOPX + 8998 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 9002 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8] + 9006 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 9010 0x80 0x11 0xa8 0x40 0x01 0x84 JNZ r16, #9040 +.delay_slot + 9016 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 9020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9024 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9026 0x00 0x00 NOPX + 9028 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 9040 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13 + 9046 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] + 9050 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 9054 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 9060 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9062 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9064 0x00 0x00 NOPX +.delay_slot + 9066 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 9072 +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function_start + 9072 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 9076 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 9080 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 9084 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 9088 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 9092 0x00 0x10 0xc8 0x00 0x00 0x84 J #8592 +.delay_slot +.swstall delay_slot + 9098 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9104 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9106 0x00 0x00 NOPX +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + +.text_segment PM 9120 +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function_start + 9120 0x23 0x85 0xd0 0x00 0x01 0xf0 0x09 0x40 0x10 0xba LDA el0, [p1], #4; MOVXM r0, #508544 + 9130 0x08 0x00 0x80 0x80 0x0b 0x3e 0x27 0xa9 0x30 0x01 0x08 0x76 MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 + 9142 0x00 0x42 0x20 0x22 0x01 0x64 MOVX r1, #4; MOV r0, #128 + 9148 0x00 0x00 NOPX + 9150 0x00 0x00 NOPX + 9152 0x00 0x00 NOPX + 9154 0x00 0x00 NOPX + 9156 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9160 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9164 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 9168 0x00 0x00 NOPX + 9170 0x00 0x00 NOPX + 9172 0x00 0x00 NOPX + 9174 0x00 0x00 NOPX + 9176 0x00 0x00 NOPX + 9178 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 9182 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 9186 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9190 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 9194 0x00 0x00 NOPX + 9196 0x00 0x00 NOPX + 9198 0x00 0x00 NOPX + 9200 0x00 0x00 NOPX + 9202 0x00 0x00 NOPX + 9204 0x0a 0x1c 0x29 0x98 ST el0, [p2], #4 + 9208 0x0a 0x1c 0x09 0x98 ST eh0, [p2], #4 + 9212 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 9216 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 9220 0x00 0x00 NOPX + 9222 0x00 0x00 NOPX + 9224 0x00 0x00 NOPX + 9226 0x00 0x00 NOPX + 9228 0x00 0x00 NOPX + 9230 0x0a 0x04 0x09 0x98 ST eh0, [p2] + 9234 0x0a 0x14 0x29 0x98 ST el0, [p2, #4] + 9238 0x00 0x08 0x76 0x98 LDA r3, [p0], m0 + 9242 0x00 0x00 NOPX + 9244 0x00 0x00 NOPX + 9246 0x00 0x00 NOPX + 9248 0x00 0x00 NOPX + 9250 0x00 0x00 NOPX + 9252 0x00 0x00 NOPX + 9254 0x10 0xc8 0x2d 0x98 LSHL r4, r3, r2 + 9258 0x18 0xc3 0xb0 0xa4 0xff 0x24 LSHL r3, r3, r1; ADD.NC r1, r4, #-1 + 9264 0x00 0x86 0x30 0x00 0x88 0x60 0x70 0x02 ST r1, [p0]; MOV r4, p0 + 9272 0x19 0x62 0x62 0x18 ADD.NC p1, r4, #-60 + 9276 0x01 0x08 0x96 0x98 LDA r4, [p1], m0 + 9280 0x00 0x00 NOPX + 9282 0x00 0x00 NOPX + 9284 0x00 0x00 NOPX + 9286 0x00 0x00 NOPX + 9288 0x00 0x00 NOPX + 9290 0x00 0x00 NOPX + 9292 0x20 0x85 0xb2 0x22 0x01 0x64 LSHL r2, r4, r2; MOV r4, #128 + 9298 0x10 0x85 0xff 0x18 ADD r2, r2, #-1 + 9302 0x23 0x8a 0x31 0x90 0x5c 0x5c ST r2, [p1], #4; MSC r4, r4, r3, r2 + 9308 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9312 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9316 0x09 0x2c 0x11 0x98 ST r0, [p1], #8 + 9320 0x09 0xfc 0x71 0x98 ST r3, [p1], #-4 + 9324 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 + 9328 0x20 0x82 0x30 0x00 0xa9 0x60 0x70 0x02 ST r0, [p1]; MOV r5, p1 + 9336 0x19 0x62 0xde 0x18 ADD.NC p1, r5, #-68 + 9340 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9344 0x09 0x1c 0x11 0x98 ST r0, [p1], #4 + 9348 0x09 0x1c 0x51 0x98 ST r2, [p1], #4 + 9352 0x09 0x1c 0x31 0x98 ST r1, [p1], #4 + 9356 0x23 0x82 0x30 0x50 0x00 0x5c ST r0, [p1], #4; RET lr +.delay_slot + 9362 0x09 0x2c 0x71 0x98 ST r3, [p1], #8 +.delay_slot + 9366 0x09 0xfc 0x51 0x98 ST r2, [p1], #-4 +.delay_slot + 9370 0x09 0x2c 0x91 0x98 ST r4, [p1], #8 +.delay_slot + 9374 0x09 0x04 0x31 0x98 ST r1, [p1] +.delay_slot + 9378 0x09 0x14 0x11 0x98 ST r0, [p1, #4] +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + +.text_segment PM 9392 +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function_start + 9392 0x00 0x41 0x00 0x00 0x01 0xf1 0x31 0x46 0x10 0xba MOVA r1, #2; MOVXM p2, #508556 + 9402 0x40 0xee 0xd0 0x00 0xb2 0x2c LDA r27, [p2]; MOVX r0, #22 + 9408 0x00 0x00 NOPX + 9410 0x00 0x00 NOPX + 9412 0x00 0x00 NOPX + 9414 0x00 0x00 NOPX + 9416 0x00 0x00 NOPX + 9418 0x00 0x00 NOPX + 9420 0x16 0xc2 0x17 0x98 EQ r1, r27, r1 + 9424 0x08 0x13 0x50 0x40 0x01 0x84 JNZ r1, #9888 +.delay_slot + 9430 0x10 0x04 0x75 0x18 MOVX r2, #29 +.delay_slot + 9434 0x10 0x00 0x22 0x18 SEL.EQZ r0, r0, r2, r27 +.delay_slot +.swstall delay_slot + 9438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9442 0x00 0x00 NOPX + 9444 0x00 0x07 0xc4 0xc5 0x20 0x44 MOVXM p2, #508560 + 9450 0x02 0x04 0x36 0x98 LDA r1, [p2] + 9454 0x00 0x00 NOPX + 9456 0x00 0x00 NOPX + 9458 0x00 0x00 NOPX + 9460 0x00 0x00 NOPX + 9462 0x00 0x00 NOPX + 9464 0x00 0x00 NOPX + 9466 0x08 0x14 0x90 0x00 0x01 0x84 JZ r1, #10528 +.delay_slot +.swstall delay_slot + 9472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9476 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9480 0x00 0x00 NOPX + 9482 0x10 0x04 0x29 0x18 MOVX r2, #10 + 9486 0x10 0x44 0x2c 0x98 LTU r2, r1, r2 + 9490 0x10 0x13 0x00 0x40 0x01 0x84 JNZ r2, #9728 +.delay_slot +.swstall delay_slot + 9496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9504 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9506 0x00 0x38 0x68 0x00 0x00 0x08 0x7a 0xd0 0x10 0x3a VLDB x0, [p0], #64; MOVXM ls, #9632 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9516 0x00 0x38 0x68 0x00 0x00 0x09 0xba 0xd0 0x10 0x3a VLDB x0, [p0], #64; MOVXM le, #9632 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9526 0x00 0x2c 0xf0 0x1c 0x34 0x02 0xb8 0x7d 0xce 0xba NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9536 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9552 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9568 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9584 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9600 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9616 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.loop_nesting 1 +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9632 0x00 0x2c 0xf0 0x38 0x69 0x1c 0x06 0x80 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9648 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9656 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9664 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9672 0x23 0x80 0xd0 0x01 0x40 0x00 0x00 0x00 0xe9 0x3a VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9682 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9690 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9698 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9706 0x00 0x2c 0xf2 0x38 0x0d 0x0c NOPA; VST bmll0, [p1], #64 +.delay_slot + 9712 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 + 9728 0x1d 0x70 0xa0 0xf8 MOV lc, r1 + 9732 0x00 0x00 0x21 0xec 0x20 0x44 MOVXM ls, #9744 + 9738 0x00 0x00 0x26 0xed 0x00 0x44 MOVXM le, #9856 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.loop_nesting 1 +.begin_of_loop + 9744 0x38 0x1c 0x34 0x18 VLDB x0, [p0], #64 + 9748 0x00 0x00 NOPX + 9750 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM + 9760 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9776 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9792 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9808 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 9824 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV + 9840 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.end_of_loop + 9856 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.loop_nesting 0 + 9872 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 9876 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9878 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9880 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9884 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 + 9888 0x00 0x07 0xc4 0xc5 0x00 0x44 MOVXM p2, #508544 + 9894 0x02 0x04 0x16 0x98 LDA r0, [p2] + 9898 0x00 0x00 NOPX + 9900 0x00 0x00 NOPX + 9902 0x00 0x00 NOPX + 9904 0x00 0x00 NOPX + 9906 0x00 0x00 NOPX + 9908 0x00 0x00 NOPX + 9910 0x00 0x14 0x90 0x00 0x01 0x84 JZ r0, #10528 +.delay_slot +.swstall delay_slot + 9916 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9924 0x00 0x00 NOPX + 9926 0x04 0x94 0x80 0x00 0x01 0xf2 0x31 0x42 0x10 0xba MOVA m5, #36; MOVXM p4, #508548 + 9936 0x83 0x86 0xd0 0x00 0x51 0x08 0x4f 0xfd 0x58 0xba LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 + 9946 0x95 0x12 0xd0 0x00 0x30 0x2a 0x60 0x00 0x58 0xba LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 + 9956 0x9d 0x90 0xd0 0x10 0x4b 0x00 0x60 0x8a 0x00 0x20 0x58 0x76 LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 + 9968 0x9d 0x94 0xd1 0x10 0x4b 0x00 0x0f 0xf8 0xe8 0x34 0x58 0x76 LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 + 9980 0x87 0x98 0xd5 0x10 0x4b 0x00 0x00 0x09 0x33 0xa8 0x10 0x76 LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #10064 + 9992 0x9d 0xd4 0xd0 0x00 0x00 0x09 0xb3 0xb8 0x10 0xba LDA dn5, [p4], #-8; MOVXM p3, #10096 + 10002 0x91 0x58 0xd0 0x41 0xaa 0x2c LDA dj5, [p4], m4; MOVX r16, #53 + 10008 0x9d 0x80 0xd0 0x0b 0xb0 0xe4 0xa8 0x7f 0xc8 0xba LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 + 10018 0x9d 0x84 0xd0 0x0b 0x11 0x6c 0xa9 0x3f 0xc8 0xba LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 + 10028 0x87 0x88 0xd0 0x0a 0x21 0x6c 0xac 0x40 0x48 0xba LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 + 10038 0x80 0xc4 0xd0 0x06 0x52 0x90 0x68 0x80 0x48 0xba LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 + 10048 0x9c 0xc8 0xd0 0x00 0x20 0x01 0x5b 0x0a 0x5f 0xf8 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.loop_nesting 1 + 10064 0x08 0x14 0x88 0x00 0x01 0x84 JZ r1, #10512 +.delay_slot +.swstall delay_slot + 10070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10076 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10078 0x00 0x00 NOPX + 10080 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x29 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.loop_nesting 2 + 10096 0x20 0x14 0x80 0x00 0x01 0x84 JZ r4, #10496 +.delay_slot +.swstall delay_slot + 10102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10104 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10106 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10110 0x00 0x00 NOPX + 10112 0x10 0xe4 0x6c 0x98 LTU r18, r3, r6 + 10116 0x90 0x14 0x38 0x40 0x01 0x84 JNZ r18, #10352 +.delay_slot +.swstall delay_slot + 10122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10128 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10130 0x00 0x00 NOPX + 10132 0x00 0x28 0x68 0x00 0x00 0x08 0x7c 0x00 0x10 0x3a VLDB x0, [p0, #64]; MOVXM ls, #10240 + 10142 0x00 0x70 0xe8 0x00 0x00 0x09 0xbc 0x10 0x10 0x3a VLDB.3D x1, [p0], d1; MOVXM le, #10272 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10152 0x1d 0x71 0xfe 0x98 ADD.NC lc, r3, #-3 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10156 0x38 0x14 0x34 0x18 VLDB x0, [p0, #64] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10176 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 0x00 0x2c 0xf0 0x28 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 0x00 0x2c 0xf0 0x70 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.loop_nesting 3 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10240 0x00 0x2c 0xf0 0x28 0x6c 0x84 0x8b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 0x00 0x2c 0xf0 0x70 0xe9 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV +.loop_nesting 2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10296 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10300 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10308 0x18 0x08 0x41 0xd8 VSHUFFLE bmll0, x1, x0, r16 + 10312 0x00 0x14 0x80 0x00 0x00 0x84 J #10496 +.delay_slot + 10318 0x23 0x04 0xd0 0x02 0x31 0x60 0x70 0x02 VST.3D bmlh0, [p1], d0; MOV p4, p1 +.delay_slot + 10326 0x82 0x80 0xd0 0x00 0x44 0x0e 0xe0 0x02 VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 +.delay_slot + 10334 0x90 0x91 0x60 0x00 0x04 0x20 0xe0 0x02 MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 +.delay_slot + 10342 0x09 0x18 0x26 0x98 VST.3D bmlh0, [p1], d0 +.delay_slot + 10346 0x00 0x2c 0xf8 0x28 0x0d 0x0c NOPA; VST bmll0, [p4, #64] +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 + 10352 0x00 0x00 0x21 0xf1 0x00 0x44 MOVXM ls, #10368 + 10358 0x00 0x00 0x26 0xf1 0xe0 0x44 MOVXM le, #10480 + 10364 0x1d 0x71 0x00 0x98 ADD.NC lc, r2, #1 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.loop_nesting 3 +.begin_of_loop + 10368 0x02 0x86 0x88 0xc5 0x81 0xf4 VLDB x0, [p0, #64]; MOV p4, p1 + 10374 0x38 0x38 0x74 0x18 VLDB.3D x1, [p0], d1 + 10378 0x00 0x00 NOPX + 10380 0x00 0x00 NOPX + 10382 0x00 0x00 NOPX + 10384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10400 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10416 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 10432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x44 0x0e 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV + 10448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x04 0x20 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV + 10464 0x00 0x2c 0xf0 0x00 0x21 0x18 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.end_of_loop + 10480 0x00 0x2c 0xf0 0x00 0x24 0x14 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.loop_nesting 2 + 10496 0x14 0x62 0xe0 0x18 JNZD r17, r17, p3 +.delay_slot +.swstall delay_slot + 10500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10504 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10506 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10508 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.loop_nesting 1 + 10512 0x10 0x00 0xa0 0x18 JNZD r0, r0, p2 +.delay_slot +.swstall delay_slot + 10516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10524 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.loop_nesting 0 + 10528 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 10532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10536 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10538 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10540 0x00 0x00 NOPX +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + +.text_segment PM 10544 +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function_start + 10544 0x1b 0x6c 0xc0 0xf8 MOV p3, p6 + 10548 0xd0 0x91 0x60 0x00 0x01 0xf0 0xb1 0x0a 0x11 0x3a MOVS p6, p1; MOVXM p1, #508436 + 10558 0x01 0x06 0x16 0x98 LDA r16, [p1] + 10562 0x00 0x00 NOPX + 10564 0x00 0x00 NOPX + 10566 0x00 0x00 NOPX + 10568 0x00 0x00 NOPX + 10570 0x00 0x00 NOPX + 10572 0x00 0x00 NOPX + 10574 0x80 0x14 0xc8 0x40 0x01 0x84 JNZ r16, #10640 +.delay_slot + 10580 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 10586 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 10590 0xf0 0x11 0x60 0x00 0xb7 0x60 0x70 0x02 MOVS p7, p0; MOV p1, p7 +.delay_slot + 10598 0x0f 0xf9 0x9d 0x98 ST p3, [sp, #-8] +.delay_slot + 10602 0xff 0x93 0xb0 0x00 0x01 0xf0 0x31 0x40 0x11 0x3a ST p1, [sp, #-4]; MOVXM p0, #508544 +.no_stack_arguments + 10612 0x00 0x11 0xd0 0x00 0x01 0x04 JL #9120 +.delay_slot + 10618 0x19 0x64 0xc0 0xf8 MOV p1, p2 +.delay_slot +.swstall delay_slot + 10622 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10624 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10626 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10628 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.return_address + 10640 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb1 0x4a 0x10 0xba LDA r16, [p7]; MOVXM p7, #508564 + 10650 0x07 0x06 0x36 0x98 LDA r17, [p7] + 10654 0x06 0x04 0x9e 0x98 LDA p1, [p6] + 10658 0x00 0x00 NOPX +.no_stack_arguments + 10660 0x00 0x12 0x58 0x00 0x01 0x04 JL #9392 +.delay_slot + 10666 0x10 0x24 0x05 0x18 MOVX r18, #1 +.delay_slot + 10670 0x00 0x07 0xc4 0xc5 0x00 0x44 MOVXM p2, #508544 +.delay_slot + 10676 0x1e 0x64 0xc0 0xf8 MOV p6, p2 +.delay_slot + 10680 0x14 0x63 0x2d 0x98 LSHL r17, r17, r18 +.delay_slot + 10684 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.return_address + 10688 0xfe 0x87 0x20 0x00 0x01 0xf1 0x31 0x0a 0x10 0xba LDA lr, [sp, #-12]; MOVXM p2, #508436 + 10698 0x40 0xc2 0xd0 0x60 0x02 0x2c LDA r16, [p2]; MOVX r24, #0 + 10704 0x06 0x66 0x36 0x98 LDA r17, [p6, #24] + 10708 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 10712 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] + 10716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 10722 0x00 0x00 NOPX + 10724 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10728 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 10732 0x14 0x77 0x07 0x98 EQ r27, r17, r16 +.delay_slot + 10736 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot + 10740 0x0a 0x06 0x11 0x98 ST r16, [p2] +.delay_slot +.swstall delay_slot + 10744 0x00 0x00 NOPX +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + +.text_segment PM 10752 +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function_start + 10752 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 10756 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 10760 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 10764 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 10768 0x00 0x14 0x98 0x00 0x00 0x84 J #10544 +.delay_slot +.swstall delay_slot + 10774 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10776 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10778 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10780 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10782 0x00 0x00 NOPX +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function_start + 10784 0x20 0x85 0xd8 0xa9 0x81 0xd4 LDA el0, [p1]; MOV r17, p2 + 10790 0x19 0x68 0x82 0x18 ADD.NC p1, r17, #4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10794 0x01 0x1e 0x56 0x98 LDA r18, [p1], #4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10798 0x01 0x05 0xf6 0x98 LDA r15, [p1] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10802 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10804 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10806 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10808 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10810 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10814 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10820 0x7c 0xa5 0xf8 0x3f 0xfd 0x64 MUL r18, r15, r18; MOV r16, #-1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10826 0xfd 0xca 0xb0 0x0f 0xff 0xfe 0x2f 0xff 0x91 0x3a ST r18, [sp, #-20]; MOVXM r17, #1073741823 + 10836 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 + 10840 0x14 0x61 0x04 0x98 AND r16, r17, r16 + 10844 0x80 0x15 0x58 0x00 0x01 0x84 JZ r16, #10928 +.delay_slot + 10850 0x00 0xf3 0xd0 0xdd 0x81 0xd4 LDA p7, [p0]; MOV p0, p7 +.delay_slot + 10856 0x0f 0xf8 0x1d 0x98 ST p0, [sp, #-8] +.delay_slot + 10860 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] +.delay_slot + 10864 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] +.delay_slot + 10868 0x3c 0xba 0xdf 0xf8 0x2b 0x0c LDA r14, [p1, #-8]; ST r0, [sp, #-4] + 10874 0xfd 0x05 0xb0 0x00 0x02 0x5c ST el0, [sp, #-24]; MOVX r0, #0 + 10880 0x07 0xe8 0x99 0x18 LDA p1, [sp, #-24] +.no_stack_arguments + 10884 0x00 0x18 0xa0 0x00 0x01 0x04 JL #12608 +.delay_slot + 10890 0x10 0x22 0x09 0x18 MOVX r17, #2 +.delay_slot + 10894 0x14 0x03 0x1d 0x98 LSHL r1, r16, r17 +.delay_slot +.swstall delay_slot + 10898 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10900 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10902 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 10912 0x00 0x15 0x60 0x00 0x00 0x84 J #10944 +.delay_slot +.swstall delay_slot + 10918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10926 0x00 0x00 NOPX +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 10928 0x00 0x2c 0xf0 0x00 0x27 0xe8 0x2d 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 + 10944 0x78 0x15 0xe8 0x00 0x01 0x84 JZ r15, #11216 +.delay_slot +.swstall delay_slot + 10950 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10952 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10956 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10958 0x00 0x00 NOPX + 10960 0xfd 0xc6 0x20 0x00 0x00 0x08 0x7d 0x98 0x10 0xba LDA r17, [sp, #-20]; MOVXM ls, #11056 + 10970 0x00 0x33 0x00 0x00 0x00 0x09 0xbd 0xc8 0x10 0xba MOVA r19, #1; MOVXM le, #11152 + 10980 0xfd 0x4a 0x20 0x1d 0x49 0xee 0x0b 0xff 0xc8 0xba LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 + 10990 0xfe 0x07 0x20 0x00 0x00 0x08 0x35 0x88 0x10 0xba LDA lr, [sp, #-16]; MOVXM p0, #11024 + 11000 0x18 0x0a 0x20 0xf8 MOV m0, r20 + 11004 0x00 0x00 NOPX + 11006 0x00 0x00 NOPX + 11008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x23 0x19 0xec 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.loop_nesting 1 + 11024 0x70 0x15 0xd0 0x00 0x01 0x84 JZ r14, #11168 +.delay_slot +.swstall delay_slot + 11030 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11032 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11034 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11036 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11038 0x00 0x00 NOPX + 11040 0x53 0x91 0x60 0x02 0xbb 0x90 0x70 0x02 MOVS p2, p7; MOV lc, r14 + 11048 0x00 0x2b 0x60 0x00 0xb4 0x90 0x70 0x02 NOPS; MOV p1, r18 +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.loop_nesting 2 +.begin_of_loop + 11056 0x43 0xce 0x50 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 11072 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11088 0x23 0xce 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV + 11104 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11120 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 11136 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 11152 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.loop_nesting 1 + 11168 0xe1 0x72 0x08 0x40 0x40 0x1c PADDB [p7], m0; JNZD r16, r16, p0 +.delay_slot +.swstall delay_slot + 11174 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11176 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11178 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11180 0x00 0x00 NOPX +.delay_slot + 11182 0x1c 0x98 0xc9 0x58 ADD.NC r18, r17, r18 +.loop_nesting 0 + 11186 0x00 0x15 0xf0 0x00 0x00 0x84 J #11232 +.delay_slot +.swstall delay_slot + 11192 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11194 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11196 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11198 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11200 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 + 11216 0xfe 0x07 0x20 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 11232 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] + 11236 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 11240 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 11244 0x00 0x00 NOPX + 11246 0x00 0x00 NOPX + 11248 0x00 0x00 NOPX + 11250 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11254 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 11260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11262 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11264 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11266 0x00 0x00 NOPX +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + +.text_segment PM 11280 +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function_start + 11280 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11284 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 11288 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 11292 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11296 0x00 0x15 0x10 0x00 0x00 0x84 J #10784 +.delay_slot +.swstall delay_slot + 11302 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11304 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11306 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11308 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11310 0x00 0x00 NOPX +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function_start + 11312 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11316 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11320 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11324 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11328 0x00 0x07 0xa0 0x00 0x00 0x84 J #3904 +.delay_slot +.swstall delay_slot + 11334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11336 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11338 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11342 0x00 0x00 NOPX +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function_start + 11344 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 11348 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 11352 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 11356 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 11360 0x00 0x09 0x80 0x00 0x00 0x84 J #4864 +.delay_slot +.swstall delay_slot + 11366 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11368 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11374 0x00 0x00 NOPX +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function_start + 11376 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 11380 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 11384 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 11388 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 11392 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 11396 0x00 0x0b 0x78 0x00 0x00 0x84 J #5872 +.delay_slot +.swstall delay_slot + 11402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11410 0x00 0x00 NOPX +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + +.text_segment PM 11424 +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start + 11424 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11428 0x00 0x00 NOPX + 11430 0x00 0x00 NOPX + 11432 0x00 0x00 NOPX + 11434 0x00 0x00 NOPX + 11436 0x00 0x00 NOPX + 11438 0x00 0x00 NOPX + 11440 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11444 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11448 0x00 0x00 NOPX + 11450 0x00 0x00 NOPX + 11452 0x00 0x00 NOPX + 11454 0x00 0x00 NOPX + 11456 0x00 0x00 NOPX + 11458 0x00 0x00 NOPX + 11460 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11464 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11468 0x00 0x00 NOPX + 11470 0x00 0x00 NOPX + 11472 0x00 0x00 NOPX + 11474 0x00 0x00 NOPX + 11476 0x00 0x00 NOPX + 11478 0x00 0x00 NOPX + 11480 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11484 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11488 0x00 0x00 NOPX + 11490 0x00 0x00 NOPX + 11492 0x00 0x00 NOPX + 11494 0x00 0x00 NOPX + 11496 0x00 0x00 NOPX + 11498 0x00 0x00 NOPX + 11500 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11504 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11508 0x00 0x00 NOPX + 11510 0x00 0x00 NOPX + 11512 0x00 0x00 NOPX + 11514 0x00 0x00 NOPX + 11516 0x00 0x00 NOPX + 11518 0x00 0x00 NOPX + 11520 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11524 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11528 0x00 0x00 NOPX + 11530 0x00 0x00 NOPX + 11532 0x00 0x00 NOPX + 11534 0x00 0x00 NOPX + 11536 0x00 0x00 NOPX + 11538 0x00 0x00 NOPX + 11540 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11544 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11548 0x00 0x00 NOPX + 11550 0x00 0x00 NOPX + 11552 0x00 0x00 NOPX + 11554 0x00 0x00 NOPX + 11556 0x00 0x00 NOPX + 11558 0x00 0x00 NOPX + 11560 0x08 0x04 0x29 0x98 ST el0, [p0] + 11564 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 11568 0x00 0x00 NOPX + 11570 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 11574 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11580 0x00 0x00 NOPX +.delay_slot + 11582 0x08 0x14 0x29 0x98 ST el0, [p0, #4] +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 11600 +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function_start + 11600 0x03 0x86 0xd0 0x00 0x00 0x28 0x80 0x20 0x58 0xba LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 + 11610 0x03 0x96 0xd0 0x00 0x30 0x48 0x4f 0xfa 0x58 0xba LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 + 11620 0x05 0x92 0xd0 0x01 0x01 0x54 LDA r4, [p0], #8; MOV m0, #64 + 11626 0x05 0x1a 0xd1 0x02 0x01 0x54 LDA r6, [p0], m1; MOV dj0, #128 + 11632 0x00 0x00 NOPX + 11634 0x00 0x00 NOPX + 11636 0x00 0x00 NOPX + 11638 0x00 0x00 NOPX + 11640 0x00 0x00 NOPX + 11642 0x11 0x42 0x1f 0x98 MUL r1, r5, r1 + 11646 0x11 0x80 0x04 0x98 AND r0, r6, r0 + 11650 0x10 0xc0 0x05 0x98 OR r0, r3, r0 + 11654 0x19 0x82 0x30 0x84 0x9f 0x5c ST r0, [p0], #-16; MUL r1, r1, r4 + 11660 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11664 0x10 0x40 0x2d 0x98 LSHL r0, r1, r2 +.delay_slot + 11668 0x08 0x1c 0x11 0x98 ST r0, [p0], #4 +.delay_slot + 11672 0x08 0x1c 0x01 0x98 ST m0, [p0], #4 +.delay_slot + 11676 0x08 0x04 0x41 0x98 ST dj0, [p0] +.delay_slot + 11680 0x08 0x14 0x01 0x98 ST m0, [p0, #4] +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + +.text_segment PM 11696 +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 11696 0x00 0x16 0x50 0x00 0x01 0x04 JL #11424 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11702 0x18 0xc1 0xe0 0xf8 MOV dc0, lr +.delay_slot + 11706 0x1a 0x60 0xc0 0xf8 MOV p2, p0 +.delay_slot +.swstall delay_slot + 11710 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11712 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11714 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.tail_call +.return_address + 11728 0x00 0x16 0xa8 0x00 0x00 0x84 J #11600 +.delay_slot + 11734 0x1f 0x71 0x80 0xf8 MOV lr, dc0 +.delay_slot + 11738 0x18 0x64 0xc0 0xf8 MOV p0, p2 +.delay_slot +.swstall delay_slot + 11742 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11744 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11746 0x00 0x00 NOPX +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + +.text_segment PM 11760 +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function_start + 11760 0xb0 0x91 0x60 0x00 0x0a 0x60 0x70 0x02 MOVS p5, p1; MOV r0, p2 + 11768 0x1b 0x60 0x12 0x18 ADD.NC p3, r0, #36 + 11772 0x63 0xa0 0xd0 0x3d 0x81 0xd4 LDA m2, [p3], #4; MOV r0, p7 + 11778 0x03 0x1c 0x06 0x98 LDA m0, [p3], #4 + 11782 0x03 0xd4 0x56 0x98 LDA r2, [p3, #-12] + 11786 0x03 0x04 0x86 0x98 LDA m1, [p3] + 11790 0x00 0x00 NOPX + 11792 0x00 0x00 NOPX + 11794 0x00 0x00 NOPX + 11796 0x00 0x00 NOPX + 11798 0x00 0x00 NOPX + 11800 0x10 0x17 0xe0 0x00 0x01 0x84 JZ r2, #12224 +.delay_slot + 11806 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot + 11810 0xe1 0x72 0x06 0xdd 0x81 0xf4 PADDB [p7], m0; MOV p3, p7 +.delay_slot + 11816 0x38 0x4b 0x90 0x18 PADDB [p0], m2 +.delay_slot + 11820 0x01 0x72 0x08 0xc1 0x81 0xf4 PADDB [p0], m0; MOV p4, p0 +.delay_slot + 11826 0x39 0x2b 0x90 0x18 PADDB [p1], m1 + 11830 0x10 0x02 0x11 0x18 MOVX r1, #4 + 11834 0x10 0x86 0x1c 0x98 LTU r3, r2, r1 + 11838 0x18 0x17 0x98 0x40 0x01 0x84 JNZ r3, #12080 +.delay_slot + 11844 0x18 0x80 0x60 0xb8 MOV dj0, #48 +.delay_slot + 11848 0x02 0x00 0x36 0x98 LDA r1, [p2, dj0] +.delay_slot +.swstall delay_slot + 11852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11854 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11856 0x00 0x00 NOPX + 11858 0x81 0x13 0x76 0x10 0xe8 0x00 0x00 0x08 0x7f 0x58 0x10 0xb6 VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #11952 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11870 0x81 0x0c 0xfe 0x10 0x68 0x00 0x01 0x37 0xee 0x02 0x61 0x0b 0x60 0x7e PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #12000 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11884 0x61 0x0b 0x70 0x11 0xef 0x08 0x5b 0x02 0xb8 0xbf 0x40 0xf6 VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11896 0x01 0x0c 0xf8 0x11 0x6b 0x08 0x5b 0x32 PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11904 0x81 0x0c 0xfe 0x10 0x68 0x3c PADDA [p4], m0; VLDB x0, [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11910 0x01 0x1e 0x8e 0x10 0xb6 0x4c VLDB x3, [p0], m0; PADDS [p7], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11916 0x01 0x0c 0xf6 0x10 0xe8 0x3c PADDA [p0], m0; VLDB x1, [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11922 0x81 0x16 0x80 0x12 0x0b 0xb4 VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11928 0x00 0x2c 0xfe 0x10 0x6b 0x08 0x5b 0x32 NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11936 0x00 0x2c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11952 0x81 0x0c 0xf6 0x10 0xef 0x08 0x5b 0x00 0x00 0x00 0x04 0x82 0xe8 0x00 0x00 0xe1 PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11968 0x01 0x0c 0xf8 0x11 0x69 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11984 0xa5 0x0c 0xfe 0x10 0x6b 0x08 0x5b 0x00 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12000 0x25 0x0c 0xf0 0x11 0xed 0x28 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12016 0x18 0x09 0x05 0xd8 VSHUFFLE bmll0, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12020 0x09 0x28 0x26 0x98 VST bmlh0, [p1], m1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12024 0x25 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12030 0x00 0x17 0xe0 0x00 0x00 0x84 J #12224 +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12036 0xa5 0x0c 0xf1 0x28 0x26 0x80 0x04 0x82 0xe2 0xba PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12046 0x25 0x0c 0xfa 0x50 0x0d 0x0c PADDA [p1], m1; VST bmll0, [p5], m1 +.delay_slot + 12052 0xa5 0x0c 0xf1 0x03 0x0b 0x94 PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 +.delay_slot + 12058 0x00 0x2c 0xfa 0x50 0x0d 0x0c NOPA; VST bmll0, [p5], m1 +.delay_slot + 12064 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 + 12080 0x1d 0x71 0x20 0xf8 MOV lc, r2 + 12084 0x00 0x00 0x21 0xfe 0x80 0x44 MOVXM ls, #12096 + 12090 0x00 0x00 0x26 0xff 0x60 0x44 MOVXM le, #12208 +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.loop_nesting 1 +.begin_of_loop + 12096 0x81 0x0b 0x76 0x11 0x68 0x3c VLDA x1, [p4], m0; VLDB x2, [p3], m0 + 12102 0x61 0x0c 0xfe 0x10 0x6c 0x08 0x5b 0x32 PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 + 12110 0xe1 0x0c 0xf0 0x11 0xe8 0x3c PADDA [p7], m0; VLDB x3, [p0], m0 + 12116 0x38 0x0b 0x90 0x18 PADDB [p0], m0 + 12120 0x00 0x00 NOPX + 12122 0x00 0x00 NOPX + 12124 0x00 0x01 0x67 0x98 NOPA + 12128 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x08 0x42 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV + 12144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12160 0x00 0x2c 0xf0 0x00 0x25 0x28 0x06 0x80 0x00 0x00 0x40 0xc2 0xe8 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV + 12176 0x00 0x2c 0xfa 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV + 12192 0x00 0x2c 0xf0 0x00 0x21 0x28 0x26 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.end_of_loop + 12208 0x00 0x2c 0xf2 0x57 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.loop_nesting 0 + 12224 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12228 0x1f 0x60 0x20 0xf8 MOV p7, r0 +.delay_slot +.swstall delay_slot + 12232 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12234 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12236 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12238 0x00 0x00 NOPX +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function_start + 12240 0xb0 0x11 0x60 0x00 0x04 0x00 0x00 0x00 0x71 0x3a MOVS p5, p0; PADDXM [sp], #128 + 12250 0xff 0x87 0xb0 0x01 0xb1 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV p3, p1 +.no_stack_arguments + 12258 0x31 0x11 0x60 0x00 0x05 0xb6 0x00 0x00 0x41 0x3a MOVS p1, p2; JL #11696 +.delay_slot + 12268 0x18 0x65 0xe0 0xf8 MOV p0, sp +.delay_slot + 12272 0x38 0xef 0x90 0x18 PADDB [p0], #-128 +.delay_slot + 12276 0x1c 0x60 0xc0 0xf8 MOV p4, p0 +.delay_slot +.swstall delay_slot + 12280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12282 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.return_address + 12288 0xf0 0x4a 0x22 0x90 0x8b 0x02 0x2d 0x70 0x72 0xba LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID + 12298 0xf0 0xda 0x28 0xc5 0x20 0x2c LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 + 12304 0xf1 0x52 0x20 0x00 0x00 0x3e 0x6f 0xff 0x10 0xba LDA r20, [sp, #-120]; MOVXM r19, #65534 + 12314 0x60 0x93 0xd9 0xc6 0x21 0x2c LDA p1, [p3]; ADD r17, r19, r17 + 12320 0xf1 0xce 0x28 0xd5 0x60 0x2c LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 + 12326 0x00 0x00 NOPX + 12328 0x05 0x06 0x36 0x98 LDA r17, [p5] + 12332 0x00 0x00 NOPX + 12334 0x15 0xa5 0x2f 0x98 MUL r18, r22, r18 + 12338 0x00 0x00 NOPX + 12340 0x14 0xa5 0x4f 0x98 MUL r18, r18, r20 + 12344 0x00 0x00 NOPX + 12346 0x15 0x65 0x2f 0x98 MUL r18, r21, r18 +.no_stack_arguments + 12350 0x00 0x16 0xf8 0x00 0x01 0x04 JL #11760 +.delay_slot + 12356 0x14 0xe5 0x2f 0x98 MUL r18, r19, r18 +.delay_slot + 12360 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 12364 0x14 0xa1 0x0d 0x98 LSHL r16, r18, r16 +.delay_slot + 12368 0x18 0x68 0xc1 0x58 ADD.NC p0, r17, r16 +.delay_slot +.swstall delay_slot + 12372 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 12384 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 12388 0x00 0x00 NOPX + 12390 0x00 0x00 NOPX + 12392 0x00 0x00 NOPX + 12394 0x00 0x00 NOPX + 12396 0x00 0x00 NOPX + 12398 0x00 0x00 NOPX + 12400 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12404 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 12410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12416 0x00 0x00 NOPX +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + +.text_segment PM 12432 +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function_start + 12432 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 12436 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 12440 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 12444 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 12448 0x00 0x17 0xe8 0x00 0x00 0x84 J #12240 +.delay_slot +.swstall delay_slot + 12454 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12462 0x00 0x00 NOPX +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 12464 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 12470 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12474 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12478 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12482 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12486 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12490 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12494 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12498 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12502 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12506 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12510 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12514 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12518 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12522 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12526 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12530 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12534 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12538 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12542 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12546 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12550 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12554 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12558 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12562 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12566 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12570 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12574 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12578 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 12582 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12586 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12590 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12594 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12598 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 12602 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.text_segment PM 12608 +.label memset +.function_start + 12608 0x08 0x18 0xf0 0x00 0x01 0x84 JZ r1, #12768 +.delay_slot + 12614 0x18 0x62 0xc0 0xf8 MOV p0, p1 +.delay_slot +.swstall delay_slot + 12618 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12620 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12622 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12624 0x00 0x00 NOPX + 12626 0x30 0x11 0x60 0x02 0xb8 0x50 0x70 0x02 MOVS p1, p0; MOV lc, r1 + 12634 0x00 0x00 0x31 0xe2 0xe0 0x44 MOVXM ls, #12656 + 12640 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x0d 0xb8 0xe8 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV +.label ZLS_Fmemset_48 +.loop_nesting 1 +.begin_of_loop + 12656 0x23 0x80 0xe0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV + 12672 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12704 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 12736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_Fmemset_144 +.end_of_loop + 12752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_Fmemset_160 +.loop_nesting 0 + 12768 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 12772 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12774 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12776 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12778 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12780 0x00 0x00 NOPX +.label memset__end + +.bss_segment DMb 508416 24 + +.data_segment DMb 508440 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 508444 4 + +.bss_segment DMb 508448 1 + +.rodata_segment DMb 508480 +.label _ZL20g_uniformKernelFuncs + 0x70 + 0x23 + 0x0 + 0x0 + 0x0 + 0x2a + 0x0 + 0x0 + 0x10 + 0x2c + 0x0 + 0x0 + 0x30 + 0x2c + 0x0 + 0x0 + 0x50 + 0x2c + 0x0 + 0x0 + 0x70 + 0x2c + 0x0 + 0x0 + 0x90 + 0x30 + 0x0 + 0x0 + +.bss_segment DMb 508544 576 + +.stack DM_stack 507264 508352 diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.map b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.map new file mode 100644 index 0000000000000000000000000000000000000000..983d2fac110a6f03228d0251bef89839729e58af --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.map @@ -0,0 +1,314 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable4 ../Release/0_0_reloadable4.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable4.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577691 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1088 + + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1725 + + 0x00000000..0x0007bd7f ( 507264 items) : Reserved + 0x0007bd80..0x0007c1bf ( 1088 items) : Stack + 0x0007c1c0..0x0007c1ff ( 64 items) : Reserved + 0x0007c200..0x0007c203 ( 4 items) : ../Release/0_0_reloadable4.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c204..0x0007c207 ( 4 items) : ../Release/0_0_reloadable4.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c208..0x0007c20b ( 4 items) : ../Release/0_0_reloadable4.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c20c..0x0007c20f ( 4 items) : ../Release/0_0_reloadable4.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c210..0x0007c213 ( 4 items) : ../Release/0_0_reloadable4.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4) + 0x0007c214..0x0007c217 ( 4 items) : ../Release/0_0_reloadable4.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep (Data, Weak, .bss.DMb.4) + 0x0007c218..0x0007c21b ( 4 items) : ../Release/0_0_reloadable4.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c21c..0x0007c21f ( 4 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c220..0x0007c220 ( 1 items) : me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c240..0x0007c25b ( 28 items) : ../Release/0_0_reloadable4.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z15_b14160_wrapperPPv + _Z14_b7835_wrapperPPv + _Z14_b8148_wrapperPPv + _Z15_b13739_wrapperPPv + _Z15_b13744_wrapperPPv + _Z15_b13749_wrapperPPv + _Z14_b8170_wrapperPPv + + 0x0007c280..0x0007c2ff ( 128 items) : ../Release/0_0_reloadable4.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params (Data, Weak, .bss.DMb.64) + 0x0007c300..0x0007c33f ( 64 items) : ../Release/0_0_reloadable4.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c340..0x0007c37f ( 64 items) : ../Release/0_0_reloadable4.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c380..0x0007c3bf ( 64 items) : ../Release/0_0_reloadable4.o::sigmoid1d_params (Data, Global, .bss.DMb.64) + 0x0007c3c0..0x0007c4bf ( 256 items) : ../Release/0_0_reloadable4.o::conv2d_dw_params (Data, Global, .bss.DMb.64) + 0x0007ca00..0x000fffff ( 538112 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 9978 + + 0x00000000..0x000009df ( 2528 items) : Reserved + 0x000009e0..0x00000c01 ( 546 items) : ../Release/0_0_reloadable4.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000c10..0x00000c27 ( 24 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000c30..0x00000ce1 ( 178 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000cf0..0x00000d27 ( 56 items) : ../Release/0_0_reloadable4.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d30..0x00000d6b ( 60 items) : ../Release/0_0_reloadable4.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000d70..0x00000eb9 ( 330 items) : ../Release/0_0_reloadable4.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + + 0x00000ec0..0x00000f31 ( 114 items) : ../Release/0_0_reloadable4.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: add1d_attribute_broadcasting_params + + 0x00000f40..0x00001127 ( 488 items) : ../Release/0_0_reloadable4.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00001130..0x00001173 ( 68 items) : ../Release/0_0_reloadable4.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + + 0x00001180..0x000012f9 ( 378 items) : ../Release/0_0_reloadable4.o::_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: sigmoid1d_params + _ZN12me_primitive11control_rndE + + 0x00001300..0x000014e7 ( 488 items) : ../Release/0_0_reloadable4.o::_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + sigmoid1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x000014f0..0x00001507 ( 24 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + + 0x00001510..0x000015a9 ( 154 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + Referenced symbols: mul1d_params + + 0x000015b0..0x000016e3 ( 308 items) : ../Release/0_0_reloadable4.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: mul1d_params + _ZN12me_primitive11control_rndE + + 0x000016f0..0x00001949 ( 602 items) : ../Release/0_0_reloadable4.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00001950..0x00001c67 ( 792 items) : ../Release/0_0_reloadable4.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x00001c70..0x00001f11 ( 674 items) : ../Release/0_0_reloadable4.o::_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x00001f20..0x0000201d ( 254 items) : ../Release/0_0_reloadable4.o::_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: conv2d_dw_params + + 0x00002020..0x00002187 ( 360 items) : ../Release/0_0_reloadable4.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params + _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params + + Referenced symbols: conv2d_dw_params + + 0x00002190..0x0000236d ( 478 items) : ../Release/0_0_reloadable4.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL8num_iter + _ZL10ifmsv_size + conv2d_dw_params + + 0x00002370..0x00002393 ( 36 items) : ../Release/0_0_reloadable4.o::_Z15_b14160_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x000023a0..0x000024a5 ( 262 items) : ../Release/0_0_reloadable4.o::_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x000024b0..0x0000292d ( 1150 items) : ../Release/0_0_reloadable4.o::_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002930..0x000029f9 ( 202 items) : ../Release/0_0_reloadable4.o::_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep + _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params + + 0x00002a00..0x00002a1f ( 32 items) : ../Release/0_0_reloadable4.o::_Z14_b7835_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj + + 0x00002a20..0x00002c03 ( 484 items) : ../Release/0_0_reloadable4.o::_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : memset + + 0x00002c10..0x00002c2f ( 32 items) : ../Release/0_0_reloadable4.o::_Z14_b8148_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj + + 0x00002c30..0x00002c4f ( 32 items) : ../Release/0_0_reloadable4.o::_Z15_b13739_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002c50..0x00002c6f ( 32 items) : ../Release/0_0_reloadable4.o::_Z15_b13744_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00002c70..0x00002c93 ( 36 items) : ../Release/0_0_reloadable4.o::_Z15_b13749_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00002ca0..0x00002d41 ( 162 items) : ../Release/0_0_reloadable4.o::_ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + 0x00002d50..0x00002da3 ( 84 items) : ../Release/0_0_reloadable4.o::_ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params (Function, Local, .text) (stack frame size = 0) + 0x00002db0..0x00002de3 ( 52 items) : ../Release/0_0_reloadable4.o::_ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj + _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params + + 0x00002df0..0x00002fcf ( 480 items) : ../Release/0_0_reloadable4.o::_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params (Function, Weak, .text) (stack frame size = 0) + 0x00002fd0..0x00003081 ( 178 items) : ../Release/0_0_reloadable4.o::_ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj + _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params + + 0x00003090..0x000030af ( 32 items) : ../Release/0_0_reloadable4.o::_Z14_b8170_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj + + 0x000030b0..0x0000313d ( 142 items) : me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + 0x00003140..0x000031ed ( 174 items) : string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a)::memset (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x31ee + _pc_start = 0x9e0 + _sp_end_DM_stack = 0x7c1c0 + _sp_start_DM_stack = 0x7bd80 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 1088 + ---------- ---------- + 1088 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 600 4 28 ../Release/0_0_reloadable4.o + 5 0 0 me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 605 4 28 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 9662 ../Release/0_0_reloadable4.o + 142 me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + 174 string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + ---------- ---------- + 9978 Total + +File summary: + +../Release/0_0_reloadable4.o + DMb 632 + PM 9662 + +me_defs.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release/libme.a) + PM 142 + +string.o(/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + PM 174 + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.sdr b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.sdr new file mode 100644 index 0000000000000000000000000000000000000000..d4b928c4974d1777a4363132c93bc355c04f2901 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.sdr @@ -0,0 +1,125 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable4 ../Release/0_0_reloadable4.o -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/isg -g -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable4.bcf -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/softfloat/lib/Release -L/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3577691 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6kn_rep 0x0007c214 +_symbol _ZN12me_primitive11control_satE 0x0007c21c +_symbol _ZN12me_primitive11control_rndE 0x0007c220 +_symbol _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_KjE6params 0x0007c280 +_symbol add1d_attribute_broadcasting_params 0x0007c300 +_symbol mul1d_params 0x0007c340 +_symbol sigmoid1d_params 0x0007c380 +_symbol conv2d_dw_params 0x0007c3c0 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x000009e0 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00000c10 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00000c30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00000cf0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00000d30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00000d70 +_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00000ec0 +_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00000f40 +_symbol _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00001130 +_symbol _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00001180 +_symbol _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001300 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x000014f0 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00001510 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x000015b0 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x000016f0 +_symbol _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params 0x00001c70 +_symbol _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params 0x00001f20 +_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x00002020 +_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002190 +_symbol _Z15_b14160_wrapperPPv 0x00002370 +_symbol _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj 0x000023a0 +_symbol _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params 0x000024b0 +_symbol _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj 0x00002930 +_symbol _Z14_b7835_wrapperPPv 0x00002a00 +_symbol _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj 0x00002a20 +_symbol _Z14_b8148_wrapperPPv 0x00002c10 +_symbol _Z15_b13739_wrapperPPv 0x00002c30 +_symbol _Z15_b13744_wrapperPPv 0x00002c50 +_symbol _Z15_b13749_wrapperPPv 0x00002c70 +_symbol _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params 0x00002df0 +_symbol _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj 0x00002fd0 +_symbol _Z14_b8170_wrapperPPv 0x00003090 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x000030b0 +_symbol memset 0x00003140 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.srv b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.srv new file mode 100644 index 0000000000000000000000000000000000000000..f5e6b3e5828701d92c85b709bbc3b7c45a16ad8f --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.srv @@ -0,0 +1,14042 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri May 30 11:30:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib -d -h -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/include/common -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/. -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime_cxx/libs/libcxxabi-16/include -I/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable4 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable4.cc" 91 first +.src_ref 0 "0_0_reloadable4.cc" 93 60 +.src_ref 0 "0_0_reloadable4.cc" 93 110 first +.function_start + 2528 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2529 "11100000" // /* MW 5 */ + 2530 "11101001" // /* MW 4 */ + 2531 "11010000" // /* MW 3 */ + 2532 "11000010" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 91 + 2534 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2535 "00000001" // /* MW 5 */ + 2536 "00000000" // /* MW 4 */ + 2537 "00000000" // /* MW 3 */ + 2538 "00001000" // /* MW 2 */ + 2539 "00000000" // /* MW 1 */ + 2540 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "00011101" // /* MW 3 */ + 2542 "11101111" // /* MW 2 */ + 2543 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2544 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2545 "01110000" // /* MW 7 */ + 2546 "11010000" // /* MW 6 */ + 2547 "11001000" // /* MW 5 */ + 2548 "00000001" // /* MW 4 */ + 2549 "10110000" // /* MW 3 */ + 2550 "00111010" // /* MW 2 */ + 2551 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2552 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2553 "01110000" // /* MW 7 */ + 2554 "01010000" // /* MW 6 */ + 2555 "11101000" // /* MW 5 */ + 2556 "00000001" // /* MW 4 */ + 2557 "10110000" // /* MW 3 */ + 2558 "00111110" // /* MW 2 */ + 2559 "11111111" // /* MW 1 */ + 2560 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "10011101" // /* MW 3 */ + 2562 "11110111" // /* MW 2 */ + 2563 "00001111" // /* MW 1 */ + 2564 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00111101" // /* MW 3 */ + 2566 "11111100" // /* MW 2 */ + 2567 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2568 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2569 "00000010" // /* MW 3 */ + 2570 "01101000" // /* MW 2 */ + 2571 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2572 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "00010110" // /* MW 3 */ + 2574 "00011110" // /* MW 2 */ + 2575 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2576 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2577 "01010110" // /* MW 3 */ + 2578 "00111110" // /* MW 2 */ + 2579 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2580 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00110110" // /* MW 3 */ + 2582 "11101110" // /* MW 2 */ + 2583 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2584 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00000111" // /* MW 2 */ + 2587 "00000110" // /* MW 1 */ + 2588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2589 "00000000" // /* MW 1 */ + 2590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2591 "00000000" // /* MW 1 */ + 2592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2593 "00000000" // /* MW 1 */ + 2594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2595 "00000000" // /* MW 1 */ + 2596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2597 "00000000" // /* MW 1 */ + 2598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2599 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2600 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2601 "00100010" // /* MW 3 */ + 2602 "00100001" // /* MW 2 */ + 2603 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2604 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2605 "00010001" // /* MW 3 */ + 2606 "11010110" // /* MW 2 */ + 2607 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 +.src_ref 1 "io_buffer_main.h" 434 8 + 2608 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2609 "00111001" // /* MW 5 */ + 2610 "00110101" // /* MW 4 */ + 2611 "10100000" // /* MW 3 */ + 2612 "00011111" // /* MW 2 */ + 2613 "11111100" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ + 2616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2617 "00000000" // /* MW 1 */ + 2618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2619 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2620 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00001000" // /* MW 3 */ + 2622 "01010111" // /* MW 2 */ + 2623 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2624 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2625 "00000101" // /* MW 5 */ + 2626 "10100000" // /* MW 4 */ + 2627 "00101001" // /* MW 3 */ + 2628 "01000001" // /* MW 2 */ + 2629 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 60 first + 2630 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2631 "10000001" // /* MW 5 */ + 2632 "00100001" // /* MW 4 */ + 2633 "10111001" // /* MW 3 */ + 2634 "00100011" // /* MW 2 */ + 2635 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 95 110 + 2636 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2637 "10100010" // /* MW 5 */ + 2638 "11010010" // /* MW 4 */ + 2639 "10011100" // /* MW 3 */ + 2640 "10011111" // /* MW 2 */ + 2641 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 95 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2642 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2643 "01110010" // /* MW 9 */ + 2644 "01100000" // /* MW 8 */ + 2645 "01101110" // /* MW 7 */ + 2646 "10000010" // /* MW 6 */ + 2647 "10010101" // /* MW 5 */ + 2648 "11100110" // /* MW 4 */ + 2649 "11010111" // /* MW 3 */ + 2650 "11010010" // /* MW 2 */ + 2651 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 + 2652 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2653 "01110000" // /* MW 7 */ + 2654 "10010000" // /* MW 6 */ + 2655 "01001100" // /* MW 5 */ + 2656 "00000011" // /* MW 4 */ + 2657 "10110000" // /* MW 3 */ + 2658 "01001010" // /* MW 2 */ + 2659 "11111101" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ + 2664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2665 "00000000" // /* MW 1 */ + 2666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2667 "00000000" // /* MW 1 */ + 2668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2670 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "00000010" // /* MW 3 */ + 2672 "01101010" // /* MW 2 */ + 2673 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2674 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "10010110" // /* MW 3 */ + 2676 "00011110" // /* MW 2 */ + 2677 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2678 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2679 "11010110" // /* MW 3 */ + 2680 "00111110" // /* MW 2 */ + 2681 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 46 + 2682 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2683 "10110110" // /* MW 3 */ + 2684 "11101110" // /* MW 2 */ + 2685 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2686 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2687 "01110110" // /* MW 3 */ + 2688 "00000111" // /* MW 2 */ + 2689 "00000110" // /* MW 1 */ + 2690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2691 "00000000" // /* MW 1 */ + 2692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2693 "00000000" // /* MW 1 */ + 2694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2695 "00000000" // /* MW 1 */ + 2696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2697 "00000000" // /* MW 1 */ + 2698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2699 "00000000" // /* MW 1 */ + 2700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2701 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2702 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "01100010" // /* MW 3 */ + 2704 "00101001" // /* MW 2 */ + 2705 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2706 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "10010001" // /* MW 3 */ + 2708 "11010110" // /* MW 2 */ + 2709 "00001110" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ + 2714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2715 "00000000" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2718 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00001000" // /* MW 3 */ + 2720 "01010111" // /* MW 2 */ + 2721 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 first + 2722 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2723 "00011101" // /* MW 3 */ + 2724 "10100101" // /* MW 2 */ + 2725 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2726 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2727 "10010010" // /* MW 5 */ + 2728 "10110011" // /* MW 4 */ + 2729 "10111001" // /* MW 3 */ + 2730 "10100011" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 +.src_ref 0 "0_0_reloadable4.cc" 98 112 + 2732 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "10010010" // /* MW 5 */ + 2734 "11010011" // /* MW 4 */ + 2735 "00001100" // /* MW 3 */ + 2736 "10011110" // /* MW 2 */ + 2737 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 98 60 + 2738 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "10101011" // /* MW 5 */ + 2740 "11000110" // /* MW 4 */ + 2741 "11011111" // /* MW 3 */ + 2742 "11001010" // /* MW 2 */ + 2743 "11000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 first + 2756 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2757 "00000010" // /* MW 3 */ + 2758 "01101001" // /* MW 2 */ + 2759 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 13 + 2760 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "01110110" // /* MW 3 */ + 2762 "00111110" // /* MW 2 */ + 2763 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 23 + 2764 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2765 "01010110" // /* MW 3 */ + 2766 "11101110" // /* MW 2 */ + 2767 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 36 + 2768 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2769 "10010110" // /* MW 3 */ + 2770 "00011110" // /* MW 2 */ + 2771 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 614 60 + 2772 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2773 "01110110" // /* MW 3 */ + 2774 "00000111" // /* MW 2 */ + 2775 "00000111" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ + 2786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 219 30 first +.src_ref 1 "io_buffer_compiler.h" 219 37 first + 2788 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2789 "01000010" // /* MW 3 */ + 2790 "11100111" // /* MW 2 */ + 2791 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 218 23 first + 2792 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2793 "01110001" // /* MW 3 */ + 2794 "11010110" // /* MW 2 */ + 2795 "00001111" // /* MW 1 */ + 2796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2797 "00000000" // /* MW 1 */ + 2798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2799 "00000000" // /* MW 1 */ + 2800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2801 "00000000" // /* MW 1 */ + 2802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 434 8 first + 2804 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2805 "00001000" // /* MW 3 */ + 2806 "10010111" // /* MW 2 */ + 2807 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 first + 2808 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2809 "00011101" // /* MW 3 */ + 2810 "00100001" // /* MW 2 */ + 2811 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2812 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00100000" // /* MW 3 */ + 2814 "10001000" // /* MW 2 */ + 2815 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2816 "01000100" // MOVXM p7, #508480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "10000000" // /* MW 5 */ + 2818 "11000100" // /* MW 4 */ + 2819 "11001110" // /* MW 3 */ + 2820 "00000111" // /* MW 2 */ + 2821 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 7 + 2822 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2823 "01011011" // /* MW 5 */ + 2824 "10111000" // /* MW 4 */ + 2825 "11011111" // /* MW 3 */ + 2826 "00010011" // /* MW 2 */ + 2827 "11100000" // /* MW 1 */ + 2828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 101 4 +.no_stack_arguments + 2840 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2841 "01000000" // /* MW 3 */ + 2842 "00110000" // /* MW 2 */ + 2843 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 104 60 +.src_ref 0 "0_0_reloadable4.cc" 106 60 +.delay_slot + 2844 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11000000" // /* MW 3 */ + 2846 "01100000" // /* MW 2 */ + 2847 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2854 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2855 "01111110" // /* MW 9 */ + 2856 "10100101" // /* MW 8 */ + 2857 "00000001" // /* MW 7 */ + 2858 "00000000" // /* MW 6 */ + 2859 "00010000" // /* MW 5 */ + 2860 "00000000" // /* MW 4 */ + 2861 "11110000" // /* MW 3 */ + 2862 "00101100" // /* MW 2 */ + 2863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 104 60 first +.return_address + 2864 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2865 "00001010" // /* MW 5 */ + 2866 "01000000" // /* MW 4 */ + 2867 "11010000" // /* MW 3 */ + 2868 "11000110" // /* MW 2 */ + 2869 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2870 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "01010001" // /* MW 3 */ + 2872 "11011111" // /* MW 2 */ + 2873 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 106 60 + 2874 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "01000001" // /* MW 3 */ + 2876 "11100100" // /* MW 2 */ + 2877 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_main.h" 464 8 + 2878 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101001" // /* MW 3 */ + 2880 "11101000" // /* MW 2 */ + 2881 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 + 2882 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2883 "00001001" // /* MW 3 */ + 2884 "11100000" // /* MW 2 */ + 2885 "00000111" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first + 2890 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "10001000" // /* MW 3 */ + 2892 "01101000" // /* MW 2 */ + 2893 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 + 2894 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00110110" // /* MW 3 */ + 2896 "00000110" // /* MW 2 */ + 2897 "00000000" // /* MW 1 */ + 2898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2899 "00000000" // /* MW 1 */ + 2900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2901 "00000000" // /* MW 1 */ + 2902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2903 "00000000" // /* MW 1 */ + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 2910 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2911 "00001000" // /* MW 3 */ + 2912 "01010101" // /* MW 2 */ + 2913 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2914 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2915 "01000001" // /* MW 5 */ + 2916 "10101111" // /* MW 4 */ + 2917 "11011101" // /* MW 3 */ + 2918 "11000110" // /* MW 2 */ + 2919 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.src_ref 0 "0_0_reloadable4.cc" 106 60 first + 2920 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2921 "00111001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "11011101" // /* MW 3 */ + 2924 "01001010" // /* MW 2 */ + 2925 "11100000" // /* MW 1 */ + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ + 2934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2936 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "00010001" // /* MW 3 */ + 2938 "00100111" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2940 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2941 "00010000" // /* MW 5 */ + 2942 "11010010" // /* MW 4 */ + 2943 "01001110" // /* MW 3 */ + 2944 "01100110" // /* MW 2 */ + 2945 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 12 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 2946 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2947 "01100011" // /* MW 5 */ + 2948 "11101100" // /* MW 4 */ + 2949 "11010001" // /* MW 3 */ + 2950 "11000110" // /* MW 2 */ + 2951 "11100000" // /* MW 1 */ + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ + 2958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2959 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2961 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2962 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2963 "00011100" // /* MW 3 */ + 2964 "10100001" // /* MW 2 */ + 2965 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2966 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2967 "00001000" // /* MW 3 */ + 2968 "01010101" // /* MW 2 */ + 2969 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2970 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2971 "00111001" // /* MW 5 */ + 2972 "11000000" // /* MW 4 */ + 2973 "11011101" // /* MW 3 */ + 2974 "11000110" // /* MW 2 */ + 2975 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 109 60 first + 2976 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2977 "01010110" // /* MW 3 */ + 2978 "00000110" // /* MW 2 */ + 2979 "00000110" // /* MW 1 */ + 2980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2981 "00000000" // /* MW 1 */ + 2982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2983 "00000000" // /* MW 1 */ + 2984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2985 "00000000" // /* MW 1 */ + 2986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2987 "00000000" // /* MW 1 */ + 2988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 2990 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2991 "00010001" // /* MW 3 */ + 2992 "00100111" // /* MW 2 */ + 2993 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 first +.src_ref 1 "io_buffer_compiler.h" 630 24 + 2994 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2995 "00010100" // /* MW 5 */ + 2996 "11010010" // /* MW 4 */ + 2997 "01000000" // /* MW 3 */ + 2998 "01100110" // /* MW 2 */ + 2999 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 629 25 +.src_ref 1 "io_buffer_compiler.h" 630 22 first + 3000 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3001 "01100011" // /* MW 5 */ + 3002 "11101100" // /* MW 4 */ + 3003 "11011111" // /* MW 3 */ + 3004 "11000110" // /* MW 2 */ + 3005 "00000000" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 464 8 first + 3018 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "00001000" // /* MW 3 */ + 3020 "01010101" // /* MW 2 */ + 3021 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 + 3022 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3023 "00111001" // /* MW 3 */ + 3024 "11111100" // /* MW 2 */ + 3025 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first + 3026 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00110110" // /* MW 3 */ + 3028 "11100110" // /* MW 2 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00011001" // /* MW 3 */ + 3032 "11101111" // /* MW 2 */ + 3033 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3034 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "10011001" // /* MW 3 */ + 3036 "11110111" // /* MW 2 */ + 3037 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3038 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "11010001" // /* MW 3 */ + 3040 "11110001" // /* MW 2 */ + 3041 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3042 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "11110001" // /* MW 3 */ + 3044 "11111001" // /* MW 2 */ + 3045 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3046 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3047 "00000001" // /* MW 5 */ + 3048 "00000000" // /* MW 4 */ + 3049 "00000000" // /* MW 3 */ + 3050 "11111000" // /* MW 2 */ + 3051 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 111 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3052 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3053 "00000000" // /* MW 3 */ + 3054 "00101000" // /* MW 2 */ + 3055 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3056 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00010001" // /* MW 3 */ + 3058 "00100001" // /* MW 2 */ + 3059 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3062 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3063 "00100000" // /* MW 3 */ + 3064 "11010111" // /* MW 2 */ + 3065 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 24 +.delay_slot + 3066 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "00000010" // /* MW 3 */ + 3068 "01100001" // /* MW 2 */ + 3069 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 630 22 +.delay_slot + 3070 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3071 "00010001" // /* MW 3 */ + 3072 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 3073 "00001000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 3088 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3089 "00000000" // /* MW 3 */ + 3090 "00101000" // /* MW 2 */ + 3091 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3092 "01000100" // MOVXM p0, #508704 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3093 "01000000" // /* MW 5 */ + 3094 "11000110" // /* MW 4 */ + 3095 "11000000" // /* MW 3 */ + 3096 "00000111" // /* MW 2 */ + 3097 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3098 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3099 "10000000" // /* MW 3 */ + 3100 "00000000" // /* MW 2 */ + 3101 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 3102 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3103 "00000001" // /* MW 3 */ + 3104 "00000100" // /* MW 2 */ + 3105 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 3106 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3107 "00000001" // /* MW 3 */ + 3108 "00010100" // /* MW 2 */ + 3109 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3111 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 3120 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3121 "00010000" // /* MW 9 */ + 3122 "10000000" // /* MW 8 */ + 3123 "00110001" // /* MW 7 */ + 3124 "11110000" // /* MW 6 */ + 3125 "00000001" // /* MW 5 */ + 3126 "00000000" // /* MW 4 */ + 3127 "11010000" // /* MW 3 */ + 3128 "10000101" // /* MW 2 */ + 3129 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 3130 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3131 "00000001" // /* MW 5 */ + 3132 "00000000" // /* MW 4 */ + 3133 "00000000" // /* MW 3 */ + 3134 "00001000" // /* MW 2 */ + 3135 "00000000" // /* MW 1 */ + 3136 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3137 "00111101" // /* MW 3 */ + 3138 "11111000" // /* MW 2 */ + 3139 "00001111" // /* MW 1 */ + 3140 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "11110101" // /* MW 3 */ + 3142 "11111101" // /* MW 2 */ + 3143 "00001111" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 3150 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "00101001" // /* MW 3 */ + 3152 "00011100" // /* MW 2 */ + 3153 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 3154 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3155 "00101110" // /* MW 3 */ + 3156 "00011100" // /* MW 2 */ + 3157 "00000001" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 3170 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3171 "00101001" // /* MW 3 */ + 3172 "00011100" // /* MW 2 */ + 3173 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 3174 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3175 "00101110" // /* MW 3 */ + 3176 "00000100" // /* MW 2 */ + 3177 "00000001" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ + 3182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3183 "00000000" // /* MW 1 */ + 3184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3185 "00000000" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 3190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00101001" // /* MW 3 */ + 3192 "00011100" // /* MW 2 */ + 3193 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 3194 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3195 "00101110" // /* MW 3 */ + 3196 "00010100" // /* MW 2 */ + 3197 "00000001" // /* MW 1 */ + 3198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3199 "00000000" // /* MW 1 */ + 3200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3201 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 3202 "00000100" // JL #3088 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3088 delay_slots=5 */ + 3203 "00000001" // /* MW 5 */ + 3204 "00000000" // /* MW 4 */ + 3205 "00001000" // /* MW 3 */ + 3206 "00000110" // /* MW 2 */ + 3207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3213 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 3214 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3215 "00101001" // /* MW 3 */ + 3216 "11011100" // /* MW 2 */ + 3217 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.delay_slot + 3218 "00101110" // NOPA; NOPS; MOV r15, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3219 "00011100" // /* MW 13 */ + 3220 "00000000" // /* MW 12 */ + 3221 "00000000" // /* MW 11 */ + 3222 "00000111" // /* MW 10 */ + 3223 "10000110" // /* MW 9 */ + 3224 "01011110" // /* MW 8 */ + 3225 "00000000" // /* MW 7 */ + 3226 "00000000" // /* MW 6 */ + 3227 "10110110" // /* MW 5 */ + 3228 "00000010" // /* MW 4 */ + 3229 "11110000" // /* MW 3 */ + 3230 "00101100" // /* MW 2 */ + 3231 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "add_impl.h" 105 29 +.return_address + 3232 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3233 "00001000" // /* MW 9 */ + 3234 "11000100" // /* MW 8 */ + 3235 "00110011" // /* MW 7 */ + 3236 "01101000" // /* MW 6 */ + 3237 "00000000" // /* MW 5 */ + 3238 "00000001" // /* MW 4 */ + 3239 "00100000" // /* MW 3 */ + 3240 "00000111" // /* MW 2 */ + 3241 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 +.src_ref 3 "add_impl.h" 106 37 +.src_ref 3 "add_impl.h" 106 39 + 3242 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3243 "01011000" // /* MW 9 */ + 3244 "11111101" // /* MW 8 */ + 3245 "00000111" // /* MW 7 */ + 3246 "00001000" // /* MW 6 */ + 3247 "10000000" // /* MW 5 */ + 3248 "00000001" // /* MW 4 */ + 3249 "10000000" // /* MW 3 */ + 3250 "11100010" // /* MW 2 */ + 3251 "00000001" // /* MW 1 */ +.src_ref 3 "add_impl.h" 105 29 first +.src_ref 3 "add_impl.h" 106 39 + 3252 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3253 "00000001" // /* MW 9 */ + 3254 "10100000" // /* MW 8 */ + 3255 "00000111" // /* MW 7 */ + 3256 "10000000" // /* MW 6 */ + 3257 "00010001" // /* MW 5 */ + 3258 "00001010" // /* MW 4 */ + 3259 "00100000" // /* MW 3 */ + 3260 "10111110" // /* MW 2 */ + 3261 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 50 first + 3262 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3263 "01001010" // /* MW 3 */ + 3264 "00000110" // /* MW 2 */ + 3265 "00000000" // /* MW 1 */ + 3266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3267 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3269 "00000000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3270 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3271 "00010111" // /* MW 3 */ + 3272 "00000010" // /* MW 2 */ + 3273 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3274 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3275 "00000000" // /* MW 3 */ + 3276 "00101000" // /* MW 2 */ + 3277 "00010000" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3278 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3279 "00000101" // /* MW 3 */ + 3280 "00100010" // /* MW 2 */ + 3281 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3282 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3283 "00000001" // /* MW 5 */ + 3284 "00000000" // /* MW 4 */ + 3285 "00000000" // /* MW 3 */ + 3286 "11111000" // /* MW 2 */ + 3287 "11111111" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3288 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3289 "00100111" // /* MW 3 */ + 3290 "01110111" // /* MW 2 */ + 3291 "00010100" // /* MW 1 */ +.src_ref 3 "add_impl.h" 106 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3292 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3293 "10000010" // /* MW 3 */ + 3294 "00100001" // /* MW 2 */ + 3295 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3297 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 40 first +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.function_start + 3312 "10111010" // MOVA m0, #20; MOVXM p0, #508684 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3313 "00010000" // /* MW 9 */ + 3314 "10000110" // /* MW 8 */ + 3315 "00110001" // /* MW 7 */ + 3316 "11110000" // /* MW 6 */ + 3317 "00000001" // /* MW 5 */ + 3318 "00000000" // /* MW 4 */ + 3319 "10000000" // /* MW 3 */ + 3320 "10000000" // /* MW 2 */ + 3321 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 + 3322 "10111010" // LDA.u8 r0, [p0], m0; MOVX r2, #1; MOV r1, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3323 "01011000" // /* MW 9 */ + 3324 "00000110" // /* MW 8 */ + 3325 "00101000" // /* MW 7 */ + 3326 "00101000" // /* MW 6 */ + 3327 "00100000" // /* MW 5 */ + 3328 "00000000" // /* MW 4 */ + 3329 "01010000" // /* MW 3 */ + 3330 "00000001" // /* MW 2 */ + 3331 "00000001" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 43 4 first + 3342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3343 "00000000" // /* MW 3 */ + 3344 "00101000" // /* MW 2 */ + 3345 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 first +.delay_slot + 3346 "00011000" // NEZ r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "11110000" // /* MW 3 */ + 3348 "00000110" // /* MW 2 */ + 3349 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 33 +.delay_slot + 3350 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00001000" // /* MW 3 */ + 3352 "10000000" // /* MW 2 */ + 3353 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 42 25 first +.delay_slot + 3354 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00011101" // /* MW 3 */ + 3356 "00000000" // /* MW 2 */ + 3357 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 25 first +.src_ref 2 "elementwise_binary_broadcasting.h" 42 23 +.delay_slot + 3358 "01011100" // ST r0, [p0, #4]; LSHL r2, r3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3359 "00111011" // /* MW 5 */ + 3360 "10001000" // /* MW 4 */ + 3361 "00110001" // /* MW 3 */ + 3362 "10000010" // /* MW 2 */ + 3363 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 41 23 +.delay_slot + 3364 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3365 "01010001" // /* MW 3 */ + 3366 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 3367 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 2 "elementwise_binary_broadcasting.h" 35 +.src_ref 2 "elementwise_binary_broadcasting.h" 35 first +.function_start + 3376 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3377 "00000001" // /* MW 5 */ + 3378 "00000000" // /* MW 4 */ + 3379 "00000000" // /* MW 3 */ + 3380 "00001000" // /* MW 2 */ + 3381 "00000000" // /* MW 1 */ + 3382 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3383 "00111101" // /* MW 3 */ + 3384 "11111100" // /* MW 2 */ + 3385 "00001111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 first +.no_stack_arguments + 3386 "00000100" // JL #3120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3120 delay_slots=5 */ + 3387 "00000001" // /* MW 5 */ + 3388 "00000000" // /* MW 4 */ + 3389 "00011000" // /* MW 3 */ + 3390 "00000110" // /* MW 2 */ + 3391 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 36 8 +.delay_slot + 3392 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3393 "00000000" // /* MW 5 */ + 3394 "11000110" // /* MW 4 */ + 3395 "11000000" // /* MW 3 */ + 3396 "00000111" // /* MW 2 */ + 3397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3404 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "00000001" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.return_address + 3408 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00111001" // /* MW 3 */ + 3410 "11111100" // /* MW 2 */ + 3411 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 first +.tail_call + 3412 "10000100" // J #3312 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3312 delay_slots=5 */ + 3413 "00000000" // /* MW 5 */ + 3414 "00000000" // /* MW 4 */ + 3415 "01111000" // /* MW 3 */ + 3416 "00000110" // /* MW 2 */ + 3417 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 37 8 +.delay_slot + 3418 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3419 "00000000" // /* MW 5 */ + 3420 "11000110" // /* MW 4 */ + 3421 "11000000" // /* MW 3 */ + 3422 "00000111" // /* MW 2 */ + 3423 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 38 4 first +.delay_slot + 3424 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3425 "00000001" // /* MW 5 */ + 3426 "00000000" // /* MW 4 */ + 3427 "00000000" // /* MW 3 */ + 3428 "11111000" // /* MW 2 */ + 3429 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 3435 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_broadcasting.h" 48 first +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 +.function_start + 3440 "10111010" // MOVA m0, #20; MOVXM p3, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3441 "00010000" // /* MW 9 */ + 3442 "10000000" // /* MW 8 */ + 3443 "10110001" // /* MW 7 */ + 3444 "11110001" // /* MW 6 */ + 3445 "00000001" // /* MW 5 */ + 3446 "00000000" // /* MW 4 */ + 3447 "10000000" // /* MW 3 */ + 3448 "10000000" // /* MW 2 */ + 3449 "00000010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 37 first + 3450 "10011000" // LDA r0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3451 "00010110" // /* MW 3 */ + 3452 "00111100" // /* MW 2 */ + 3453 "00000011" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 19 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3454 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3455 "10000001" // /* MW 5 */ + 3456 "11001101" // /* MW 4 */ + 3457 "01011000" // /* MW 3 */ + 3458 "00000101" // /* MW 2 */ + 3459 "01100001" // /* MW 1 */ + 3460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3461 "00000000" // /* MW 1 */ + 3462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3463 "00000000" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 61 12 +.src_ref 2 "elementwise_binary_broadcasting.h" 61 35 + 3472 "10000100" // JNZ r1, #3536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3536 delay_slots=5 */ + 3473 "00000001" // /* MW 5 */ + 3474 "01000000" // /* MW 4 */ + 3475 "11101000" // /* MW 3 */ + 3476 "00000110" // /* MW 2 */ + 3477 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 +.delay_slot + 3478 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3479 "11101001" // /* MW 3 */ + 3480 "11000100" // /* MW 2 */ + 3481 "00010111" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 55 78 first +.delay_slot + 3482 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3483 "00101101" // /* MW 3 */ + 3484 "00000000" // /* MW 2 */ + 3485 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 62 28 first + 3492 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "00110010" // /* MW 3 */ + 3494 "00000100" // /* MW 2 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "10000100" // J #3568 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3568 delay_slots=5 */ + 3503 "00000000" // /* MW 5 */ + 3504 "00000000" // /* MW 4 */ + 3505 "11111000" // /* MW 3 */ + 3506 "00000110" // /* MW 2 */ + 3507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3511 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.delay_slot + 3512 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3513 "01110010" // /* MW 3 */ + 3514 "00000101" // /* MW 2 */ + 3515 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3516 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "01100111" // /* MW 3 */ + 3518 "00000001" // /* MW 2 */ + 3519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 3520 "11100001" // NOPA; NOPB; VST x0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3521 "00000000" // /* MW 15 */ + 3522 "00000000" // /* MW 14 */ + 3523 "01111000" // /* MW 13 */ + 3524 "10100101" // /* MW 12 */ + 3525 "00000001" // /* MW 11 */ + 3526 "00000000" // /* MW 10 */ + 3527 "00000000" // /* MW 9 */ + 3528 "00000000" // /* MW 8 */ + 3529 "00010011" // /* MW 7 */ + 3530 "00000100" // /* MW 6 */ + 3531 "00100000" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "11110000" // /* MW 3 */ + 3534 "00101100" // /* MW 2 */ + 3535 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_96 +.src_ref 2 "elementwise_binary_broadcasting.h" 65 28 first + 3536 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "00110010" // /* MW 3 */ + 3538 "00000100" // /* MW 2 */ + 3539 "00000001" // /* MW 1 */ + 3540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3541 "00000000" // /* MW 1 */ + 3542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3543 "00000000" // /* MW 1 */ + 3544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3545 "00000000" // /* MW 1 */ + 3546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3547 "00000000" // /* MW 1 */ + 3548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3549 "00000000" // /* MW 1 */ + 3550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3551 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first + 3552 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "01110010" // /* MW 3 */ + 3554 "00000101" // /* MW 2 */ + 3555 "00011000" // /* MW 1 */ + 3556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3557 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 3558 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "00000000" // /* MW 9 */ + 3560 "00000000" // /* MW 8 */ + 3561 "00000000" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00010011" // /* MW 5 */ + 3564 "00000100" // /* MW 4 */ + 3565 "11110001" // /* MW 3 */ + 3566 "00101100" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_128 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 first + 3568 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01001000" // /* MW 9 */ + 3570 "00111111" // /* MW 8 */ + 3571 "10111000" // /* MW 7 */ + 3572 "10001010" // /* MW 6 */ + 3573 "00000111" // /* MW 5 */ + 3574 "00000000" // /* MW 4 */ + 3575 "11010000" // /* MW 3 */ + 3576 "10000000" // /* MW 2 */ + 3577 "10001010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3578 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #3680 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3579 "00010000" // /* MW 9 */ + 3580 "00110000" // /* MW 8 */ + 3581 "01111111" // /* MW 7 */ + 3582 "00000000" // /* MW 6 */ + 3583 "00000000" // /* MW 5 */ + 3584 "00000000" // /* MW 4 */ + 3585 "11010000" // /* MW 3 */ + 3586 "10010000" // /* MW 2 */ + 3587 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3588 "01000100" // MOVXM le, #3712 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3589 "00000000" // /* MW 5 */ + 3590 "11111101" // /* MW 4 */ + 3591 "00000110" // /* MW 3 */ + 3592 "00000000" // /* MW 2 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3594 "01000100" // MOVXM p4, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "01000000" // /* MW 5 */ + 3596 "11000100" // /* MW 4 */ + 3597 "11001000" // /* MW 3 */ + 3598 "00000111" // /* MW 2 */ + 3599 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_broadcasting.h" 76 8 + 3600 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3601 "00100010" // /* MW 3 */ + 3602 "00000100" // /* MW 2 */ + 3603 "00000100" // /* MW 1 */ + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first + 3608 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3609 "10101011" // /* MW 3 */ + 3610 "00001000" // /* MW 2 */ + 3611 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 148 20 first + 3612 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "00101011" // /* MW 3 */ + 3614 "00101001" // /* MW 2 */ + 3615 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first + 3616 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00101011" // /* MW 3 */ + 3618 "00001000" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3620 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "00101011" // /* MW 3 */ + 3622 "00101010" // /* MW 2 */ + 3623 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3624 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "00000000" // /* MW 5 */ + 3626 "11110101" // /* MW 4 */ + 3627 "01110000" // /* MW 3 */ + 3628 "00010101" // /* MW 2 */ + 3629 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3630 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3631 "00111101" // /* MW 7 */ + 3632 "00101000" // /* MW 6 */ + 3633 "00000011" // /* MW 5 */ + 3634 "00000100" // /* MW 4 */ + 3635 "01110000" // /* MW 3 */ + 3636 "00100101" // /* MW 2 */ + 3637 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3638 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3639 "00101011" // /* MW 3 */ + 3640 "00001000" // /* MW 2 */ + 3641 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3642 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3643 "00111101" // /* MW 7 */ + 3644 "00010000" // /* MW 6 */ + 3645 "00000100" // /* MW 5 */ + 3646 "00000100" // /* MW 4 */ + 3647 "01110000" // /* MW 3 */ + 3648 "01000101" // /* MW 2 */ + 3649 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3650 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3651 "10101011" // /* MW 3 */ + 3652 "00001000" // /* MW 2 */ + 3653 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3654 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3655 "00111101" // /* MW 7 */ + 3656 "00101000" // /* MW 6 */ + 3657 "00000011" // /* MW 5 */ + 3658 "00000100" // /* MW 4 */ + 3659 "01110000" // /* MW 3 */ + 3660 "00100101" // /* MW 2 */ + 3661 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3662 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3663 "00101011" // /* MW 3 */ + 3664 "00001000" // /* MW 2 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3666 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3667 "00111101" // /* MW 13 */ + 3668 "00010000" // /* MW 12 */ + 3669 "00000100" // /* MW 11 */ + 3670 "01010111" // /* MW 10 */ + 3671 "00011010" // /* MW 9 */ + 3672 "01000000" // /* MW 8 */ + 3673 "00000000" // /* MW 7 */ + 3674 "00000000" // /* MW 6 */ + 3675 "01000110" // /* MW 5 */ + 3676 "00111011" // /* MW 4 */ + 3677 "01110100" // /* MW 3 */ + 3678 "01000101" // /* MW 2 */ + 3679 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_240 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 3680 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "10101011" // /* MW 3 */ + 3682 "00001000" // /* MW 2 */ + 3683 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3684 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3685 "00111101" // /* MW 11 */ + 3686 "00101000" // /* MW 10 */ + 3687 "00000011" // /* MW 9 */ + 3688 "10001110" // /* MW 8 */ + 3689 "00010001" // /* MW 7 */ + 3690 "00001111" // /* MW 6 */ + 3691 "00100001" // /* MW 5 */ + 3692 "00000000" // /* MW 4 */ + 3693 "01110000" // /* MW 3 */ + 3694 "00100101" // /* MW 2 */ + 3695 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3696 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3697 "00000000" // /* MW 15 */ + 3698 "00000000" // /* MW 14 */ + 3699 "01111000" // /* MW 13 */ + 3700 "10100101" // /* MW 12 */ + 3701 "00000001" // /* MW 11 */ + 3702 "00000000" // /* MW 10 */ + 3703 "00000000" // /* MW 9 */ + 3704 "00000000" // /* MW 8 */ + 3705 "01011011" // /* MW 7 */ + 3706 "00000001" // /* MW 6 */ + 3707 "00100000" // /* MW 5 */ + 3708 "00000000" // /* MW 4 */ + 3709 "01110000" // /* MW 3 */ + 3710 "00000101" // /* MW 2 */ + 3711 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_272 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 940 83 +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3712 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3713 "10000001" // /* MW 15 */ + 3714 "00100000" // /* MW 14 */ + 3715 "01111000" // /* MW 13 */ + 3716 "10100101" // /* MW 12 */ + 3717 "00000001" // /* MW 11 */ + 3718 "00000000" // /* MW 10 */ + 3719 "00000000" // /* MW 9 */ + 3720 "00000000" // /* MW 8 */ + 3721 "10100011" // /* MW 7 */ + 3722 "00011101" // /* MW 6 */ + 3723 "00100010" // /* MW 5 */ + 3724 "00000000" // /* MW 4 */ + 3725 "01110000" // /* MW 3 */ + 3726 "01000101" // /* MW 2 */ + 3727 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 3728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3729 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3730 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3731 "00111101" // /* MW 7 */ + 3732 "00101000" // /* MW 6 */ + 3733 "00000011" // /* MW 5 */ + 3734 "00000010" // /* MW 4 */ + 3735 "01100000" // /* MW 3 */ + 3736 "11000100" // /* MW 2 */ + 3737 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3740 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3741 "00111101" // /* MW 7 */ + 3742 "00010000" // /* MW 6 */ + 3743 "00000100" // /* MW 5 */ + 3744 "00000010" // /* MW 4 */ + 3745 "01100000" // /* MW 3 */ + 3746 "10110100" // /* MW 2 */ + 3747 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3749 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.src_ref 2 "elementwise_binary_broadcasting.h" 80 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3750 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 3751 "00000000" // /* MW 5 */ + 3752 "01010000" // /* MW 4 */ + 3753 "01100000" // /* MW 3 */ + 3754 "11000100" // /* MW 2 */ + 3755 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 3758 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3759 "10100011" // /* MW 3 */ + 3760 "00011101" // /* MW 2 */ + 3761 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3763 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 3764 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3765 "00100011" // /* MW 3 */ + 3766 "00011110" // /* MW 2 */ + 3767 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3769 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 41 first +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.function_start + 3776 "00111010" // MOVS p2, p1; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3777 "01110001" // /* MW 9 */ + 3778 "00000000" // /* MW 8 */ + 3779 "00000000" // /* MW 7 */ + 3780 "00000000" // /* MW 6 */ + 3781 "00000100" // /* MW 5 */ + 3782 "00000000" // /* MW 4 */ + 3783 "01100000" // /* MW 3 */ + 3784 "10010001" // /* MW 2 */ + 3785 "01010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 + 3786 "00000010" // ST lr, [sp, #-4]; MOV r16, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3787 "01110000" // /* MW 7 */ + 3788 "01100000" // /* MW 6 */ + 3789 "00001000" // /* MW 5 */ + 3790 "00000010" // /* MW 4 */ + 3791 "10110000" // /* MW 3 */ + 3792 "10000111" // /* MW 2 */ + 3793 "11111111" // /* MW 1 */ + 3794 "11111000" // MOV r17, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3795 "11100000" // /* MW 3 */ + 3796 "01010101" // /* MW 2 */ + 3797 "00011100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 + 3798 "01000100" // MOVXM p3, #508684 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3799 "00011000" // /* MW 5 */ + 3800 "11000110" // /* MW 4 */ + 3801 "11000110" // /* MW 3 */ + 3802 "00000111" // /* MW 2 */ + 3803 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 19 first + 3804 "00010100" // LDA.u8 r27, [p3], #2; ADD.NC p0, r17, #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3805 "10000000" // /* MW 5 */ + 3806 "11010001" // /* MW 4 */ + 3807 "01010000" // /* MW 3 */ + 3808 "11101101" // /* MW 2 */ + 3809 "01100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 3810 "00001100" // LDA.s16 r18, [p3], #-14; VST sfh, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3811 "01010110" // /* MW 5 */ + 3812 "00001110" // /* MW 4 */ + 3813 "01010000" // /* MW 3 */ + 3814 "11001010" // /* MW 2 */ + 3815 "01110011" // /* MW 1 */ + 3816 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3817 "01010111" // /* MW 3 */ + 3818 "00000110" // /* MW 2 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ + 3822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3823 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 first +.no_stack_arguments + 3824 "00000100" // JL #3440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3440 delay_slots=5 */ + 3825 "00000001" // /* MW 5 */ + 3826 "00000000" // /* MW 4 */ + 3827 "10111000" // /* MW 3 */ + 3828 "00000110" // /* MW 2 */ + 3829 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.delay_slot + 3830 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3831 "11000000" // /* MW 3 */ + 3832 "01010000" // /* MW 2 */ + 3833 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 first +.delay_slot + 3836 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3837 "00010010" // /* MW 3 */ + 3838 "00100101" // /* MW 2 */ + 3839 "00010100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 51 35 +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3840 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000001" // /* MW 5 */ + 3842 "11010010" // /* MW 4 */ + 3843 "01000010" // /* MW 3 */ + 3844 "00100000" // /* MW 2 */ + 3845 "10001100" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 76 8 +.delay_slot + 3846 "10111010" // NOPA; NOPB; MOV p0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111110" // /* MW 9 */ + 3848 "00010000" // /* MW 8 */ + 3849 "00110100" // /* MW 7 */ + 3850 "00000000" // /* MW 6 */ + 3851 "00010000" // /* MW 5 */ + 3852 "00000000" // /* MW 4 */ + 3853 "11110000" // /* MW 3 */ + 3854 "00101100" // /* MW 2 */ + 3855 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.return_address + 3856 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00111001" // /* MW 3 */ + 3858 "11111100" // /* MW 2 */ + 3859 "00000111" // /* MW 1 */ + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ + 3862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3863 "00000000" // /* MW 1 */ + 3864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3865 "00000000" // /* MW 1 */ + 3866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3867 "00000000" // /* MW 1 */ + 3868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3869 "00000000" // /* MW 1 */ + 3870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3871 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 first + 3872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3873 "00000000" // /* MW 3 */ + 3874 "00101000" // /* MW 2 */ + 3875 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary_attribute_broadcasting.h" 77 4 +.delay_slot + 3876 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3877 "00000001" // /* MW 5 */ + 3878 "00000000" // /* MW 4 */ + 3879 "00000000" // /* MW 3 */ + 3880 "11110000" // /* MW 2 */ + 3881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 3889 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 152 first +.src_ref 6 "superkernels.cpp" 157 6 +.function_start + 3904 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3905 "00000000" // /* MW 5 */ + 3906 "11000100" // /* MW 4 */ + 3907 "11000110" // /* MW 3 */ + 3908 "00000111" // /* MW 2 */ + 3909 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 first + 3910 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3911 "11000001" // /* MW 5 */ + 3912 "10110101" // /* MW 4 */ + 3913 "11011000" // /* MW 3 */ + 3914 "11000010" // /* MW 2 */ + 3915 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 152 + 3916 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3917 "00000001" // /* MW 5 */ + 3918 "00000000" // /* MW 4 */ + 3919 "00000000" // /* MW 3 */ + 3920 "00001000" // /* MW 2 */ + 3921 "00000000" // /* MW 1 */ + 3922 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3923 "01110000" // /* MW 7 */ + 3924 "11010000" // /* MW 6 */ + 3925 "00001011" // /* MW 5 */ + 3926 "00000000" // /* MW 4 */ + 3927 "10110000" // /* MW 3 */ + 3928 "01100011" // /* MW 2 */ + 3929 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 11 + 3930 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3931 "00010001" // /* MW 9 */ + 3932 "00000010" // /* MW 8 */ + 3933 "00110001" // /* MW 7 */ + 3934 "11110011" // /* MW 6 */ + 3935 "00000001" // /* MW 5 */ + 3936 "00000000" // /* MW 4 */ + 3937 "10110000" // /* MW 3 */ + 3938 "10000010" // /* MW 2 */ + 3939 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 3940 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3941 "11000000" // /* MW 3 */ + 3942 "11010100" // /* MW 2 */ + 3943 "00011011" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 157 6 +.src_ref 6 "superkernels.cpp" 157 16 + 3948 "10000100" // JNZ r16, #4112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4112 delay_slots=5 */ + 3949 "00000001" // /* MW 5 */ + 3950 "01000000" // /* MW 4 */ + 3951 "00001000" // /* MW 3 */ + 3952 "00001000" // /* MW 2 */ + 3953 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 22 first +.delay_slot + 3954 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10010000" // /* MW 3 */ + 3956 "01100010" // /* MW 2 */ + 3957 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 154 30 +.delay_slot + 3958 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3959 "11111011" // /* MW 3 */ + 3960 "01100011" // /* MW 2 */ + 3961 "00010100" // /* MW 1 */ +.delay_slot + 3962 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3963 "00111101" // /* MW 3 */ + 3964 "11110100" // /* MW 2 */ + 3965 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 154 11 +.delay_slot + 3966 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3967 "01110000" // /* MW 7 */ + 3968 "01100000" // /* MW 6 */ + 3969 "00110000" // /* MW 5 */ + 3970 "00000011" // /* MW 4 */ + 3971 "00110000" // /* MW 3 */ + 3972 "11000110" // /* MW 2 */ + 3973 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 +.src_ref 6 "superkernels.cpp" 171 2 +.delay_slot + 3974 "01000100" // MOVXM p0, #508672 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3975 "00000000" // /* MW 5 */ + 3976 "11000110" // /* MW 4 */ + 3977 "11000000" // /* MW 3 */ + 3978 "00000111" // /* MW 2 */ + 3979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3980 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3981 "01000000" // /* MW 5 */ + 3982 "11000100" // /* MW 4 */ + 3983 "11000100" // /* MW 3 */ + 3984 "00000111" // /* MW 2 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3986 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3987 "00010000" // /* MW 9 */ + 3988 "00001110" // /* MW 8 */ + 3989 "00110001" // /* MW 7 */ + 3990 "11110001" // /* MW 6 */ + 3991 "00000001" // /* MW 5 */ + 3992 "00000000" // /* MW 4 */ + 3993 "11100000" // /* MW 3 */ + 3994 "11000000" // /* MW 2 */ + 3995 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 160 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 3998 "00000100" // JL #3376 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3376 delay_slots=5 */ + 3999 "00000001" // /* MW 5 */ + 4000 "00000000" // /* MW 4 */ + 4001 "10011000" // /* MW 3 */ + 4002 "00000110" // /* MW 2 */ + 4003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4008 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4009 "00110001" // /* MW 3 */ + 4010 "00100000" // /* MW 2 */ + 4011 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4012 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4013 "00000101" // /* MW 3 */ + 4014 "00100000" // /* MW 2 */ + 4015 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4016 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4017 "00000000" // /* MW 15 */ + 4018 "00000000" // /* MW 14 */ + 4019 "01111000" // /* MW 13 */ + 4020 "10100101" // /* MW 12 */ + 4021 "00000001" // /* MW 11 */ + 4022 "00000000" // /* MW 10 */ + 4023 "00000000" // /* MW 9 */ + 4024 "10000000" // /* MW 8 */ + 4025 "00010001" // /* MW 7 */ + 4026 "00000110" // /* MW 6 */ + 4027 "00100010" // /* MW 5 */ + 4028 "00000000" // /* MW 4 */ + 4029 "11110000" // /* MW 3 */ + 4030 "00101100" // /* MW 2 */ + 4031 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 +.return_address + 4032 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4033 "00001000" // /* MW 5 */ + 4034 "11000100" // /* MW 4 */ + 4035 "11000100" // /* MW 3 */ + 4036 "00000111" // /* MW 2 */ + 4037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 18 first +.src_ref 6 "superkernels.cpp" 164 65 + 4038 "10111010" // LDA r16, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4039 "00010000" // /* MW 9 */ + 4040 "10000000" // /* MW 8 */ + 4041 "00110001" // /* MW 7 */ + 4042 "11110001" // /* MW 6 */ + 4043 "00000001" // /* MW 5 */ + 4044 "00000000" // /* MW 4 */ + 4045 "11010000" // /* MW 3 */ + 4046 "11000010" // /* MW 2 */ + 4047 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 +.src_ref 6 "superkernels.cpp" 164 65 +.src_ref 6 "superkernels.cpp" 171 2 + 4048 "10111010" // LDA r17, [p2]; MOVXM p2, #508672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "00010000" // /* MW 9 */ + 4050 "10000000" // /* MW 8 */ + 4051 "00110001" // /* MW 7 */ + 4052 "11110001" // /* MW 6 */ + 4053 "00000001" // /* MW 5 */ + 4054 "00000000" // /* MW 4 */ + 4055 "11010000" // /* MW 3 */ + 4056 "11000110" // /* MW 2 */ + 4057 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 51 first +.src_ref 6 "superkernels.cpp" 164 16 +.src_ref 6 "superkernels.cpp" 169 47 + 4058 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4059 "00010000" // /* MW 9 */ + 4060 "00000100" // /* MW 8 */ + 4061 "10110001" // /* MW 7 */ + 4062 "11110000" // /* MW 6 */ + 4063 "00000001" // /* MW 5 */ + 4064 "00000000" // /* MW 4 */ + 4065 "01010000" // /* MW 3 */ + 4066 "11001011" // /* MW 2 */ + 4067 "01001010" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "10000100" // J #4128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=4128 delay_slots=5 */ + 4073 "00000000" // /* MW 5 */ + 4074 "00000000" // /* MW 4 */ + 4075 "00010000" // /* MW 3 */ + 4076 "00001000" // /* MW 2 */ + 4077 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 +.delay_slot + 4078 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4079 "00110000" // /* MW 5 */ + 4080 "11000100" // /* MW 4 */ + 4081 "11000000" // /* MW 3 */ + 4082 "00000111" // /* MW 2 */ + 4083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4085 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 27 first +.delay_slot + 4086 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4087 "00001111" // /* MW 3 */ + 4088 "01100001" // /* MW 2 */ + 4089 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 162 13 first +.delay_slot + 4090 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4091 "10100011" // /* MW 5 */ + 4092 "00001100" // /* MW 4 */ + 4093 "11110000" // /* MW 3 */ + 4094 "00101100" // /* MW 2 */ + 4095 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 164 16 first +.delay_slot + 4096 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4097 "00000000" // /* MW 15 */ + 4098 "00000000" // /* MW 14 */ + 4099 "01111000" // /* MW 13 */ + 4100 "10100101" // /* MW 12 */ + 4101 "00000001" // /* MW 11 */ + 4102 "00000000" // /* MW 10 */ + 4103 "00000000" // /* MW 9 */ + 4104 "10000000" // /* MW 8 */ + 4105 "00010001" // /* MW 7 */ + 4106 "00000110" // /* MW 6 */ + 4107 "00100001" // /* MW 5 */ + 4108 "00000000" // /* MW 4 */ + 4109 "11110000" // /* MW 3 */ + 4110 "00101100" // /* MW 2 */ + 4111 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 169 47 +.src_ref 6 "superkernels.cpp" 171 2 + 4112 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4113 "00000000" // /* MW 15 */ + 4114 "00000000" // /* MW 14 */ + 4115 "00010000" // /* MW 13 */ + 4116 "00000100" // /* MW 12 */ + 4117 "10110001" // /* MW 11 */ + 4118 "11110000" // /* MW 10 */ + 4119 "00000001" // /* MW 9 */ + 4120 "00000000" // /* MW 8 */ + 4121 "10001011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "00100010" // /* MW 5 */ + 4124 "00000000" // /* MW 4 */ + 4125 "11110000" // /* MW 3 */ + 4126 "00101100" // /* MW 2 */ + 4127 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 4128 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4129 "00000000" // /* MW 7 */ + 4130 "11000011" // /* MW 6 */ + 4131 "10110011" // /* MW 5 */ + 4132 "00000011" // /* MW 4 */ + 4133 "01100000" // /* MW 3 */ + 4134 "10010001" // /* MW 2 */ + 4135 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 168 2 + 4136 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4137 "00010000" // /* MW 9 */ + 4138 "00000000" // /* MW 8 */ + 4139 "00110001" // /* MW 7 */ + 4140 "11110000" // /* MW 6 */ + 4141 "00000001" // /* MW 5 */ + 4142 "00000000" // /* MW 4 */ + 4143 "11010000" // /* MW 3 */ + 4144 "11101110" // /* MW 2 */ + 4145 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 4146 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4147 "00010110" // /* MW 3 */ + 4148 "11111110" // /* MW 2 */ + 4149 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 4150 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "00110110" // /* MW 3 */ + 4152 "11111110" // /* MW 2 */ + 4153 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 4154 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4155 "01010110" // /* MW 3 */ + 4156 "01000110" // /* MW 2 */ + 4157 "00000111" // /* MW 1 */ + 4158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4159 "00000000" // /* MW 1 */ + 4160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4161 "00000000" // /* MW 1 */ + 4162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4163 "00000000" // /* MW 1 */ + 4164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4165 "00000000" // /* MW 1 */ + 4166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4167 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 4168 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "00000010" // /* MW 3 */ + 4170 "01100001" // /* MW 2 */ + 4171 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 4172 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4173 "00010001" // /* MW 3 */ + 4174 "00000110" // /* MW 2 */ + 4175 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 4176 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4177 "11111101" // /* MW 3 */ + 4178 "11100000" // /* MW 2 */ + 4179 "00010111" // /* MW 1 */ + 4180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4181 "00000000" // /* MW 1 */ + 4182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4183 "00000000" // /* MW 1 */ + 4184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 4186 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4187 "00001000" // /* MW 3 */ + 4188 "10010011" // /* MW 2 */ + 4189 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 + 4190 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4191 "10000001" // /* MW 5 */ + 4192 "10101101" // /* MW 4 */ + 4193 "10100111" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00000100" // /* MW 1 */ + 4196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4197 "00000000" // /* MW 1 */ + 4198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4199 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first + 4200 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4201 "00110110" // /* MW 3 */ + 4202 "00000110" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 4204 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4205 "10000001" // /* MW 5 */ + 4206 "11011101" // /* MW 4 */ + 4207 "11011100" // /* MW 3 */ + 4208 "11001010" // /* MW 2 */ + 4209 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 47 first + 4210 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "01110110" // /* MW 3 */ + 4212 "00000110" // /* MW 2 */ + 4213 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 4214 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "10011110" // /* MW 3 */ + 4216 "01011100" // /* MW 2 */ + 4217 "00000111" // /* MW 1 */ + 4218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4219 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 171 2 first +.no_stack_arguments + 4220 "00000100" // JL #3776 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=3776 delay_slots=5 */ + 4221 "00000001" // /* MW 5 */ + 4222 "00000000" // /* MW 4 */ + 4223 "01100000" // /* MW 3 */ + 4224 "00000111" // /* MW 2 */ + 4225 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4227 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 first +.delay_slot + 4228 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4229 "00000111" // /* MW 3 */ + 4230 "01100010" // /* MW 2 */ + 4231 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 168 2 +.delay_slot + 4232 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4233 "00110001" // /* MW 3 */ + 4234 "00000110" // /* MW 2 */ + 4235 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 first +.delay_slot + 4236 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4237 "00001101" // /* MW 3 */ + 4238 "11100001" // /* MW 2 */ + 4239 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 169 45 +.delay_slot + 4240 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4241 "00000000" // /* MW 15 */ + 4242 "00000000" // /* MW 14 */ + 4243 "10101000" // /* MW 13 */ + 4244 "10100000" // /* MW 12 */ + 4245 "00110100" // /* MW 11 */ + 4246 "00000000" // /* MW 10 */ + 4247 "00000000" // /* MW 9 */ + 4248 "00000000" // /* MW 8 */ + 4249 "01011011" // /* MW 7 */ + 4250 "00000001" // /* MW 6 */ + 4251 "00100000" // /* MW 5 */ + 4252 "00000000" // /* MW 4 */ + 4253 "11110000" // /* MW 3 */ + 4254 "00101100" // /* MW 2 */ + 4255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 173 6 +.src_ref 6 "superkernels.cpp" 174 14 +.return_address + 4256 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4257 "00010000" // /* MW 9 */ + 4258 "00000000" // /* MW 8 */ + 4259 "00110001" // /* MW 7 */ + 4260 "11110011" // /* MW 6 */ + 4261 "00000001" // /* MW 5 */ + 4262 "00000000" // /* MW 4 */ + 4263 "11010000" // /* MW 3 */ + 4264 "11000110" // /* MW 2 */ + 4265 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 4266 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4267 "00000101" // /* MW 3 */ + 4268 "00100000" // /* MW 2 */ + 4269 "00010000" // /* MW 1 */ + 4270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4271 "00000000" // /* MW 1 */ + 4272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4273 "00000000" // /* MW 1 */ + 4274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4275 "00000000" // /* MW 1 */ + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 4280 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4281 "00001000" // /* MW 3 */ + 4282 "01010001" // /* MW 2 */ + 4283 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 173 19 + 4284 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4285 "00010000" // /* MW 9 */ + 4286 "00001100" // /* MW 8 */ + 4287 "00110001" // /* MW 7 */ + 4288 "11110001" // /* MW 6 */ + 4289 "00000001" // /* MW 5 */ + 4290 "00000000" // /* MW 4 */ + 4291 "11010000" // /* MW 3 */ + 4292 "11001110" // /* MW 2 */ + 4293 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 first + 4294 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4295 "00110110" // /* MW 3 */ + 4296 "00000110" // /* MW 2 */ + 4297 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 19 + 4298 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4299 "01010110" // /* MW 3 */ + 4300 "00000110" // /* MW 2 */ + 4301 "00000010" // /* MW 1 */ + 4302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4303 "00000000" // /* MW 1 */ + 4304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4305 "00000000" // /* MW 1 */ + 4306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4307 "00000000" // /* MW 1 */ + 4308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4309 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 4310 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4311 "00110001" // /* MW 3 */ + 4312 "00100001" // /* MW 2 */ + 4313 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 4314 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4315 "00010001" // /* MW 3 */ + 4316 "11100110" // /* MW 2 */ + 4317 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 16 first + 4318 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4319 "00101000" // /* MW 3 */ + 4320 "01100001" // /* MW 2 */ + 4321 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 173 6 + 4322 "10000100" // JNZ r16, #4352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4352 delay_slots=5 */ + 4323 "00000001" // /* MW 5 */ + 4324 "01000000" // /* MW 4 */ + 4325 "10000000" // /* MW 3 */ + 4326 "00001000" // /* MW 2 */ + 4327 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4337 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 + 4338 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4339 "00000001" // /* MW 3 */ + 4340 "00100000" // /* MW 2 */ + 4341 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 174 14 first + 4342 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "00000000" // /* MW 7 */ + 4346 "10000000" // /* MW 6 */ + 4347 "00010001" // /* MW 5 */ + 4348 "00000110" // /* MW 4 */ + 4349 "11110110" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 176 + 4352 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4353 "00111001" // /* MW 3 */ + 4354 "11110100" // /* MW 2 */ + 4355 "00000111" // /* MW 1 */ + 4356 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4357 "00011001" // /* MW 3 */ + 4358 "11111011" // /* MW 2 */ + 4359 "00000111" // /* MW 1 */ + 4360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4361 "00000000" // /* MW 1 */ + 4362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4365 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4366 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4367 "11110001" // /* MW 3 */ + 4368 "11111101" // /* MW 2 */ + 4369 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4371 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4372 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4373 "00000000" // /* MW 3 */ + 4374 "00101000" // /* MW 2 */ + 4375 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4376 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4377 "10100000" // /* MW 3 */ + 4378 "01100111" // /* MW 2 */ + 4379 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 176 +.delay_slot + 4380 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4381 "00000001" // /* MW 5 */ + 4382 "00000000" // /* MW 4 */ + 4383 "00000000" // /* MW 3 */ + 4384 "11111000" // /* MW 2 */ + 4385 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 4391 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 2 "elementwise_unary.h" 95 first +.src_ref 2 "elementwise_unary.h" 97 22 +.src_ref 2 "elementwise_unary.h" 97 24 first +.function_start + 4400 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4401 "00010000" // /* MW 9 */ + 4402 "11000000" // /* MW 8 */ + 4403 "00110001" // /* MW 7 */ + 4404 "11110000" // /* MW 6 */ + 4405 "00000001" // /* MW 5 */ + 4406 "00000000" // /* MW 4 */ + 4407 "11010000" // /* MW 3 */ + 4408 "10000101" // /* MW 2 */ + 4409 "00100011" // /* MW 1 */ + 4410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4411 "00000000" // /* MW 1 */ + 4412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4413 "00000000" // /* MW 1 */ + 4414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4415 "00000000" // /* MW 1 */ + 4416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4417 "00000000" // /* MW 1 */ + 4418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4419 "00000000" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 97 22 first + 4422 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4423 "00101001" // /* MW 3 */ + 4424 "00011100" // /* MW 2 */ + 4425 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 24 first + 4426 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4427 "00101110" // /* MW 3 */ + 4428 "00000100" // /* MW 2 */ + 4429 "00000001" // /* MW 1 */ + 4430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4431 "00000000" // /* MW 1 */ + 4432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4433 "00000000" // /* MW 1 */ + 4434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4435 "00000000" // /* MW 1 */ + 4436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4437 "00000000" // /* MW 1 */ + 4438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4439 "00000000" // /* MW 1 */ + 4440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4441 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 98 22 + 4442 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00101001" // /* MW 3 */ + 4444 "00000100" // /* MW 2 */ + 4445 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 24 first + 4446 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "00101110" // /* MW 3 */ + 4448 "00010100" // /* MW 2 */ + 4449 "00000001" // /* MW 1 */ + 4450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4451 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 101 4 first + 4452 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4453 "00000000" // /* MW 3 */ + 4454 "00101000" // /* MW 2 */ + 4455 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 99 22 first +.delay_slot + 4464 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4465 "00101001" // /* MW 3 */ + 4466 "00010100" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 4467 "00001000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 107 first +.src_ref 2 "elementwise_unary.h" 113 37 +.src_ref 2 "elementwise_unary.h" 113 78 +.src_ref 2 "elementwise_unary.h" 142 19 +.function_start + 4480 "10110110" // MOVA r0, #-6; VLDB x10, [p0], #64; MOVXM p2, #508800 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4481 "00010000" // /* MW 11 */ + 4482 "11000000" // /* MW 10 */ + 4483 "00110001" // /* MW 9 */ + 4484 "11110001" // /* MW 8 */ + 4485 "00000001" // /* MW 7 */ + 4486 "00000000" // /* MW 6 */ + 4487 "01101000" // /* MW 5 */ + 4488 "00111101" // /* MW 4 */ + 4489 "00000000" // /* MW 3 */ + 4490 "01000000" // /* MW 2 */ + 4491 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 113 37 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4492 "10110110" // LDA r2, [p2]; VLDB x7, [p0], #64; MOVXM p2, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4493 "00010000" // /* MW 11 */ + 4494 "00010000" // /* MW 10 */ + 4495 "00110001" // /* MW 9 */ + 4496 "11110001" // /* MW 8 */ + 4497 "00000001" // /* MW 7 */ + 4498 "00000000" // /* MW 6 */ + 4499 "11101000" // /* MW 5 */ + 4500 "00111011" // /* MW 4 */ + 4501 "11010000" // /* MW 3 */ + 4502 "10001010" // /* MW 2 */ + 4503 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_unary.h" 142 19 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4504 "10110110" // LDA.s8 r1, [p2]; VLDB x10, [p0], #64; MOVXM r6, #16512 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4505 "00010000" // /* MW 11 */ + 4506 "01000000" // /* MW 10 */ + 4507 "11001000" // /* MW 9 */ + 4508 "00010000" // /* MW 8 */ + 4509 "00000000" // /* MW 7 */ + 4510 "00000000" // /* MW 6 */ + 4511 "01101000" // /* MW 5 */ + 4512 "00111101" // /* MW 4 */ + 4513 "01010000" // /* MW 3 */ + 4514 "10000100" // /* MW 2 */ + 4515 "01000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4516 "11110100" // VLDB x7, [p0], #64; VBCST.16 x0, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4517 "11100101" // /* MW 5 */ + 4518 "00110010" // /* MW 4 */ + 4519 "10000000" // /* MW 3 */ + 4520 "10111110" // /* MW 2 */ + 4521 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4522 "01000100" // MOVXM r4, #49280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4523 "00000000" // /* MW 5 */ + 4524 "00100001" // /* MW 4 */ + 4525 "11000010" // /* MW 3 */ + 4526 "00000000" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "11111000" // VBCST.16 x1, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4529 "01110010" // /* MW 3 */ + 4530 "10010001" // /* MW 2 */ + 4531 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4532 "01000100" // MOVXM r3, #32767 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4533 "11111110" // /* MW 5 */ + 4534 "10111111" // /* MW 4 */ + 4535 "01110001" // /* MW 3 */ + 4536 "00000000" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4538 "11111000" // VMIN_GE.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4539 "00101100" // /* MW 3 */ + 4540 "01010000" // /* MW 2 */ + 4541 "00011100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "elementwise_unary.h" 113 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4542 "11100100" // LSHL r0, r2, r0; VMAX_LT.bf16 x6, r16, x8, x1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4543 "11011001" // /* MW 5 */ + 4544 "10000001" // /* MW 4 */ + 4545 "10110110" // /* MW 3 */ + 4546 "00000001" // /* MW 2 */ + 4547 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 +.src_ref 2 "elementwise_unary.h" 166 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4548 "11100100" // MOVX crRnd, r1; VMIN_GE.bf16 x8, r16, x7, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4549 "01011001" // /* MW 5 */ + 4550 "01110000" // /* MW 4 */ + 4551 "00001000" // /* MW 3 */ + 4552 "01010000" // /* MW 2 */ + 4553 "00001111" // /* MW 1 */ + 4554 "11111000" // VBCST.16 x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4555 "01110010" // /* MW 3 */ + 4556 "00001101" // /* MW 2 */ + 4557 "00011001" // /* MW 1 */ + 4558 "01000100" // MOVXM r5, #15616 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4559 "00000000" // /* MW 5 */ + 4560 "10111010" // /* MW 4 */ + 4561 "00110010" // /* MW 3 */ + 4562 "00000000" // /* MW 2 */ + 4563 "00000000" // /* MW 1 */ + 4564 "11111000" // VBCST.16 x3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "01110010" // /* MW 3 */ + 4566 "10010101" // /* MW 2 */ + 4567 "00011001" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 + 4568 "01000100" // MOVXM r17, #16128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "00000000" // /* MW 5 */ + 4570 "10111110" // /* MW 4 */ + 4571 "00111000" // /* MW 3 */ + 4572 "00000000" // /* MW 2 */ + 4573 "00000000" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4574 "01111000" // VBAND x11, x6, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4575 "00101011" // /* MW 3 */ + 4576 "10110001" // /* MW 2 */ + 4577 "00011101" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 102 58 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4578 "11100100" // MOVX r17, #828; VBCST.16 x5, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4579 "11100101" // /* MW 5 */ + 4580 "10001010" // /* MW 4 */ + 4581 "00100101" // /* MW 3 */ + 4582 "01011110" // /* MW 2 */ + 4583 "01100100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4584 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMUL.f dm4, x3, x11, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4585 "01100001" // /* MW 7 */ + 4586 "11100111" // /* MW 6 */ + 4587 "10001100" // /* MW 5 */ + 4588 "11100110" // /* MW 4 */ + 4589 "11101100" // /* MW 3 */ + 4590 "11000000" // /* MW 2 */ + 4591 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4592 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4593 "00101011" // /* MW 3 */ + 4594 "01001001" // /* MW 2 */ + 4595 "00011100" // /* MW 1 */ + 4596 "01000100" // MOVXM r2, #16000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4597 "00000000" // /* MW 5 */ + 4598 "00111101" // /* MW 4 */ + 4599 "00110001" // /* MW 3 */ + 4600 "00000000" // /* MW 2 */ + 4601 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first + 4602 "01100010" // VBCST.16 x4, r2; VMUL.f dm2, x3, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4603 "00000001" // /* MW 7 */ + 4604 "11100111" // /* MW 6 */ + 4605 "10001010" // /* MW 5 */ + 4606 "11100110" // /* MW 4 */ + 4607 "01110010" // /* MW 3 */ + 4608 "00001001" // /* MW 2 */ + 4609 "00000010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 940 83 first + 4610 "11111000" // VCONV.fp32.bf16 cml0, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4611 "10001010" // /* MW 3 */ + 4612 "00001011" // /* MW 2 */ + 4613 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4614 "01100010" // VMIN_GE.bf16 x8, r16, x10, x0; VMAC.f dm3, dm0, x6, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4615 "10000001" // /* MW 7 */ + 4616 "00001100" // /* MW 6 */ + 4617 "10001011" // /* MW 5 */ + 4618 "11100110" // /* MW 4 */ + 4619 "00101100" // /* MW 3 */ + 4620 "01010000" // /* MW 2 */ + 4621 "00000100" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4622 "01010110" // VCONV.bf16.fp32 x11, cml4; MOVXM ls, #4672; VMAC.f dm1, dm0, x9, x4, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4623 "10000001" // /* MW 11 */ + 4624 "00010010" // /* MW 10 */ + 4625 "10001001" // /* MW 9 */ + 4626 "00000010" // /* MW 8 */ + 4627 "00100100" // /* MW 7 */ + 4628 "10001111" // /* MW 6 */ + 4629 "00000000" // /* MW 5 */ + 4630 "00000000" // /* MW 4 */ + 4631 "11000000" // /* MW 3 */ + 4632 "01000010" // /* MW 2 */ + 4633 "10110010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4634 "11111000" // VMAX_LT.bf16 x6, r16, x8, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11101100" // /* MW 3 */ + 4636 "01000000" // /* MW 2 */ + 4637 "00011011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 125 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4638 "01011010" // MOVXM le, #4768; VMSC.f dm2, dm3, x11, x6, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4639 "11000011" // /* MW 9 */ + 4640 "01110110" // /* MW 8 */ + 4641 "10001010" // /* MW 7 */ + 4642 "00000010" // /* MW 6 */ + 4643 "00101010" // /* MW 5 */ + 4644 "10110111" // /* MW 4 */ + 4645 "00000000" // /* MW 3 */ + 4646 "00000000" // /* MW 2 */ + 4647 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 125 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4648 "00000010" // VCONV.bf16.fp32 x5, cml2; ADD.NC lc, r0, #-2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4649 "10000000" // /* MW 7 */ + 4650 "00111111" // /* MW 6 */ + 4651 "10111000" // /* MW 5 */ + 4652 "00000010" // /* MW 4 */ + 4653 "11000000" // /* MW 3 */ + 4654 "00100010" // /* MW 2 */ + 4655 "01010010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first + 4656 "11111000" // VMIN_GE.bf16 x8, r16, x7, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4657 "00101100" // /* MW 3 */ + 4658 "00111000" // /* MW 2 */ + 4659 "00011100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first + 4660 "11110110" // NOPA; NOPB; NOPS; VBAND x11, x6, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4661 "10110000" // /* MW 11 */ + 4662 "10010101" // /* MW 10 */ + 4663 "11011000" // /* MW 9 */ + 4664 "00000010" // /* MW 8 */ + 4665 "01011011" // /* MW 7 */ + 4666 "00000001" // /* MW 6 */ + 4667 "00100000" // /* MW 5 */ + 4668 "00000000" // /* MW 4 */ + 4669 "11110000" // /* MW 3 */ + 4670 "00101100" // /* MW 2 */ + 4671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.src_ref 2 "elementwise_unary.h" 142 19 first +.begin_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first +.loop_nesting 1 + 4672 "01001010" // VLDB x10, [p0], #64; VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4673 "00100011" // /* MW 9 */ + 4674 "00101011" // /* MW 8 */ + 4675 "10001100" // /* MW 7 */ + 4676 "11100110" // /* MW 6 */ + 4677 "11101100" // /* MW 5 */ + 4678 "11000000" // /* MW 4 */ + 4679 "01101100" // /* MW 3 */ + 4680 "00111101" // /* MW 2 */ + 4681 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "abs.hpp" 32 22 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_unary.h" 161 19 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4682 "01001010" // VLDB x7, [p0], #64; VBAND x8, x9, x2; VMUL.f dm4, x3, x11, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4683 "01100001" // /* MW 9 */ + 4684 "11100111" // /* MW 8 */ + 4685 "10001100" // /* MW 7 */ + 4686 "01100110" // /* MW 6 */ + 4687 "00101011" // /* MW 5 */ + 4688 "01001001" // /* MW 4 */ + 4689 "11101100" // /* MW 3 */ + 4690 "00111011" // /* MW 2 */ + 4691 "00000000" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4692 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "10000001" // /* MW 3 */ + 4694 "00001100" // /* MW 2 */ + 4695 "10001011" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4696 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4697 "00000001" // /* MW 3 */ + 4698 "11100111" // /* MW 2 */ + 4699 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4700 "01100010" // VST.CONV.bf16.fp32 cml2, [p1], #64; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4701 "10000001" // /* MW 7 */ + 4702 "00010010" // /* MW 6 */ + 4703 "10001001" // /* MW 5 */ + 4704 "00000010" // /* MW 4 */ + 4705 "01100000" // /* MW 3 */ + 4706 "10100100" // /* MW 2 */ + 4707 "00100011" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4710 "01111010" // NOPA; VST.CONV.bf16.fp32 cml4, [p1], #64;NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4711 "00000000" // /* MW 9 */ + 4712 "00000000" // /* MW 8 */ + 4713 "00000000" // /* MW 7 */ + 4714 "00000000" // /* MW 6 */ + 4715 "00100011" // /* MW 5 */ + 4716 "00011110" // /* MW 4 */ + 4717 "11110001" // /* MW 3 */ + 4718 "00101100" // /* MW 2 */ + 4719 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 4720 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 x11, cml4; NOPX; VMIN_GE.bf16 x8, r16, x10, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "00010110" // /* MW 12 */ + 4725 "00101000" // /* MW 11 */ + 4726 "00000010" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "00010110" // /* MW 7 */ + 4730 "10010010" // /* MW 6 */ + 4731 "00100101" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x8, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "01110110" // /* MW 12 */ + 4741 "10100000" // /* MW 11 */ + 4742 "00000001" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4752 "00011011" // NOPA; NOPB; VCONV.bf16.fp32 x5, cml2; NOPX; VMIN_GE.bf16 x8, r16, x7, x0; VMSC.f dm2, dm3, x11, x6, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "10110110" // /* MW 15 */ + 4754 "01010011" // /* MW 14 */ + 4755 "01111100" // /* MW 13 */ + 4756 "00010110" // /* MW 12 */ + 4757 "00011100" // /* MW 11 */ + 4758 "00000010" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "00010110" // /* MW 7 */ + 4762 "10010001" // /* MW 6 */ + 4763 "00100010" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_288 +.src_ref 4 "abs.hpp" 32 22 first +.end_of_loop + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; VBAND x11, x6, x2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "10111000" // /* MW 13 */ + 4772 "10010101" // /* MW 12 */ + 4773 "11011000" // /* MW 11 */ + 4774 "00000010" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first +.loop_nesting 0 + 4784 "01100010" // VMAX_LT.bf16 x9, r16, x8, x1; VMSC.f dm4, dm1, x5, x9, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4785 "00100011" // /* MW 7 */ + 4786 "00101011" // /* MW 6 */ + 4787 "10001100" // /* MW 5 */ + 4788 "11100110" // /* MW 4 */ + 4789 "11101100" // /* MW 3 */ + 4790 "11000000" // /* MW 2 */ + 4791 "00000100" // /* MW 1 */ +.src_ref 4 "abs.hpp" 32 22 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 4792 "01111000" // VBAND x8, x9, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4793 "00101011" // /* MW 3 */ + 4794 "01001001" // /* MW 2 */ + 4795 "00011100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 4796 "01001000" // VMUL.f dm4, x3, x11, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100001" // /* MW 3 */ + 4798 "11100111" // /* MW 2 */ + 4799 "10001100" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4800 "01001000" // VMUL.f dm2, x3, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4801 "00000001" // /* MW 3 */ + 4802 "11100111" // /* MW 2 */ + 4803 "10001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4804 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4805 "00100011" // /* MW 3 */ + 4806 "00011101" // /* MW 2 */ + 4807 "00001001" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 4808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4809 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4810 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4811 "00100011" // /* MW 3 */ + 4812 "00011110" // /* MW 2 */ + 4813 "00001001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 38 105 first + 4814 "01001000" // VMAC.f dm3, dm0, x6, x4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "10000001" // /* MW 3 */ + 4816 "00001100" // /* MW 2 */ + 4817 "10001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 38 105 + 4818 "01100010" // VCONV.bf16.fp32 x11, cml4; VMAC.f dm1, dm0, x9, x4, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4819 "10000001" // /* MW 7 */ + 4820 "00010010" // /* MW 6 */ + 4821 "10001001" // /* MW 5 */ + 4822 "00000010" // /* MW 4 */ + 4823 "11000000" // /* MW 3 */ + 4824 "01000010" // /* MW 2 */ + 4825 "10110010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 + 4826 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4827 "00010110" // /* MW 3 */ + 4828 "10010001" // /* MW 2 */ + 4829 "00001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 first + 4830 "01001000" // VMSC.f dm2, dm3, x11, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4831 "11000011" // /* MW 3 */ + 4832 "01110110" // /* MW 2 */ + 4833 "10001010" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 39 105 + 4834 "01001000" // VMSC.f dm4, dm1, x5, x9, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4835 "00100011" // /* MW 3 */ + 4836 "00101011" // /* MW 2 */ + 4837 "10001100" // /* MW 1 */ + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_unary.h" 129 4 first + 4840 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4841 "00000000" // /* MW 3 */ + 4842 "00101000" // /* MW 2 */ + 4843 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4847 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_unary.h" 166 20 first +.delay_slot + 4848 "00011000" // VST.CONV.bf16.fp32 cml2, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "00100011" // /* MW 3 */ + 4850 "00011101" // /* MW 2 */ + 4851 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_unary.h" 147 20 first +.delay_slot + 4852 "00011000" // VST.CONV.bf16.fp32 cml4, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4853 "00100011" // /* MW 3 */ + 4854 "00011110" // /* MW 2 */ + 4855 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1619elementwise_sigmoidIS0_E26sigmoid_templated_params_tIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 4857 "00000000" // /* MW 1 */ +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_sigmoid1d _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 210 first +.src_ref 6 "superkernels.cpp" 215 6 +.function_start + 4864 "01000100" // MOVXM p3, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4865 "00000000" // /* MW 5 */ + 4866 "11000100" // /* MW 4 */ + 4867 "11000110" // /* MW 3 */ + 4868 "00000111" // /* MW 2 */ + 4869 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 first + 4870 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4871 "11000001" // /* MW 5 */ + 4872 "10110101" // /* MW 4 */ + 4873 "11011000" // /* MW 3 */ + 4874 "11000010" // /* MW 2 */ + 4875 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 210 + 4876 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4877 "00000001" // /* MW 5 */ + 4878 "00000000" // /* MW 4 */ + 4879 "00000000" // /* MW 3 */ + 4880 "00001000" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ + 4882 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4883 "01110000" // /* MW 7 */ + 4884 "11010000" // /* MW 6 */ + 4885 "00001011" // /* MW 5 */ + 4886 "00000000" // /* MW 4 */ + 4887 "10110000" // /* MW 3 */ + 4888 "01100011" // /* MW 2 */ + 4889 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 11 + 4890 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4891 "00010001" // /* MW 9 */ + 4892 "00000010" // /* MW 8 */ + 4893 "00110001" // /* MW 7 */ + 4894 "11110011" // /* MW 6 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "10110000" // /* MW 3 */ + 4898 "10000010" // /* MW 2 */ + 4899 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 4900 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "11000000" // /* MW 3 */ + 4902 "11010100" // /* MW 2 */ + 4903 "00011011" // /* MW 1 */ + 4904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4905 "00000000" // /* MW 1 */ + 4906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4907 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 215 6 +.src_ref 6 "superkernels.cpp" 215 16 + 4908 "10000100" // JNZ r16, #5072 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5072 delay_slots=5 */ + 4909 "00000001" // /* MW 5 */ + 4910 "01000000" // /* MW 4 */ + 4911 "11101000" // /* MW 3 */ + 4912 "00001001" // /* MW 2 */ + 4913 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 22 first +.delay_slot + 4914 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4915 "10010000" // /* MW 3 */ + 4916 "01100010" // /* MW 2 */ + 4917 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 212 30 +.delay_slot + 4918 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4919 "11111011" // /* MW 3 */ + 4920 "01100011" // /* MW 2 */ + 4921 "00010100" // /* MW 1 */ +.delay_slot + 4922 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4923 "00111101" // /* MW 3 */ + 4924 "11110100" // /* MW 2 */ + 4925 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 6 "superkernels.cpp" 212 11 +.delay_slot + 4926 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4927 "01110000" // /* MW 7 */ + 4928 "01100000" // /* MW 6 */ + 4929 "00110000" // /* MW 5 */ + 4930 "00000011" // /* MW 4 */ + 4931 "00110000" // /* MW 3 */ + 4932 "11000110" // /* MW 2 */ + 4933 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 +.src_ref 6 "superkernels.cpp" 229 2 +.delay_slot + 4934 "01000100" // MOVXM p0, #508800 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4935 "00000000" // /* MW 5 */ + 4936 "11000111" // /* MW 4 */ + 4937 "11000000" // /* MW 3 */ + 4938 "00000111" // /* MW 2 */ + 4939 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4940 "01000100" // MOVXM p2, #508448 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4941 "01000000" // /* MW 5 */ + 4942 "11000100" // /* MW 4 */ + 4943 "11000100" // /* MW 3 */ + 4944 "00000111" // /* MW 2 */ + 4945 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4946 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4947 "00010000" // /* MW 9 */ + 4948 "00001110" // /* MW 8 */ + 4949 "00110001" // /* MW 7 */ + 4950 "11110001" // /* MW 6 */ + 4951 "00000001" // /* MW 5 */ + 4952 "00000000" // /* MW 4 */ + 4953 "11100000" // /* MW 3 */ + 4954 "11000000" // /* MW 2 */ + 4955 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4957 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 218 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 4958 "00000100" // JL #4400 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4400 delay_slots=5 */ + 4959 "00000001" // /* MW 5 */ + 4960 "00000000" // /* MW 4 */ + 4961 "10011000" // /* MW 3 */ + 4962 "00001000" // /* MW 2 */ + 4963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4967 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4968 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4969 "00110001" // /* MW 3 */ + 4970 "00100000" // /* MW 2 */ + 4971 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 4972 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4973 "00000101" // /* MW 3 */ + 4974 "00100000" // /* MW 2 */ + 4975 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 4976 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "10000000" // /* MW 8 */ + 4985 "00010001" // /* MW 7 */ + 4986 "00000110" // /* MW 6 */ + 4987 "00100010" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 +.return_address + 4992 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4993 "00001000" // /* MW 5 */ + 4994 "11000100" // /* MW 4 */ + 4995 "11000100" // /* MW 3 */ + 4996 "00000111" // /* MW 2 */ + 4997 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 18 first +.src_ref 6 "superkernels.cpp" 222 46 + 4998 "10111010" // LDA r16, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4999 "00010000" // /* MW 9 */ + 5000 "11000000" // /* MW 8 */ + 5001 "00110001" // /* MW 7 */ + 5002 "11110001" // /* MW 6 */ + 5003 "00000001" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11010000" // /* MW 3 */ + 5006 "11000010" // /* MW 2 */ + 5007 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 +.src_ref 6 "superkernels.cpp" 222 46 +.src_ref 6 "superkernels.cpp" 229 2 + 5008 "10111010" // LDA r17, [p2]; MOVXM p2, #508800 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5009 "00010000" // /* MW 9 */ + 5010 "11000000" // /* MW 8 */ + 5011 "00110001" // /* MW 7 */ + 5012 "11110001" // /* MW 6 */ + 5013 "00000001" // /* MW 5 */ + 5014 "00000000" // /* MW 4 */ + 5015 "11010000" // /* MW 3 */ + 5016 "11000110" // /* MW 2 */ + 5017 "01000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 32 first +.src_ref 6 "superkernels.cpp" 222 16 +.src_ref 6 "superkernels.cpp" 227 47 + 5018 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5019 "00010000" // /* MW 9 */ + 5020 "00000100" // /* MW 8 */ + 5021 "10110001" // /* MW 7 */ + 5022 "11110000" // /* MW 6 */ + 5023 "00000001" // /* MW 5 */ + 5024 "00000000" // /* MW 4 */ + 5025 "01010000" // /* MW 3 */ + 5026 "11001011" // /* MW 2 */ + 5027 "01001000" // /* MW 1 */ + 5028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5029 "00000000" // /* MW 1 */ + 5030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5031 "00000000" // /* MW 1 */ + 5032 "10000100" // J #5088 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=5088 delay_slots=5 */ + 5033 "00000000" // /* MW 5 */ + 5034 "00000000" // /* MW 4 */ + 5035 "11110000" // /* MW 3 */ + 5036 "00001001" // /* MW 2 */ + 5037 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 +.delay_slot + 5038 "01000100" // MOVXM p0, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5039 "00110000" // /* MW 5 */ + 5040 "11000100" // /* MW 4 */ + 5041 "11000000" // /* MW 3 */ + 5042 "00000111" // /* MW 2 */ + 5043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5045 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 27 first +.delay_slot + 5046 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5047 "00001111" // /* MW 3 */ + 5048 "01100001" // /* MW 2 */ + 5049 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 220 13 first +.delay_slot + 5050 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5051 "10100011" // /* MW 5 */ + 5052 "00001100" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 222 16 first +.delay_slot + 5056 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "10000000" // /* MW 8 */ + 5065 "00010001" // /* MW 7 */ + 5066 "00000110" // /* MW 6 */ + 5067 "00100001" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 6 "superkernels.cpp" 227 47 +.src_ref 6 "superkernels.cpp" 229 2 + 5072 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #508424; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "00010000" // /* MW 13 */ + 5076 "00000100" // /* MW 12 */ + 5077 "10110001" // /* MW 11 */ + 5078 "11110000" // /* MW 10 */ + 5079 "00000001" // /* MW 9 */ + 5080 "00000000" // /* MW 8 */ + 5081 "10001011" // /* MW 7 */ + 5082 "10000000" // /* MW 6 */ + 5083 "00100010" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 242 49 first + 5088 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5089 "00000000" // /* MW 7 */ + 5090 "11000011" // /* MW 6 */ + 5091 "10110011" // /* MW 5 */ + 5092 "00000011" // /* MW 4 */ + 5093 "01100000" // /* MW 3 */ + 5094 "10010001" // /* MW 2 */ + 5095 "01110011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 226 2 + 5096 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5097 "00010000" // /* MW 9 */ + 5098 "00000000" // /* MW 8 */ + 5099 "00110001" // /* MW 7 */ + 5100 "11110000" // /* MW 6 */ + 5101 "00000001" // /* MW 5 */ + 5102 "00000000" // /* MW 4 */ + 5103 "11010000" // /* MW 3 */ + 5104 "11101110" // /* MW 2 */ + 5105 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 5106 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5107 "00010110" // /* MW 3 */ + 5108 "11111110" // /* MW 2 */ + 5109 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 5110 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5111 "00110110" // /* MW 3 */ + 5112 "11111110" // /* MW 2 */ + 5113 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 5114 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5115 "01010110" // /* MW 3 */ + 5116 "01000110" // /* MW 2 */ + 5117 "00000111" // /* MW 1 */ + 5118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5119 "00000000" // /* MW 1 */ + 5120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5121 "00000000" // /* MW 1 */ + 5122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5123 "00000000" // /* MW 1 */ + 5124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5125 "00000000" // /* MW 1 */ + 5126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 5128 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5129 "00000010" // /* MW 3 */ + 5130 "01100001" // /* MW 2 */ + 5131 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 5132 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5133 "00010001" // /* MW 3 */ + 5134 "00000110" // /* MW 2 */ + 5135 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 5136 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5137 "11111101" // /* MW 3 */ + 5138 "11100000" // /* MW 2 */ + 5139 "00010111" // /* MW 1 */ + 5140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5141 "00000000" // /* MW 1 */ + 5142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5143 "00000000" // /* MW 1 */ + 5144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5145 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 5146 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5147 "00001000" // /* MW 3 */ + 5148 "10010011" // /* MW 2 */ + 5149 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 + 5150 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5151 "10000001" // /* MW 5 */ + 5152 "10101101" // /* MW 4 */ + 5153 "10100111" // /* MW 3 */ + 5154 "00000000" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ + 5156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5157 "00000000" // /* MW 1 */ + 5158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5159 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first + 5160 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "00110110" // /* MW 3 */ + 5162 "00000110" // /* MW 2 */ + 5163 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 1 "io_buffer_main.h" 348 51 + 5164 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5165 "10000001" // /* MW 5 */ + 5166 "11011101" // /* MW 4 */ + 5167 "11011100" // /* MW 3 */ + 5168 "11001010" // /* MW 2 */ + 5169 "11000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 47 first + 5170 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5171 "01110110" // /* MW 3 */ + 5172 "00000110" // /* MW 2 */ + 5173 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 5174 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5175 "10011110" // /* MW 3 */ + 5176 "01011100" // /* MW 2 */ + 5177 "00000111" // /* MW 1 */ + 5178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5179 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 229 2 first +.no_stack_arguments + 5180 "00000100" // JL #4480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4480 delay_slots=5 */ + 5181 "00000001" // /* MW 5 */ + 5182 "00000000" // /* MW 4 */ + 5183 "11000000" // /* MW 3 */ + 5184 "00001000" // /* MW 2 */ + 5185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5187 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 first +.delay_slot + 5188 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "00000111" // /* MW 3 */ + 5190 "01100010" // /* MW 2 */ + 5191 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 226 2 +.delay_slot + 5192 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "00110001" // /* MW 3 */ + 5194 "00000110" // /* MW 2 */ + 5195 "00001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 first +.delay_slot + 5196 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "00001101" // /* MW 3 */ + 5198 "11100001" // /* MW 2 */ + 5199 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 227 45 +.delay_slot + 5200 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5201 "00000000" // /* MW 15 */ + 5202 "00000000" // /* MW 14 */ + 5203 "10101000" // /* MW 13 */ + 5204 "10100000" // /* MW 12 */ + 5205 "00110100" // /* MW 11 */ + 5206 "00000000" // /* MW 10 */ + 5207 "00000000" // /* MW 9 */ + 5208 "00000000" // /* MW 8 */ + 5209 "01011011" // /* MW 7 */ + 5210 "00000001" // /* MW 6 */ + 5211 "00100000" // /* MW 5 */ + 5212 "00000000" // /* MW 4 */ + 5213 "11110000" // /* MW 3 */ + 5214 "00101100" // /* MW 2 */ + 5215 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 6 "superkernels.cpp" 231 6 +.src_ref 6 "superkernels.cpp" 232 14 +.return_address + 5216 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5217 "00010000" // /* MW 9 */ + 5218 "00000000" // /* MW 8 */ + 5219 "00110001" // /* MW 7 */ + 5220 "11110011" // /* MW 6 */ + 5221 "00000001" // /* MW 5 */ + 5222 "00000000" // /* MW 4 */ + 5223 "11010000" // /* MW 3 */ + 5224 "11000110" // /* MW 2 */ + 5225 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 + 5226 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5227 "00000101" // /* MW 3 */ + 5228 "00100000" // /* MW 2 */ + 5229 "00010000" // /* MW 1 */ + 5230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5231 "00000000" // /* MW 1 */ + 5232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5233 "00000000" // /* MW 1 */ + 5234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5235 "00000000" // /* MW 1 */ + 5236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5237 "00000000" // /* MW 1 */ + 5238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5239 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 5240 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5241 "00001000" // /* MW 3 */ + 5242 "01010001" // /* MW 2 */ + 5243 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 231 19 + 5244 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5245 "00010000" // /* MW 9 */ + 5246 "00001100" // /* MW 8 */ + 5247 "00110001" // /* MW 7 */ + 5248 "11110001" // /* MW 6 */ + 5249 "00000001" // /* MW 5 */ + 5250 "00000000" // /* MW 4 */ + 5251 "11010000" // /* MW 3 */ + 5252 "11001110" // /* MW 2 */ + 5253 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 first + 5254 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5255 "00110110" // /* MW 3 */ + 5256 "00000110" // /* MW 2 */ + 5257 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 19 + 5258 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "01010110" // /* MW 3 */ + 5260 "00000110" // /* MW 2 */ + 5261 "00000010" // /* MW 1 */ + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ + 5264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5265 "00000000" // /* MW 1 */ + 5266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5267 "00000000" // /* MW 1 */ + 5268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 5270 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5271 "00110001" // /* MW 3 */ + 5272 "00100001" // /* MW 2 */ + 5273 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 5274 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5275 "00010001" // /* MW 3 */ + 5276 "11100110" // /* MW 2 */ + 5277 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 16 first + 5278 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5279 "00101000" // /* MW 3 */ + 5280 "01100001" // /* MW 2 */ + 5281 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 231 6 + 5282 "10000100" // JNZ r16, #5312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5312 delay_slots=5 */ + 5283 "00000001" // /* MW 5 */ + 5284 "01000000" // /* MW 4 */ + 5285 "01100000" // /* MW 3 */ + 5286 "00001010" // /* MW 2 */ + 5287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5297 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 + 5298 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5299 "00000001" // /* MW 3 */ + 5300 "00100000" // /* MW 2 */ + 5301 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 232 14 first + 5302 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00000000" // /* MW 7 */ + 5306 "10000000" // /* MW 6 */ + 5307 "00010001" // /* MW 5 */ + 5308 "00000110" // /* MW 4 */ + 5309 "11110110" // /* MW 3 */ + 5310 "00101100" // /* MW 2 */ + 5311 "00000000" // /* MW 1 */ +.label TGT_F_Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 234 + 5312 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "00111001" // /* MW 3 */ + 5314 "11110100" // /* MW 2 */ + 5315 "00000111" // /* MW 1 */ + 5316 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5317 "00011001" // /* MW 3 */ + 5318 "11111011" // /* MW 2 */ + 5319 "00000111" // /* MW 1 */ + 5320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5321 "00000000" // /* MW 1 */ + 5322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5326 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5327 "11110001" // /* MW 3 */ + 5328 "11111101" // /* MW 2 */ + 5329 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5331 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5332 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5333 "00000000" // /* MW 3 */ + 5334 "00101000" // /* MW 2 */ + 5335 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5336 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5337 "10100000" // /* MW 3 */ + 5338 "01100111" // /* MW 2 */ + 5339 "00011111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 234 +.delay_slot + 5340 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5341 "00000001" // /* MW 5 */ + 5342 "00000000" // /* MW 4 */ + 5343 "00000000" // /* MW 3 */ + 5344 "11111000" // /* MW 2 */ + 5345 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z21superkernel_sigmoid1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 5351 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 100 first +.src_ref 2 "elementwise_binary.h" 103 4 first +.function_start + 5360 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5361 "00000000" // /* MW 3 */ + 5362 "00101000" // /* MW 2 */ + 5363 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5364 "01000100" // MOVXM p0, #508768 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5365 "11000000" // /* MW 5 */ + 5366 "11000110" // /* MW 4 */ + 5367 "11000000" // /* MW 3 */ + 5368 "00000111" // /* MW 2 */ + 5369 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5370 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "10000000" // /* MW 3 */ + 5372 "00000000" // /* MW 2 */ + 5373 "00011000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 first +.delay_slot + 5374 "10011000" // ST m0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "00000001" // /* MW 3 */ + 5376 "00000100" // /* MW 2 */ + 5377 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 101 23 +.delay_slot + 5378 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "00000001" // /* MW 3 */ + 5380 "00010100" // /* MW 2 */ + 5381 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 5383 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 2 "elementwise_binary.h" 89 first +.src_ref 2 "elementwise_binary.h" 92 22 +.src_ref 2 "elementwise_binary.h" 92 24 first +.function_start + 5392 "10111010" // LDA el0, [p1], #4; MOVXM p0, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5393 "00010000" // /* MW 9 */ + 5394 "10100000" // /* MW 8 */ + 5395 "00110001" // /* MW 7 */ + 5396 "11110000" // /* MW 6 */ + 5397 "00000001" // /* MW 5 */ + 5398 "00000000" // /* MW 4 */ + 5399 "11010000" // /* MW 3 */ + 5400 "10000101" // /* MW 2 */ + 5401 "00100011" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 89 + 5402 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5403 "00000001" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "00000000" // /* MW 3 */ + 5406 "00001000" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ + 5408 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "00111101" // /* MW 3 */ + 5410 "11111100" // /* MW 2 */ + 5411 "00001111" // /* MW 1 */ + 5412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5413 "00000000" // /* MW 1 */ + 5414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5415 "00000000" // /* MW 1 */ + 5416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5417 "00000000" // /* MW 1 */ + 5418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5419 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 92 22 first + 5420 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "00101001" // /* MW 3 */ + 5422 "00011100" // /* MW 2 */ + 5423 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 24 first + 5424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5425 "00101110" // /* MW 3 */ + 5426 "00011100" // /* MW 2 */ + 5427 "00000001" // /* MW 1 */ + 5428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5429 "00000000" // /* MW 1 */ + 5430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5431 "00000000" // /* MW 1 */ + 5432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5433 "00000000" // /* MW 1 */ + 5434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5435 "00000000" // /* MW 1 */ + 5436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5437 "00000000" // /* MW 1 */ + 5438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5439 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 93 22 + 5440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5441 "00101001" // /* MW 3 */ + 5442 "00011100" // /* MW 2 */ + 5443 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 24 first + 5444 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "00101110" // /* MW 3 */ + 5446 "00000100" // /* MW 2 */ + 5447 "00000001" // /* MW 1 */ + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ + 5450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5451 "00000000" // /* MW 1 */ + 5452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5453 "00000000" // /* MW 1 */ + 5454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5455 "00000000" // /* MW 1 */ + 5456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5457 "00000000" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 94 22 + 5460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5461 "00101001" // /* MW 3 */ + 5462 "00011100" // /* MW 2 */ + 5463 "00001000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 24 first + 5464 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5465 "00101110" // /* MW 3 */ + 5466 "00010100" // /* MW 2 */ + 5467 "00000001" // /* MW 1 */ + 5468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5469 "00000000" // /* MW 1 */ + 5470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5471 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 96 8 first +.no_stack_arguments + 5472 "00000100" // JL #5360 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5360 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "00000000" // /* MW 4 */ + 5475 "01111000" // /* MW 3 */ + 5476 "00001010" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot + 5478 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5479 "10011101" // /* MW 3 */ + 5480 "11111011" // /* MW 2 */ + 5481 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5485 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 95 22 first +.delay_slot + 5486 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5487 "00101001" // /* MW 3 */ + 5488 "11011100" // /* MW 2 */ + 5489 "00001000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot + 5490 "00101110" // NOPA; NOPS; MOV p7, p0; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5491 "00011100" // /* MW 13 */ + 5492 "00000000" // /* MW 12 */ + 5493 "00000000" // /* MW 11 */ + 5494 "00000111" // /* MW 10 */ + 5495 "00000110" // /* MW 9 */ + 5496 "01111011" // /* MW 8 */ + 5497 "00000000" // /* MW 7 */ + 5498 "00000000" // /* MW 6 */ + 5499 "10110110" // /* MW 5 */ + 5500 "00000010" // /* MW 4 */ + 5501 "11110000" // /* MW 3 */ + 5502 "00101100" // /* MW 2 */ + 5503 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.return_address + 5504 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5505 "00111001" // /* MW 3 */ + 5506 "11111100" // /* MW 2 */ + 5507 "00000111" // /* MW 1 */ + 5508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5509 "00000000" // /* MW 1 */ + 5510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5511 "00000000" // /* MW 1 */ + 5512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5513 "00000000" // /* MW 1 */ + 5514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5518 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "10011001" // /* MW 3 */ + 5520 "11111011" // /* MW 2 */ + 5521 "00000111" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5522 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 5523 "00000000" // /* MW 3 */ + 5524 "00101000" // /* MW 2 */ + 5525 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5531 "00000000" // /* MW 1 */ +.src_ref 3 "mul_impl.h" 93 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5532 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5533 "00000001" // /* MW 3 */ + 5534 "00100000" // /* MW 2 */ + 5535 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 98 4 +.src_ref 3 "mul_impl.h" 93 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5536 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5537 "01110001" // /* MW 9 */ + 5538 "00000000" // /* MW 8 */ + 5539 "00000000" // /* MW 7 */ + 5540 "00000000" // /* MW 6 */ + 5541 "11111110" // /* MW 5 */ + 5542 "00111111" // /* MW 4 */ + 5543 "00110000" // /* MW 3 */ + 5544 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 5545 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 2 "elementwise_binary.h" 108 first +.src_ref 2 "elementwise_binary.h" 115 37 +.src_ref 2 "elementwise_binary.h" 115 37 +.function_start + 5552 "10111010" // MOVA m0, #32; MOVXM p3, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "00010000" // /* MW 9 */ + 5554 "10100000" // /* MW 8 */ + 5555 "10110001" // /* MW 7 */ + 5556 "11110001" // /* MW 6 */ + 5557 "00000001" // /* MW 5 */ + 5558 "00000000" // /* MW 4 */ + 5559 "10000000" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000100" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 37 first +.src_ref 2 "elementwise_binary.h" 115 78 + 5562 "10111010" // LDA r1, [p3], m0; MOVXM p4, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5563 "00010000" // /* MW 9 */ + 5564 "00010000" // /* MW 8 */ + 5565 "00110001" // /* MW 7 */ + 5566 "11110010" // /* MW 6 */ + 5567 "00000001" // /* MW 5 */ + 5568 "00000000" // /* MW 4 */ + 5569 "11010000" // /* MW 3 */ + 5570 "00000110" // /* MW 2 */ + 5571 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 115 78 + 5572 "10111010" // LDA m1, [p3]; MOVX r0, #828; MOV r3, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5573 "01011000" // /* MW 9 */ + 5574 "11111010" // /* MW 8 */ + 5575 "01101111" // /* MW 7 */ + 5576 "10001000" // /* MW 6 */ + 5577 "00000111" // /* MW 5 */ + 5578 "00011000" // /* MW 4 */ + 5579 "11010000" // /* MW 3 */ + 5580 "10010000" // /* MW 2 */ + 5581 "01100000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 +.src_ref 2 "elementwise_binary.h" 127 8 first + 5582 "10111010" // LDA m0, [p3, #4]; MOVXM ls, #5744 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5583 "00010000" // /* MW 9 */ + 5584 "00111000" // /* MW 8 */ + 5585 "01111011" // /* MW 7 */ + 5586 "00000100" // /* MW 6 */ + 5587 "00000000" // /* MW 5 */ + 5588 "00000000" // /* MW 4 */ + 5589 "11010000" // /* MW 3 */ + 5590 "10000000" // /* MW 2 */ + 5591 "01100010" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 first +.src_ref 2 "elementwise_binary.h" 127 8 + 5592 "10111010" // LDA.s8 r2, [p4]; MOVXM le, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5593 "00010000" // /* MW 9 */ + 5594 "01000000" // /* MW 8 */ + 5595 "10111011" // /* MW 7 */ + 5596 "00000101" // /* MW 6 */ + 5597 "00000000" // /* MW 5 */ + 5598 "00000000" // /* MW 4 */ + 5599 "01010000" // /* MW 3 */ + 5600 "10001000" // /* MW 2 */ + 5601 "10000000" // /* MW 1 */ + 5602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5603 "00000000" // /* MW 1 */ + 5604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5605 "00000000" // /* MW 1 */ + 5606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5607 "00000000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 115 78 + 5608 "10011000" // LSHL r1, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5609 "00111101" // /* MW 3 */ + 5610 "01000010" // /* MW 2 */ + 5611 "00010000" // /* MW 1 */ +.src_ref 2 "elementwise_binary.h" 127 8 first + 5612 "10011000" // ADD.NC lc, r1, #-7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5613 "11111100" // /* MW 3 */ + 5614 "01110000" // /* MW 2 */ + 5615 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first + 5616 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5617 "11101000" // /* MW 5 */ + 5618 "01010000" // /* MW 4 */ + 5619 "01110000" // /* MW 3 */ + 5620 "00010011" // /* MW 2 */ + 5621 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5622 "00010010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5623 "10000000" // /* MW 7 */ + 5624 "10111010" // /* MW 6 */ + 5625 "01101000" // /* MW 5 */ + 5626 "01010000" // /* MW 4 */ + 5627 "01110000" // /* MW 3 */ + 5628 "00011011" // /* MW 2 */ + 5629 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5630 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5631 "11101000" // /* MW 5 */ + 5632 "01010000" // /* MW 4 */ + 5633 "01110000" // /* MW 3 */ + 5634 "00010011" // /* MW 2 */ + 5635 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5636 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5637 "01101000" // /* MW 5 */ + 5638 "01010000" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00011011" // /* MW 2 */ + 5641 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5642 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5643 "11101000" // /* MW 5 */ + 5644 "01010000" // /* MW 4 */ + 5645 "01110000" // /* MW 3 */ + 5646 "00010011" // /* MW 2 */ + 5647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5648 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5649 "01101000" // /* MW 5 */ + 5650 "01010000" // /* MW 4 */ + 5651 "01110000" // /* MW 3 */ + 5652 "00011011" // /* MW 2 */ + 5653 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5654 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5655 "11101000" // /* MW 5 */ + 5656 "01010000" // /* MW 4 */ + 5657 "01110000" // /* MW 3 */ + 5658 "00010011" // /* MW 2 */ + 5659 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5660 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5661 "01000001" // /* MW 9 */ + 5662 "11100010" // /* MW 8 */ + 5663 "00000000" // /* MW 7 */ + 5664 "00011101" // /* MW 6 */ + 5665 "00110100" // /* MW 5 */ + 5666 "00101000" // /* MW 4 */ + 5667 "01110000" // /* MW 3 */ + 5668 "00011011" // /* MW 2 */ + 5669 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5670 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5671 "01100001" // /* MW 9 */ + 5672 "11100000" // /* MW 8 */ + 5673 "00000001" // /* MW 7 */ + 5674 "00011101" // /* MW 6 */ + 5675 "01110100" // /* MW 5 */ + 5676 "00101000" // /* MW 4 */ + 5677 "01110000" // /* MW 3 */ + 5678 "00010011" // /* MW 2 */ + 5679 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5680 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5681 "01000001" // /* MW 9 */ + 5682 "11100010" // /* MW 8 */ + 5683 "00000000" // /* MW 7 */ + 5684 "00011101" // /* MW 6 */ + 5685 "00110100" // /* MW 5 */ + 5686 "00101000" // /* MW 4 */ + 5687 "01110000" // /* MW 3 */ + 5688 "00011011" // /* MW 2 */ + 5689 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5690 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5691 "01100001" // /* MW 9 */ + 5692 "11100000" // /* MW 8 */ + 5693 "00000001" // /* MW 7 */ + 5694 "00011101" // /* MW 6 */ + 5695 "01110100" // /* MW 5 */ + 5696 "00101000" // /* MW 4 */ + 5697 "01110000" // /* MW 3 */ + 5698 "00010011" // /* MW 2 */ + 5699 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5700 "01100110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; NOPS; VMUL.f dm0, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5701 "01000001" // /* MW 11 */ + 5702 "11100010" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "10001110" // /* MW 8 */ + 5705 "10101101" // /* MW 7 */ + 5706 "00000000" // /* MW 6 */ + 5707 "01101000" // /* MW 5 */ + 5708 "01010000" // /* MW 4 */ + 5709 "01110000" // /* MW 3 */ + 5710 "00011011" // /* MW 2 */ + 5711 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5712 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; NOPS; NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "00000011" // /* MW 15 */ + 5714 "00001111" // /* MW 14 */ + 5715 "01111000" // /* MW 13 */ + 5716 "10100101" // /* MW 12 */ + 5717 "00000001" // /* MW 11 */ + 5718 "00000000" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "11101000" // /* MW 5 */ + 5724 "01010000" // /* MW 4 */ + 5725 "01110000" // /* MW 3 */ + 5726 "00010011" // /* MW 2 */ + 5727 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5728 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00010010" // /* MW 15 */ + 5730 "00000111" // /* MW 14 */ + 5731 "01111000" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "00100011" // /* MW 7 */ + 5738 "00011100" // /* MW 6 */ + 5739 "01101010" // /* MW 5 */ + 5740 "01010000" // /* MW 4 */ + 5741 "01110000" // /* MW 3 */ + 5742 "00011011" // /* MW 2 */ + 5743 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 146 20 first +.src_ref 2 "elementwise_binary.h" 148 20 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5744 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000011" // /* MW 15 */ + 5746 "00001111" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "10100011" // /* MW 7 */ + 5754 "00011100" // /* MW 6 */ + 5755 "11101010" // /* MW 5 */ + 5756 "01010000" // /* MW 4 */ + 5757 "01110000" // /* MW 3 */ + 5758 "00010011" // /* MW 2 */ + 5759 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 170 20 first +.src_ref 2 "elementwise_binary.h" 172 20 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5760 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "00010010" // /* MW 15 */ + 5762 "00000111" // /* MW 14 */ + 5763 "01111000" // /* MW 13 */ + 5764 "10100101" // /* MW 12 */ + 5765 "00000001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "00100011" // /* MW 7 */ + 5770 "00011100" // /* MW 6 */ + 5771 "01101010" // /* MW 5 */ + 5772 "01010000" // /* MW 4 */ + 5773 "01110000" // /* MW 3 */ + 5774 "00011011" // /* MW 2 */ + 5775 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5776 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5777 "01100001" // /* MW 7 */ + 5778 "11100000" // /* MW 6 */ + 5779 "00000001" // /* MW 5 */ + 5780 "00000010" // /* MW 4 */ + 5781 "01100000" // /* MW 3 */ + 5782 "10010100" // /* MW 2 */ + 5783 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5784 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5785 "01000001" // /* MW 7 */ + 5786 "11100010" // /* MW 6 */ + 5787 "00000000" // /* MW 5 */ + 5788 "00000010" // /* MW 4 */ + 5789 "01100000" // /* MW 3 */ + 5790 "10000100" // /* MW 2 */ + 5791 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5792 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5793 "01100001" // /* MW 7 */ + 5794 "11100000" // /* MW 6 */ + 5795 "00000001" // /* MW 5 */ + 5796 "00000010" // /* MW 4 */ + 5797 "01100000" // /* MW 3 */ + 5798 "10010100" // /* MW 2 */ + 5799 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5800 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5801 "01000001" // /* MW 7 */ + 5802 "11100010" // /* MW 6 */ + 5803 "00000000" // /* MW 5 */ + 5804 "00000010" // /* MW 4 */ + 5805 "01100000" // /* MW 3 */ + 5806 "10000100" // /* MW 2 */ + 5807 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5808 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5809 "01100001" // /* MW 7 */ + 5810 "11100000" // /* MW 6 */ + 5811 "00000001" // /* MW 5 */ + 5812 "00000010" // /* MW 4 */ + 5813 "01100000" // /* MW 3 */ + 5814 "10010100" // /* MW 2 */ + 5815 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5816 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5817 "01000001" // /* MW 7 */ + 5818 "11100010" // /* MW 6 */ + 5819 "00000000" // /* MW 5 */ + 5820 "00000010" // /* MW 4 */ + 5821 "01100000" // /* MW 3 */ + 5822 "10000100" // /* MW 2 */ + 5823 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5824 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5825 "01100001" // /* MW 7 */ + 5826 "11100000" // /* MW 6 */ + 5827 "00000001" // /* MW 5 */ + 5828 "00000010" // /* MW 4 */ + 5829 "01100000" // /* MW 3 */ + 5830 "10010100" // /* MW 2 */ + 5831 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5832 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5833 "00100011" // /* MW 3 */ + 5834 "00011100" // /* MW 2 */ + 5835 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 131 4 first +.src_ref 2 "elementwise_binary.h" 154 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5836 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 5837 "00000000" // /* MW 5 */ + 5838 "01010000" // /* MW 4 */ + 5839 "01100000" // /* MW 3 */ + 5840 "10010100" // /* MW 2 */ + 5841 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5842 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "00100011" // /* MW 3 */ + 5844 "00011100" // /* MW 2 */ + 5845 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5846 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5847 "10100011" // /* MW 3 */ + 5848 "00011100" // /* MW 2 */ + 5849 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1119 102 first +.src_ref 2 "elementwise_binary.h" 177 20 first +.delay_slot + 5850 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5851 "00100011" // /* MW 3 */ + 5852 "00011100" // /* MW 2 */ + 5853 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 2 "elementwise_binary.h" 154 20 first +.delay_slot + 5854 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10100011" // /* MW 3 */ + 5856 "00011100" // /* MW 2 */ + 5857 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 5859 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 6 "superkernels.cpp" 277 first +.src_ref 6 "superkernels.cpp" 282 6 +.function_start + 5872 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5873 "00000000" // /* MW 5 */ + 5874 "11000100" // /* MW 4 */ + 5875 "11001000" // /* MW 3 */ + 5876 "00000111" // /* MW 2 */ + 5877 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first + 5878 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5879 "11000001" // /* MW 5 */ + 5880 "10110101" // /* MW 4 */ + 5881 "11011000" // /* MW 3 */ + 5882 "11000010" // /* MW 2 */ + 5883 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 277 + 5884 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5885 "00000001" // /* MW 5 */ + 5886 "00000000" // /* MW 4 */ + 5887 "00000000" // /* MW 3 */ + 5888 "00001000" // /* MW 2 */ + 5889 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 279 22 first + 5890 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5891 "01111001" // /* MW 9 */ + 5892 "01100000" // /* MW 8 */ + 5893 "11001010" // /* MW 7 */ + 5894 "10000001" // /* MW 6 */ + 5895 "00010100" // /* MW 5 */ + 5896 "00100011" // /* MW 4 */ + 5897 "10110000" // /* MW 3 */ + 5898 "00111010" // /* MW 2 */ + 5899 "11111111" // /* MW 1 */ + 5900 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5901 "01110000" // /* MW 7 */ + 5902 "11010000" // /* MW 6 */ + 5903 "00001011" // /* MW 5 */ + 5904 "00000000" // /* MW 4 */ + 5905 "10110000" // /* MW 3 */ + 5906 "10000011" // /* MW 2 */ + 5907 "11111101" // /* MW 1 */ + 5908 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5909 "00010101" // /* MW 3 */ + 5910 "11111100" // /* MW 2 */ + 5911 "00001111" // /* MW 1 */ + 5912 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5913 "00111101" // /* MW 3 */ + 5914 "11110000" // /* MW 2 */ + 5915 "00001111" // /* MW 1 */ + 5916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5917 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 282 6 first +.src_ref 6 "superkernels.cpp" 282 16 first + 5918 "10000100" // JNZ r16, #6064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6064 delay_slots=5 */ + 5919 "00000001" // /* MW 5 */ + 5920 "01000000" // /* MW 4 */ + 5921 "11011000" // /* MW 3 */ + 5922 "00001011" // /* MW 2 */ + 5923 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 30 first +.delay_slot + 5924 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5925 "11111011" // /* MW 3 */ + 5926 "01100011" // /* MW 2 */ + 5927 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5928 "01000100" // MOVXM p2, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5929 "00001000" // /* MW 5 */ + 5930 "11000100" // /* MW 4 */ + 5931 "11000100" // /* MW 3 */ + 5932 "00000111" // /* MW 2 */ + 5933 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 279 11 +.delay_slot + 5934 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "01110000" // /* MW 7 */ + 5936 "01100000" // /* MW 6 */ + 5937 "00110111" // /* MW 5 */ + 5938 "00000001" // /* MW 4 */ + 5939 "00110000" // /* MW 3 */ + 5940 "11000110" // /* MW 2 */ + 5941 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 5942 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "11000000" // /* MW 3 */ + 5944 "11010110" // /* MW 2 */ + 5945 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 +.src_ref 6 "superkernels.cpp" 287 28 +.src_ref 6 "superkernels.cpp" 289 42 +.src_ref 6 "superkernels.cpp" 301 2 +.delay_slot + 5946 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #508736 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "00010001" // /* MW 9 */ + 5948 "10100000" // /* MW 8 */ + 5949 "10110001" // /* MW 7 */ + 5950 "11110011" // /* MW 6 */ + 5951 "00000001" // /* MW 5 */ + 5952 "00000000" // /* MW 4 */ + 5953 "10110000" // /* MW 3 */ + 5954 "10100011" // /* MW 2 */ + 5955 "11111110" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 285 4 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5956 "00111010" // MOVS p0, p7; MOVXM p2, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5957 "00010001" // /* MW 9 */ + 5958 "00010000" // /* MW 8 */ + 5959 "00110001" // /* MW 7 */ + 5960 "11110001" // /* MW 6 */ + 5961 "00000001" // /* MW 5 */ + 5962 "00000000" // /* MW 4 */ + 5963 "01100000" // /* MW 3 */ + 5964 "10010001" // /* MW 2 */ + 5965 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5966 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #508444 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5967 "00010000" // /* MW 9 */ + 5968 "00001110" // /* MW 8 */ + 5969 "00110001" // /* MW 7 */ + 5970 "11110001" // /* MW 6 */ + 5971 "00000001" // /* MW 5 */ + 5972 "00000000" // /* MW 4 */ + 5973 "11100000" // /* MW 3 */ + 5974 "11000000" // /* MW 2 */ + 5975 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5977 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 285 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5978 "00000100" // JL #5392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5392 delay_slots=5 */ + 5979 "00000001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "10001000" // /* MW 3 */ + 5982 "00001010" // /* MW 2 */ + 5983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5987 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5988 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5989 "00110001" // /* MW 3 */ + 5990 "00100000" // /* MW 2 */ + 5991 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 5992 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5993 "00000101" // /* MW 3 */ + 5994 "00100000" // /* MW 2 */ + 5995 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 5996 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010001" // /* MW 3 */ + 5998 "00000110" // /* MW 2 */ + 5999 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 289 42 first +.return_address + 6000 "10111010" // LDA r16, [p7]; MOVXM p1, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6001 "00010000" // /* MW 9 */ + 6002 "00000010" // /* MW 8 */ + 6003 "10110001" // /* MW 7 */ + 6004 "11110000" // /* MW 6 */ + 6005 "00000001" // /* MW 5 */ + 6006 "00000000" // /* MW 4 */ + 6007 "11010000" // /* MW 3 */ + 6008 "11000010" // /* MW 2 */ + 6009 "11100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 +.src_ref 6 "superkernels.cpp" 289 18 +.src_ref 6 "superkernels.cpp" 298 48 + 6010 "10111010" // LDA r17, [p1]; MOVXM p3, #508424 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6011 "00010000" // /* MW 9 */ + 6012 "00000100" // /* MW 8 */ + 6013 "10110001" // /* MW 7 */ + 6014 "11110001" // /* MW 6 */ + 6015 "00000001" // /* MW 5 */ + 6016 "00000000" // /* MW 4 */ + 6017 "11010000" // /* MW 3 */ + 6018 "11000110" // /* MW 2 */ + 6019 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 28 first +.src_ref 6 "superkernels.cpp" 290 16 +.src_ref 6 "superkernels.cpp" 299 48 + 6020 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6021 "00010000" // /* MW 9 */ + 6022 "00000110" // /* MW 8 */ + 6023 "10110001" // /* MW 7 */ + 6024 "11110000" // /* MW 6 */ + 6025 "00000001" // /* MW 5 */ + 6026 "00000000" // /* MW 4 */ + 6027 "01010000" // /* MW 3 */ + 6028 "11001011" // /* MW 2 */ + 6029 "11101010" // /* MW 1 */ + 6030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6031 "00000000" // /* MW 1 */ + 6032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6033 "00000000" // /* MW 1 */ + 6034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6035 "00000000" // /* MW 1 */ + 6036 "10000100" // J #6080 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6080 delay_slots=5 */ + 6037 "00000000" // /* MW 5 */ + 6038 "00000000" // /* MW 4 */ + 6039 "11100000" // /* MW 3 */ + 6040 "00001011" // /* MW 2 */ + 6041 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 +.delay_slot + 6042 "01000100" // MOVXM p2, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6043 "00110000" // /* MW 5 */ + 6044 "11000100" // /* MW 4 */ + 6045 "11000100" // /* MW 3 */ + 6046 "00000111" // /* MW 2 */ + 6047 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 27 first +.delay_slot + 6048 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6049 "00001111" // /* MW 3 */ + 6050 "01100001" // /* MW 2 */ + 6051 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 287 13 first +.delay_slot + 6052 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6053 "01010001" // /* MW 3 */ + 6054 "00000110" // /* MW 2 */ + 6055 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 289 16 first +.delay_slot + 6056 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6057 "00010001" // /* MW 3 */ + 6058 "00000110" // /* MW 2 */ + 6059 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 290 16 first +.delay_slot + 6060 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "00010001" // /* MW 3 */ + 6062 "00000110" // /* MW 2 */ + 6063 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 6 "superkernels.cpp" 298 48 + 6064 "01000100" // MOVXM p3, #508424 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6065 "00010000" // /* MW 5 */ + 6066 "11000100" // /* MW 4 */ + 6067 "11000110" // /* MW 3 */ + 6068 "00000111" // /* MW 2 */ + 6069 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 + 6070 "10111010" // NOPA; MOVXM p1, #508428 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6071 "00010000" // /* MW 9 */ + 6072 "00000110" // /* MW 8 */ + 6073 "10110001" // /* MW 7 */ + 6074 "11110000" // /* MW 6 */ + 6075 "00000001" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "11110000" // /* MW 3 */ + 6078 "00101100" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 242 49 first + 6080 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6081 "10000110" // /* MW 3 */ + 6082 "01100111" // /* MW 2 */ + 6083 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 293 2 + 6084 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6085 "00010000" // /* MW 9 */ + 6086 "00000000" // /* MW 8 */ + 6087 "00110001" // /* MW 7 */ + 6088 "11110001" // /* MW 6 */ + 6089 "00000001" // /* MW 5 */ + 6090 "00000000" // /* MW 4 */ + 6091 "11010000" // /* MW 3 */ + 6092 "11101110" // /* MW 2 */ + 6093 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6094 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6095 "00010110" // /* MW 3 */ + 6096 "11111110" // /* MW 2 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6098 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00110110" // /* MW 3 */ + 6100 "11111110" // /* MW 2 */ + 6101 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 first + 6102 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6103 "01010110" // /* MW 3 */ + 6104 "00000110" // /* MW 2 */ + 6105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 6106 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6107 "01110110" // /* MW 3 */ + 6108 "01000110" // /* MW 2 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6118 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00000010" // /* MW 3 */ + 6120 "01100001" // /* MW 2 */ + 6121 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 +.src_ref 6 "superkernels.cpp" 293 2 first + 6122 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6123 "00001110" // /* MW 5 */ + 6124 "01000000" // /* MW 4 */ + 6125 "00111001" // /* MW 3 */ + 6126 "11000010" // /* MW 2 */ + 6127 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 293 2 + 6128 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "00010001" // /* MW 3 */ + 6130 "00000110" // /* MW 2 */ + 6131 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 +.src_ref 1 "io_buffer_main.h" 419 8 + 6132 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6133 "11111101" // /* MW 3 */ + 6134 "11100000" // /* MW 2 */ + 6135 "00010111" // /* MW 1 */ + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ + 6138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6139 "00000000" // /* MW 1 */ + 6140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6141 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6142 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00001000" // /* MW 3 */ + 6144 "11010011" // /* MW 2 */ + 6145 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 first + 6146 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00000110" // /* MW 3 */ + 6148 "01100111" // /* MW 2 */ + 6149 "00011010" // /* MW 1 */ + 6150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6151 "00000000" // /* MW 1 */ + 6152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6153 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 + 6154 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "01110110" // /* MW 3 */ + 6156 "11111111" // /* MW 2 */ + 6157 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 6158 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6159 "00110110" // /* MW 3 */ + 6160 "11111110" // /* MW 2 */ + 6161 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 6162 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6163 "01010110" // /* MW 3 */ + 6164 "11111110" // /* MW 2 */ + 6165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 47 first + 6166 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6167 "01110110" // /* MW 3 */ + 6168 "01010110" // /* MW 2 */ + 6169 "00000010" // /* MW 1 */ + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 6180 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6181 "00010010" // /* MW 3 */ + 6182 "10100011" // /* MW 2 */ + 6183 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 6184 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6185 "00110001" // /* MW 3 */ + 6186 "00000110" // /* MW 2 */ + 6187 "00001010" // /* MW 1 */ + 6188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6189 "00000000" // /* MW 1 */ + 6190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6191 "00000000" // /* MW 1 */ + 6192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6193 "00000000" // /* MW 1 */ + 6194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6195 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 6196 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6197 "00001000" // /* MW 3 */ + 6198 "11010011" // /* MW 2 */ + 6199 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 +.src_ref 6 "superkernels.cpp" 298 46 +.src_ref 6 "superkernels.cpp" 299 46 + 6200 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111001" // /* MW 9 */ + 6202 "01100000" // /* MW 8 */ + 6203 "11001110" // /* MW 7 */ + 6204 "00101001" // /* MW 6 */ + 6205 "00000000" // /* MW 5 */ + 6206 "00000001" // /* MW 4 */ + 6207 "01100000" // /* MW 3 */ + 6208 "00010001" // /* MW 2 */ + 6209 "11010001" // /* MW 1 */ + 6210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6211 "00000000" // /* MW 1 */ + 6212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6213 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 6214 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6215 "00011001" // /* MW 3 */ + 6216 "11101110" // /* MW 2 */ + 6217 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 48 first + 6218 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6219 "00111011" // /* MW 5 */ + 6220 "11011000" // /* MW 4 */ + 6221 "11011111" // /* MW 3 */ + 6222 "11000110" // /* MW 2 */ + 6223 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 48 first +.src_ref 6 "superkernels.cpp" 301 2 + 6224 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6225 "10000001" // /* MW 5 */ + 6226 "11011101" // /* MW 4 */ + 6227 "11010110" // /* MW 3 */ + 6228 "11010010" // /* MW 2 */ + 6229 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 6230 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6231 "01010110" // /* MW 3 */ + 6232 "01001110" // /* MW 2 */ + 6233 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 6234 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6235 "00011110" // /* MW 3 */ + 6236 "01011101" // /* MW 2 */ + 6237 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6238 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6239 "11000000" // /* MW 3 */ + 6240 "01100000" // /* MW 2 */ + 6241 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6244 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6245 "01110110" // /* MW 3 */ + 6246 "00000110" // /* MW 2 */ + 6247 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 6248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6249 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 301 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6250 "00000100" // JL #5552 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=5552 delay_slots=5 */ + 6251 "00000001" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "11011000" // /* MW 3 */ + 6254 "00001010" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "11000000" // /* MW 3 */ + 6258 "11010100" // /* MW 2 */ + 6259 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6260 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "00001101" // /* MW 3 */ + 6262 "01100011" // /* MW 2 */ + 6263 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 first +.delay_slot + 6264 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00001101" // /* MW 3 */ + 6266 "00100001" // /* MW 2 */ + 6267 "00010101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 299 46 +.delay_slot + 6268 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "01000001" // /* MW 3 */ + 6270 "01101001" // /* MW 2 */ + 6271 "00011001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 298 46 first +.delay_slot + 6272 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6273 "00000000" // /* MW 15 */ + 6274 "00000000" // /* MW 14 */ + 6275 "10101000" // /* MW 13 */ + 6276 "11100010" // /* MW 12 */ + 6277 "00110100" // /* MW 11 */ + 6278 "00000000" // /* MW 10 */ + 6279 "00000000" // /* MW 9 */ + 6280 "00000000" // /* MW 8 */ + 6281 "01011011" // /* MW 7 */ + 6282 "00000001" // /* MW 6 */ + 6283 "00100000" // /* MW 5 */ + 6284 "00000000" // /* MW 4 */ + 6285 "11110000" // /* MW 3 */ + 6286 "00101100" // /* MW 2 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 32 first +.src_ref 1 "io_buffer_main.h" 351 28 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 351 40 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 1 "io_buffer_main.h" 449 8 +.return_address + 6288 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6289 "01111000" // /* MW 9 */ + 6290 "11010000" // /* MW 8 */ + 6291 "10110011" // /* MW 7 */ + 6292 "00101000" // /* MW 6 */ + 6293 "00000000" // /* MW 5 */ + 6294 "00000001" // /* MW 4 */ + 6295 "11010000" // /* MW 3 */ + 6296 "11000110" // /* MW 2 */ + 6297 "11001000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 + 6298 "01000100" // MOVXM p6, #508440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6299 "00110000" // /* MW 5 */ + 6300 "11000100" // /* MW 4 */ + 6301 "11001100" // /* MW 3 */ + 6302 "00000111" // /* MW 2 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ + 6308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6309 "00000000" // /* MW 1 */ + 6310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6311 "00000000" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6314 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "00001000" // /* MW 3 */ + 6316 "01010001" // /* MW 2 */ + 6317 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first + 6318 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "00110110" // /* MW 3 */ + 6320 "11110110" // /* MW 2 */ + 6321 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 + 6322 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6323 "00011001" // /* MW 3 */ + 6324 "11101101" // /* MW 2 */ + 6325 "00000111" // /* MW 1 */ + 6326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6327 "00000000" // /* MW 1 */ + 6328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6329 "00000000" // /* MW 1 */ + 6330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6331 "00000000" // /* MW 1 */ + 6332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6333 "00000000" // /* MW 1 */ + 6334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6335 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 + 6336 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6337 "00010001" // /* MW 3 */ + 6338 "00100011" // /* MW 2 */ + 6339 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first +.src_ref 1 "io_buffer_main.h" 351 28 + 6340 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6341 "01100011" // /* MW 5 */ + 6342 "11101100" // /* MW 4 */ + 6343 "11010011" // /* MW 3 */ + 6344 "11000110" // /* MW 2 */ + 6345 "01001010" // /* MW 1 */ + 6346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6347 "00000000" // /* MW 1 */ + 6348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6349 "00000000" // /* MW 1 */ + 6350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6351 "00000000" // /* MW 1 */ + 6352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6353 "00000000" // /* MW 1 */ + 6354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6355 "00000000" // /* MW 1 */ + 6356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6357 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 6358 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6359 "00001000" // /* MW 3 */ + 6360 "01010001" // /* MW 2 */ + 6361 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 305 6 +.src_ref 6 "superkernels.cpp" 306 14 + 6362 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6363 "00010000" // /* MW 9 */ + 6364 "00000000" // /* MW 8 */ + 6365 "10110001" // /* MW 7 */ + 6366 "11110000" // /* MW 6 */ + 6367 "00000001" // /* MW 5 */ + 6368 "00000000" // /* MW 4 */ + 6369 "11010000" // /* MW 3 */ + 6370 "11001110" // /* MW 2 */ + 6371 "11111100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 19 first + 6372 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6373 "01010110" // /* MW 3 */ + 6374 "00000110" // /* MW 2 */ + 6375 "00000110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6376 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6377 "00110110" // /* MW 3 */ + 6378 "00000110" // /* MW 2 */ + 6379 "00000001" // /* MW 1 */ + 6380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6381 "00000000" // /* MW 1 */ + 6382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6383 "00000000" // /* MW 1 */ + 6384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6385 "00000000" // /* MW 1 */ + 6386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6387 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 6388 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6389 "00110001" // /* MW 3 */ + 6390 "00100001" // /* MW 2 */ + 6391 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 6392 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6393 "00010001" // /* MW 3 */ + 6394 "11100110" // /* MW 2 */ + 6395 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 16 first + 6396 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6397 "00101000" // /* MW 3 */ + 6398 "01100001" // /* MW 2 */ + 6399 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 305 6 + 6400 "10000100" // JNZ r16, #6432 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6432 delay_slots=5 */ + 6401 "00000001" // /* MW 5 */ + 6402 "01000000" // /* MW 4 */ + 6403 "10010000" // /* MW 3 */ + 6404 "00001100" // /* MW 2 */ + 6405 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6415 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 + 6416 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6417 "00000001" // /* MW 3 */ + 6418 "00100000" // /* MW 2 */ + 6419 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 306 14 first + 6420 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6421 "11000001" // /* MW 11 */ + 6422 "00001000" // /* MW 10 */ + 6423 "10000011" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "00000000" // /* MW 7 */ + 6426 "00000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 6 "superkernels.cpp" 308 + 6432 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11110000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "11110001" // /* MW 3 */ + 6438 "11111101" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ + 6440 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "10011001" // /* MW 3 */ + 6442 "11110111" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 6444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6445 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 6446 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6447 "11010001" // /* MW 3 */ + 6448 "11111001" // /* MW 2 */ + 6449 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 6454 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00101000" // /* MW 2 */ + 6457 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6458 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00001011" // /* MW 3 */ + 6460 "10001110" // /* MW 2 */ + 6461 "00001110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 308 +.delay_slot + 6462 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6463 "00000001" // /* MW 5 */ + 6464 "00000000" // /* MW 4 */ + 6465 "00000000" // /* MW 3 */ + 6466 "11111000" // /* MW 2 */ + 6467 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 6473 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 7 "conv2d_dw_bf16_params.h" 177 first +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.function_start + 6480 "10111010" // LDA el0, [p0], #4; MOVXM p1, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6481 "00010000" // /* MW 9 */ + 6482 "11100000" // /* MW 8 */ + 6483 "10110001" // /* MW 7 */ + 6484 "11110000" // /* MW 6 */ + 6485 "00000001" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "11010000" // /* MW 3 */ + 6488 "10000101" // /* MW 2 */ + 6489 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 first +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6490 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6491 "01011000" // /* MW 9 */ + 6492 "00000000" // /* MW 8 */ + 6493 "00001000" // /* MW 7 */ + 6494 "01001011" // /* MW 6 */ + 6495 "00000000" // /* MW 5 */ + 6496 "00000001" // /* MW 4 */ + 6497 "11010000" // /* MW 3 */ + 6498 "10000001" // /* MW 2 */ + 6499 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 177 + 6500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6501 "00000001" // /* MW 5 */ + 6502 "00000000" // /* MW 4 */ + 6503 "00000000" // /* MW 3 */ + 6504 "00001000" // /* MW 2 */ + 6505 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 + 6506 "00111010" // ST p7, [sp, #-16]; MOVXM p7, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6507 "00010001" // /* MW 9 */ + 6508 "11100000" // /* MW 8 */ + 6509 "10110001" // /* MW 7 */ + 6510 "11110011" // /* MW 6 */ + 6511 "00000001" // /* MW 5 */ + 6512 "00000000" // /* MW 4 */ + 6513 "10110000" // /* MW 3 */ + 6514 "01110011" // /* MW 2 */ + 6515 "11111110" // /* MW 1 */ + 6516 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "00111101" // /* MW 3 */ + 6518 "11111100" // /* MW 2 */ + 6519 "00001111" // /* MW 1 */ + 6520 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6521 "11010101" // /* MW 3 */ + 6522 "11110101" // /* MW 2 */ + 6523 "00001111" // /* MW 1 */ + 6524 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6525 "11110101" // /* MW 3 */ + 6526 "11111001" // /* MW 2 */ + 6527 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6528 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6529 "00101001" // /* MW 3 */ + 6530 "00011100" // /* MW 2 */ + 6531 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6532 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6533 "00001001" // /* MW 3 */ + 6534 "00011100" // /* MW 2 */ + 6535 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6536 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00101110" // /* MW 3 */ + 6538 "00000100" // /* MW 2 */ + 6539 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 17 + 6540 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "00001110" // /* MW 3 */ + 6542 "00010100" // /* MW 2 */ + 6543 "00000000" // /* MW 1 */ + 6544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6545 "00000000" // /* MW 1 */ + 6546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6547 "00000000" // /* MW 1 */ + 6548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6549 "00000000" // /* MW 1 */ + 6550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6551 "00000000" // /* MW 1 */ + 6552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6553 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6554 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6555 "00101001" // /* MW 3 */ + 6556 "00000100" // /* MW 2 */ + 6557 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 181 15 + 6558 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6559 "00001001" // /* MW 3 */ + 6560 "00010100" // /* MW 2 */ + 6561 "00001001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 32 first + 6562 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6563 "00101010" // /* MW 3 */ + 6564 "01011110" // /* MW 2 */ + 6565 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 52 + 6566 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6567 "01001010" // /* MW 3 */ + 6568 "11101110" // /* MW 2 */ + 6569 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 + 6570 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6571 "00101010" // /* MW 3 */ + 6572 "11101100" // /* MW 2 */ + 6573 "00000111" // /* MW 1 */ + 6574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6575 "00000000" // /* MW 1 */ + 6576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6577 "00000000" // /* MW 1 */ + 6578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6579 "00000000" // /* MW 1 */ + 6580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6581 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.no_stack_arguments + 6582 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6583 "00000001" // /* MW 5 */ + 6584 "00000000" // /* MW 4 */ + 6585 "01011000" // /* MW 3 */ + 6586 "00011000" // /* MW 2 */ + 6587 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 38 +.delay_slot + 6588 "01011100" // ST r18, [sp, #-20]; SUB r14, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6589 "01000011" // /* MW 5 */ + 6590 "10111010" // /* MW 4 */ + 6591 "10111000" // /* MW 3 */ + 6592 "11001010" // /* MW 2 */ + 6593 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 80 +.delay_slot + 6594 "00111010" // ST r1, [sp, #-28]; NE r16, r1, r16; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6595 "01111001" // /* MW 9 */ + 6596 "01010000" // /* MW 8 */ + 6597 "11101000" // /* MW 7 */ + 6598 "01000101" // /* MW 6 */ + 6599 "00001000" // /* MW 5 */ + 6600 "00000011" // /* MW 4 */ + 6601 "10110000" // /* MW 3 */ + 6602 "10000110" // /* MW 2 */ + 6603 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6604 "01011100" // ST r16, [sp, #-24]; LT r27, r14, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6605 "00010101" // /* MW 5 */ + 6606 "01101111" // /* MW 4 */ + 6607 "10110111" // /* MW 3 */ + 6608 "01000010" // /* MW 2 */ + 6609 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6610 "10011000" // SUB r17, r24, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6611 "11100001" // /* MW 3 */ + 6612 "00100010" // /* MW 2 */ + 6613 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.delay_slot + 6614 "01111010" // NOPA; NOPS; SEL.EQZ r0, r14, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6615 "00010010" // /* MW 9 */ + 6616 "10000001" // /* MW 8 */ + 6617 "00000011" // /* MW 7 */ + 6618 "00000000" // /* MW 6 */ + 6619 "01011011" // /* MW 5 */ + 6620 "00000001" // /* MW 4 */ + 6621 "11110000" // /* MW 3 */ + 6622 "00101100" // /* MW 2 */ + 6623 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 32 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.return_address + 6624 "10111010" // LDA.u8 r17, [p7], #3; XOR r20, r15, r14; MOV r16, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6625 "01011000" // /* MW 9 */ + 6626 "00000000" // /* MW 8 */ + 6627 "00001000" // /* MW 7 */ + 6628 "00110110" // /* MW 6 */ + 6629 "01000111" // /* MW 5 */ + 6630 "00011111" // /* MW 4 */ + 6631 "01010000" // /* MW 3 */ + 6632 "11000101" // /* MW 2 */ + 6633 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.src_ref 7 "conv2d_dw_bf16_params.h" 185 52 + 6634 "00101100" // LDA.u8 r18, [p7], #-2; SUB r19, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6635 "01000011" // /* MW 5 */ + 6636 "01001100" // /* MW 4 */ + 6637 "01011000" // /* MW 3 */ + 6638 "11001001" // /* MW 2 */ + 6639 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6640 "00101100" // LDA r1, [sp, #-28]; LT r27, r20, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6641 "00010101" // /* MW 5 */ + 6642 "01101110" // /* MW 4 */ + 6643 "00101010" // /* MW 3 */ + 6644 "10000110" // /* MW 2 */ + 6645 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 + 6646 "00011000" // SEL.EQZ r19, r2, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6647 "00110010" // /* MW 3 */ + 6648 "10100111" // /* MW 2 */ + 6649 "00010000" // /* MW 1 */ + 6650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6651 "00000000" // /* MW 1 */ + 6652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6653 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.no_stack_arguments + 6654 "00000100" // JL #12464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12464 delay_slots=5 */ + 6655 "00000001" // /* MW 5 */ + 6656 "00000000" // /* MW 4 */ + 6657 "01011000" // /* MW 3 */ + 6658 "00011000" // /* MW 2 */ + 6659 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 66 first +.delay_slot + 6660 "00011000" // EXTEND.s16 r19, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6661 "01110000" // /* MW 3 */ + 6662 "11100110" // /* MW 2 */ + 6663 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 184 87 +.src_ref 7 "conv2d_dw_bf16_params.h" 185 38 first +.delay_slot + 6664 "00111010" // ST r18, [sp, #-32]; SUB r14, r17, r18; ADD.NC r15, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6665 "01001001" // /* MW 9 */ + 6666 "11000000" // /* MW 8 */ + 6667 "11101100" // /* MW 7 */ + 6668 "00001101" // /* MW 6 */ + 6669 "11101001" // /* MW 5 */ + 6670 "00100010" // /* MW 4 */ + 6671 "10110000" // /* MW 3 */ + 6672 "01001010" // /* MW 2 */ + 6673 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6674 "10011000" // LT r27, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6675 "00001010" // /* MW 3 */ + 6676 "10110111" // /* MW 2 */ + 6677 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6678 "10011000" // SUB r17, r16, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6679 "11100001" // /* MW 3 */ + 6680 "00100010" // /* MW 2 */ + 6681 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.delay_slot + 6682 "00101100" // NOPA; SEL.EQZ r0, r14, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6683 "00100100" // /* MW 5 */ + 6684 "00000010" // /* MW 4 */ + 6685 "11110111" // /* MW 3 */ + 6686 "00101100" // /* MW 2 */ + 6687 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 +.return_address + 6688 "10111010" // LDA r1, [sp, #-28]; MOVX r19, #2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6689 "01011000" // /* MW 9 */ + 6690 "01000010" // /* MW 8 */ + 6691 "00000000" // /* MW 7 */ + 6692 "01001000" // /* MW 6 */ + 6693 "00110000" // /* MW 5 */ + 6694 "00000001" // /* MW 4 */ + 6695 "00100000" // /* MW 3 */ + 6696 "10000110" // /* MW 2 */ + 6697 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 50 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6698 "10111010" // LDA.u8 r20, [p7], m0; MOVX r24, #0; MOV r18, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6699 "01011000" // /* MW 9 */ + 6700 "00001000" // /* MW 8 */ + 6701 "01001000" // /* MW 7 */ + 6702 "00001010" // /* MW 6 */ + 6703 "10000000" // /* MW 5 */ + 6704 "00000001" // /* MW 4 */ + 6705 "01010000" // /* MW 3 */ + 6706 "01010001" // /* MW 2 */ + 6707 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 + 6708 "10111010" // LDA r28, [sp, #-32]; MOVX r16, #-6; MOV r31, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6709 "01011000" // /* MW 9 */ + 6710 "00010111" // /* MW 8 */ + 6711 "11101000" // /* MW 7 */ + 6712 "01001011" // /* MW 6 */ + 6713 "00000111" // /* MW 5 */ + 6714 "00111111" // /* MW 4 */ + 6715 "00100000" // /* MW 3 */ + 6716 "01110010" // /* MW 2 */ + 6717 "11111100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 + 6718 "10111010" // LDA r22, [sp, #-20]; MOVX r26, #-2; MOV r21, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6719 "01011000" // /* MW 9 */ + 6720 "00000110" // /* MW 8 */ + 6721 "10101000" // /* MW 7 */ + 6722 "11001010" // /* MW 6 */ + 6723 "10100111" // /* MW 5 */ + 6724 "00111111" // /* MW 4 */ + 6725 "00100000" // /* MW 3 */ + 6726 "11011010" // /* MW 2 */ + 6727 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 + 6728 "10111010" // LDA r3, [sp, #-24]; MOVX r29, #508; MOV m2, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6729 "01011000" // /* MW 9 */ + 6730 "00100000" // /* MW 8 */ + 6731 "00000000" // /* MW 7 */ + 6732 "10001001" // /* MW 6 */ + 6733 "11010111" // /* MW 5 */ + 6734 "00001111" // /* MW 4 */ + 6735 "00100000" // /* MW 3 */ + 6736 "00001110" // /* MW 2 */ + 6737 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 6738 "10111010" // MOVA m0, #-178; MOVX r23, #1; MOV r0, #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6739 "01011000" // /* MW 9 */ + 6740 "10000000" // /* MW 8 */ + 6741 "00001000" // /* MW 7 */ + 6742 "00101000" // /* MW 6 */ + 6743 "01110000" // /* MW 5 */ + 6744 "00000001" // /* MW 4 */ + 6745 "10000000" // /* MW 3 */ + 6746 "11000000" // /* MW 2 */ + 6747 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 + 6748 "10111010" // MOVA m1, #186; SUB r17, r24, r2; MOV vaddSign0, crMCDEn /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6749 "01111000" // /* MW 9 */ + 6750 "10110000" // /* MW 8 */ + 6751 "10011101" // /* MW 7 */ + 6752 "00001100" // /* MW 6 */ + 6753 "00010001" // /* MW 5 */ + 6754 "00110001" // /* MW 4 */ + 6755 "10000000" // /* MW 3 */ + 6756 "01000100" // /* MW 2 */ + 6757 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6758 "10011000" // XOR r30, r1, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6759 "11100110" // /* MW 3 */ + 6760 "01111100" // /* MW 2 */ + 6761 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 + 6762 "10011000" // LT r27, r30, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6763 "10001010" // /* MW 3 */ + 6764 "10110111" // /* MW 2 */ + 6765 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 206 70 + 6766 "00100100" // SEL.EQZ r17, r2, r17, r27; ADD.NC r7, r28, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6767 "11111111" // /* MW 5 */ + 6768 "10111100" // /* MW 4 */ + 6769 "01000011" // /* MW 3 */ + 6770 "01100010" // /* MW 2 */ + 6771 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 67 +.src_ref 7 "conv2d_dw_bf16_params.h" 186 74 + 6772 "00100100" // EXTEND.s16 r30, r17; ADD.NC r22, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6773 "00000010" // /* MW 5 */ + 6774 "00110110" // /* MW 4 */ + 6775 "00001011" // /* MW 3 */ + 6776 "10001110" // /* MW 2 */ + 6777 "10001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 185 88 +.src_ref 7 "conv2d_dw_bf16_params.h" 192 22 first + 6778 "00100100" // MUL r30, r15, r20; ADD.NC r14, r30, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6779 "00000001" // /* MW 5 */ + 6780 "00111110" // /* MW 4 */ + 6781 "11110111" // /* MW 3 */ + 6782 "10101001" // /* MW 2 */ + 6783 "01111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 45 first + 6784 "00100100" // MUL r2, r1, r14; ADD.NC r17, r22, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6785 "00000001" // /* MW 5 */ + 6786 "10110110" // /* MW 4 */ + 6787 "11111000" // /* MW 3 */ + 6788 "10011101" // /* MW 2 */ + 6789 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 64 first + 6790 "10011000" // EQ r27, r19, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6791 "00010111" // /* MW 3 */ + 6792 "11110110" // /* MW 2 */ + 6793 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 55 first + 6794 "10011000" // MUL r2, r30, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6795 "00101111" // /* MW 3 */ + 6796 "10000100" // /* MW 2 */ + 6797 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 + 6798 "01100100" // SEL.EQZ r31, r31, r18, r27; MOV r18, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6799 "11110101" // /* MW 5 */ + 6800 "00111111" // /* MW 4 */ + 6801 "01001001" // /* MW 3 */ + 6802 "11100100" // /* MW 2 */ + 6803 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 198 76 first + 6804 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00001101" // /* MW 3 */ + 6806 "10100001" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 68 first + 6808 "10011000" // LSHL r2, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6809 "10101101" // /* MW 3 */ + 6810 "01000101" // /* MW 2 */ + 6811 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 197 78 +.src_ref 7 "conv2d_dw_bf16_params.h" 204 79 + 6812 "00100100" // MUL r2, r2, r28; ADD.NC r4, r2, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6813 "11111111" // /* MW 5 */ + 6814 "00100010" // /* MW 4 */ + 6815 "11110010" // /* MW 3 */ + 6816 "10111001" // /* MW 2 */ + 6817 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 201 50 first + 6818 "10011000" // LSHL r3, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6819 "01011101" // /* MW 3 */ + 6820 "11000111" // /* MW 2 */ + 6821 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 197 39 first + 6822 "01011100" // ST r2, [p7], #-4; MUL r5, r15, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6823 "00111111" // /* MW 5 */ + 6824 "10010100" // /* MW 4 */ + 6825 "00110111" // /* MW 3 */ + 6826 "10001010" // /* MW 2 */ + 6827 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 84 +.src_ref 7 "conv2d_dw_bf16_params.h" 198 39 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 + 6828 "00111010" // ST r16, [p7], m2; AND r22, r29, r22; MOV r16, #4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6829 "01011001" // /* MW 9 */ + 6830 "00000100" // /* MW 8 */ + 6831 "00001000" // /* MW 7 */ + 6832 "00100110" // /* MW 6 */ + 6833 "01101011" // /* MW 5 */ + 6834 "00111011" // /* MW 4 */ + 6835 "00110000" // /* MW 3 */ + 6836 "01000010" // /* MW 2 */ + 6837 "11101001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 186 44 first +.src_ref 7 "conv2d_dw_bf16_params.h" 200 42 first + 6838 "01011100" // ST r31, [p7], #-16; ADD r22, r5, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6839 "11000001" // /* MW 5 */ + 6840 "11011010" // /* MW 4 */ + 6841 "00110010" // /* MW 3 */ + 6842 "11111110" // /* MW 2 */ + 6843 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 193 22 first +.src_ref 7 "conv2d_dw_bf16_params.h" 201 47 first + 6844 "01011100" // ST r3, [p7], #24; MUL r31, r22, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6845 "10011111" // /* MW 5 */ + 6846 "01111110" // /* MW 4 */ + 6847 "00111011" // /* MW 3 */ + 6848 "10001110" // /* MW 2 */ + 6849 "11101101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 204 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 212 52 first + 6850 "01011100" // ST r4, [p7], #4; LSHL r22, r22, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6851 "00011011" // /* MW 5 */ + 6852 "01011010" // /* MW 4 */ + 6853 "00111011" // /* MW 3 */ + 6854 "10010010" // /* MW 2 */ + 6855 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 16 first + 6856 "10011000" // LSHL r3, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6857 "01111101" // /* MW 3 */ + 6858 "11000111" // /* MW 2 */ + 6859 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 63 + 6860 "10011000" // LSHL r4, r4, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6861 "01011101" // /* MW 3 */ + 6862 "00001001" // /* MW 2 */ + 6863 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 208 71 +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 first + 6864 "10100100" // SUB r25, r22, r3; ADD.NC r4, r4, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6865 "00000010" // /* MW 5 */ + 6866 "00100100" // /* MW 4 */ + 6867 "00110010" // /* MW 3 */ + 6868 "01000110" // /* MW 2 */ + 6869 "10110110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 208 23 first +.src_ref 7 "conv2d_dw_bf16_params.h" 214 72 +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6870 "10111010" // MOVA r0, #-64; SUB r6, r3, r4; ADD.NC r25, r25, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6871 "10101000" // /* MW 9 */ + 6872 "01000000" // /* MW 8 */ + 6873 "00101110" // /* MW 7 */ + 6874 "00001111" // /* MW 6 */ + 6875 "01100010" // /* MW 5 */ + 6876 "00000110" // /* MW 4 */ + 6877 "00000000" // /* MW 3 */ + 6878 "00000000" // /* MW 2 */ + 6879 "11111000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 205 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6880 "01011100" // ST r0, [p7], #4; MUL r1, r31, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6881 "00111111" // /* MW 5 */ + 6882 "10000100" // /* MW 4 */ + 6883 "00111111" // /* MW 3 */ + 6884 "10000010" // /* MW 2 */ + 6885 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 206 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 209 53 first + 6886 "01011100" // ST r7, [p7], #4; MUL r31, r31, r7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6887 "11111111" // /* MW 5 */ + 6888 "11111100" // /* MW 4 */ + 6889 "00111111" // /* MW 3 */ + 6890 "10011110" // /* MW 2 */ + 6891 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 207 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 69 first + 6892 "01011100" // ST r6, [p7], #4; LSHL r5, r5, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6893 "01011011" // /* MW 5 */ + 6894 "10010110" // /* MW 4 */ + 6895 "00110010" // /* MW 3 */ + 6896 "10011010" // /* MW 2 */ + 6897 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 93 first +.src_ref 7 "conv2d_dw_bf16_params.h" 213 73 + 6898 "00100100" // LSHL r6, r31, r23; ADD.NC r31, r5, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6899 "11111111" // /* MW 5 */ + 6900 "10100101" // /* MW 4 */ + 6901 "10111111" // /* MW 3 */ + 6902 "10101111" // /* MW 2 */ + 6903 "11111001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6904 "10111010" // MOVA r4, #7; LSHL r5, r20, r18; ADD.NC r18, r6, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6905 "10101000" // /* MW 9 */ + 6906 "10001000" // /* MW 8 */ + 6907 "01001001" // /* MW 7 */ + 6908 "01101110" // /* MW 6 */ + 6909 "01011001" // /* MW 5 */ + 6910 "00101000" // /* MW 4 */ + 6911 "00000000" // /* MW 3 */ + 6912 "11100100" // /* MW 2 */ + 6913 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 100 + 6914 "10011000" // SUB r18, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6915 "00100001" // /* MW 3 */ + 6916 "00100101" // /* MW 2 */ + 6917 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 209 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 211 77 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 53 first + 6918 "00111010" // ST r18, [p7], #4; LSHL r1, r1, r23; ADD.NC r18, r5, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6919 "11001001" // /* MW 9 */ + 6920 "01111111" // /* MW 8 */ + 6921 "01001001" // /* MW 7 */ + 6922 "11101110" // /* MW 6 */ + 6923 "00011011" // /* MW 5 */ + 6924 "00000010" // /* MW 4 */ + 6925 "00110000" // /* MW 3 */ + 6926 "11001010" // /* MW 2 */ + 6927 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 211 42 +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6928 "01011100" // ST r18, [p7], #4; ADD r6, r1, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6929 "11000001" // /* MW 5 */ + 6930 "10011010" // /* MW 4 */ + 6931 "00110000" // /* MW 3 */ + 6932 "11001010" // /* MW 2 */ + 6933 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 212 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 + 6934 "01011100" // ST r22, [p7], #4; LSHL r1, r31, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6935 "10011011" // /* MW 5 */ + 6936 "10000100" // /* MW 4 */ + 6937 "00111111" // /* MW 3 */ + 6938 "11011010" // /* MW 2 */ + 6939 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 213 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 + 6940 "00111010" // ST r31, [p7], #4; ADD r22, r3, r1; MOV r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6941 "01011001" // /* MW 9 */ + 6942 "11111111" // /* MW 8 */ + 6943 "00101111" // /* MW 7 */ + 6944 "10000100" // /* MW 6 */ + 6945 "01100000" // /* MW 5 */ + 6946 "00000111" // /* MW 4 */ + 6947 "00110000" // /* MW 3 */ + 6948 "11111110" // /* MW 2 */ + 6949 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 214 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 216 61 +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 6950 "00111010" // ST r25, [p7], #4; SUB r3, r6, r22; MOV r22, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6951 "01011001" // /* MW 9 */ + 6952 "01000000" // /* MW 8 */ + 6953 "11001000" // /* MW 7 */ + 6954 "00001110" // /* MW 6 */ + 6955 "00111011" // /* MW 5 */ + 6956 "00001100" // /* MW 4 */ + 6957 "00110000" // /* MW 3 */ + 6958 "11100110" // /* MW 2 */ + 6959 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 215 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 53 first + 6960 "01011100" // ST r3, [p7], #4; LSHL r16, r15, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6961 "00011011" // /* MW 5 */ + 6962 "11000010" // /* MW 4 */ + 6963 "00110111" // /* MW 3 */ + 6964 "10001110" // /* MW 2 */ + 6965 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 200 45 first +.src_ref 7 "conv2d_dw_bf16_params.h" 218 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 219 63 + 6966 "01011100" // ST r18, [p7], #4; SEL.EQZ r0, r0, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6967 "00000100" // /* MW 5 */ + 6968 "00000011" // /* MW 4 */ + 6969 "00110000" // /* MW 3 */ + 6970 "11001010" // /* MW 2 */ + 6971 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 60 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 first + 6972 "10100100" // LSHL r3, r30, r23; ADD.NC r0, r16, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6973 "00000010" // /* MW 5 */ + 6974 "00110000" // /* MW 4 */ + 6975 "10110000" // /* MW 3 */ + 6976 "11101111" // /* MW 2 */ + 6977 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 219 43 +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 + 6978 "01011100" // ST r0, [p7], #4; SUB r16, r16, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6979 "01100011" // /* MW 5 */ + 6980 "01000000" // /* MW 4 */ + 6981 "00111000" // /* MW 3 */ + 6982 "10000010" // /* MW 2 */ + 6983 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 220 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 221 73 +.src_ref 7 "conv2d_dw_bf16_params.h" 224 116 first +.src_ref 7 "conv2d_dw_bf16_params.h" 224 140 first + 6984 "00111010" // ST r31, [p7], #4; MAC r1, r1, r5, r2; ADD.NC r31, r16, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6985 "00001001" // /* MW 9 */ + 6986 "00010000" // /* MW 8 */ + 6987 "11101100" // /* MW 7 */ + 6988 "00110011" // /* MW 6 */ + 6989 "00010001" // /* MW 5 */ + 6990 "00001010" // /* MW 4 */ + 6991 "00110000" // /* MW 3 */ + 6992 "11111110" // /* MW 2 */ + 6993 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 221 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 6994 "01011100" // ST r31, [p7], #4; LSHL r31, r18, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6995 "10111011" // /* MW 5 */ + 6996 "01111110" // /* MW 4 */ + 6997 "00111001" // /* MW 3 */ + 6998 "11111110" // /* MW 2 */ + 6999 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 222 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 first + 7000 "01011100" // ST r22, [p7], #4; LSHL r2, r1, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7001 "10111011" // /* MW 5 */ + 7002 "10001010" // /* MW 4 */ + 7003 "00110000" // /* MW 3 */ + 7004 "11011010" // /* MW 2 */ + 7005 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 224 42 first +.src_ref 7 "conv2d_dw_bf16_params.h" 226 49 + 7006 "01011100" // ST r1, [p7], #4; SUB r1, r24, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7007 "01000011" // /* MW 5 */ + 7008 "00000100" // /* MW 4 */ + 7009 "00111100" // /* MW 3 */ + 7010 "10000110" // /* MW 2 */ + 7011 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 225 43 first +.src_ref 7 "conv2d_dw_bf16_params.h" 230 47 first + 7012 "01011100" // ST r22, [p7], #4; SUB r2, r24, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7013 "11100011" // /* MW 5 */ + 7014 "00001011" // /* MW 4 */ + 7015 "00111100" // /* MW 3 */ + 7016 "11011010" // /* MW 2 */ + 7017 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 226 43 first + 7018 "10011000" // ST r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7019 "00110001" // /* MW 3 */ + 7020 "00011100" // /* MW 2 */ + 7021 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 228 40 first + 7022 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7023 "01010001" // /* MW 3 */ + 7024 "00011110" // /* MW 2 */ + 7025 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 229 41 first + 7026 "10011000" // ST r22, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7027 "11010001" // /* MW 3 */ + 7028 "00011110" // /* MW 2 */ + 7029 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 230 41 first + 7030 "10011000" // ST r2, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7031 "01010001" // /* MW 3 */ + 7032 "00001000" // /* MW 2 */ + 7033 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 54 first + 7034 "10011000" // LDA.u8 r1, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "00101010" // /* MW 3 */ + 7036 "00101000" // /* MW 2 */ + 7037 "00000111" // /* MW 1 */ + 7038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7039 "00000000" // /* MW 1 */ + 7040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7041 "00000000" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ + 7046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7047 "00000000" // /* MW 1 */ + 7048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7049 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 58 + 7050 "10000100" // JZ r1, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 7051 "00000001" // /* MW 5 */ + 7052 "00000000" // /* MW 4 */ + 7053 "11011000" // /* MW 3 */ + 7054 "00001101" // /* MW 2 */ + 7055 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 +.delay_slot + 7056 "00011000" // MOVX r16, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "00001101" // /* MW 3 */ + 7058 "00100000" // /* MW 2 */ + 7059 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 190 20 first +.delay_slot + 7060 "10011000" // LSHL r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001101" // /* MW 3 */ + 7062 "11100001" // /* MW 2 */ + 7063 "00010011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.delay_slot + 7064 "01000100" // MOVXM r31, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7065 "00000000" // /* MW 5 */ + 7066 "10100000" // /* MW 4 */ + 7067 "00001111" // /* MW 3 */ + 7068 "01111111" // /* MW 2 */ + 7069 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 + 7074 "01111110" // NOPA; NOPB; NOPS; MOVX r31, #0; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7075 "01100000" // /* MW 13 */ + 7076 "00101011" // /* MW 12 */ + 7077 "00000000" // /* MW 11 */ + 7078 "10101111" // /* MW 10 */ + 7079 "00110100" // /* MW 9 */ + 7080 "00000000" // /* MW 8 */ + 7081 "00000001" // /* MW 7 */ + 7082 "00111110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 + 7088 "10111010" // MOVA m0, #-197; MOVXM p0, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00010000" // /* MW 8 */ + 7091 "00110001" // /* MW 7 */ + 7092 "11110000" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "10000000" // /* MW 3 */ + 7096 "01100000" // /* MW 2 */ + 7097 "11100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 234 77 first + 7098 "10111010" // LDA.s8 r17, [p0]; AND r29, r29, r17; VINSERT.32 x0, x0, #0, r31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "10111000" // /* MW 9 */ + 7100 "11111000" // /* MW 8 */ + 7101 "00000001" // /* MW 7 */ + 7102 "10100100" // /* MW 6 */ + 7103 "11011000" // /* MW 5 */ + 7104 "00111011" // /* MW 4 */ + 7105 "01010000" // /* MW 3 */ + 7106 "11000100" // /* MW 2 */ + 7107 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 + 7108 "10111010" // LDA r1, [sp, #-8]; MOVX r31, #5; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "01111000" // /* MW 9 */ + 7110 "01001001" // /* MW 8 */ + 7111 "00000000" // /* MW 7 */ + 7112 "10101000" // /* MW 6 */ + 7113 "11110000" // /* MW 5 */ + 7114 "00000001" // /* MW 4 */ + 7115 "00100000" // /* MW 3 */ + 7116 "00000110" // /* MW 2 */ + 7117 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 +.src_ref 7 "conv2d_dw_bf16_params.h" 239 122 first +.src_ref 7 "conv2d_dw_bf16_params.h" 240 + 7118 "10111010" // LDA lr, [sp, #-4]; LSHL r31, r15, r31; MOV m1, #201 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7119 "01011000" // /* MW 9 */ + 7120 "11001001" // /* MW 8 */ + 7121 "10000000" // /* MW 7 */ + 7122 "11101100" // /* MW 6 */ + 7123 "11111111" // /* MW 5 */ + 7124 "00011111" // /* MW 4 */ + 7125 "00100000" // /* MW 3 */ + 7126 "10000111" // /* MW 2 */ + 7127 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 + 7128 "00101100" // LDA p0, [sp, #-16]; MOVX r25, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7129 "00000010" // /* MW 5 */ + 7130 "01100100" // /* MW 4 */ + 7131 "00100000" // /* MW 3 */ + 7132 "00000011" // /* MW 2 */ + 7133 "11111110" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7136 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "11010001" // /* MW 3 */ + 7138 "11110101" // /* MW 2 */ + 7139 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 39 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7140 "00011000" // ST.s16 r1, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00110111" // /* MW 3 */ + 7142 "00101100" // /* MW 2 */ + 7143 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 +.src_ref 7 "conv2d_dw_bf16_params.h" 234 81 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7144 "11100100" // MUL r28, r29, r28; MOV crRnd, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7145 "01000001" // /* MW 5 */ + 7146 "01110001" // /* MW 4 */ + 7147 "11111111" // /* MW 3 */ + 7148 "00111001" // /* MW 2 */ + 7149 "11101111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 74 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7150 "00111010" // VCONV.bf16.fp32 wl0, bmll0; LSHL r17, r15, r26; MOV r15, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7151 "01111001" // /* MW 9 */ + 7152 "01010000" // /* MW 8 */ + 7153 "11101000" // /* MW 7 */ + 7154 "01101101" // /* MW 6 */ + 7155 "00011101" // /* MW 5 */ + 7156 "00011111" // /* MW 4 */ + 7157 "11000000" // /* MW 3 */ + 7158 "00000010" // /* MW 2 */ + 7159 "00001000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 109 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7160 "00100100" // MUL r20, r28, r20; ADD.NC r17, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7161 "11111111" // /* MW 5 */ + 7162 "10110001" // /* MW 4 */ + 7163 "11111000" // /* MW 3 */ + 7164 "00101001" // /* MW 2 */ + 7165 "11100101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 232 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 235 59 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7166 "01100100" // LSHL r29, r20, r19; VEXTRACT.16 r1, x0, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7167 "00000011" // /* MW 5 */ + 7168 "10000010" // /* MW 4 */ + 7169 "10110000" // /* MW 3 */ + 7170 "01100111" // /* MW 2 */ + 7171 "10100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 52 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7172 "10011000" // MUL r28, r30, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7173 "11101111" // /* MW 3 */ + 7174 "10111000" // /* MW 2 */ + 7175 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7176 "10011000" // LSHL r21, r17, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "01011101" // /* MW 3 */ + 7178 "01101011" // /* MW 2 */ + 7179 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 234 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 first + 7180 "01011100" // ST r20, [p7], #4; LSHL r23, r28, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7181 "11111011" // /* MW 5 */ + 7182 "01011110" // /* MW 4 */ + 7183 "00111110" // /* MW 3 */ + 7184 "11010010" // /* MW 2 */ + 7185 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 235 41 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 first + 7186 "01011100" // ST r29, [p7], #4; SUB r26, r31, r21 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7187 "10100011" // /* MW 5 */ + 7188 "11101010" // /* MW 4 */ + 7189 "00111111" // /* MW 3 */ + 7190 "11110110" // /* MW 2 */ + 7191 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 236 40 first +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7192 "01011100" // ST r28, [p7], m0; MAC r21, r21, r31, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7193 "01001100" // /* MW 5 */ + 7194 "11010110" // /* MW 4 */ + 7195 "00111111" // /* MW 3 */ + 7196 "01110010" // /* MW 2 */ + 7197 "11100001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 53 first + 7198 "10011000" // LDA.u8 r20, [p7], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "10001010" // /* MW 3 */ + 7200 "00101010" // /* MW 2 */ + 7201 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7202 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7203 "00000001" // /* MW 5 */ + 7204 "00000000" // /* MW 4 */ + 7205 "00000000" // /* MW 3 */ + 7206 "11111000" // /* MW 2 */ + 7207 "11111111" // /* MW 1 */ + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ + 7210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7211 "00000000" // /* MW 1 */ + 7212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7213 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 139 first + 7214 "10011000" // LSHL r30, r30, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7215 "00111101" // /* MW 3 */ + 7216 "10111101" // /* MW 2 */ + 7217 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 51 + 7218 "10011000" // SUB r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7219 "01010001" // /* MW 3 */ + 7220 "10101011" // /* MW 2 */ + 7221 "00010111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 57 first + 7222 "10011000" // EQ r27, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7223 "01000111" // /* MW 3 */ + 7224 "11110111" // /* MW 2 */ + 7225 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 41 + 7226 "00011000" // SEL.EQZ r19, r24, r23, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7227 "01110010" // /* MW 3 */ + 7228 "00100111" // /* MW 2 */ + 7229 "00010110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 237 39 + 7230 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7231 "01110001" // /* MW 3 */ + 7232 "00011110" // /* MW 2 */ + 7233 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 238 39 first + 7234 "10011000" // ST r16, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7235 "00010001" // /* MW 3 */ + 7236 "00011110" // /* MW 2 */ + 7237 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first + 7238 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7239 "00110001" // /* MW 3 */ + 7240 "00011110" // /* MW 2 */ + 7241 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.src_ref 7 "conv2d_dw_bf16_params.h" 240 first + 7242 "01011100" // ST r22, [p7], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7243 "00000000" // /* MW 5 */ + 7244 "01010000" // /* MW 4 */ + 7245 "00110000" // /* MW 3 */ + 7246 "11011010" // /* MW 2 */ + 7247 "11100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 first +.delay_slot + 7248 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7249 "01010001" // /* MW 3 */ + 7250 "00011110" // /* MW 2 */ + 7251 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7252 "10011000" // ST r26, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7253 "01010001" // /* MW 3 */ + 7254 "00011111" // /* MW 2 */ + 7255 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7256 "10011000" // ST r21, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7257 "10110001" // /* MW 3 */ + 7258 "00011110" // /* MW 2 */ + 7259 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7260 "10011000" // ST r25, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7261 "00110001" // /* MW 3 */ + 7262 "00000111" // /* MW 2 */ + 7263 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16_params.h" 239 49 +.delay_slot + 7264 "00000010" // ST r25, [p7, #4]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7265 "01110000" // /* MW 7 */ + 7266 "01100000" // /* MW 6 */ + 7267 "10110000" // /* MW 5 */ + 7268 "00000011" // /* MW 4 */ + 7269 "00110000" // /* MW 3 */ + 7270 "11100110" // /* MW 2 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 7271 "11100010" // /* MW 1 */ +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_begin0 +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.function conv2d_dw_core _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 158 first +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 183 4 +.function_start + 7280 "10110110" // MOVA m6, #-120; VLDB x6, [p0], #64; MOVXM p4, #508972 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7281 "00010000" // /* MW 11 */ + 7282 "00010110" // /* MW 10 */ + 7283 "00110010" // /* MW 9 */ + 7284 "11110010" // /* MW 8 */ + 7285 "00000001" // /* MW 7 */ + 7286 "00000000" // /* MW 6 */ + 7287 "01101000" // /* MW 5 */ + 7288 "00111011" // /* MW 4 */ + 7289 "10000000" // /* MW 3 */ + 7290 "00011000" // /* MW 2 */ + 7291 "11110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7292 "10110110" // LDA dj2, [p4], #-4; VLDB x1, [p0], #64; MOVX r1, #16; MOV m5, #128 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7293 "01011000" // /* MW 11 */ + 7294 "10000000" // /* MW 10 */ + 7295 "10000000" // /* MW 9 */ + 7296 "00001010" // /* MW 8 */ + 7297 "00010010" // /* MW 7 */ + 7298 "00000000" // /* MW 6 */ + 7299 "11101000" // /* MW 5 */ + 7300 "00111000" // /* MW 4 */ + 7301 "11010000" // /* MW 3 */ + 7302 "10101000" // /* MW 2 */ + 7303 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 202 56 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 229 12 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 +.src_ref 7 "conv2d_dw_bf16.h" 231 12 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 +.src_ref 7 "conv2d_dw_bf16.h" 233 12 +.src_ref 7 "conv2d_dw_bf16.h" 234 12 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 + 7304 "10111010" // LDA dn2, [p4], #12; MOVX r0, #60; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7305 "01011000" // /* MW 9 */ + 7306 "10010000" // /* MW 8 */ + 7307 "00000111" // /* MW 7 */ + 7308 "10001010" // /* MW 6 */ + 7309 "00000111" // /* MW 5 */ + 7310 "00000000" // /* MW 4 */ + 7311 "11010000" // /* MW 3 */ + 7312 "10100100" // /* MW 2 */ + 7313 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 + 7314 "10111010" // LDA dj6, [p4], #-4; MOVX r2, #32; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7315 "01011000" // /* MW 9 */ + 7316 "00000000" // /* MW 8 */ + 7317 "01100000" // /* MW 7 */ + 7318 "00001010" // /* MW 6 */ + 7319 "00100100" // /* MW 5 */ + 7320 "00000000" // /* MW 4 */ + 7321 "11010000" // /* MW 3 */ + 7322 "11101000" // /* MW 2 */ + 7323 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7324 "01110110" // LDA dn6, [p4], #8; MOVS dc7, dc4; MOVXM ls, #7520 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7325 "00010000" // /* MW 11 */ + 7326 "10110000" // /* MW 10 */ + 7327 "01111110" // /* MW 9 */ + 7328 "00000100" // /* MW 8 */ + 7329 "00000000" // /* MW 7 */ + 7330 "00000000" // /* MW 6 */ + 7331 "01001011" // /* MW 5 */ + 7332 "00010000" // /* MW 4 */ + 7333 "11010111" // /* MW 3 */ + 7334 "11100100" // /* MW 2 */ + 7335 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 179 4 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 + 7336 "01110110" // LDA m2, [p4], #8; MOVS dc2, dc4; MOVXM le, #7600 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7337 "00010000" // /* MW 11 */ + 7338 "11011000" // /* MW 10 */ + 7339 "10111110" // /* MW 9 */ + 7340 "00000101" // /* MW 8 */ + 7341 "00000000" // /* MW 7 */ + 7342 "00000000" // /* MW 6 */ + 7343 "01001011" // /* MW 5 */ + 7344 "00010000" // /* MW 4 */ + 7345 "11010010" // /* MW 3 */ + 7346 "10100000" // /* MW 2 */ + 7347 "10000101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7348 "01110110" // LDA dj0, [p4], #-4; MOVS dc6, dc4; MOVXM p5, #508448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7349 "00010000" // /* MW 11 */ + 7350 "00010000" // /* MW 10 */ + 7351 "10110001" // /* MW 9 */ + 7352 "11110010" // /* MW 8 */ + 7353 "00000001" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "01001011" // /* MW 5 */ + 7356 "00010000" // /* MW 4 */ + 7357 "11010110" // /* MW 3 */ + 7358 "10001000" // /* MW 2 */ + 7359 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7360 "01110110" // LDA dn0, [p4], #12; MOVS dc1, dc4; MOVX r3, #48; MOV dc3, dc4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7361 "01111000" // /* MW 11 */ + 7362 "11000000" // /* MW 10 */ + 7363 "11100100" // /* MW 9 */ + 7364 "00001001" // /* MW 8 */ + 7365 "00110110" // /* MW 7 */ + 7366 "00000000" // /* MW 6 */ + 7367 "01001011" // /* MW 5 */ + 7368 "00010000" // /* MW 4 */ + 7369 "11010001" // /* MW 3 */ + 7370 "10000100" // /* MW 2 */ + 7371 "10000111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7372 "10111010" // LDA dj4, [p4], #-4; MOVS dc0, dc4; VSHIFT x4, x6, x1, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7373 "01100010" // /* MW 9 */ + 7374 "01000011" // /* MW 8 */ + 7375 "00011000" // /* MW 7 */ + 7376 "00000001" // /* MW 6 */ + 7377 "01001011" // /* MW 5 */ + 7378 "00010000" // /* MW 4 */ + 7379 "11010000" // /* MW 3 */ + 7380 "11001000" // /* MW 2 */ + 7381 "10011111" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 180 4 first + 7382 "11010100" // LDA dn4, [p4], #8; MOV dc5, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7383 "00000001" // /* MW 5 */ + 7384 "10010011" // /* MW 4 */ + 7385 "11011011" // /* MW 3 */ + 7386 "11000100" // /* MW 2 */ + 7387 "10000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 180 4 + 7388 "10011000" // LDA m0, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7389 "00000110" // /* MW 3 */ + 7390 "00101100" // /* MW 2 */ + 7391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7392 "10011000" // LDA dj1, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7393 "11000110" // /* MW 3 */ + 7394 "11111100" // /* MW 2 */ + 7395 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7396 "00111100" // LDA dn1, [p4], #12; VLDB.3D x2, [p0], d2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7397 "01101000" // /* MW 5 */ + 7398 "10110001" // /* MW 4 */ + 7399 "11010000" // /* MW 3 */ + 7400 "10010100" // /* MW 2 */ + 7401 "10000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 first + 7402 "10011000" // LDA dj5, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7403 "11000110" // /* MW 3 */ + 7404 "11111110" // /* MW 2 */ + 7405 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7406 "10011000" // LDA dn5, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7407 "10100110" // /* MW 3 */ + 7408 "00101110" // /* MW 2 */ + 7409 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 181 4 + 7410 "10011000" // LDA m1, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7411 "10000110" // /* MW 3 */ + 7412 "00101100" // /* MW 2 */ + 7413 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 first + 7414 "10011000" // LDA dj7, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7415 "11000110" // /* MW 3 */ + 7416 "11111111" // /* MW 2 */ + 7417 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7418 "10011000" // LDA dn7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7419 "10100110" // /* MW 3 */ + 7420 "00101111" // /* MW 2 */ + 7421 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 182 4 + 7422 "10011000" // LDA m7, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7423 "10000110" // /* MW 3 */ + 7424 "00101111" // /* MW 2 */ + 7425 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 first + 7426 "10011000" // LDA dj3, [p4], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7427 "11000110" // /* MW 3 */ + 7428 "11111101" // /* MW 2 */ + 7429 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7430 "10011000" // LDA dn3, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7431 "10100110" // /* MW 3 */ + 7432 "00101101" // /* MW 2 */ + 7433 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 183 4 + 7434 "10011000" // LDA m3, [p4], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7435 "10000110" // /* MW 3 */ + 7436 "11001001" // /* MW 2 */ + 7437 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7438 "10011000" // LDA r4, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7439 "10010110" // /* MW 3 */ + 7440 "10101000" // /* MW 2 */ + 7441 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 + 7442 "10011000" // LDA.s16 r7, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7443 "11110010" // /* MW 3 */ + 7444 "10001000" // /* MW 2 */ + 7445 "00000100" // /* MW 1 */ + 7446 "10011000" // LDA m4, [p4], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7447 "00000110" // /* MW 3 */ + 7448 "01001110" // /* MW 2 */ + 7449 "00000100" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7450 "00111100" // LDA r5, [p4, #-28]; VLDB.2D x3, [p1], d7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7451 "11101000" // /* MW 5 */ + 7452 "11100001" // /* MW 4 */ + 7453 "11010011" // /* MW 3 */ + 7454 "10010110" // /* MW 2 */ + 7455 "10010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 940 83 first + 7456 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7457 "00101011" // /* MW 3 */ + 7458 "00000100" // /* MW 2 */ + 7459 "00000010" // /* MW 1 */ + 7460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7461 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 56 first + 7462 "10011000" // LDA.s8 r6, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000010" // /* MW 3 */ + 7464 "00000100" // /* MW 2 */ + 7465 "00000101" // /* MW 1 */ + 7466 "00011000" // ADD r4, r4, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "11111011" // /* MW 3 */ + 7468 "00001001" // /* MW 2 */ + 7469 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 202 4 + 7470 "10111010" // LDA r17, [p4]; MOVXM p4, #7664 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7471 "00010000" // /* MW 9 */ + 7472 "11111000" // /* MW 8 */ + 7473 "00110110" // /* MW 7 */ + 7474 "00000110" // /* MW 6 */ + 7475 "00000000" // /* MW 5 */ + 7476 "00000000" // /* MW 4 */ + 7477 "11010000" // /* MW 3 */ + 7478 "11000110" // /* MW 2 */ + 7479 "10000000" // /* MW 1 */ + 7480 "11111000" // VBCST.16 x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7481 "01110010" // /* MW 3 */ + 7482 "00011101" // /* MW 2 */ + 7483 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first + 7484 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7485 "00000011" // /* MW 3 */ + 7486 "00011100" // /* MW 2 */ + 7487 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first + 7488 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "11111111" // /* MW 3 */ + 7490 "01110010" // /* MW 2 */ + 7491 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7492 "01100110" // NOPA; NOPB; VMOV cml3, cml0; VMAC.f dm4, dm3, x6, x10, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7493 "01000001" // /* MW 11 */ + 7494 "01101101" // /* MW 10 */ + 7495 "00000100" // /* MW 9 */ + 7496 "11100010" // /* MW 8 */ + 7497 "10001010" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00100011" // /* MW 5 */ + 7500 "00000000" // /* MW 4 */ + 7501 "11110000" // /* MW 3 */ + 7502 "00101100" // /* MW 2 */ + 7503 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7504 "00001011" // NOPA; NOPB; NOPS; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7505 "00011010" // /* MW 15 */ + 7506 "00001000" // /* MW 14 */ + 7507 "10101000" // /* MW 13 */ + 7508 "00000011" // /* MW 12 */ + 7509 "00001110" // /* MW 11 */ + 7510 "00000010" // /* MW 10 */ + 7511 "11010100" // /* MW 9 */ + 7512 "00001101" // /* MW 8 */ + 7513 "01011011" // /* MW 7 */ + 7514 "00000001" // /* MW 6 */ + 7515 "00100000" // /* MW 5 */ + 7516 "00000000" // /* MW 4 */ + 7517 "11110000" // /* MW 3 */ + 7518 "00101100" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_240 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 7520 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7521 "01101110" // /* MW 9 */ + 7522 "10000011" // /* MW 8 */ + 7523 "10000100" // /* MW 7 */ + 7524 "00000010" // /* MW 6 */ + 7525 "11110100" // /* MW 5 */ + 7526 "11110000" // /* MW 4 */ + 7527 "01110001" // /* MW 3 */ + 7528 "10110011" // /* MW 2 */ + 7529 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7530 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7531 "00000001" // /* MW 9 */ + 7532 "10001001" // /* MW 8 */ + 7533 "00000010" // /* MW 7 */ + 7534 "01000110" // /* MW 6 */ + 7535 "00001011" // /* MW 5 */ + 7536 "10011100" // /* MW 4 */ + 7537 "11101010" // /* MW 3 */ + 7538 "00111000" // /* MW 2 */ + 7539 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7540 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7541 "00000001" // /* MW 9 */ + 7542 "00110101" // /* MW 8 */ + 7543 "00000001" // /* MW 7 */ + 7544 "11000110" // /* MW 6 */ + 7545 "10001010" // /* MW 5 */ + 7546 "00110000" // /* MW 4 */ + 7547 "01101010" // /* MW 3 */ + 7548 "10110001" // /* MW 2 */ + 7549 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7550 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00001010" // /* MW 3 */ + 7552 "10001001" // /* MW 2 */ + 7553 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7554 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7555 "10100001" // /* MW 7 */ + 7556 "01001000" // /* MW 6 */ + 7557 "00000100" // /* MW 5 */ + 7558 "11000110" // /* MW 4 */ + 7559 "10001110" // /* MW 3 */ + 7560 "10110000" // /* MW 2 */ + 7561 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7563 "10100001" // /* MW 7 */ + 7564 "00110110" // /* MW 6 */ + 7565 "00000010" // /* MW 5 */ + 7566 "01000110" // /* MW 4 */ + 7567 "00001111" // /* MW 3 */ + 7568 "10011100" // /* MW 2 */ + 7569 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7570 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7571 "00001110" // /* MW 3 */ + 7572 "10001001" // /* MW 2 */ + 7573 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7574 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7575 "11100001" // /* MW 7 */ + 7576 "10010010" // /* MW 6 */ + 7577 "00000011" // /* MW 5 */ + 7578 "01000110" // /* MW 4 */ + 7579 "00000011" // /* MW 3 */ + 7580 "00011100" // /* MW 2 */ + 7581 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7582 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7583 "11100001" // /* MW 7 */ + 7584 "01010110" // /* MW 6 */ + 7585 "00000000" // /* MW 5 */ + 7586 "01000110" // /* MW 4 */ + 7587 "00000111" // /* MW 3 */ + 7588 "00011100" // /* MW 2 */ + 7589 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7590 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7591 "00001101" // /* MW 5 */ + 7592 "01100001" // /* MW 4 */ + 7593 "11110100" // /* MW 3 */ + 7594 "00101100" // /* MW 2 */ + 7595 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7596 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7597 "01000001" // /* MW 3 */ + 7598 "01101101" // /* MW 2 */ + 7599 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7600 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7601 "00011010" // /* MW 15 */ + 7602 "00001000" // /* MW 14 */ + 7603 "01111000" // /* MW 13 */ + 7604 "10100101" // /* MW 12 */ + 7605 "00000001" // /* MW 11 */ + 7606 "00000000" // /* MW 10 */ + 7607 "00000000" // /* MW 9 */ + 7608 "00000000" // /* MW 8 */ + 7609 "01011011" // /* MW 7 */ + 7610 "00000001" // /* MW 6 */ + 7611 "00100000" // /* MW 5 */ + 7612 "00000000" // /* MW 4 */ + 7613 "11110000" // /* MW 3 */ + 7614 "00101100" // /* MW 2 */ + 7615 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7616 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7617 "01101110" // /* MW 9 */ + 7618 "10000011" // /* MW 8 */ + 7619 "10000100" // /* MW 7 */ + 7620 "00000010" // /* MW 6 */ + 7621 "10010000" // /* MW 5 */ + 7622 "01110011" // /* MW 4 */ + 7623 "11110010" // /* MW 3 */ + 7624 "00001100" // /* MW 2 */ + 7625 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7626 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7627 "00000001" // /* MW 7 */ + 7628 "10001001" // /* MW 6 */ + 7629 "00000010" // /* MW 5 */ + 7630 "01000110" // /* MW 4 */ + 7631 "00001011" // /* MW 3 */ + 7632 "10011100" // /* MW 2 */ + 7633 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7634 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7635 "00000001" // /* MW 7 */ + 7636 "00110101" // /* MW 6 */ + 7637 "00000001" // /* MW 5 */ + 7638 "11000110" // /* MW 4 */ + 7639 "10001010" // /* MW 3 */ + 7640 "00110000" // /* MW 2 */ + 7641 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7642 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7643 "00001010" // /* MW 3 */ + 7644 "10001001" // /* MW 2 */ + 7645 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7646 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7647 "10100001" // /* MW 7 */ + 7648 "01001000" // /* MW 6 */ + 7649 "00000100" // /* MW 5 */ + 7650 "01000110" // /* MW 4 */ + 7651 "00001111" // /* MW 3 */ + 7652 "10011100" // /* MW 2 */ + 7653 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7654 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7655 "10100001" // /* MW 9 */ + 7656 "00110110" // /* MW 8 */ + 7657 "00000010" // /* MW 7 */ + 7658 "11000010" // /* MW 6 */ + 7659 "10001110" // /* MW 5 */ + 7660 "10110000" // /* MW 4 */ + 7661 "11110100" // /* MW 3 */ + 7662 "00101100" // /* MW 2 */ + 7663 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_384 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7664 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7665 "00011101" // /* MW 5 */ + 7666 "00010010" // /* MW 4 */ + 7667 "10001011" // /* MW 3 */ + 7668 "00011110" // /* MW 2 */ + 7669 "00111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7670 "01011010" // MOVXM le, #7840; VMAC.f dm3, dm4, x9, x7, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7671 "11100001" // /* MW 9 */ + 7672 "10010010" // /* MW 8 */ + 7673 "00000011" // /* MW 7 */ + 7674 "00000010" // /* MW 6 */ + 7675 "11101010" // /* MW 5 */ + 7676 "10110111" // /* MW 4 */ + 7677 "00000000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 940 83 first +.src_ref 7 "conv2d_dw_bf16.h" 208 8 +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7680 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p2]; MOVXM ls, #7760; VMAC.f dm0, dm2, x11, x7, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7681 "11100001" // /* MW 11 */ + 7682 "01010110" // /* MW 10 */ + 7683 "00000000" // /* MW 9 */ + 7684 "00000010" // /* MW 8 */ + 7685 "11100101" // /* MW 7 */ + 7686 "10001111" // /* MW 6 */ + 7687 "00000000" // /* MW 5 */ + 7688 "00000000" // /* MW 4 */ + 7689 "01110000" // /* MW 3 */ + 7690 "10000101" // /* MW 2 */ + 7691 "01000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 208 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7692 "10011000" // ADD.NC lc, r5, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7693 "11111111" // /* MW 3 */ + 7694 "01110010" // /* MW 2 */ + 7695 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7696 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7697 "10011011" // /* MW 3 */ + 7698 "00011101" // /* MW 2 */ + 7699 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7700 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7701 "01110100" // /* MW 3 */ + 7702 "00011100" // /* MW 2 */ + 7703 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7704 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7705 "10110100" // /* MW 3 */ + 7706 "01011000" // /* MW 2 */ + 7707 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7708 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7709 "10010110" // /* MW 3 */ + 7710 "00010001" // /* MW 2 */ + 7711 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7712 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00010110" // /* MW 3 */ + 7714 "00010000" // /* MW 2 */ + 7715 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7716 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01101100" // /* MW 3 */ + 7718 "01010000" // /* MW 2 */ + 7719 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7720 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7721 "01000100" // /* MW 3 */ + 7722 "01010011" // /* MW 2 */ + 7723 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7724 "00000010" // VST x8, [p3], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7725 "01110000" // /* MW 7 */ + 7726 "00110110" // /* MW 6 */ + 7727 "10101000" // /* MW 5 */ + 7728 "00000010" // /* MW 4 */ + 7729 "01100000" // /* MW 3 */ + 7730 "01000010" // /* MW 2 */ + 7731 "01110001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7732 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7733 "00000011" // /* MW 3 */ + 7734 "00011100" // /* MW 2 */ + 7735 "00011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7736 "00000010" // VST.3D x10, [p3], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7737 "01110000" // /* MW 7 */ + 7738 "01000101" // /* MW 6 */ + 7739 "10000000" // /* MW 5 */ + 7740 "00000001" // /* MW 4 */ + 7741 "01100000" // /* MW 3 */ + 7742 "01010010" // /* MW 2 */ + 7743 "01100111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7744 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7745 "01000001" // /* MW 7 */ + 7746 "01101101" // /* MW 6 */ + 7747 "00000100" // /* MW 5 */ + 7748 "01000110" // /* MW 4 */ + 7749 "00000111" // /* MW 3 */ + 7750 "00011100" // /* MW 2 */ + 7751 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7752 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm0, x1, x10, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7753 "01000001" // /* MW 7 */ + 7754 "00000011" // /* MW 6 */ + 7755 "00000001" // /* MW 5 */ + 7756 "11000110" // /* MW 4 */ + 7757 "10000110" // /* MW 3 */ + 7758 "00110000" // /* MW 2 */ + 7759 "00000010" // /* MW 1 */ +.label ZLS_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_480 +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 7760 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7761 "01101110" // /* MW 9 */ + 7762 "10000011" // /* MW 8 */ + 7763 "10000100" // /* MW 7 */ + 7764 "00000010" // /* MW 6 */ + 7765 "11110100" // /* MW 5 */ + 7766 "11110000" // /* MW 4 */ + 7767 "01110001" // /* MW 3 */ + 7768 "10110011" // /* MW 2 */ + 7769 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7770 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7771 "00000001" // /* MW 9 */ + 7772 "10001001" // /* MW 8 */ + 7773 "00000010" // /* MW 7 */ + 7774 "01000110" // /* MW 6 */ + 7775 "00001011" // /* MW 5 */ + 7776 "10011100" // /* MW 4 */ + 7777 "11101010" // /* MW 3 */ + 7778 "00111000" // /* MW 2 */ + 7779 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7780 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7781 "00000001" // /* MW 9 */ + 7782 "00110101" // /* MW 8 */ + 7783 "00000001" // /* MW 7 */ + 7784 "11000110" // /* MW 6 */ + 7785 "10001010" // /* MW 5 */ + 7786 "00110000" // /* MW 4 */ + 7787 "01101010" // /* MW 3 */ + 7788 "10110001" // /* MW 2 */ + 7789 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7790 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "00001010" // /* MW 3 */ + 7792 "10001001" // /* MW 2 */ + 7793 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7794 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7795 "10100001" // /* MW 7 */ + 7796 "01001000" // /* MW 6 */ + 7797 "00000100" // /* MW 5 */ + 7798 "11000110" // /* MW 4 */ + 7799 "10001110" // /* MW 3 */ + 7800 "10110000" // /* MW 2 */ + 7801 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7802 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7803 "10100001" // /* MW 7 */ + 7804 "00110110" // /* MW 6 */ + 7805 "00000010" // /* MW 5 */ + 7806 "01000110" // /* MW 4 */ + 7807 "00001111" // /* MW 3 */ + 7808 "10011100" // /* MW 2 */ + 7809 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7810 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7811 "00001110" // /* MW 3 */ + 7812 "10001001" // /* MW 2 */ + 7813 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 226 12 first +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7814 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7815 "11100001" // /* MW 7 */ + 7816 "10010010" // /* MW 6 */ + 7817 "00000011" // /* MW 5 */ + 7818 "01000110" // /* MW 4 */ + 7819 "00000011" // /* MW 3 */ + 7820 "00011100" // /* MW 2 */ + 7821 "00000101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 223 12 first +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7822 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7823 "11100001" // /* MW 7 */ + 7824 "01010110" // /* MW 6 */ + 7825 "00000000" // /* MW 5 */ + 7826 "01000110" // /* MW 4 */ + 7827 "00000111" // /* MW 3 */ + 7828 "00011100" // /* MW 2 */ + 7829 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first + 7830 "10010100" // NOPA; VSHIFT x4, x6, x1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7831 "00001101" // /* MW 5 */ + 7832 "01100001" // /* MW 4 */ + 7833 "11110100" // /* MW 3 */ + 7834 "00101100" // /* MW 2 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 228 12 first + 7836 "01001000" // VMAC.f dm4, dm3, x6, x10, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "01000001" // /* MW 3 */ + 7838 "01101101" // /* MW 2 */ + 7839 "00000100" // /* MW 1 */ +.label ZLE_F_Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params_560 +.src_ref 7 "conv2d_dw_bf16.h" 232 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 7840 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7841 "00011010" // /* MW 15 */ + 7842 "00001000" // /* MW 14 */ + 7843 "01111000" // /* MW 13 */ + 7844 "10100101" // /* MW 12 */ + 7845 "00000001" // /* MW 11 */ + 7846 "00000000" // /* MW 10 */ + 7847 "00000000" // /* MW 9 */ + 7848 "00000000" // /* MW 8 */ + 7849 "01011011" // /* MW 7 */ + 7850 "00000001" // /* MW 6 */ + 7851 "00100000" // /* MW 5 */ + 7852 "00000000" // /* MW 4 */ + 7853 "11110000" // /* MW 3 */ + 7854 "00101100" // /* MW 2 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 81 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 202 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 7856 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p2], d3; JNZD r4, r4, p4; VSHIFT x10, x1, x2, r1 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 7857 "01101000" // /* MW 11 */ + 7858 "10000011" // /* MW 10 */ + 7859 "10000100" // /* MW 9 */ + 7860 "00000010" // /* MW 8 */ + 7861 "01001001" // /* MW 7 */ + 7862 "00001000" // /* MW 6 */ + 7863 "00100000" // /* MW 5 */ + 7864 "11100111" // /* MW 4 */ + 7865 "11110100" // /* MW 3 */ + 7866 "00001100" // /* MW 2 */ + 7867 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 224 12 first +.src_ref 7 "conv2d_dw_bf16.h" 229 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7868 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7869 "00000001" // /* MW 7 */ + 7870 "10001001" // /* MW 6 */ + 7871 "00000010" // /* MW 5 */ + 7872 "01000110" // /* MW 4 */ + 7873 "00001011" // /* MW 3 */ + 7874 "10011100" // /* MW 2 */ + 7875 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 233 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7876 "01100010" // VSHIFT x4, x6, x1, r2; VMAC.f dm1, dm1, x10, x8, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7877 "00000001" // /* MW 7 */ + 7878 "00110101" // /* MW 6 */ + 7879 "00000001" // /* MW 5 */ + 7880 "11000110" // /* MW 4 */ + 7881 "10001010" // /* MW 3 */ + 7882 "00110000" // /* MW 2 */ + 7883 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7884 "11011000" // VSHIFT x11, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7885 "00001010" // /* MW 3 */ + 7886 "10001001" // /* MW 2 */ + 7887 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 225 12 first +.src_ref 7 "conv2d_dw_bf16.h" 230 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7888 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7889 "10100001" // /* MW 7 */ + 7890 "01001000" // /* MW 6 */ + 7891 "00000100" // /* MW 5 */ + 7892 "01000110" // /* MW 4 */ + 7893 "00001111" // /* MW 3 */ + 7894 "10011100" // /* MW 2 */ + 7895 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 first +.src_ref 7 "conv2d_dw_bf16.h" 234 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 7896 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7897 "10100001" // /* MW 7 */ + 7898 "00110110" // /* MW 6 */ + 7899 "00000010" // /* MW 5 */ + 7900 "11000110" // /* MW 4 */ + 7901 "10001110" // /* MW 3 */ + 7902 "10110000" // /* MW 2 */ + 7903 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 153 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 7904 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "00001110" // /* MW 3 */ + 7906 "10001001" // /* MW 2 */ + 7907 "00011101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 231 12 first + 7908 "01001000" // VMAC.f dm3, dm4, x9, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7909 "11100001" // /* MW 3 */ + 7910 "10010010" // /* MW 2 */ + 7911 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 235 12 first + 7912 "01001000" // VMAC.f dm0, dm2, x11, x7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7913 "11100001" // /* MW 3 */ + 7914 "01010110" // /* MW 2 */ + 7915 "00000000" // /* MW 1 */ + 7916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7917 "00000000" // /* MW 1 */ + 7918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7919 "00000000" // /* MW 1 */ + 7920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7921 "00000000" // /* MW 1 */ + 7922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7923 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 first + 7924 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "10010110" // /* MW 3 */ + 7926 "00010001" // /* MW 2 */ + 7927 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1119 102 +.src_ref 7 "conv2d_dw_bf16.h" 248 first + 7928 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7929 "00000000" // /* MW 5 */ + 7930 "01010000" // /* MW 4 */ + 7931 "11000000" // /* MW 3 */ + 7932 "00000010" // /* MW 2 */ + 7933 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7934 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7935 "01101100" // /* MW 3 */ + 7936 "01010000" // /* MW 2 */ + 7937 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 244 17 first +.delay_slot + 7938 "01111000" // VSHUFFLE x10, x10, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7939 "01000100" // /* MW 3 */ + 7940 "01010011" // /* MW 2 */ + 7941 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 7942 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7943 "01101100" // /* MW 3 */ + 7944 "01010000" // /* MW 2 */ + 7945 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 7 "conv2d_dw_bf16.h" 243 16 first +.delay_slot + 7946 "00011000" // VST x8, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7947 "00010011" // /* MW 3 */ + 7948 "10001010" // /* MW 2 */ + 7949 "00001011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 7950 "00011000" // VST.3D x10, [p3], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7951 "10010011" // /* MW 3 */ + 7952 "00111010" // /* MW 2 */ +.label _Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params__end +.label __Z14conv2d_dw_coreI8bfloat16EvPT_S2_S2_S2_R21conv2d_dw_bf16_params___func_end0 + 7953 "00001011" // /* MW 1 */ +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_begin0 +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.function conv2d_dw_shuffle _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 7 "conv2d_dw_bf16.h" 254 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 +.function_start + 7968 "10110110" // MOVA m4, #-36; VLDB x2, [p1], #64; MOVXM p3, #509080 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7969 "00010000" // /* MW 11 */ + 7970 "01001100" // /* MW 10 */ + 7971 "10110010" // /* MW 9 */ + 7972 "11110001" // /* MW 8 */ + 7973 "00000001" // /* MW 7 */ + 7974 "00000000" // /* MW 6 */ + 7975 "01101000" // /* MW 5 */ + 7976 "00111001" // /* MW 4 */ + 7977 "10000010" // /* MW 3 */ + 7978 "10010000" // /* MW 2 */ + 7979 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 + 7980 "10110110" // LDA dn0, [p3], #4; VLDB x0, [p2], #64; MOVX r4, #-5; MOV r3, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7981 "01011000" // /* MW 11 */ + 7982 "00000001" // /* MW 10 */ + 7983 "01101000" // /* MW 9 */ + 7984 "01101000" // /* MW 8 */ + 7985 "01000111" // /* MW 7 */ + 7986 "00111110" // /* MW 6 */ + 7987 "01101000" // /* MW 5 */ + 7988 "00111000" // /* MW 4 */ + 7989 "11010100" // /* MW 3 */ + 7990 "10000100" // /* MW 2 */ + 7991 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first + 7992 "10111010" // LDA dj0, [p3], #4; MOVXM ls, #8080 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7993 "00010000" // /* MW 9 */ + 7994 "11001000" // /* MW 8 */ + 7995 "01111111" // /* MW 7 */ + 7996 "00000100" // /* MW 6 */ + 7997 "00000000" // /* MW 5 */ + 7998 "00000000" // /* MW 4 */ + 7999 "11010000" // /* MW 3 */ + 8000 "10001000" // /* MW 2 */ + 8001 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 first +.src_ref 7 "conv2d_dw_bf16.h" 258 4 + 8002 "10111010" // LDA dn4, [p3], #4; MOVXM le, #8176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8003 "00010000" // /* MW 9 */ + 8004 "11111000" // /* MW 8 */ + 8005 "10111111" // /* MW 7 */ + 8006 "00000101" // /* MW 6 */ + 8007 "00000000" // /* MW 5 */ + 8008 "00000000" // /* MW 4 */ + 8009 "11010000" // /* MW 3 */ + 8010 "11000100" // /* MW 2 */ + 8011 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 +.src_ref 7 "conv2d_dw_bf16.h" 264 16 +.src_ref 7 "conv2d_dw_bf16.h" 266 47 + 8012 "10111010" // LDA dj4, [p3], #4; MOVX r1, #50; MOV r0, #18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8013 "01011000" // /* MW 9 */ + 8014 "00010010" // /* MW 8 */ + 8015 "00001000" // /* MW 7 */ + 8016 "01001000" // /* MW 6 */ + 8017 "00010110" // /* MW 5 */ + 8018 "00000000" // /* MW 4 */ + 8019 "11010000" // /* MW 3 */ + 8020 "11001000" // /* MW 2 */ + 8021 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 +.src_ref 7 "conv2d_dw_bf16.h" 267 47 + 8022 "00101100" // LDA m0, [p3], #4; MOVX r2, #19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8023 "10011010" // /* MW 5 */ + 8024 "00001000" // /* MW 4 */ + 8025 "11010000" // /* MW 3 */ + 8026 "10000000" // /* MW 2 */ + 8027 "01100011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8028 "10011000" // LDA dc0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "01100110" // /* MW 3 */ + 8030 "00011100" // /* MW 2 */ + 8031 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 255 22 + 8032 "10011000" // LDA dc4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "01100110" // /* MW 3 */ + 8034 "10001010" // /* MW 2 */ + 8035 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 51 first + 8036 "10011000" // LDA r5, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "10110110" // /* MW 3 */ + 8038 "00000100" // /* MW 2 */ + 8039 "00000011" // /* MW 1 */ + 8040 "10011000" // LDA r6, [p3, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8041 "11010110" // /* MW 3 */ + 8042 "00100100" // /* MW 2 */ + 8043 "00000011" // /* MW 1 */ + 8044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8045 "00000000" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ + 8050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8051 "00000000" // /* MW 1 */ + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ + 8054 "10011000" // LSHL r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8055 "01001101" // /* MW 3 */ + 8056 "01001000" // /* MW 2 */ + 8057 "00010001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 258 4 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8058 "00100100" // LSHL r3, r6, r3; ADD.NC lc, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8059 "11111111" // /* MW 5 */ + 8060 "11100100" // /* MW 4 */ + 8061 "10111010" // /* MW 3 */ + 8062 "11000111" // /* MW 2 */ + 8063 "00110000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8064 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV dj1, r3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8065 "00000000" // /* MW 15 */ + 8066 "00000000" // /* MW 14 */ + 8067 "01111000" // /* MW 13 */ + 8068 "11010000" // /* MW 12 */ + 8069 "11000000" // /* MW 11 */ + 8070 "00000000" // /* MW 10 */ + 8071 "00000000" // /* MW 9 */ + 8072 "00000000" // /* MW 8 */ + 8073 "01011011" // /* MW 7 */ + 8074 "00000001" // /* MW 6 */ + 8075 "00100000" // /* MW 5 */ + 8076 "00000000" // /* MW 4 */ + 8077 "11110000" // /* MW 3 */ + 8078 "00101100" // /* MW 2 */ + 8079 "00000000" // /* MW 1 */ +.label ZLS_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_112 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 261 15 first +.src_ref 7 "conv2d_dw_bf16.h" 262 15 first +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 8080 "11100001" // VLDA x0, [p2], #64; VLDB x2, [p1], #64; MOVS p4, p0; NOPX; VSHUFFLE x1, x2, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8081 "00000000" // /* MW 15 */ + 8082 "00000000" // /* MW 14 */ + 8083 "00111000" // /* MW 13 */ + 8084 "00000010" // /* MW 12 */ + 8085 "01001000" // /* MW 11 */ + 8086 "00000000" // /* MW 10 */ + 8087 "00000000" // /* MW 9 */ + 8088 "00000000" // /* MW 8 */ + 8089 "10001011" // /* MW 7 */ + 8090 "10000000" // /* MW 6 */ + 8091 "01101100" // /* MW 5 */ + 8092 "00111001" // /* MW 4 */ + 8093 "01110010" // /* MW 3 */ + 8094 "10000011" // /* MW 2 */ + 8095 "01000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8096 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE x3, x0, x0, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8097 "00000000" // /* MW 15 */ + 8098 "00000000" // /* MW 14 */ + 8099 "00111000" // /* MW 13 */ + 8100 "00000010" // /* MW 12 */ + 8101 "11000000" // /* MW 11 */ + 8102 "00000000" // /* MW 10 */ + 8103 "00000000" // /* MW 9 */ + 8104 "00000000" // /* MW 8 */ + 8105 "01011011" // /* MW 7 */ + 8106 "00000001" // /* MW 6 */ + 8107 "00100000" // /* MW 5 */ + 8108 "00000000" // /* MW 4 */ + 8109 "11110000" // /* MW 3 */ + 8110 "00101100" // /* MW 2 */ + 8111 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first + 8112 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x3, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8113 "00000000" // /* MW 15 */ + 8114 "00000000" // /* MW 14 */ + 8115 "11101000" // /* MW 13 */ + 8116 "11000000" // /* MW 12 */ + 8117 "01000100" // /* MW 11 */ + 8118 "00000000" // /* MW 10 */ + 8119 "00000000" // /* MW 9 */ + 8120 "00000000" // /* MW 8 */ + 8121 "01011011" // /* MW 7 */ + 8122 "00000001" // /* MW 6 */ + 8123 "00100000" // /* MW 5 */ + 8124 "00000000" // /* MW 4 */ + 8125 "11110000" // /* MW 3 */ + 8126 "00101100" // /* MW 2 */ + 8127 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first + 8128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x3, r2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8129 "00000000" // /* MW 15 */ + 8130 "00000000" // /* MW 14 */ + 8131 "11101000" // /* MW 13 */ + 8132 "11000100" // /* MW 12 */ + 8133 "00000100" // /* MW 11 */ + 8134 "00000000" // /* MW 10 */ + 8135 "00000000" // /* MW 9 */ + 8136 "00000000" // /* MW 8 */ + 8137 "01011011" // /* MW 7 */ + 8138 "00000001" // /* MW 6 */ + 8139 "00100000" // /* MW 5 */ + 8140 "00000000" // /* MW 4 */ + 8141 "11110000" // /* MW 3 */ + 8142 "00101100" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first + 8144 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p0], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00100110" // /* MW 7 */ + 8154 "00011000" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first + 8160 "11100001" // NOPA; NOPB; VST bmll0, [p4, dj1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8161 "00000000" // /* MW 15 */ + 8162 "00000000" // /* MW 14 */ + 8163 "01111000" // /* MW 13 */ + 8164 "10100101" // /* MW 12 */ + 8165 "00000001" // /* MW 11 */ + 8166 "00000000" // /* MW 10 */ + 8167 "00000000" // /* MW 9 */ + 8168 "10000000" // /* MW 8 */ + 8169 "00000110" // /* MW 7 */ + 8170 "00100000" // /* MW 6 */ + 8171 "00100100" // /* MW 5 */ + 8172 "00000000" // /* MW 4 */ + 8173 "11110000" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label ZLE_F_Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params_208 +.end_of_loop + 8176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8177 "00000000" // /* MW 15 */ + 8178 "00000000" // /* MW 14 */ + 8179 "01111000" // /* MW 13 */ + 8180 "10100101" // /* MW 12 */ + 8181 "00000001" // /* MW 11 */ + 8182 "00000000" // /* MW 10 */ + 8183 "00000000" // /* MW 9 */ + 8184 "00000000" // /* MW 8 */ + 8185 "01011011" // /* MW 7 */ + 8186 "00000001" // /* MW 6 */ + 8187 "00100000" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "11110000" // /* MW 3 */ + 8190 "00101100" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 263 16 first +.src_ref 7 "conv2d_dw_bf16.h" 270 25 +.src_ref 7 "conv2d_dw_bf16.h" 274 first +.loop_nesting 0 + 8192 "00111010" // MOVS p4, p0; RET lr; VSHUFFLE x1, x2, x0, r1 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 8193 "00111001" // /* MW 9 */ + 8194 "00000010" // /* MW 8 */ + 8195 "01001000" // /* MW 7 */ + 8196 "00000000" // /* MW 6 */ + 8197 "01000000" // /* MW 5 */ + 8198 "00000001" // /* MW 4 */ + 8199 "01100000" // /* MW 3 */ + 8200 "00010001" // /* MW 2 */ + 8201 "10010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 264 16 first +.delay_slot + 8202 "01111000" // VSHUFFLE x3, x0, x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8203 "00000100" // /* MW 3 */ + 8204 "10000000" // /* MW 2 */ + 8205 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 266 47 first +.delay_slot + 8206 "11011000" // VSHUFFLE bmlh0, x1, x3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8207 "10000001" // /* MW 3 */ + 8208 "10001001" // /* MW 2 */ + 8209 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 267 47 first +.delay_slot + 8210 "11011000" // VSHUFFLE bmll0, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8211 "10001001" // /* MW 3 */ + 8212 "00001001" // /* MW 2 */ + 8213 "00011000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 8214 "10011000" // VST.3D bmlh0, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8215 "00100110" // /* MW 3 */ + 8216 "00011000" // /* MW 2 */ + 8217 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 7 "conv2d_dw_bf16.h" 270 25 first +.delay_slot + 8218 "10011000" // VST bmll0, [p4, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8219 "00000110" // /* MW 3 */ + 8220 "00100000" // /* MW 2 */ +.label _Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params__end +.label __Z17conv2d_dw_shuffleI8bfloat16EvPT_S2_S2_RK21conv2d_dw_bf16_params___func_end0 + 8221 "00001100" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 282 first +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.function_start + 8224 "10111010" // LDA p1, [p1]; MOVX r17, #1; MOV m0, #-193 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8225 "01011000" // /* MW 9 */ + 8226 "00111111" // /* MW 8 */ + 8227 "00000111" // /* MW 7 */ + 8228 "00101000" // /* MW 6 */ + 8229 "00010000" // /* MW 5 */ + 8230 "00000001" // /* MW 4 */ + 8231 "11010000" // /* MW 3 */ + 8232 "10010011" // /* MW 2 */ + 8233 "00100000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 +.src_ref 7 "conv2d_dw_bf16.h" 287 77 + 8234 "10111010" // MOVA m1, #-208; MOVXM p4, #509064 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8235 "00010000" // /* MW 9 */ + 8236 "01000100" // /* MW 8 */ + 8237 "00110010" // /* MW 7 */ + 8238 "11110010" // /* MW 6 */ + 8239 "00000001" // /* MW 5 */ + 8240 "00000000" // /* MW 4 */ + 8241 "10000000" // /* MW 3 */ + 8242 "00000100" // /* MW 2 */ + 8243 "11100110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 79 first +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8244 "01010100" // LDA r16, [p4], m0; MOV m0, #201 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8245 "00100101" // /* MW 5 */ + 8246 "00000011" // /* MW 4 */ + 8247 "11010000" // /* MW 3 */ + 8248 "01000010" // /* MW 2 */ + 8249 "10000001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 103 + 8250 "10011000" // LDA.u8 r19, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8251 "01101010" // /* MW 3 */ + 8252 "00001010" // /* MW 2 */ + 8253 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 287 77 first + 8254 "10011000" // LDA r18, [p4], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8255 "01010110" // /* MW 3 */ + 8256 "00101010" // /* MW 2 */ + 8257 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 282 + 8258 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8259 "00000001" // /* MW 5 */ + 8260 "00000000" // /* MW 4 */ + 8261 "00000000" // /* MW 3 */ + 8262 "00001000" // /* MW 2 */ + 8263 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8264 "00001100" // LDA p0, [p0]; ST lr, [sp, #-8] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8265 "01111011" // /* MW 5 */ + 8266 "11110000" // /* MW 4 */ + 8267 "11011111" // /* MW 3 */ + 8268 "10000011" // /* MW 2 */ + 8269 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8270 "00001100" // LDA r15, [p2]; ST p2, [sp, #-16] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8271 "00111011" // /* MW 5 */ + 8272 "11100010" // /* MW 4 */ + 8273 "11011111" // /* MW 3 */ + 8274 "10111110" // /* MW 2 */ + 8275 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8276 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "10011101" // /* MW 3 */ + 8278 "11111111" // /* MW 2 */ + 8279 "00001111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8280 "00111010" // ST p6, [sp, #-20]; JL #7280 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8281 "01000001" // /* MW 9 */ + 8282 "00000000" // /* MW 8 */ + 8283 "00000000" // /* MW 7 */ + 8284 "10001110" // /* MW 6 */ + 8285 "00000011" // /* MW 5 */ + 8286 "00000000" // /* MW 4 */ + 8287 "10110000" // /* MW 3 */ + 8288 "11100011" // /* MW 2 */ + 8289 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 284 4 +.src_ref 7 "conv2d_dw_bf16.h" 285 89 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8290 "00111010" // ST r15, [sp, #-12]; MUL r16, r19, r16; MOV p2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8291 "01111001" // /* MW 9 */ + 8292 "01100000" // /* MW 8 */ + 8293 "00110001" // /* MW 7 */ + 8294 "01111101" // /* MW 6 */ + 8295 "00001000" // /* MW 5 */ + 8296 "00100111" // /* MW 4 */ + 8297 "10110000" // /* MW 3 */ + 8298 "10111110" // /* MW 2 */ + 8299 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 +.delay_slot + 8300 "11111000" // MOV p6, p4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8301 "11000000" // /* MW 3 */ + 8302 "01101000" // /* MW 2 */ + 8303 "00011110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.delay_slot + 8304 "01011100" // ST p1, [sp, #-24]; LSHL r16, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8305 "00111011" // /* MW 5 */ + 8306 "01000010" // /* MW 4 */ + 8307 "10111000" // /* MW 3 */ + 8308 "00010011" // /* MW 2 */ + 8309 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 +.src_ref 7 "conv2d_dw_bf16.h" 287 48 first +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.delay_slot + 8310 "00111010" // MOVS p7, p0; LSHL r16, r18, r17; MOV m0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8311 "01111001" // /* MW 9 */ + 8312 "00010000" // /* MW 8 */ + 8313 "00000100" // /* MW 7 */ + 8314 "11101100" // /* MW 6 */ + 8315 "00001000" // /* MW 5 */ + 8316 "00100101" // /* MW 4 */ + 8317 "01100000" // /* MW 3 */ + 8318 "00010001" // /* MW 2 */ + 8319 "11110000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 285 49 first +.src_ref 7 "conv2d_dw_bf16.h" 287 48 +.delay_slot + 8320 "11100001" // NOPA; PADDB [p1], m0; NOPS; NOPX; ADD.NC p3, r15, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8321 "00000000" // /* MW 15 */ + 8322 "00000000" // /* MW 14 */ + 8323 "10101000" // /* MW 13 */ + 8324 "11100000" // /* MW 12 */ + 8325 "10110011" // /* MW 11 */ + 8326 "00000001" // /* MW 10 */ + 8327 "00000000" // /* MW 9 */ + 8328 "00000000" // /* MW 8 */ + 8329 "01011011" // /* MW 7 */ + 8330 "00000001" // /* MW 6 */ + 8331 "00100000" // /* MW 5 */ + 8332 "00010111" // /* MW 4 */ + 8333 "11110010" // /* MW 3 */ + 8334 "00101100" // /* MW 2 */ + 8335 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 20 first +.src_ref 7 "conv2d_dw_bf16.h" 290 24 +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.return_address + 8336 "00101100" // LDA.u8 r16, [p6, #7]; MOVX r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8337 "00010010" // /* MW 5 */ + 8338 "01000100" // /* MW 4 */ + 8339 "01010000" // /* MW 3 */ + 8340 "11000001" // /* MW 2 */ + 8341 "11001110" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ + 8346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8347 "00000000" // /* MW 1 */ + 8348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8349 "00000000" // /* MW 1 */ + 8350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8351 "00000000" // /* MW 1 */ + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 24 + 8354 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8355 "00001000" // /* MW 3 */ + 8356 "01100001" // /* MW 2 */ + 8357 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 290 8 + 8358 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8359 "00000001" // /* MW 5 */ + 8360 "01000000" // /* MW 4 */ + 8361 "10110000" // /* MW 3 */ + 8362 "00010000" // /* MW 2 */ + 8363 "10000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 61 +.delay_slot + 8364 "01000100" // MOVXM p4, #509064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8365 "00010000" // /* MW 5 */ + 8366 "11001001" // /* MW 4 */ + 8367 "11001000" // /* MW 3 */ + 8368 "00000111" // /* MW 2 */ + 8369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 292 61 first +.src_ref 7 "conv2d_dw_bf16.h" 292 71 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 + 8378 "10111010" // LDA r16, [p4], #-4; MOVX r0, #1; MOV m0, #-208 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8379 "01011000" // /* MW 9 */ + 8380 "00110000" // /* MW 8 */ + 8381 "00000111" // /* MW 7 */ + 8382 "00101000" // /* MW 6 */ + 8383 "00000000" // /* MW 5 */ + 8384 "00000000" // /* MW 4 */ + 8385 "11010000" // /* MW 3 */ + 8386 "11000010" // /* MW 2 */ + 8387 "10011111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 118 + 8388 "10011000" // LDA r21, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8389 "10110110" // /* MW 3 */ + 8390 "00101110" // /* MW 2 */ + 8391 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 59 first + 8392 "10011000" // LDA r18, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8393 "01010110" // /* MW 3 */ + 8394 "00011110" // /* MW 2 */ + 8395 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 293 31 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8396 "11010100" // LDA r19, [sp, #-24]; MOV p0, p4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8397 "10000001" // /* MW 5 */ + 8398 "11010001" // /* MW 4 */ + 8399 "00100000" // /* MW 3 */ + 8400 "01001110" // /* MW 2 */ + 8401 "11111101" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 +.src_ref 7 "conv2d_dw_bf16.h" 294 97 +.src_ref 7 "conv2d_dw_bf16.h" 297 84 + 8402 "10111010" // LDA r20, [p4], m0; MOVS p0, p7; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8403 "01110010" // /* MW 9 */ + 8404 "01100000" // /* MW 8 */ + 8405 "10110000" // /* MW 7 */ + 8406 "00000011" // /* MW 6 */ + 8407 "10001011" // /* MW 5 */ + 8408 "10011100" // /* MW 4 */ + 8409 "11010000" // /* MW 3 */ + 8410 "01010010" // /* MW 2 */ + 8411 "10000001" // /* MW 1 */ + 8412 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8413 "00011001" // /* MW 3 */ + 8414 "11101111" // /* MW 2 */ + 8415 "00000111" // /* MW 1 */ + 8416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8417 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first + 8418 "10011000" // LSHL r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00011101" // /* MW 3 */ + 8420 "00100011" // /* MW 2 */ + 8421 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 71 + 8422 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001101" // /* MW 3 */ + 8424 "00100000" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 291 8 first +.no_stack_arguments + 8426 "00000100" // JL #7280 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7280 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "00111000" // /* MW 3 */ + 8430 "00001110" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 first +.src_ref 7 "conv2d_dw_bf16.h" 294 30 first +.delay_slot + 8432 "10100100" // LSHL r18, r18, r0; ADD.NC r22, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8433 "10001010" // /* MW 5 */ + 8434 "00110011" // /* MW 4 */ + 8435 "10111011" // /* MW 3 */ + 8436 "10000001" // /* MW 2 */ + 8437 "10010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 30 +.delay_slot + 8438 "10100100" // LSHL r17, r21, r0; ADD.NC r21, r15, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8439 "10010010" // /* MW 5 */ + 8440 "10101111" // /* MW 4 */ + 8441 "10111010" // /* MW 3 */ + 8442 "01000001" // /* MW 2 */ + 8443 "10101100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 292 31 +.src_ref 7 "conv2d_dw_bf16.h" 294 68 +.delay_slot + 8444 "10100100" // LSHL r18, r20, r0; ADD.NC p1, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8445 "10110010" // /* MW 5 */ + 8446 "11010001" // /* MW 4 */ + 8447 "10110010" // /* MW 3 */ + 8448 "10000001" // /* MW 2 */ + 8449 "10100100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 293 31 first +.delay_slot + 8450 "01011000" // ADD.NC p2, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8451 "11000001" // /* MW 3 */ + 8452 "01101001" // /* MW 2 */ + 8453 "00011010" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 294 68 first +.delay_slot + 8454 "10111010" // NOPA; NOPB; ADD.NC p3, r21, r18 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8455 "10101110" // /* MW 9 */ + 8456 "01100100" // /* MW 8 */ + 8457 "10110101" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00010000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 7 "conv2d_dw_bf16.h" 297 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.src_ref 7 "conv2d_dw_bf16.h" 298 84 +.return_address + 8464 "10111010" // LDA p4, [sp, #-16]; MOVX r0, #1; MOV m0, #-204 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8465 "01011000" // /* MW 9 */ + 8466 "00110100" // /* MW 8 */ + 8467 "00000111" // /* MW 7 */ + 8468 "00101000" // /* MW 6 */ + 8469 "00000000" // /* MW 5 */ + 8470 "00000000" // /* MW 4 */ + 8471 "00100000" // /* MW 3 */ + 8472 "01000011" // /* MW 2 */ + 8473 "11111110" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 84 first + 8474 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8475 "00010110" // /* MW 3 */ + 8476 "11111110" // /* MW 2 */ + 8477 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 + 8478 "11010100" // LDA p7, [sp, #-4]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8479 "10000001" // /* MW 5 */ + 8480 "11011101" // /* MW 4 */ + 8481 "00100110" // /* MW 3 */ + 8482 "11110011" // /* MW 2 */ + 8483 "11111111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 84 first + 8484 "10011000" // LDA r17, [p3], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8485 "00110110" // /* MW 3 */ + 8486 "00001010" // /* MW 2 */ + 8487 "00000011" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 + 8488 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8489 "00111001" // /* MW 3 */ + 8490 "11111000" // /* MW 2 */ + 8491 "00000111" // /* MW 1 */ + 8492 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8493 "11110001" // /* MW 3 */ + 8494 "11110101" // /* MW 2 */ + 8495 "00000111" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8496 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8497 "00000001" // /* MW 5 */ + 8498 "00000000" // /* MW 4 */ + 8499 "00000000" // /* MW 3 */ + 8500 "11111000" // /* MW 2 */ + 8501 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8502 "10011000" // LDA r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8503 "01010110" // /* MW 3 */ + 8504 "00000110" // /* MW 2 */ + 8505 "00000100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first + 8506 "10011000" // LSHL r16, r16, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8507 "00001101" // /* MW 3 */ + 8508 "00100000" // /* MW 2 */ + 8509 "00010100" // /* MW 1 */ + 8510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8511 "00000000" // /* MW 1 */ + 8512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8513 "00000000" // /* MW 1 */ + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 first +.tail_call + 8516 "10000100" // J #7968 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7968 delay_slots=5 */ + 8517 "00000000" // /* MW 5 */ + 8518 "00000000" // /* MW 4 */ + 8519 "10010000" // /* MW 3 */ + 8520 "00001111" // /* MW 2 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 first +.delay_slot + 8522 "10011000" // LSHL r17, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001101" // /* MW 3 */ + 8524 "01100010" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 55 +.delay_slot + 8526 "01011000" // ADD.NC r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "11001001" // /* MW 3 */ + 8528 "01011000" // /* MW 2 */ + 8529 "00011100" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 297 55 first +.delay_slot + 8530 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8531 "01000001" // /* MW 3 */ + 8532 "01101001" // /* MW 2 */ + 8533 "00011001" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 296 8 +.delay_slot + 8534 "11111000" // MOV p0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8535 "00100000" // /* MW 3 */ + 8536 "01101001" // /* MW 2 */ + 8537 "00011000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 298 93 first +.delay_slot + 8538 "10010100" // NOPA; ADD.NC p2, r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "10000010" // /* MW 5 */ + 8540 "11010001" // /* MW 4 */ + 8541 "11110100" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_320 +.src_ref 7 "conv2d_dw_bf16.h" 301 +.return_address + 8544 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8545 "00111001" // /* MW 3 */ + 8546 "11111000" // /* MW 2 */ + 8547 "00000111" // /* MW 1 */ + 8548 "00011000" // LDA r15, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8549 "11110001" // /* MW 3 */ + 8550 "11110101" // /* MW 2 */ + 8551 "00000111" // /* MW 1 */ + 8552 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8553 "10011001" // /* MW 3 */ + 8554 "11111111" // /* MW 2 */ + 8555 "00000111" // /* MW 1 */ + 8556 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8557 "00011001" // /* MW 3 */ + 8558 "11101111" // /* MW 2 */ + 8559 "00000111" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 first + 8566 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8567 "00000000" // /* MW 3 */ + 8568 "00101000" // /* MW 2 */ + 8569 "00010000" // /* MW 1 */ +.src_ref 7 "conv2d_dw_bf16.h" 301 +.delay_slot + 8570 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8571 "00000001" // /* MW 5 */ + 8572 "00000000" // /* MW 4 */ + 8573 "00000000" // /* MW 3 */ + 8574 "11111000" // /* MW 2 */ + 8575 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 8583 "00000000" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 6 "superkernels.cpp" 352 first +.src_ref 6 "superkernels.cpp" 357 6 +.function_start + 8592 "01000100" // MOVXM p4, #508416 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8593 "00000000" // /* MW 5 */ + 8594 "11000100" // /* MW 4 */ + 8595 "11001000" // /* MW 3 */ + 8596 "00000111" // /* MW 2 */ + 8597 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 first + 8598 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8599 "01000001" // /* MW 5 */ + 8600 "00101111" // /* MW 4 */ + 8601 "11010000" // /* MW 3 */ + 8602 "11000010" // /* MW 2 */ + 8603 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 352 + 8604 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8605 "00000001" // /* MW 5 */ + 8606 "00000000" // /* MW 4 */ + 8607 "00000000" // /* MW 3 */ + 8608 "00010000" // /* MW 2 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8611 "01110000" // /* MW 7 */ + 8612 "01110000" // /* MW 6 */ + 8613 "00101101" // /* MW 5 */ + 8614 "00000010" // /* MW 4 */ + 8615 "10110000" // /* MW 3 */ + 8616 "00111010" // /* MW 2 */ + 8617 "11111111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 + 8618 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8619 "01110000" // /* MW 7 */ + 8620 "11110000" // /* MW 6 */ + 8621 "10101000" // /* MW 5 */ + 8622 "00000001" // /* MW 4 */ + 8623 "10110000" // /* MW 3 */ + 8624 "10110110" // /* MW 2 */ + 8625 "11111111" // /* MW 1 */ + 8626 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8627 "00011101" // /* MW 3 */ + 8628 "11101100" // /* MW 2 */ + 8629 "00001111" // /* MW 1 */ + 8630 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "10011101" // /* MW 3 */ + 8632 "11110111" // /* MW 2 */ + 8633 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 + 8634 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8635 "01110000" // /* MW 7 */ + 8636 "01100000" // /* MW 6 */ + 8637 "11001010" // /* MW 5 */ + 8638 "00000001" // /* MW 4 */ + 8639 "10110000" // /* MW 3 */ + 8640 "00000010" // /* MW 2 */ + 8641 "11111110" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 357 6 +.src_ref 6 "superkernels.cpp" 357 16 + 8642 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8643 "00000001" // /* MW 5 */ + 8644 "01000000" // /* MW 4 */ + 8645 "00100000" // /* MW 3 */ + 8646 "00010001" // /* MW 2 */ + 8647 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.delay_slot + 8648 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8649 "11000000" // /* MW 3 */ + 8650 "11010110" // /* MW 2 */ + 8651 "00011011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 22 first +.delay_slot + 8652 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8653 "10010000" // /* MW 3 */ + 8654 "01100010" // /* MW 2 */ + 8655 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 30 +.delay_slot + 8656 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8657 "11111011" // /* MW 3 */ + 8658 "01100011" // /* MW 2 */ + 8659 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8660 "01000100" // MOVXM p3, #508420 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8661 "00001000" // /* MW 5 */ + 8662 "11000100" // /* MW 4 */ + 8663 "11000110" // /* MW 3 */ + 8664 "00000111" // /* MW 2 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 354 11 +.delay_slot + 8666 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8667 "00110001" // /* MW 3 */ + 8668 "00000110" // /* MW 2 */ + 8669 "00001011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.src_ref 6 "superkernels.cpp" 369 2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8670 "00111010" // MOVS p7, p1; MOVXM p1, #508448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8671 "00010001" // /* MW 9 */ + 8672 "00010000" // /* MW 8 */ + 8673 "10110001" // /* MW 7 */ + 8674 "11110000" // /* MW 6 */ + 8675 "00000001" // /* MW 5 */ + 8676 "00000000" // /* MW 4 */ + 8677 "01100000" // /* MW 3 */ + 8678 "10010001" // /* MW 2 */ + 8679 "11110000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.src_ref 6 "superkernels.cpp" 359 4 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8680 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #508444 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8681 "00010000" // /* MW 11 */ + 8682 "00001110" // /* MW 10 */ + 8683 "10110001" // /* MW 9 */ + 8684 "11110000" // /* MW 8 */ + 8685 "00000001" // /* MW 7 */ + 8686 "00000000" // /* MW 6 */ + 8687 "10001011" // /* MW 5 */ + 8688 "10001000" // /* MW 4 */ + 8689 "11100000" // /* MW 3 */ + 8690 "11000000" // /* MW 2 */ + 8691 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 359 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8694 "00000100" // JL #6480 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 8695 "00000001" // /* MW 5 */ + 8696 "00000000" // /* MW 4 */ + 8697 "10101000" // /* MW 3 */ + 8698 "00001100" // /* MW 2 */ + 8699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8703 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8704 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8705 "00110001" // /* MW 3 */ + 8706 "00100000" // /* MW 2 */ + 8707 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8708 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8709 "00000101" // /* MW 3 */ + 8710 "00100000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8712 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8713 "01110000" // /* MW 7 */ + 8714 "10100101" // /* MW 6 */ + 8715 "00000001" // /* MW 5 */ + 8716 "00000000" // /* MW 4 */ + 8717 "00110000" // /* MW 3 */ + 8718 "11000010" // /* MW 2 */ + 8719 "00100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 44 +.src_ref 6 "superkernels.cpp" 369 2 +.return_address + 8720 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8721 "00000000" // /* MW 7 */ + 8722 "10000010" // /* MW 6 */ + 8723 "00110011" // /* MW 5 */ + 8724 "00000001" // /* MW 4 */ + 8725 "01100000" // /* MW 3 */ + 8726 "10010001" // /* MW 2 */ + 8727 "00110011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 17 first + 8728 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8729 "00111010" // /* MW 3 */ + 8730 "00000110" // /* MW 2 */ + 8731 "00000010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 +.src_ref 6 "superkernels.cpp" 361 15 first + 8732 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8733 "00010000" // /* MW 9 */ + 8734 "00001100" // /* MW 8 */ + 8735 "00110001" // /* MW 7 */ + 8736 "11110001" // /* MW 6 */ + 8737 "00000001" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "01010000" // /* MW 3 */ + 8740 "11000011" // /* MW 2 */ + 8741 "01000100" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8745 "00000000" // /* MW 5 */ + 8746 "00000000" // /* MW 4 */ + 8747 "00101000" // /* MW 3 */ + 8748 "00010001" // /* MW 2 */ + 8749 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 +.src_ref 6 "superkernels.cpp" 365 26 +.delay_slot + 8750 "01000100" // MOVXM p3, #508432 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8751 "00100000" // /* MW 5 */ + 8752 "11000100" // /* MW 4 */ + 8753 "11000110" // /* MW 3 */ + 8754 "00000111" // /* MW 2 */ + 8755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8757 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8759 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 360 15 first +.delay_slot + 8760 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8761 "00110001" // /* MW 3 */ + 8762 "00000110" // /* MW 2 */ + 8763 "00001011" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 361 13 first +.delay_slot + 8764 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8765 "00010001" // /* MW 3 */ + 8766 "00000110" // /* MW 2 */ + 8767 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 6 "superkernels.cpp" 365 26 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #508432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "00010000" // /* MW 13 */ + 8772 "00001000" // /* MW 12 */ + 8773 "10110001" // /* MW 11 */ + 8774 "11110001" // /* MW 10 */ + 8775 "00000001" // /* MW 9 */ + 8776 "00000000" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 242 49 first + 8784 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8785 "10000110" // /* MW 3 */ + 8786 "01100111" // /* MW 2 */ + 8787 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 49 +.src_ref 6 "superkernels.cpp" 365 15 + 8788 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #508420 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8789 "00010000" // /* MW 9 */ + 8790 "00000010" // /* MW 8 */ + 8791 "00110001" // /* MW 7 */ + 8792 "11110010" // /* MW 6 */ + 8793 "00000001" // /* MW 5 */ + 8794 "00000000" // /* MW 4 */ + 8795 "11010000" // /* MW 3 */ + 8796 "11101110" // /* MW 2 */ + 8797 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 64 + 8798 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010110" // /* MW 3 */ + 8800 "11111110" // /* MW 2 */ + 8801 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 80 + 8802 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8803 "00110110" // /* MW 3 */ + 8804 "11111110" // /* MW 2 */ + 8805 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 259 28 first + 8806 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8807 "01010110" // /* MW 3 */ + 8808 "01000110" // /* MW 2 */ + 8809 "00000010" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ + 8814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8815 "00000000" // /* MW 1 */ + 8816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8817 "00000000" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 36 first +.src_ref 1 "io_buffer_main.h" 242 43 first + 8820 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8821 "00000010" // /* MW 3 */ + 8822 "01100001" // /* MW 2 */ + 8823 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 242 20 + 8824 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "00010001" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 + 8828 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "11111101" // /* MW 3 */ + 8830 "11100000" // /* MW 2 */ + 8831 "00010111" // /* MW 1 */ + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8835 "00000000" // /* MW 1 */ + 8836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8837 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 419 8 first + 8838 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8839 "00001000" // /* MW 3 */ + 8840 "10010011" // /* MW 2 */ + 8841 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 +.src_ref 1 "io_buffer_main.h" 449 8 +.src_ref 6 "superkernels.cpp" 364 11 +.src_ref 6 "superkernels.cpp" 367 47 +.src_ref 6 "superkernels.cpp" 372 6 +.src_ref 6 "superkernels.cpp" 373 16 + 8842 "10111010" // MOVA r15, #1; MOVXM p7, #508416 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8843 "00010000" // /* MW 9 */ + 8844 "00000000" // /* MW 8 */ + 8845 "10110001" // /* MW 7 */ + 8846 "11110011" // /* MW 6 */ + 8847 "00000001" // /* MW 5 */ + 8848 "00000000" // /* MW 4 */ + 8849 "00000000" // /* MW 3 */ + 8850 "00101111" // /* MW 2 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 + 8852 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8853 "11000001" // /* MW 5 */ + 8854 "00101011" // /* MW 4 */ + 8855 "00101000" // /* MW 3 */ + 8856 "00000000" // /* MW 2 */ + 8857 "00000110" // /* MW 1 */ + 8858 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8859 "01011010" // /* MW 3 */ + 8860 "01101000" // /* MW 2 */ + 8861 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 1 "io_buffer_main.h" 348 51 + 8862 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8863 "10000001" // /* MW 5 */ + 8864 "00101001" // /* MW 4 */ + 8865 "00100111" // /* MW 3 */ + 8866 "11010011" // /* MW 2 */ + 8867 "11111101" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 15 first + 8868 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8869 "00110110" // /* MW 3 */ + 8870 "00000110" // /* MW 2 */ + 8871 "00000100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 26 +.src_ref 6 "superkernels.cpp" 369 2 + 8872 "10111010" // LDA r16, [p3]; MOVXM p3, #508864 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8873 "00010000" // /* MW 9 */ + 8874 "11100000" // /* MW 8 */ + 8875 "10110001" // /* MW 7 */ + 8876 "11110001" // /* MW 6 */ + 8877 "00000001" // /* MW 5 */ + 8878 "00000000" // /* MW 4 */ + 8879 "11010000" // /* MW 3 */ + 8880 "11000010" // /* MW 2 */ + 8881 "01100000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8882 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "01010110" // /* MW 3 */ + 8884 "00000110" // /* MW 2 */ + 8885 "00000111" // /* MW 1 */ + 8886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8887 "00000000" // /* MW 1 */ + 8888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8889 "00000000" // /* MW 1 */ + 8890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8891 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 8892 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8893 "01110110" // /* MW 3 */ + 8894 "00000110" // /* MW 2 */ + 8895 "00000101" // /* MW 1 */ + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 365 24 first + 8898 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8899 "00001111" // /* MW 3 */ + 8900 "01100001" // /* MW 2 */ + 8901 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first + 8902 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8903 "00000111" // /* MW 3 */ + 8904 "10100010" // /* MW 2 */ + 8905 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first + 8906 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8907 "11111101" // /* MW 3 */ + 8908 "00100000" // /* MW 2 */ + 8909 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 369 2 first +.no_stack_arguments + 8910 "00000100" // JL #8224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8224 delay_slots=5 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "00010000" // /* MW 3 */ + 8914 "00010000" // /* MW 2 */ + 8915 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 364 11 first +.delay_slot + 8916 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8917 "00110001" // /* MW 3 */ + 8918 "00000110" // /* MW 2 */ + 8919 "00001111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 367 47 first +.delay_slot + 8920 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8921 "11000001" // /* MW 3 */ + 8922 "01001001" // /* MW 2 */ + 8923 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 225 10 first +.delay_slot + 8924 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8925 "00100101" // /* MW 3 */ + 8926 "10110100" // /* MW 2 */ + 8927 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 first +.delay_slot + 8928 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8929 "00010101" // /* MW 3 */ + 8930 "10111011" // /* MW 2 */ + 8931 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 76 16 +.delay_slot + 8932 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8933 "11000001" // /* MW 11 */ + 8934 "10001010" // /* MW 10 */ + 8935 "11011111" // /* MW 9 */ + 8936 "00000011" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "00100000" // /* MW 5 */ + 8940 "00000000" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 +.return_address + 8944 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8945 "00001010" // /* MW 3 */ + 8946 "01100111" // /* MW 2 */ + 8947 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 348 51 first + 8948 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8949 "00010110" // /* MW 3 */ + 8950 "00000110" // /* MW 2 */ + 8951 "00000010" // /* MW 1 */ + 8952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8953 "00000000" // /* MW 1 */ + 8954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8955 "00000000" // /* MW 1 */ + 8956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8957 "00000000" // /* MW 1 */ + 8958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8959 "00000000" // /* MW 1 */ + 8960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8961 "00000000" // /* MW 1 */ + 8962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8963 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 449 8 first + 8964 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8965 "11111000" // /* MW 3 */ + 8966 "00010000" // /* MW 2 */ + 8967 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 40 first +.src_ref 6 "superkernels.cpp" 372 19 + 8968 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #508440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8969 "00010000" // /* MW 9 */ + 8970 "00001100" // /* MW 8 */ + 8971 "10110001" // /* MW 7 */ + 8972 "11110000" // /* MW 6 */ + 8973 "00000001" // /* MW 5 */ + 8974 "00000000" // /* MW 4 */ + 8975 "11010000" // /* MW 3 */ + 8976 "11000010" // /* MW 2 */ + 8977 "01011100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 19 first + 8978 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8979 "01010110" // /* MW 3 */ + 8980 "00000110" // /* MW 2 */ + 8981 "00000001" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 8982 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8983 "00110110" // /* MW 3 */ + 8984 "00000110" // /* MW 2 */ + 8985 "00000111" // /* MW 1 */ + 8986 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8987 "10011001" // /* MW 3 */ + 8988 "11110100" // /* MW 2 */ + 8989 "00000111" // /* MW 1 */ + 8990 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "11010001" // /* MW 3 */ + 8992 "11111001" // /* MW 2 */ + 8993 "00000111" // /* MW 1 */ + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 32 first + 8998 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8999 "00000001" // /* MW 3 */ + 9000 "11100001" // /* MW 2 */ + 9001 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 351 28 + 9002 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9003 "00010001" // /* MW 3 */ + 9004 "11100110" // /* MW 2 */ + 9005 "00001010" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 16 first + 9006 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9007 "00101000" // /* MW 3 */ + 9008 "01100001" // /* MW 2 */ + 9009 "00010100" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 372 6 + 9010 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 9011 "00000001" // /* MW 5 */ + 9012 "01000000" // /* MW 4 */ + 9013 "10101000" // /* MW 3 */ + 9014 "00010001" // /* MW 2 */ + 9015 "10000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 +.delay_slot + 9016 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9017 "00000001" // /* MW 3 */ + 9018 "00110000" // /* MW 2 */ + 9019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9027 "00000000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 373 16 first + 9028 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9029 "11000001" // /* MW 11 */ + 9030 "10001000" // /* MW 10 */ + 9031 "10000011" // /* MW 9 */ + 9032 "00000011" // /* MW 8 */ + 9033 "00000000" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 6 "superkernels.cpp" 375 + 9040 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9041 "01000001" // /* MW 5 */ + 9042 "11101101" // /* MW 4 */ + 9043 "00101110" // /* MW 3 */ + 9044 "10110110" // /* MW 2 */ + 9045 "11111111" // /* MW 1 */ + 9046 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9047 "11110001" // /* MW 3 */ + 9048 "11110001" // /* MW 2 */ + 9049 "00000111" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 first + 9050 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9051 "00000000" // /* MW 3 */ + 9052 "00101000" // /* MW 2 */ + 9053 "00010000" // /* MW 1 */ +.src_ref 6 "superkernels.cpp" 375 +.delay_slot + 9054 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9055 "00000001" // /* MW 5 */ + 9056 "00000000" // /* MW 4 */ + 9057 "00000000" // /* MW 3 */ + 9058 "11110000" // /* MW 2 */ + 9059 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9065 "00000000" // /* MW 1 */ +.delay_slot + 9066 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "11000000" // /* MW 3 */ + 9068 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9069 "00011111" // /* MW 1 */ +.label __Z15_b14160_wrapperPPv___func_begin0 +.label _Z15_b14160_wrapperPPv +.function _b14160_wrapper _Z15_b14160_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 21 first +.src_ref 0 "0_0_reloadable4.cc" 23 79 +.function_start + 9072 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9073 "11000000" // /* MW 3 */ + 9074 "01100000" // /* MW 2 */ + 9075 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 23 79 first + 9076 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9077 "00011110" // /* MW 3 */ + 9078 "00011100" // /* MW 2 */ + 9079 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 24 79 first + 9080 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9081 "10011110" // /* MW 3 */ + 9082 "00101100" // /* MW 2 */ + 9083 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 26 81 first + 9084 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9085 "10011110" // /* MW 3 */ + 9086 "11110101" // /* MW 2 */ + 9087 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 25 47 first + 9088 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9089 "00011110" // /* MW 3 */ + 9090 "00000101" // /* MW 2 */ + 9091 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 22 4 first +.tail_call + 9092 "10000100" // J #8592 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8592 delay_slots=5 */ + 9093 "00000000" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "11001000" // /* MW 3 */ + 9096 "00010000" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b14160_wrapperPPv__end +.label __Z15_b14160_wrapperPPv___func_end0 + 9107 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj +.src_ref 3 "transposeshuffle_params.h" 71 first +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 76 18 first +.function_start + 9120 "10111010" // LDA el0, [p1], #4; MOVXM r0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9121 "00010000" // /* MW 9 */ + 9122 "01000000" // /* MW 8 */ + 9123 "00001001" // /* MW 7 */ + 9124 "11110000" // /* MW 6 */ + 9125 "00000001" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "11010000" // /* MW 3 */ + 9128 "10000101" // /* MW 2 */ + 9129 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 9 +.src_ref 3 "transposeshuffle_params.h" 76 16 +.src_ref 3 "transposeshuffle_params.h" 80 28 +.src_ref 3 "transposeshuffle_params.h" 80 36 +.src_ref 3 "transposeshuffle_params.h" 81 28 +.src_ref 3 "transposeshuffle_params.h" 81 36 + 9130 "01110110" // MOVA m0, #64; MOVS p0, r0; MOVX r2, #-3; ADD.NC p2, r0, #4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9131 "00001000" // /* MW 11 */ + 9132 "00000001" // /* MW 10 */ + 9133 "00110000" // /* MW 9 */ + 9134 "10101001" // /* MW 8 */ + 9135 "00100111" // /* MW 7 */ + 9136 "00111110" // /* MW 6 */ + 9137 "00001011" // /* MW 5 */ + 9138 "10000000" // /* MW 4 */ + 9139 "10000000" // /* MW 3 */ + 9140 "00000000" // /* MW 2 */ + 9141 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 +.src_ref 3 "transposeshuffle_params.h" 86 17 +.src_ref 3 "transposeshuffle_params.h" 89 43 +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 94 4 + 9142 "01100100" // MOVX r1, #4; MOV r0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9143 "00000001" // /* MW 5 */ + 9144 "00100010" // /* MW 4 */ + 9145 "00100000" // /* MW 3 */ + 9146 "01000010" // /* MW 2 */ + 9147 "00000000" // /* MW 1 */ + 9148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9149 "00000000" // /* MW 1 */ + 9150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9151 "00000000" // /* MW 1 */ + 9152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9153 "00000000" // /* MW 1 */ + 9154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9155 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 first + 9156 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9157 "00101001" // /* MW 3 */ + 9158 "00011100" // /* MW 2 */ + 9159 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9160 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9161 "00101110" // /* MW 3 */ + 9162 "00011100" // /* MW 2 */ + 9163 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9164 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9165 "00001110" // /* MW 3 */ + 9166 "00011100" // /* MW 2 */ + 9167 "00000001" // /* MW 1 */ + 9168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9169 "00000000" // /* MW 1 */ + 9170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9171 "00000000" // /* MW 1 */ + 9172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9173 "00000000" // /* MW 1 */ + 9174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9175 "00000000" // /* MW 1 */ + 9176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9177 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9178 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9179 "00101001" // /* MW 3 */ + 9180 "00011100" // /* MW 2 */ + 9181 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9182 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "00001001" // /* MW 3 */ + 9184 "00011100" // /* MW 2 */ + 9185 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9186 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9187 "00101110" // /* MW 3 */ + 9188 "00011100" // /* MW 2 */ + 9189 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9190 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9191 "00001110" // /* MW 3 */ + 9192 "00011100" // /* MW 2 */ + 9193 "00000001" // /* MW 1 */ + 9194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9195 "00000000" // /* MW 1 */ + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ + 9198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9199 "00000000" // /* MW 1 */ + 9200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9201 "00000000" // /* MW 1 */ + 9202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9203 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9204 "10011000" // ST el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9205 "00101001" // /* MW 3 */ + 9206 "00011100" // /* MW 2 */ + 9207 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9208 "10011000" // ST eh0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00001001" // /* MW 3 */ + 9210 "00011100" // /* MW 2 */ + 9211 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9212 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00001110" // /* MW 3 */ + 9214 "00000100" // /* MW 2 */ + 9215 "00000001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 18 + 9216 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9217 "00101110" // /* MW 3 */ + 9218 "00010100" // /* MW 2 */ + 9219 "00000001" // /* MW 1 */ + 9220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9221 "00000000" // /* MW 1 */ + 9222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9223 "00000000" // /* MW 1 */ + 9224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9225 "00000000" // /* MW 1 */ + 9226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9227 "00000000" // /* MW 1 */ + 9228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9229 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9230 "10011000" // ST eh0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9231 "00001001" // /* MW 3 */ + 9232 "00000100" // /* MW 2 */ + 9233 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 76 16 + 9234 "10011000" // ST el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9235 "00101001" // /* MW 3 */ + 9236 "00010100" // /* MW 2 */ + 9237 "00001010" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 28 first + 9238 "10011000" // LDA r3, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9239 "01110110" // /* MW 3 */ + 9240 "00001000" // /* MW 2 */ + 9241 "00000000" // /* MW 1 */ + 9242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9243 "00000000" // /* MW 1 */ + 9244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9245 "00000000" // /* MW 1 */ + 9246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9247 "00000000" // /* MW 1 */ + 9248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9249 "00000000" // /* MW 1 */ + 9250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9251 "00000000" // /* MW 1 */ + 9252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9253 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 36 + 9254 "10011000" // LSHL r4, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9255 "00101101" // /* MW 3 */ + 9256 "11001000" // /* MW 2 */ + 9257 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 42 +.src_ref 3 "transposeshuffle_params.h" 89 43 first + 9258 "00100100" // LSHL r3, r3, r1; ADD.NC r1, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9259 "11111111" // /* MW 5 */ + 9260 "10100100" // /* MW 4 */ + 9261 "10110000" // /* MW 3 */ + 9262 "11000011" // /* MW 2 */ + 9263 "00011000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 +.src_ref 3 "transposeshuffle_params.h" 80 19 first + 9264 "00000010" // ST r1, [p0]; MOV r4, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9265 "01110000" // /* MW 7 */ + 9266 "01100000" // /* MW 6 */ + 9267 "10001000" // /* MW 5 */ + 9268 "00000000" // /* MW 4 */ + 9269 "00110000" // /* MW 3 */ + 9270 "10000110" // /* MW 2 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 80 19 + 9272 "00011000" // ADD.NC p1, r4, #-60 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "01100010" // /* MW 3 */ + 9274 "01100010" // /* MW 2 */ + 9275 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 28 first + 9276 "10011000" // LDA r4, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9277 "10010110" // /* MW 3 */ + 9278 "00001000" // /* MW 2 */ + 9279 "00000001" // /* MW 1 */ + 9280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9283 "00000000" // /* MW 1 */ + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ + 9286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9287 "00000000" // /* MW 1 */ + 9288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9289 "00000000" // /* MW 1 */ + 9290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9291 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 36 +.src_ref 3 "transposeshuffle_params.h" 90 77 + 9292 "01100100" // LSHL r2, r4, r2; MOV r4, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "00100010" // /* MW 4 */ + 9295 "10110010" // /* MW 3 */ + 9296 "10000101" // /* MW 2 */ + 9297 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 42 + 9298 "00011000" // ADD r2, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "11111111" // /* MW 3 */ + 9300 "10000101" // /* MW 2 */ + 9301 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 81 19 +.src_ref 3 "transposeshuffle_params.h" 90 77 first + 9302 "01011100" // ST r2, [p1], #4; MSC r4, r4, r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9303 "01011100" // /* MW 5 */ + 9304 "10010000" // /* MW 4 */ + 9305 "00110001" // /* MW 3 */ + 9306 "10001010" // /* MW 2 */ + 9307 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 first + 9308 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9309 "00010001" // /* MW 3 */ + 9310 "00011100" // /* MW 2 */ + 9311 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 85 22 + 9312 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9313 "00010001" // /* MW 3 */ + 9314 "00011100" // /* MW 2 */ + 9315 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 86 17 first + 9316 "10011000" // ST r0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9317 "00010001" // /* MW 3 */ + 9318 "00101100" // /* MW 2 */ + 9319 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 89 23 first + 9320 "10011000" // ST r3, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9321 "01110001" // /* MW 3 */ + 9322 "11111100" // /* MW 2 */ + 9323 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 90 23 first + 9324 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9325 "10010001" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 +.src_ref 3 "transposeshuffle_params.h" 91 18 first + 9328 "00000010" // ST r0, [p1]; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "01110000" // /* MW 7 */ + 9330 "01100000" // /* MW 6 */ + 9331 "10101001" // /* MW 5 */ + 9332 "00000000" // /* MW 4 */ + 9333 "00110000" // /* MW 3 */ + 9334 "10000010" // /* MW 2 */ + 9335 "00100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 91 18 + 9336 "00011000" // ADD.NC p1, r5, #-68 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9337 "11011110" // /* MW 3 */ + 9338 "01100010" // /* MW 2 */ + 9339 "00011001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 first + 9340 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9341 "00010001" // /* MW 3 */ + 9342 "00011100" // /* MW 2 */ + 9343 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9344 "10011000" // ST r0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9345 "00010001" // /* MW 3 */ + 9346 "00011100" // /* MW 2 */ + 9347 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9348 "10011000" // ST r2, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9349 "01010001" // /* MW 3 */ + 9350 "00011100" // /* MW 2 */ + 9351 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 + 9352 "10011000" // ST r1, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9353 "00110001" // /* MW 3 */ + 9354 "00011100" // /* MW 2 */ + 9355 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 93 4 +.src_ref 3 "transposeshuffle_params.h" 95 first + 9356 "01011100" // ST r0, [p1], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9357 "00000000" // /* MW 5 */ + 9358 "01010000" // /* MW 4 */ + 9359 "00110000" // /* MW 3 */ + 9360 "10000010" // /* MW 2 */ + 9361 "00100011" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 first +.delay_slot + 9362 "10011000" // ST r3, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9363 "01110001" // /* MW 3 */ + 9364 "00101100" // /* MW 2 */ + 9365 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9366 "10011000" // ST r2, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9367 "01010001" // /* MW 3 */ + 9368 "11111100" // /* MW 2 */ + 9369 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9370 "10011000" // ST r4, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "10010001" // /* MW 3 */ + 9372 "00101100" // /* MW 2 */ + 9373 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9374 "10011000" // ST r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00110001" // /* MW 3 */ + 9376 "00000100" // /* MW 2 */ + 9377 "00001001" // /* MW 1 */ +.src_ref 3 "transposeshuffle_params.h" 94 4 +.delay_slot + 9378 "10011000" // ST r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9379 "00010001" // /* MW 3 */ + 9380 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA7_Kj___func_end0 + 9381 "00001001" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params +.src_ref 3 "transposeshuffle.h" 38 first +.src_ref 3 "transposeshuffle.h" 72 14 +.src_ref 3 "transposeshuffle.h" 79 23 +.function_start + 9392 "10111010" // MOVA r1, #2; MOVXM p2, #508556 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010000" // /* MW 9 */ + 9394 "01000110" // /* MW 8 */ + 9395 "00110001" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "00000000" // /* MW 3 */ + 9400 "01000001" // /* MW 2 */ + 9401 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 14 first +.src_ref 3 "transposeshuffle.h" 72 23 + 9402 "00101100" // LDA r27, [p2]; MOVX r0, #22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10110010" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11010000" // /* MW 3 */ + 9406 "11101110" // /* MW 2 */ + 9407 "01000000" // /* MW 1 */ + 9408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9409 "00000000" // /* MW 1 */ + 9410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9411 "00000000" // /* MW 1 */ + 9412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9413 "00000000" // /* MW 1 */ + 9414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9415 "00000000" // /* MW 1 */ + 9416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9417 "00000000" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 23 first + 9420 "10011000" // EQ r1, r27, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9421 "00010111" // /* MW 3 */ + 9422 "11000010" // /* MW 2 */ + 9423 "00010110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 79 8 + 9424 "10000100" // JNZ r1, #9888 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9888 delay_slots=5 */ + 9425 "00000001" // /* MW 5 */ + 9426 "01000000" // /* MW 4 */ + 9427 "01010000" // /* MW 3 */ + 9428 "00010011" // /* MW 2 */ + 9429 "00001000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 +.src_ref 3 "transposeshuffle.h" 72 23 +.delay_slot + 9430 "00011000" // MOVX r2, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9431 "01110101" // /* MW 3 */ + 9432 "00000100" // /* MW 2 */ + 9433 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 72 8 first +.src_ref 3 "transposeshuffle.h" 72 23 first +.delay_slot + 9434 "00011000" // SEL.EQZ r0, r0, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9435 "00100010" // /* MW 3 */ + 9436 "00000000" // /* MW 2 */ + 9437 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9443 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 + 9444 "01000100" // MOVXM p2, #508560 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9445 "00100000" // /* MW 5 */ + 9446 "11000101" // /* MW 4 */ + 9447 "11000100" // /* MW 3 */ + 9448 "00000111" // /* MW 2 */ + 9449 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 34 first + 9450 "10011000" // LDA r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9451 "00110110" // /* MW 3 */ + 9452 "00000100" // /* MW 2 */ + 9453 "00000010" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ + 9456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9457 "00000000" // /* MW 1 */ + 9458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9459 "00000000" // /* MW 1 */ + 9460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9461 "00000000" // /* MW 1 */ + 9462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9463 "00000000" // /* MW 1 */ + 9464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9465 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 116 26 + 9466 "10000100" // JZ r1, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9467 "00000001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "10010000" // /* MW 3 */ + 9470 "00010100" // /* MW 2 */ + 9471 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9481 "00000000" // /* MW 1 */ + 9482 "00011000" // MOVX r2, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9483 "00101001" // /* MW 3 */ + 9484 "00000100" // /* MW 2 */ + 9485 "00010000" // /* MW 1 */ + 9486 "10011000" // LTU r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9487 "00101100" // /* MW 3 */ + 9488 "01000100" // /* MW 2 */ + 9489 "00010000" // /* MW 1 */ + 9490 "10000100" // JNZ r2, #9728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9728 delay_slots=5 */ + 9491 "00000001" // /* MW 5 */ + 9492 "01000000" // /* MW 4 */ + 9493 "00000000" // /* MW 3 */ + 9494 "00010011" // /* MW 2 */ + 9495 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9506 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9507 "00010000" // /* MW 9 */ + 9508 "11010000" // /* MW 8 */ + 9509 "01111010" // /* MW 7 */ + 9510 "00001000" // /* MW 6 */ + 9511 "00000000" // /* MW 5 */ + 9512 "00000000" // /* MW 4 */ + 9513 "01101000" // /* MW 3 */ + 9514 "00111000" // /* MW 2 */ + 9515 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 116 8 first +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9516 "00111010" // VLDB x0, [p0], #64; MOVXM le, #9632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9517 "00010000" // /* MW 9 */ + 9518 "11010000" // /* MW 8 */ + 9519 "10111010" // /* MW 7 */ + 9520 "00001001" // /* MW 6 */ + 9521 "00000000" // /* MW 5 */ + 9522 "00000000" // /* MW 4 */ + 9523 "01101000" // /* MW 3 */ + 9524 "00111000" // /* MW 2 */ + 9525 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 116 8 +.src_ref 3 "transposeshuffle.h" 119 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9526 "10111010" // NOPA; VLDB x0, [p0], #64; ADD.NC lc, r1, #-9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "11001110" // /* MW 9 */ + 9528 "01111101" // /* MW 8 */ + 9529 "10111000" // /* MW 7 */ + 9530 "00000010" // /* MW 6 */ + 9531 "00110100" // /* MW 5 */ + 9532 "00011100" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9536 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9537 "00000000" // /* MW 15 */ + 9538 "00000000" // /* MW 14 */ + 9539 "01111000" // /* MW 13 */ + 9540 "10100101" // /* MW 12 */ + 9541 "00000001" // /* MW 11 */ + 9542 "00000000" // /* MW 10 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "01011011" // /* MW 7 */ + 9546 "00000001" // /* MW 6 */ + 9547 "01101000" // /* MW 5 */ + 9548 "00111000" // /* MW 4 */ + 9549 "11110000" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9552 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9553 "00000000" // /* MW 15 */ + 9554 "00000000" // /* MW 14 */ + 9555 "01111000" // /* MW 13 */ + 9556 "10100101" // /* MW 12 */ + 9557 "00000001" // /* MW 11 */ + 9558 "00000000" // /* MW 10 */ + 9559 "00000000" // /* MW 9 */ + 9560 "00000000" // /* MW 8 */ + 9561 "01011011" // /* MW 7 */ + 9562 "00000001" // /* MW 6 */ + 9563 "01101000" // /* MW 5 */ + 9564 "00111000" // /* MW 4 */ + 9565 "11110000" // /* MW 3 */ + 9566 "00101100" // /* MW 2 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9568 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9569 "00000000" // /* MW 15 */ + 9570 "00000000" // /* MW 14 */ + 9571 "01111000" // /* MW 13 */ + 9572 "10100101" // /* MW 12 */ + 9573 "00000001" // /* MW 11 */ + 9574 "00000000" // /* MW 10 */ + 9575 "00000000" // /* MW 9 */ + 9576 "00000000" // /* MW 8 */ + 9577 "01011011" // /* MW 7 */ + 9578 "00000001" // /* MW 6 */ + 9579 "01101000" // /* MW 5 */ + 9580 "00111000" // /* MW 4 */ + 9581 "11110000" // /* MW 3 */ + 9582 "00101100" // /* MW 2 */ + 9583 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9584 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9585 "00000000" // /* MW 15 */ + 9586 "00000000" // /* MW 14 */ + 9587 "01111000" // /* MW 13 */ + 9588 "10100101" // /* MW 12 */ + 9589 "00000001" // /* MW 11 */ + 9590 "00000000" // /* MW 10 */ + 9591 "00000000" // /* MW 9 */ + 9592 "00000000" // /* MW 8 */ + 9593 "01011011" // /* MW 7 */ + 9594 "00000001" // /* MW 6 */ + 9595 "01101000" // /* MW 5 */ + 9596 "00111000" // /* MW 4 */ + 9597 "11110000" // /* MW 3 */ + 9598 "00101100" // /* MW 2 */ + 9599 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9600 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9601 "00000000" // /* MW 15 */ + 9602 "00000000" // /* MW 14 */ + 9603 "11101000" // /* MW 13 */ + 9604 "00000000" // /* MW 12 */ + 9605 "00000000" // /* MW 11 */ + 9606 "00000000" // /* MW 10 */ + 9607 "00000000" // /* MW 9 */ + 9608 "00000000" // /* MW 8 */ + 9609 "01011011" // /* MW 7 */ + 9610 "00000001" // /* MW 6 */ + 9611 "01101000" // /* MW 5 */ + 9612 "00111000" // /* MW 4 */ + 9613 "11110000" // /* MW 3 */ + 9614 "00101100" // /* MW 2 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.src_ref 3 "transposeshuffle.h" 120 17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9616 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9617 "00000000" // /* MW 15 */ + 9618 "00000000" // /* MW 14 */ + 9619 "11101000" // /* MW 13 */ + 9620 "00000000" // /* MW 12 */ + 9621 "00000000" // /* MW 11 */ + 9622 "00000000" // /* MW 10 */ + 9623 "00000000" // /* MW 9 */ + 9624 "00000000" // /* MW 8 */ + 9625 "01011011" // /* MW 7 */ + 9626 "00000001" // /* MW 6 */ + 9627 "01101000" // /* MW 5 */ + 9628 "00111000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 119 21 +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9632 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "11101000" // /* MW 13 */ + 9636 "00000000" // /* MW 12 */ + 9637 "00000000" // /* MW 11 */ + 9638 "00000000" // /* MW 10 */ + 9639 "00000000" // /* MW 9 */ + 9640 "10000000" // /* MW 8 */ + 9641 "00000110" // /* MW 7 */ + 9642 "00011100" // /* MW 6 */ + 9643 "01101001" // /* MW 5 */ + 9644 "00111000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9648 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9649 "11100000" // /* MW 7 */ + 9650 "00000000" // /* MW 6 */ + 9651 "00000000" // /* MW 5 */ + 9652 "00000000" // /* MW 4 */ + 9653 "11010000" // /* MW 3 */ + 9654 "10000000" // /* MW 2 */ + 9655 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9656 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9657 "11100000" // /* MW 7 */ + 9658 "00000000" // /* MW 6 */ + 9659 "00000000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "10000000" // /* MW 2 */ + 9663 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9664 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9665 "11100000" // /* MW 7 */ + 9666 "00000000" // /* MW 6 */ + 9667 "00000000" // /* MW 5 */ + 9668 "00000000" // /* MW 4 */ + 9669 "11010000" // /* MW 3 */ + 9670 "10000000" // /* MW 2 */ + 9671 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.src_ref 3 "transposeshuffle.h" 126 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9672 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 9673 "11101001" // /* MW 9 */ + 9674 "00000000" // /* MW 8 */ + 9675 "00000000" // /* MW 7 */ + 9676 "00000000" // /* MW 6 */ + 9677 "01000000" // /* MW 5 */ + 9678 "00000001" // /* MW 4 */ + 9679 "11010000" // /* MW 3 */ + 9680 "10000000" // /* MW 2 */ + 9681 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 120 17 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9682 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9683 "11100000" // /* MW 7 */ + 9684 "00000000" // /* MW 6 */ + 9685 "00000000" // /* MW 5 */ + 9686 "00000000" // /* MW 4 */ + 9687 "11010000" // /* MW 3 */ + 9688 "10000000" // /* MW 2 */ + 9689 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9690 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9691 "11100000" // /* MW 7 */ + 9692 "00000000" // /* MW 6 */ + 9693 "00000000" // /* MW 5 */ + 9694 "00000000" // /* MW 4 */ + 9695 "11010000" // /* MW 3 */ + 9696 "10000000" // /* MW 2 */ + 9697 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 120 17 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9698 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9699 "11100000" // /* MW 7 */ + 9700 "00000000" // /* MW 6 */ + 9701 "00000000" // /* MW 5 */ + 9702 "00000000" // /* MW 4 */ + 9703 "11010000" // /* MW 3 */ + 9704 "10000000" // /* MW 2 */ + 9705 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9706 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9707 "00001101" // /* MW 5 */ + 9708 "00111000" // /* MW 4 */ + 9709 "11110010" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "transposeshuffle.h" 122 22 +.delay_slot + 9712 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9713 "00000000" // /* MW 15 */ + 9714 "00000000" // /* MW 14 */ + 9715 "01111000" // /* MW 13 */ + 9716 "10100101" // /* MW 12 */ + 9717 "00000001" // /* MW 11 */ + 9718 "00000000" // /* MW 10 */ + 9719 "00000000" // /* MW 9 */ + 9720 "10000000" // /* MW 8 */ + 9721 "00000110" // /* MW 7 */ + 9722 "00011100" // /* MW 6 */ + 9723 "00100001" // /* MW 5 */ + 9724 "00000000" // /* MW 4 */ + 9725 "11110000" // /* MW 3 */ + 9726 "00101100" // /* MW 2 */ + 9727 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_336 +.src_ref 3 "transposeshuffle.h" 116 8 first + 9728 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "10100000" // /* MW 3 */ + 9730 "01110000" // /* MW 2 */ + 9731 "00011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9732 "01000100" // MOVXM ls, #9744 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00100000" // /* MW 5 */ + 9734 "11101100" // /* MW 4 */ + 9735 "00100001" // /* MW 3 */ + 9736 "00000000" // /* MW 2 */ + 9737 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 116 8 + 9738 "01000100" // MOVXM le, #9856 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "00000000" // /* MW 5 */ + 9740 "11101101" // /* MW 4 */ + 9741 "00100110" // /* MW 3 */ + 9742 "00000000" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_352 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "transposeshuffle.h" 119 21 first +.begin_of_loop +.loop_nesting 1 + 9744 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "00110100" // /* MW 3 */ + 9746 "00011100" // /* MW 2 */ + 9747 "00111000" // /* MW 1 */ + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ + 9750 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9751 "01111110" // /* MW 9 */ + 9752 "10100101" // /* MW 8 */ + 9753 "00000001" // /* MW 7 */ + 9754 "00000000" // /* MW 6 */ + 9755 "00010000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ + 9760 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9761 "00000000" // /* MW 15 */ + 9762 "00000000" // /* MW 14 */ + 9763 "01111000" // /* MW 13 */ + 9764 "10100101" // /* MW 12 */ + 9765 "00000001" // /* MW 11 */ + 9766 "00000000" // /* MW 10 */ + 9767 "00000000" // /* MW 9 */ + 9768 "00000000" // /* MW 8 */ + 9769 "01011011" // /* MW 7 */ + 9770 "00000001" // /* MW 6 */ + 9771 "00100000" // /* MW 5 */ + 9772 "00000000" // /* MW 4 */ + 9773 "11110000" // /* MW 3 */ + 9774 "00101100" // /* MW 2 */ + 9775 "00000000" // /* MW 1 */ + 9776 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9777 "00000000" // /* MW 15 */ + 9778 "00000000" // /* MW 14 */ + 9779 "01111000" // /* MW 13 */ + 9780 "10100101" // /* MW 12 */ + 9781 "00000001" // /* MW 11 */ + 9782 "00000000" // /* MW 10 */ + 9783 "00000000" // /* MW 9 */ + 9784 "00000000" // /* MW 8 */ + 9785 "01011011" // /* MW 7 */ + 9786 "00000001" // /* MW 6 */ + 9787 "00100000" // /* MW 5 */ + 9788 "00000000" // /* MW 4 */ + 9789 "11110000" // /* MW 3 */ + 9790 "00101100" // /* MW 2 */ + 9791 "00000000" // /* MW 1 */ + 9792 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9793 "00000000" // /* MW 15 */ + 9794 "00000000" // /* MW 14 */ + 9795 "01111000" // /* MW 13 */ + 9796 "10100101" // /* MW 12 */ + 9797 "00000001" // /* MW 11 */ + 9798 "00000000" // /* MW 10 */ + 9799 "00000000" // /* MW 9 */ + 9800 "00000000" // /* MW 8 */ + 9801 "01011011" // /* MW 7 */ + 9802 "00000001" // /* MW 6 */ + 9803 "00100000" // /* MW 5 */ + 9804 "00000000" // /* MW 4 */ + 9805 "11110000" // /* MW 3 */ + 9806 "00101100" // /* MW 2 */ + 9807 "00000000" // /* MW 1 */ + 9808 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9809 "00000000" // /* MW 15 */ + 9810 "00000000" // /* MW 14 */ + 9811 "01111000" // /* MW 13 */ + 9812 "10100101" // /* MW 12 */ + 9813 "00000001" // /* MW 11 */ + 9814 "00000000" // /* MW 10 */ + 9815 "00000000" // /* MW 9 */ + 9816 "00000000" // /* MW 8 */ + 9817 "01011011" // /* MW 7 */ + 9818 "00000001" // /* MW 6 */ + 9819 "00100000" // /* MW 5 */ + 9820 "00000000" // /* MW 4 */ + 9821 "11110000" // /* MW 3 */ + 9822 "00101100" // /* MW 2 */ + 9823 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 120 17 first + 9824 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9825 "00000000" // /* MW 15 */ + 9826 "00000000" // /* MW 14 */ + 9827 "11101000" // /* MW 13 */ + 9828 "00000000" // /* MW 12 */ + 9829 "00000000" // /* MW 11 */ + 9830 "00000000" // /* MW 10 */ + 9831 "00000000" // /* MW 9 */ + 9832 "00000000" // /* MW 8 */ + 9833 "01011011" // /* MW 7 */ + 9834 "00000001" // /* MW 6 */ + 9835 "00100000" // /* MW 5 */ + 9836 "00000000" // /* MW 4 */ + 9837 "11110000" // /* MW 3 */ + 9838 "00101100" // /* MW 2 */ + 9839 "00000000" // /* MW 1 */ + 9840 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9841 "00000000" // /* MW 15 */ + 9842 "00000000" // /* MW 14 */ + 9843 "01111000" // /* MW 13 */ + 9844 "10100101" // /* MW 12 */ + 9845 "00000001" // /* MW 11 */ + 9846 "00000000" // /* MW 10 */ + 9847 "00000000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "01011011" // /* MW 7 */ + 9850 "00000001" // /* MW 6 */ + 9851 "00100000" // /* MW 5 */ + 9852 "00000000" // /* MW 4 */ + 9853 "11110000" // /* MW 3 */ + 9854 "00101100" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_464 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "transposeshuffle.h" 122 22 first +.end_of_loop + 9856 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "10000000" // /* MW 8 */ + 9865 "00000110" // /* MW 7 */ + 9866 "00011100" // /* MW 6 */ + 9867 "00100001" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 9872 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9873 "00000000" // /* MW 3 */ + 9874 "00101000" // /* MW 2 */ + 9875 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9881 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9884 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9885 "01100111" // /* MW 3 */ + 9886 "00000001" // /* MW 2 */ + 9887 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_496 +.src_ref 3 "transposeshuffle.h" 86 34 + 9888 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000000" // /* MW 5 */ + 9890 "11000101" // /* MW 4 */ + 9891 "11000100" // /* MW 3 */ + 9892 "00000111" // /* MW 2 */ + 9893 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 34 first + 9894 "10011000" // LDA r0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00010110" // /* MW 3 */ + 9896 "00000100" // /* MW 2 */ + 9897 "00000010" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ + 9906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9907 "00000000" // /* MW 1 */ + 9908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9909 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 26 + 9910 "10000100" // JZ r0, #10528 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10528 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "00000000" // /* MW 4 */ + 9913 "10010000" // /* MW 3 */ + 9914 "00010100" // /* MW 2 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9926 "10111010" // MOVA m5, #36; MOVXM p4, #508548 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9927 "00010000" // /* MW 9 */ + 9928 "01000010" // /* MW 8 */ + 9929 "00110001" // /* MW 7 */ + 9930 "11110010" // /* MW 6 */ + 9931 "00000001" // /* MW 5 */ + 9932 "00000000" // /* MW 4 */ + 9933 "10000000" // /* MW 3 */ + 9934 "10010100" // /* MW 2 */ + 9935 "00000100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 + 9936 "10111010" // LDA r1, [p4], #4; MOVX r5, #8; MOV r2, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9937 "01011000" // /* MW 9 */ + 9938 "11111101" // /* MW 8 */ + 9939 "01001111" // /* MW 7 */ + 9940 "00001000" // /* MW 6 */ + 9941 "01010001" // /* MW 5 */ + 9942 "00000000" // /* MW 4 */ + 9943 "11010000" // /* MW 3 */ + 9944 "10000110" // /* MW 2 */ + 9945 "10000011" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 + 9946 "10111010" // LDA r4, [p4], m5; MOVX r3, #1; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9947 "01011000" // /* MW 9 */ + 9948 "00000000" // /* MW 8 */ + 9949 "01100000" // /* MW 7 */ + 9950 "00101010" // /* MW 6 */ + 9951 "00110000" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11010000" // /* MW 3 */ + 9954 "00010010" // /* MW 2 */ + 9955 "10010101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9956 "01110110" // LDA m1, [p4], #-8; MOVS dc0, dc4; MOVX r6, #4; MOV m4, #32 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01011000" // /* MW 11 */ + 9958 "00100000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "10001010" // /* MW 8 */ + 9961 "01100000" // /* MW 7 */ + 9962 "00000000" // /* MW 6 */ + 9963 "01001011" // /* MW 5 */ + 9964 "00010000" // /* MW 4 */ + 9965 "11010000" // /* MW 3 */ + 9966 "10010000" // /* MW 2 */ + 9967 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "transpose.hpp" 224 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 9968 "01110110" // LDA dn1, [p4], #-8; MOVS dc1, dc4; ADD r0, r0, #-1; MOV r7, #52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9969 "01011000" // /* MW 11 */ + 9970 "00110100" // /* MW 10 */ + 9971 "11101000" // /* MW 9 */ + 9972 "11111000" // /* MW 8 */ + 9973 "00001111" // /* MW 7 */ + 9974 "00000000" // /* MW 6 */ + 9975 "01001011" // /* MW 5 */ + 9976 "00010000" // /* MW 4 */ + 9977 "11010001" // /* MW 3 */ + 9978 "10010100" // /* MW 2 */ + 9979 "10011101" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 9980 "01110110" // LDA dj1, [p4], #12; MOVS dc5, dc4; MOVXM p2, #10064 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9981 "00010000" // /* MW 11 */ + 9982 "10101000" // /* MW 10 */ + 9983 "00110011" // /* MW 9 */ + 9984 "00001001" // /* MW 8 */ + 9985 "00000000" // /* MW 7 */ + 9986 "00000000" // /* MW 6 */ + 9987 "01001011" // /* MW 5 */ + 9988 "00010000" // /* MW 4 */ + 9989 "11010101" // /* MW 3 */ + 9990 "10011000" // /* MW 2 */ + 9991 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 12 + 9992 "10111010" // LDA dn5, [p4], #-8; MOVXM p3, #10096 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9993 "00010000" // /* MW 9 */ + 9994 "10111000" // /* MW 8 */ + 9995 "10110011" // /* MW 7 */ + 9996 "00001001" // /* MW 6 */ + 9997 "00000000" // /* MW 5 */ + 9998 "00000000" // /* MW 4 */ + 9999 "11010000" // /* MW 3 */ + 10000 "11010100" // /* MW 2 */ + 10001 "10011101" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 +.src_ref 3 "transposeshuffle.h" 86 8 + 10002 "00101100" // LDA dj5, [p4], m4; MOVX r16, #53 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10003 "10101010" // /* MW 5 */ + 10004 "01000001" // /* MW 4 */ + 10005 "11010000" // /* MW 3 */ + 10006 "01011000" // /* MW 2 */ + 10007 "10010001" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 87 35 first + 10008 "10111010" // LDA m0, [p4], #-8; LTU r27, r5, r1; ADD.NC r5, r1, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10009 "11001000" // /* MW 9 */ + 10010 "01111111" // /* MW 8 */ + 10011 "10101000" // /* MW 7 */ + 10012 "11100100" // /* MW 6 */ + 10013 "10110000" // /* MW 5 */ + 10014 "00001011" // /* MW 4 */ + 10015 "11010000" // /* MW 3 */ + 10016 "10000000" // /* MW 2 */ + 10017 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 first +.src_ref 3 "transposeshuffle.h" 86 8 first + 10018 "10111010" // LDA dn0, [p4], #-8; LSHL r17, r5, r2; ADD.NC r5, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10019 "11001000" // /* MW 9 */ + 10020 "00111111" // /* MW 8 */ + 10021 "10101001" // /* MW 7 */ + 10022 "01101100" // /* MW 6 */ + 10023 "00010001" // /* MW 5 */ + 10024 "00001011" // /* MW 4 */ + 10025 "11010000" // /* MW 3 */ + 10026 "10000100" // /* MW 2 */ + 10027 "10011101" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 +.src_ref 3 "transposeshuffle.h" 86 8 + 10028 "10111010" // LDA dj0, [p4], #12; LSHL r2, r5, r2; ADD.NC r5, r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10029 "01001000" // /* MW 9 */ + 10030 "01000000" // /* MW 8 */ + 10031 "10101100" // /* MW 7 */ + 10032 "01101100" // /* MW 6 */ + 10033 "00100001" // /* MW 5 */ + 10034 "00001010" // /* MW 4 */ + 10035 "11010000" // /* MW 3 */ + 10036 "10001000" // /* MW 2 */ + 10037 "10000111" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10038 "10111010" // LDA dn4, [p4]; SEL.EQZ r5, r3, r5, r27; ADD.NC r3, r2, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "01001000" // /* MW 9 */ + 10040 "10000000" // /* MW 8 */ + 10041 "01101000" // /* MW 7 */ + 10042 "10010000" // /* MW 6 */ + 10043 "01010010" // /* MW 5 */ + 10044 "00000110" // /* MW 4 */ + 10045 "11010000" // /* MW 3 */ + 10046 "11000100" // /* MW 2 */ + 10047 "10000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 86 8 + 10048 "11100001" // LDA dj4, [p4, #-8]; NOPB; NOPS; ADD r5, r5, #-1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10049 "00000000" // /* MW 15 */ + 10050 "00000000" // /* MW 14 */ + 10051 "01111000" // /* MW 13 */ + 10052 "10100101" // /* MW 12 */ + 10053 "00000001" // /* MW 11 */ + 10054 "11111000" // /* MW 10 */ + 10055 "01011111" // /* MW 9 */ + 10056 "00001010" // /* MW 8 */ + 10057 "01011011" // /* MW 7 */ + 10058 "00000001" // /* MW 6 */ + 10059 "00100000" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "11010000" // /* MW 3 */ + 10062 "11001000" // /* MW 2 */ + 10063 "10011100" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_672 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 1 + 10064 "10000100" // JZ r1, #10512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10512 delay_slots=5 */ + 10065 "00000001" // /* MW 5 */ + 10066 "00000000" // /* MW 4 */ + 10067 "10001000" // /* MW 3 */ + 10068 "00010100" // /* MW 2 */ + 10069 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10079 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 87 12 + 10080 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r17, r5; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10081 "00000000" // /* MW 15 */ + 10082 "00000000" // /* MW 14 */ + 10083 "01111000" // /* MW 13 */ + 10084 "01010000" // /* MW 12 */ + 10085 "00101001" // /* MW 11 */ + 10086 "00000010" // /* MW 10 */ + 10087 "00000000" // /* MW 9 */ + 10088 "00000000" // /* MW 8 */ + 10089 "01011011" // /* MW 7 */ + 10090 "00000001" // /* MW 6 */ + 10091 "00100000" // /* MW 5 */ + 10092 "00000000" // /* MW 4 */ + 10093 "11110000" // /* MW 3 */ + 10094 "00101100" // /* MW 2 */ + 10095 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_704 +.src_ref 3 "transposeshuffle.h" 88 16 first +.loop_nesting 2 + 10096 "10000100" // JZ r4, #10496 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10496 delay_slots=5 */ + 10097 "00000001" // /* MW 5 */ + 10098 "00000000" // /* MW 4 */ + 10099 "10000000" // /* MW 3 */ + 10100 "00010100" // /* MW 2 */ + 10101 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "10011000" // LTU r18, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10113 "01101100" // /* MW 3 */ + 10114 "11100100" // /* MW 2 */ + 10115 "00010000" // /* MW 1 */ + 10116 "10000100" // JNZ r18, #10352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10352 delay_slots=5 */ + 10117 "00000001" // /* MW 5 */ + 10118 "01000000" // /* MW 4 */ + 10119 "00111000" // /* MW 3 */ + 10120 "00010100" // /* MW 2 */ + 10121 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10129 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10131 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 3 "transposeshuffle.h" 88 16 + 10132 "00111010" // VLDB x0, [p0, #64]; MOVXM ls, #10240 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10133 "00010000" // /* MW 9 */ + 10134 "00000000" // /* MW 8 */ + 10135 "01111100" // /* MW 7 */ + 10136 "00001000" // /* MW 6 */ + 10137 "00000000" // /* MW 5 */ + 10138 "00000000" // /* MW 4 */ + 10139 "01101000" // /* MW 3 */ + 10140 "00101000" // /* MW 2 */ + 10141 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10142 "00111010" // VLDB.3D x1, [p0], d1; MOVXM le, #10272 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10143 "00010000" // /* MW 9 */ + 10144 "00010000" // /* MW 8 */ + 10145 "10111100" // /* MW 7 */ + 10146 "00001001" // /* MW 6 */ + 10147 "00000000" // /* MW 5 */ + 10148 "00000000" // /* MW 4 */ + 10149 "11101000" // /* MW 3 */ + 10150 "01110000" // /* MW 2 */ + 10151 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10152 "10011000" // ADD.NC lc, r3, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10153 "11111110" // /* MW 3 */ + 10154 "01110001" // /* MW 2 */ + 10155 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10156 "00011000" // VLDB x0, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10157 "00110100" // /* MW 3 */ + 10158 "00010100" // /* MW 2 */ + 10159 "00111000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "11101000" // /* MW 5 */ + 10172 "01110000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10176 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10177 "00000000" // /* MW 15 */ + 10178 "00000000" // /* MW 14 */ + 10179 "01111000" // /* MW 13 */ + 10180 "10100101" // /* MW 12 */ + 10181 "00000001" // /* MW 11 */ + 10182 "00000000" // /* MW 10 */ + 10183 "00000000" // /* MW 9 */ + 10184 "00000000" // /* MW 8 */ + 10185 "01011011" // /* MW 7 */ + 10186 "00000001" // /* MW 6 */ + 10187 "00100000" // /* MW 5 */ + 10188 "00000000" // /* MW 4 */ + 10189 "11110000" // /* MW 3 */ + 10190 "00101100" // /* MW 2 */ + 10191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1132 26 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // NOPA; VLDB x0, [p0, #64]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "00000000" // /* MW 9 */ + 10200 "00000000" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "01101000" // /* MW 5 */ + 10204 "00101000" // /* MW 4 */ + 10205 "11110000" // /* MW 3 */ + 10206 "00101100" // /* MW 2 */ + 10207 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; VLDB.3D x1, [p0], d1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "11101000" // /* MW 5 */ + 10220 "01110000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00000000" // /* MW 15 */ + 10226 "00000000" // /* MW 14 */ + 10227 "11101000" // /* MW 13 */ + 10228 "00001110" // /* MW 12 */ + 10229 "01000100" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_848 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 3 + 10240 "11100001" // NOPA; VLDB x0, [p0, #64]; MOVS p4, p1; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "11101000" // /* MW 13 */ + 10244 "00100000" // /* MW 12 */ + 10245 "00000100" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "10001011" // /* MW 7 */ + 10250 "10000100" // /* MW 6 */ + 10251 "01101100" // /* MW 5 */ + 10252 "00101000" // /* MW 4 */ + 10253 "11110000" // /* MW 3 */ + 10254 "00101100" // /* MW 2 */ + 10255 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // NOPA; VLDB.3D x1, [p0], d1; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "10000000" // /* MW 8 */ + 10265 "00100110" // /* MW 7 */ + 10266 "00011000" // /* MW 6 */ + 10267 "11101001" // /* MW 5 */ + 10268 "01110000" // /* MW 4 */ + 10269 "11110000" // /* MW 3 */ + 10270 "00101100" // /* MW 2 */ + 10271 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_880 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "11101000" // /* MW 13 */ + 10276 "00001110" // /* MW 12 */ + 10277 "01000100" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "10000000" // /* MW 8 */ + 10281 "00000110" // /* MW 7 */ + 10282 "00010100" // /* MW 6 */ + 10283 "00100100" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 10288 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10289 "11100000" // /* MW 7 */ + 10290 "00100000" // /* MW 6 */ + 10291 "00000100" // /* MW 5 */ + 10292 "00000000" // /* MW 4 */ + 10293 "01100000" // /* MW 3 */ + 10294 "10010001" // /* MW 2 */ + 10295 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10296 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10297 "00100110" // /* MW 3 */ + 10298 "00011000" // /* MW 2 */ + 10299 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10300 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10301 "11100000" // /* MW 7 */ + 10302 "00001110" // /* MW 6 */ + 10303 "01000100" // /* MW 5 */ + 10304 "00000000" // /* MW 4 */ + 10305 "11010000" // /* MW 3 */ + 10306 "10000000" // /* MW 2 */ + 10307 "10000010" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10308 "11011000" // VSHUFFLE bmll0, x1, x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10309 "01000001" // /* MW 3 */ + 10310 "00001000" // /* MW 2 */ + 10311 "00011000" // /* MW 1 */ + 10312 "10000100" // J #10496 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10496 delay_slots=5 */ + 10313 "00000000" // /* MW 5 */ + 10314 "00000000" // /* MW 4 */ + 10315 "10000000" // /* MW 3 */ + 10316 "00010100" // /* MW 2 */ + 10317 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10318 "00000010" // VST.3D bmlh0, [p1], d0; MOV p4, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10319 "01110000" // /* MW 7 */ + 10320 "01100000" // /* MW 6 */ + 10321 "00110001" // /* MW 5 */ + 10322 "00000010" // /* MW 4 */ + 10323 "11010000" // /* MW 3 */ + 10324 "00000100" // /* MW 2 */ + 10325 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 224 15 first +.delay_slot + 10326 "00000010" // VST bmll0, [p4, #64]; VSHUFFLE bmlh0, x1, x0, r7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10327 "11100000" // /* MW 7 */ + 10328 "00001110" // /* MW 6 */ + 10329 "01000100" // /* MW 5 */ + 10330 "00000000" // /* MW 4 */ + 10331 "11010000" // /* MW 3 */ + 10332 "10000000" // /* MW 2 */ + 10333 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "transpose.hpp" 225 15 first +.delay_slot + 10334 "00000010" // MOVS p4, p1; VSHUFFLE bmll0, x1, x0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10335 "11100000" // /* MW 7 */ + 10336 "00100000" // /* MW 6 */ + 10337 "00000100" // /* MW 5 */ + 10338 "00000000" // /* MW 4 */ + 10339 "01100000" // /* MW 3 */ + 10340 "10010001" // /* MW 2 */ + 10341 "10010000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 10342 "10011000" // VST.3D bmlh0, [p1], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10343 "00100110" // /* MW 3 */ + 10344 "00011000" // /* MW 2 */ + 10345 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 10346 "00001100" // NOPA; VST bmll0, [p4, #64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10347 "00001101" // /* MW 5 */ + 10348 "00101000" // /* MW 4 */ + 10349 "11111000" // /* MW 3 */ + 10350 "00101100" // /* MW 2 */ + 10351 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_960 +.src_ref 3 "transposeshuffle.h" 88 16 first + 10352 "01000100" // MOVXM ls, #10368 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10353 "00000000" // /* MW 5 */ + 10354 "11110001" // /* MW 4 */ + 10355 "00100001" // /* MW 3 */ + 10356 "00000000" // /* MW 2 */ + 10357 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10358 "01000100" // MOVXM le, #10480 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10359 "11100000" // /* MW 5 */ + 10360 "11110001" // /* MW 4 */ + 10361 "00100110" // /* MW 3 */ + 10362 "00000000" // /* MW 2 */ + 10363 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 88 16 + 10364 "10011000" // ADD.NC lc, r2, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10365 "00000000" // /* MW 3 */ + 10366 "01110001" // /* MW 2 */ + 10367 "00011101" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_976 +.src_ref 4 "vector.hpp" 1132 26 first +.src_ref 4 "vector.hpp" 1152 43 +.begin_of_loop +.loop_nesting 3 + 10368 "11110100" // VLDB x0, [p0, #64]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10369 "10000001" // /* MW 5 */ + 10370 "11000101" // /* MW 4 */ + 10371 "10001000" // /* MW 3 */ + 10372 "10000110" // /* MW 2 */ + 10373 "00000010" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1132 26 + 10374 "00011000" // VLDB.3D x1, [p0], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10375 "01110100" // /* MW 3 */ + 10376 "00111000" // /* MW 2 */ + 10377 "00111000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ + 10380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10381 "00000000" // /* MW 1 */ + 10382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10383 "00000000" // /* MW 1 */ + 10384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10385 "00000000" // /* MW 15 */ + 10386 "00000000" // /* MW 14 */ + 10387 "01111000" // /* MW 13 */ + 10388 "10100101" // /* MW 12 */ + 10389 "00000001" // /* MW 11 */ + 10390 "00000000" // /* MW 10 */ + 10391 "00000000" // /* MW 9 */ + 10392 "00000000" // /* MW 8 */ + 10393 "01011011" // /* MW 7 */ + 10394 "00000001" // /* MW 6 */ + 10395 "00100000" // /* MW 5 */ + 10396 "00000000" // /* MW 4 */ + 10397 "11110000" // /* MW 3 */ + 10398 "00101100" // /* MW 2 */ + 10399 "00000000" // /* MW 1 */ + 10400 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10401 "00000000" // /* MW 15 */ + 10402 "00000000" // /* MW 14 */ + 10403 "01111000" // /* MW 13 */ + 10404 "10100101" // /* MW 12 */ + 10405 "00000001" // /* MW 11 */ + 10406 "00000000" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "01011011" // /* MW 7 */ + 10410 "00000001" // /* MW 6 */ + 10411 "00100000" // /* MW 5 */ + 10412 "00000000" // /* MW 4 */ + 10413 "11110000" // /* MW 3 */ + 10414 "00101100" // /* MW 2 */ + 10415 "00000000" // /* MW 1 */ + 10416 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10417 "00000000" // /* MW 15 */ + 10418 "00000000" // /* MW 14 */ + 10419 "01111000" // /* MW 13 */ + 10420 "10100101" // /* MW 12 */ + 10421 "00000001" // /* MW 11 */ + 10422 "00000000" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "01011011" // /* MW 7 */ + 10426 "00000001" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 224 15 first + 10432 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmlh0, x1, x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10433 "00000000" // /* MW 15 */ + 10434 "00000000" // /* MW 14 */ + 10435 "11101000" // /* MW 13 */ + 10436 "00001110" // /* MW 12 */ + 10437 "01000100" // /* MW 11 */ + 10438 "00000000" // /* MW 10 */ + 10439 "00000000" // /* MW 9 */ + 10440 "00000000" // /* MW 8 */ + 10441 "01011011" // /* MW 7 */ + 10442 "00000001" // /* MW 6 */ + 10443 "00100000" // /* MW 5 */ + 10444 "00000000" // /* MW 4 */ + 10445 "11110000" // /* MW 3 */ + 10446 "00101100" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.src_ref 4 "transpose.hpp" 225 15 first + 10448 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x1, x0, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00000000" // /* MW 15 */ + 10450 "00000000" // /* MW 14 */ + 10451 "11101000" // /* MW 13 */ + 10452 "00100000" // /* MW 12 */ + 10453 "00000100" // /* MW 11 */ + 10454 "00000000" // /* MW 10 */ + 10455 "00000000" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "01011011" // /* MW 7 */ + 10458 "00000001" // /* MW 6 */ + 10459 "00100000" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 8 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 first + 10464 "11100001" // NOPA; NOPB; VST.3D bmlh0, [p1], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10465 "00000000" // /* MW 15 */ + 10466 "00000000" // /* MW 14 */ + 10467 "01111000" // /* MW 13 */ + 10468 "10100101" // /* MW 12 */ + 10469 "00000001" // /* MW 11 */ + 10470 "00000000" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "10000000" // /* MW 8 */ + 10473 "00100110" // /* MW 7 */ + 10474 "00011000" // /* MW 6 */ + 10475 "00100001" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1088 +.src_ref 4 "vector.hpp" 1152 43 +.end_of_loop + 10480 "11100001" // NOPA; NOPB; VST bmll0, [p4, #64]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10481 "00000000" // /* MW 15 */ + 10482 "00000000" // /* MW 14 */ + 10483 "01111000" // /* MW 13 */ + 10484 "10100101" // /* MW 12 */ + 10485 "00000001" // /* MW 11 */ + 10486 "00000000" // /* MW 10 */ + 10487 "00000000" // /* MW 9 */ + 10488 "10000000" // /* MW 8 */ + 10489 "00000110" // /* MW 7 */ + 10490 "00010100" // /* MW 6 */ + 10491 "00100100" // /* MW 5 */ + 10492 "00000000" // /* MW 4 */ + 10493 "11110000" // /* MW 3 */ + 10494 "00101100" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1104 +.src_ref 3 "transposeshuffle.h" 87 12 first +.loop_nesting 2 + 10496 "00011000" // JNZD r17, r17, p3 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10497 "11100000" // /* MW 3 */ + 10498 "01100010" // /* MW 2 */ + 10499 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10508 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10509 "01100111" // /* MW 3 */ + 10510 "00000001" // /* MW 2 */ + 10511 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1120 +.src_ref 3 "transposeshuffle.h" 86 8 first +.loop_nesting 1 + 10512 "00011000" // JNZD r0, r0, p2 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 10513 "10100000" // /* MW 3 */ + 10514 "00000000" // /* MW 2 */ + 10515 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10525 "01100111" // /* MW 3 */ + 10526 "00000001" // /* MW 2 */ + 10527 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params_1136 +.src_ref 3 "transposeshuffle.h" 126 first +.loop_nesting 0 + 10528 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10529 "00000000" // /* MW 3 */ + 10530 "00101000" // /* MW 2 */ + 10531 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10537 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10539 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_7float16EEvPS1_S3_R23transposeshuffle_params___func_end0 + 10541 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 first +.function_start + 10544 "11111000" // MOV p3, p6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10545 "11000000" // /* MW 3 */ + 10546 "01101100" // /* MW 2 */ + 10547 "00011011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 + 10548 "00111010" // MOVS p6, p1; MOVXM p1, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10549 "00010001" // /* MW 9 */ + 10550 "00001010" // /* MW 8 */ + 10551 "10110001" // /* MW 7 */ + 10552 "11110000" // /* MW 6 */ + 10553 "00000001" // /* MW 5 */ + 10554 "00000000" // /* MW 4 */ + 10555 "01100000" // /* MW 3 */ + 10556 "10010001" // /* MW 2 */ + 10557 "11010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 first + 10558 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10559 "00010110" // /* MW 3 */ + 10560 "00000110" // /* MW 2 */ + 10561 "00000001" // /* MW 1 */ + 10562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10563 "00000000" // /* MW 1 */ + 10564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10565 "00000000" // /* MW 1 */ + 10566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10567 "00000000" // /* MW 1 */ + 10568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10569 "00000000" // /* MW 1 */ + 10570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10571 "00000000" // /* MW 1 */ + 10572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10573 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 37 15 + 10574 "10000100" // JNZ r16, #10640 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10640 delay_slots=5 */ + 10575 "00000001" // /* MW 5 */ + 10576 "01000000" // /* MW 4 */ + 10577 "11001000" // /* MW 3 */ + 10578 "00010100" // /* MW 2 */ + 10579 "10000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 33 +.delay_slot + 10580 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10581 "00000001" // /* MW 5 */ + 10582 "00000000" // /* MW 4 */ + 10583 "00000000" // /* MW 3 */ + 10584 "00001000" // /* MW 2 */ + 10585 "00000000" // /* MW 1 */ +.delay_slot + 10586 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10587 "00111101" // /* MW 3 */ + 10588 "11110100" // /* MW 2 */ + 10589 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 +.delay_slot + 10590 "00000010" // MOVS p7, p0; MOV p1, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10591 "01110000" // /* MW 7 */ + 10592 "01100000" // /* MW 6 */ + 10593 "10110111" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "01100000" // /* MW 3 */ + 10596 "00010001" // /* MW 2 */ + 10597 "11110000" // /* MW 1 */ +.delay_slot + 10598 "10011000" // ST p3, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10599 "10011101" // /* MW 3 */ + 10600 "11111001" // /* MW 2 */ + 10601 "00001111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10602 "00111010" // ST p1, [sp, #-4]; MOVXM p0, #508544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10603 "00010001" // /* MW 9 */ + 10604 "01000000" // /* MW 8 */ + 10605 "00110001" // /* MW 7 */ + 10606 "11110000" // /* MW 6 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "10110000" // /* MW 3 */ + 10610 "10010011" // /* MW 2 */ + 10611 "11111111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 first +.no_stack_arguments + 10612 "00000100" // JL #9120 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9120 delay_slots=5 */ + 10613 "00000001" // /* MW 5 */ + 10614 "00000000" // /* MW 4 */ + 10615 "11010000" // /* MW 3 */ + 10616 "00010001" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 38 8 +.delay_slot + 10618 "11111000" // MOV p1, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "11000000" // /* MW 3 */ + 10620 "01100100" // /* MW 2 */ + 10621 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10627 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10628 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10629 "10000001" // /* MW 11 */ + 10630 "10101101" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "00000000" // /* MW 8 */ + 10633 "00000000" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00100000" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj_96 +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 3 "transposeshuffle.h" 137 72 +.return_address + 10640 "10111010" // LDA r16, [p7]; MOVXM p7, #508564 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10641 "00010000" // /* MW 9 */ + 10642 "01001010" // /* MW 8 */ + 10643 "10110001" // /* MW 7 */ + 10644 "11110011" // /* MW 6 */ + 10645 "00000001" // /* MW 5 */ + 10646 "00000000" // /* MW 4 */ + 10647 "11010000" // /* MW 3 */ + 10648 "11000010" // /* MW 2 */ + 10649 "11100000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 72 first + 10650 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10651 "00110110" // /* MW 3 */ + 10652 "00000110" // /* MW 2 */ + 10653 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 10654 "10011000" // LDA p1, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10655 "10011110" // /* MW 3 */ + 10656 "00000100" // /* MW 2 */ + 10657 "00000110" // /* MW 1 */ + 10658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10659 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 first +.no_stack_arguments + 10660 "00000100" // JL #9392 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9392 delay_slots=5 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "01011000" // /* MW 3 */ + 10664 "00010010" // /* MW 2 */ + 10665 "00000000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10666 "00011000" // MOVX r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10667 "00000101" // /* MW 3 */ + 10668 "00100100" // /* MW 2 */ + 10669 "00010000" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 11 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10670 "01000100" // MOVXM p2, #508544 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10671 "00000000" // /* MW 5 */ + 10672 "11000101" // /* MW 4 */ + 10673 "11000100" // /* MW 3 */ + 10674 "00000111" // /* MW 2 */ + 10675 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 +.delay_slot + 10676 "11111000" // MOV p6, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10677 "11000000" // /* MW 3 */ + 10678 "01100100" // /* MW 2 */ + 10679 "00011110" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10680 "10011000" // LSHL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10681 "00101101" // /* MW 3 */ + 10682 "01100011" // /* MW 2 */ + 10683 "00010100" // /* MW 1 */ +.src_ref 3 "transposeshuffle.h" 137 64 +.delay_slot + 10684 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10685 "11000001" // /* MW 3 */ + 10686 "01101000" // /* MW 2 */ + 10687 "00011000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 +.return_address + 10688 "10111010" // LDA lr, [sp, #-12]; MOVXM p2, #508436 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10689 "00010000" // /* MW 9 */ + 10690 "00001010" // /* MW 8 */ + 10691 "00110001" // /* MW 7 */ + 10692 "11110001" // /* MW 6 */ + 10693 "00000001" // /* MW 5 */ + 10694 "00000000" // /* MW 4 */ + 10695 "00100000" // /* MW 3 */ + 10696 "10000111" // /* MW 2 */ + 10697 "11111110" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first + 10698 "00101100" // LDA r16, [p2]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "00000010" // /* MW 5 */ + 10700 "01100000" // /* MW 4 */ + 10701 "11010000" // /* MW 3 */ + 10702 "11000010" // /* MW 2 */ + 10703 "01000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 27 + 10704 "10011000" // LDA r17, [p6, #24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10705 "00110110" // /* MW 3 */ + 10706 "01100110" // /* MW 2 */ + 10707 "00000110" // /* MW 1 */ + 10708 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10709 "00011001" // /* MW 3 */ + 10710 "11111011" // /* MW 2 */ + 10711 "00000111" // /* MW 1 */ + 10712 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10713 "10011001" // /* MW 3 */ + 10714 "11111111" // /* MW 2 */ + 10715 "00000111" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 first + 10716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10717 "00000001" // /* MW 5 */ + 10718 "00000000" // /* MW 4 */ + 10719 "00000000" // /* MW 3 */ + 10720 "11111000" // /* MW 2 */ + 10721 "11111111" // /* MW 1 */ + 10722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10723 "00000000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 46 + 10724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10725 "00000000" // /* MW 3 */ + 10726 "00101000" // /* MW 2 */ + 10727 "00010000" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 first +.delay_slot + 10728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10729 "00000111" // /* MW 3 */ + 10730 "00100000" // /* MW 2 */ + 10731 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 17 +.delay_slot + 10732 "10011000" // EQ r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10733 "00000111" // /* MW 3 */ + 10734 "01110111" // /* MW 2 */ + 10735 "00010100" // /* MW 1 */ +.src_ref 10 "transpose4d_adf_wrapper.cpp" 43 8 +.delay_slot + 10736 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10737 "10000010" // /* MW 3 */ + 10738 "00100001" // /* MW 2 */ + 10739 "00010100" // /* MW 1 */ +.delay_slot + 10740 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10741 "00010001" // /* MW 3 */ + 10742 "00000110" // /* MW 2 */ + 10743 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA7_Kj___func_end0 + 10745 "00000000" // /* MW 1 */ +.label __Z14_b7835_wrapperPPv___func_begin0 +.label _Z14_b7835_wrapperPPv +.function _b7835_wrapper _Z14_b7835_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 30 first +.src_ref 0 "0_0_reloadable4.cc" 32 79 +.function_start + 10752 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10753 "11000000" // /* MW 3 */ + 10754 "01100000" // /* MW 2 */ + 10755 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 32 79 first + 10756 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10757 "00011110" // /* MW 3 */ + 10758 "00011100" // /* MW 2 */ + 10759 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 34 46 first + 10760 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10761 "00011110" // /* MW 3 */ + 10762 "00010101" // /* MW 2 */ + 10763 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 33 80 first + 10764 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10765 "10011110" // /* MW 3 */ + 10766 "00000100" // /* MW 2 */ + 10767 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 31 4 first +.tail_call + 10768 "10000100" // J #10544 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10544 delay_slots=5 */ + 10769 "00000000" // /* MW 5 */ + 10770 "00000000" // /* MW 4 */ + 10771 "10011000" // /* MW 3 */ + 10772 "00010100" // /* MW 2 */ + 10773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b7835_wrapperPPv__end +.label __Z14_b7835_wrapperPPv___func_end0 + 10783 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_begin0 +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.function buffer_pad_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.function_start + 10784 "11010100" // LDA el0, [p1]; MOV r17, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10785 "10000001" // /* MW 5 */ + 10786 "10101001" // /* MW 4 */ + 10787 "11011000" // /* MW 3 */ + 10788 "10000101" // /* MW 2 */ + 10789 "00100000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 first + 10790 "00011000" // ADD.NC p1, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10791 "10000010" // /* MW 3 */ + 10792 "01101000" // /* MW 2 */ + 10793 "00011001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 26 25 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10794 "10011000" // LDA r18, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "01010110" // /* MW 3 */ + 10796 "00011110" // /* MW 2 */ + 10797 "00000001" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 27 33 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10798 "10011000" // LDA r15, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10799 "11110110" // /* MW 3 */ + 10800 "00000101" // /* MW 2 */ + 10801 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10803 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10809 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10810 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10811 "10100000" // /* MW 3 */ + 10812 "00010111" // /* MW 2 */ + 10813 "00011000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10814 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10815 "00000001" // /* MW 5 */ + 10816 "00000000" // /* MW 4 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00001000" // /* MW 2 */ + 10819 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 43 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10820 "01100100" // MUL r18, r15, r18; MOV r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10821 "11111101" // /* MW 5 */ + 10822 "00111111" // /* MW 4 */ + 10823 "11111000" // /* MW 3 */ + 10824 "10100101" // /* MW 2 */ + 10825 "01111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10826 "00111010" // ST r18, [sp, #-20]; MOVXM r17, #1073741823 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10827 "10010001" // /* MW 9 */ + 10828 "11111111" // /* MW 8 */ + 10829 "00101111" // /* MW 7 */ + 10830 "11111110" // /* MW 6 */ + 10831 "11111111" // /* MW 5 */ + 10832 "00001111" // /* MW 4 */ + 10833 "10110000" // /* MW 3 */ + 10834 "11001010" // /* MW 2 */ + 10835 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10836 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00001101" // /* MW 3 */ + 10838 "10100001" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 72 + 10840 "10011000" // AND r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00000100" // /* MW 3 */ + 10842 "01100001" // /* MW 2 */ + 10843 "00010100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 22 + 10844 "10000100" // JZ r16, #10928 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10928 delay_slots=5 */ + 10845 "00000001" // /* MW 5 */ + 10846 "00000000" // /* MW 4 */ + 10847 "01011000" // /* MW 3 */ + 10848 "00010101" // /* MW 2 */ + 10849 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.delay_slot + 10850 "11010100" // LDA p7, [p0]; MOV p0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10851 "10000001" // /* MW 5 */ + 10852 "11011101" // /* MW 4 */ + 10853 "11010000" // /* MW 3 */ + 10854 "11110011" // /* MW 2 */ + 10855 "00000000" // /* MW 1 */ +.delay_slot + 10856 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10857 "00011101" // /* MW 3 */ + 10858 "11111000" // /* MW 2 */ + 10859 "00001111" // /* MW 1 */ +.delay_slot + 10860 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10861 "11010101" // /* MW 3 */ + 10862 "11110101" // /* MW 2 */ + 10863 "00001111" // /* MW 1 */ +.delay_slot + 10864 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10865 "00111101" // /* MW 3 */ + 10866 "11110000" // /* MW 2 */ + 10867 "00001111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 25 24 first +.delay_slot + 10868 "00001100" // LDA r14, [p1, #-8]; ST r0, [sp, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10869 "00101011" // /* MW 5 */ + 10870 "11111000" // /* MW 4 */ + 10871 "11011111" // /* MW 3 */ + 10872 "10111010" // /* MW 2 */ + 10873 "00111100" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10874 "01011100" // ST el0, [sp, #-24]; MOVX r0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10875 "00000010" // /* MW 5 */ + 10876 "00000000" // /* MW 4 */ + 10877 "10110000" // /* MW 3 */ + 10878 "00000101" // /* MW 2 */ + 10879 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 + 10880 "00011000" // LDA p1, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10881 "10011001" // /* MW 3 */ + 10882 "11101000" // /* MW 2 */ + 10883 "00000111" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 37 31 first +.no_stack_arguments + 10884 "00000100" // JL #12608 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12608 delay_slots=5 */ + 10885 "00000001" // /* MW 5 */ + 10886 "00000000" // /* MW 4 */ + 10887 "10100000" // /* MW 3 */ + 10888 "00011000" // /* MW 2 */ + 10889 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 +.delay_slot + 10890 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "00001001" // /* MW 3 */ + 10892 "00100010" // /* MW 2 */ + 10893 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 36 4 first +.delay_slot + 10894 "10011000" // LSHL r1, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00011101" // /* MW 3 */ + 10896 "00000011" // /* MW 2 */ + 10897 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10903 "01111110" // /* MW 9 */ + 10904 "10100101" // /* MW 8 */ + 10905 "00000001" // /* MW 7 */ + 10906 "00000000" // /* MW 6 */ + 10907 "00010000" // /* MW 5 */ + 10908 "00000000" // /* MW 4 */ + 10909 "11110000" // /* MW 3 */ + 10910 "00101100" // /* MW 2 */ + 10911 "00000000" // /* MW 1 */ +.return_address + 10912 "10000100" // J #10944 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10944 delay_slots=5 */ + 10913 "00000000" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01100000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_144 + 10928 "11100001" // NOPA; NOPB; ST el0, [sp, #-24]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10929 "00000000" // /* MW 15 */ + 10930 "00000000" // /* MW 14 */ + 10931 "01111000" // /* MW 13 */ + 10932 "10100101" // /* MW 12 */ + 10933 "00000001" // /* MW 11 */ + 10934 "00000000" // /* MW 10 */ + 10935 "00000000" // /* MW 9 */ + 10936 "10000000" // /* MW 8 */ + 10937 "00101101" // /* MW 7 */ + 10938 "11101000" // /* MW 6 */ + 10939 "00100111" // /* MW 5 */ + 10940 "00000000" // /* MW 4 */ + 10941 "11110000" // /* MW 3 */ + 10942 "00101100" // /* MW 2 */ + 10943 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_160 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 22 first + 10944 "10000100" // JZ r15, #11216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11216 delay_slots=5 */ + 10945 "00000001" // /* MW 5 */ + 10946 "00000000" // /* MW 4 */ + 10947 "11101000" // /* MW 3 */ + 10948 "00010101" // /* MW 2 */ + 10949 "01111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10959 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 first + 10960 "10111010" // LDA r17, [sp, #-20]; MOVXM ls, #11056 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10961 "00010000" // /* MW 9 */ + 10962 "10011000" // /* MW 8 */ + 10963 "01111101" // /* MW 7 */ + 10964 "00001000" // /* MW 6 */ + 10965 "00000000" // /* MW 5 */ + 10966 "00000000" // /* MW 4 */ + 10967 "00100000" // /* MW 3 */ + 10968 "11000110" // /* MW 2 */ + 10969 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 + 10970 "10111010" // MOVA r19, #1; MOVXM le, #11152 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10971 "00010000" // /* MW 9 */ + 10972 "11001000" // /* MW 8 */ + 10973 "10111101" // /* MW 7 */ + 10974 "00001001" // /* MW 6 */ + 10975 "00000000" // /* MW 5 */ + 10976 "00000000" // /* MW 4 */ + 10977 "00000000" // /* MW 3 */ + 10978 "00110011" // /* MW 2 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 10980 "10111010" // LDA r18, [sp, #-24]; LSHL r20, r14, r19; ADD.NC r16, r15, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10981 "11001000" // /* MW 9 */ + 10982 "11111111" // /* MW 8 */ + 10983 "00001011" // /* MW 7 */ + 10984 "11101110" // /* MW 6 */ + 10985 "01001001" // /* MW 5 */ + 10986 "00011101" // /* MW 4 */ + 10987 "00100000" // /* MW 3 */ + 10988 "01001010" // /* MW 2 */ + 10989 "11111101" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 10990 "10111010" // LDA lr, [sp, #-16]; MOVXM p0, #11024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10991 "00010000" // /* MW 9 */ + 10992 "10001000" // /* MW 8 */ + 10993 "00110101" // /* MW 7 */ + 10994 "00001000" // /* MW 6 */ + 10995 "00000000" // /* MW 5 */ + 10996 "00000000" // /* MW 4 */ + 10997 "00100000" // /* MW 3 */ + 10998 "00000111" // /* MW 2 */ + 10999 "11111110" // /* MW 1 */ + 11000 "11111000" // MOV m0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11001 "00100000" // /* MW 3 */ + 11002 "00001010" // /* MW 2 */ + 11003 "00011000" // /* MW 1 */ + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ + 11008 "11100001" // NOPA; NOPB; NOPS; LSHL r17, r17, r19; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11009 "00000000" // /* MW 15 */ + 11010 "00000000" // /* MW 14 */ + 11011 "01111000" // /* MW 13 */ + 11012 "10100101" // /* MW 12 */ + 11013 "00000001" // /* MW 11 */ + 11014 "11101100" // /* MW 10 */ + 11015 "00011001" // /* MW 9 */ + 11016 "00100011" // /* MW 8 */ + 11017 "01011011" // /* MW 7 */ + 11018 "00000001" // /* MW 6 */ + 11019 "00100000" // /* MW 5 */ + 11020 "00000000" // /* MW 4 */ + 11021 "11110000" // /* MW 3 */ + 11022 "00101100" // /* MW 2 */ + 11023 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_240 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.loop_nesting 1 + 11024 "10000100" // JZ r14, #11168 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11168 delay_slots=5 */ + 11025 "00000001" // /* MW 5 */ + 11026 "00000000" // /* MW 4 */ + 11027 "11010000" // /* MW 3 */ + 11028 "00010101" // /* MW 2 */ + 11029 "01110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11033 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11034 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11035 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11037 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11039 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 41 8 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11040 "00000010" // MOVS p2, p7; MOV lc, r14 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11041 "01110000" // /* MW 7 */ + 11042 "10010000" // /* MW 6 */ + 11043 "10111011" // /* MW 5 */ + 11044 "00000010" // /* MW 4 */ + 11045 "01100000" // /* MW 3 */ + 11046 "10010001" // /* MW 2 */ + 11047 "01010011" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11048 "00000010" // NOPS; MOV p1, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11049 "01110000" // /* MW 7 */ + 11050 "10010000" // /* MW 6 */ + 11051 "10110100" // /* MW 5 */ + 11052 "00000000" // /* MW 4 */ + 11053 "01100000" // /* MW 3 */ + 11054 "00101011" // /* MW 2 */ + 11055 "00000000" // /* MW 1 */ +.label ZLS_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_272 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 first +.begin_of_loop +.loop_nesting 2 + 11056 "11100001" // LDA.s16 r19, [p2], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11057 "00000000" // /* MW 15 */ + 11058 "00000000" // /* MW 14 */ + 11059 "01111000" // /* MW 13 */ + 11060 "10100101" // /* MW 12 */ + 11061 "00000001" // /* MW 11 */ + 11062 "00000000" // /* MW 10 */ + 11063 "00000000" // /* MW 9 */ + 11064 "00000000" // /* MW 8 */ + 11065 "01011011" // /* MW 7 */ + 11066 "00000001" // /* MW 6 */ + 11067 "00100000" // /* MW 5 */ + 11068 "00000000" // /* MW 4 */ + 11069 "01010000" // /* MW 3 */ + 11070 "11001110" // /* MW 2 */ + 11071 "01000011" // /* MW 1 */ + 11072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11073 "00000000" // /* MW 15 */ + 11074 "00000000" // /* MW 14 */ + 11075 "01111000" // /* MW 13 */ + 11076 "10100101" // /* MW 12 */ + 11077 "00000001" // /* MW 11 */ + 11078 "00000000" // /* MW 10 */ + 11079 "00000000" // /* MW 9 */ + 11080 "00000000" // /* MW 8 */ + 11081 "01011011" // /* MW 7 */ + 11082 "00000001" // /* MW 6 */ + 11083 "00100000" // /* MW 5 */ + 11084 "00000000" // /* MW 4 */ + 11085 "11110000" // /* MW 3 */ + 11086 "00101100" // /* MW 2 */ + 11087 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 42 36 + 11088 "11100001" // ST.s16 r19, [p1], #2; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11089 "00000000" // /* MW 15 */ + 11090 "00000000" // /* MW 14 */ + 11091 "01111000" // /* MW 13 */ + 11092 "10100101" // /* MW 12 */ + 11093 "00000001" // /* MW 11 */ + 11094 "00000000" // /* MW 10 */ + 11095 "00000000" // /* MW 9 */ + 11096 "00000000" // /* MW 8 */ + 11097 "01011011" // /* MW 7 */ + 11098 "00000001" // /* MW 6 */ + 11099 "00100000" // /* MW 5 */ + 11100 "00000000" // /* MW 4 */ + 11101 "11100000" // /* MW 3 */ + 11102 "11001110" // /* MW 2 */ + 11103 "00100011" // /* MW 1 */ + 11104 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11105 "00000000" // /* MW 15 */ + 11106 "00000000" // /* MW 14 */ + 11107 "01111000" // /* MW 13 */ + 11108 "10100101" // /* MW 12 */ + 11109 "00000001" // /* MW 11 */ + 11110 "00000000" // /* MW 10 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "01011011" // /* MW 7 */ + 11114 "00000001" // /* MW 6 */ + 11115 "00100000" // /* MW 5 */ + 11116 "00000000" // /* MW 4 */ + 11117 "11110000" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ + 11120 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11121 "00000000" // /* MW 15 */ + 11122 "00000000" // /* MW 14 */ + 11123 "01111000" // /* MW 13 */ + 11124 "10100101" // /* MW 12 */ + 11125 "00000001" // /* MW 11 */ + 11126 "00000000" // /* MW 10 */ + 11127 "00000000" // /* MW 9 */ + 11128 "00000000" // /* MW 8 */ + 11129 "01011011" // /* MW 7 */ + 11130 "00000001" // /* MW 6 */ + 11131 "00100000" // /* MW 5 */ + 11132 "00000000" // /* MW 4 */ + 11133 "11110000" // /* MW 3 */ + 11134 "00101100" // /* MW 2 */ + 11135 "00000000" // /* MW 1 */ + 11136 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11137 "00000000" // /* MW 15 */ + 11138 "00000000" // /* MW 14 */ + 11139 "01111000" // /* MW 13 */ + 11140 "10100101" // /* MW 12 */ + 11141 "00000001" // /* MW 11 */ + 11142 "00000000" // /* MW 10 */ + 11143 "00000000" // /* MW 9 */ + 11144 "00000000" // /* MW 8 */ + 11145 "01011011" // /* MW 7 */ + 11146 "00000001" // /* MW 6 */ + 11147 "00100000" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "11110000" // /* MW 3 */ + 11150 "00101100" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.label ZLE_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_368 +.end_of_loop + 11152 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11153 "00000000" // /* MW 15 */ + 11154 "00000000" // /* MW 14 */ + 11155 "01111000" // /* MW 13 */ + 11156 "10100101" // /* MW 12 */ + 11157 "00000001" // /* MW 11 */ + 11158 "00000000" // /* MW 10 */ + 11159 "00000000" // /* MW 9 */ + 11160 "00000000" // /* MW 8 */ + 11161 "01011011" // /* MW 7 */ + 11162 "00000001" // /* MW 6 */ + 11163 "00100000" // /* MW 5 */ + 11164 "00000000" // /* MW 4 */ + 11165 "11110000" // /* MW 3 */ + 11166 "00101100" // /* MW 2 */ + 11167 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_384 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 40 4 first +.loop_nesting 1 + 11168 "00011100" // PADDB [p7], m0; JNZD r16, r16, p0 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 11169 "01000000" // /* MW 5 */ + 11170 "01000000" // /* MW 4 */ + 11171 "00001000" // /* MW 3 */ + 11172 "01110010" // /* MW 2 */ + 11173 "11100001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11175 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11177 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11179 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11181 "00000000" // /* MW 1 */ +.delay_slot + 11182 "01011000" // ADD.NC r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "11001001" // /* MW 3 */ + 11184 "10011000" // /* MW 2 */ + 11185 "00011100" // /* MW 1 */ +.loop_nesting 0 + 11186 "10000100" // J #11232 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11232 delay_slots=5 */ + 11187 "00000000" // /* MW 5 */ + 11188 "00000000" // /* MW 4 */ + 11189 "11110000" // /* MW 3 */ + 11190 "00010101" // /* MW 2 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11195 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11197 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11199 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11200 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11201 "00000000" // /* MW 15 */ + 11202 "00000000" // /* MW 14 */ + 11203 "01111000" // /* MW 13 */ + 11204 "10100101" // /* MW 12 */ + 11205 "00000001" // /* MW 11 */ + 11206 "00000000" // /* MW 10 */ + 11207 "00000000" // /* MW 9 */ + 11208 "00000000" // /* MW 8 */ + 11209 "01011011" // /* MW 7 */ + 11210 "00000001" // /* MW 6 */ + 11211 "00100000" // /* MW 5 */ + 11212 "00000000" // /* MW 4 */ + 11213 "11110000" // /* MW 3 */ + 11214 "00101100" // /* MW 2 */ + 11215 "00000000" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_432 +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 + 11216 "11100001" // LDA lr, [sp, #-16]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11217 "00000000" // /* MW 15 */ + 11218 "00000000" // /* MW 14 */ + 11219 "01111000" // /* MW 13 */ + 11220 "10100101" // /* MW 12 */ + 11221 "00000001" // /* MW 11 */ + 11222 "00000000" // /* MW 10 */ + 11223 "00000000" // /* MW 9 */ + 11224 "00000000" // /* MW 8 */ + 11225 "01011011" // /* MW 7 */ + 11226 "00000001" // /* MW 6 */ + 11227 "00100000" // /* MW 5 */ + 11228 "00000000" // /* MW 4 */ + 11229 "00100000" // /* MW 3 */ + 11230 "00000111" // /* MW 2 */ + 11231 "11111110" // /* MW 1 */ +.label TGT_F_ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj_448 + 11232 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11233 "11010001" // /* MW 3 */ + 11234 "11110101" // /* MW 2 */ + 11235 "00000111" // /* MW 1 */ + 11236 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11237 "10011001" // /* MW 3 */ + 11238 "11111011" // /* MW 2 */ + 11239 "00000111" // /* MW 1 */ + 11240 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11241 "11110001" // /* MW 3 */ + 11242 "11111101" // /* MW 2 */ + 11243 "00000111" // /* MW 1 */ + 11244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11245 "00000000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ + 11248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11249 "00000000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 first + 11250 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11251 "00000000" // /* MW 3 */ + 11252 "00101000" // /* MW 2 */ + 11253 "00010000" // /* MW 1 */ +.src_ref 10 "buffer_pad_adf_wrapper.cpp" 45 +.delay_slot + 11254 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000001" // /* MW 5 */ + 11256 "00000000" // /* MW 4 */ + 11257 "00000000" // /* MW 3 */ + 11258 "11111000" // /* MW 2 */ + 11259 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj__end +.label __ZN12mllib_graphs22buffer_pad_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA3_Kj___func_end0 + 11267 "00000000" // /* MW 1 */ +.label __Z14_b8148_wrapperPPv___func_begin0 +.label _Z14_b8148_wrapperPPv +.function _b8148_wrapper _Z14_b8148_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 38 first +.src_ref 0 "0_0_reloadable4.cc" 40 79 +.function_start + 11280 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11281 "11000000" // /* MW 3 */ + 11282 "01100000" // /* MW 2 */ + 11283 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 40 79 first + 11284 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11285 "00011110" // /* MW 3 */ + 11286 "00011100" // /* MW 2 */ + 11287 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 42 46 first + 11288 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11289 "00011110" // /* MW 3 */ + 11290 "00010101" // /* MW 2 */ + 11291 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 41 80 first + 11292 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "10011110" // /* MW 3 */ + 11294 "00000100" // /* MW 2 */ + 11295 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 39 4 first +.tail_call + 11296 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 11297 "00000000" // /* MW 5 */ + 11298 "00000000" // /* MW 4 */ + 11299 "00010000" // /* MW 3 */ + 11300 "00010101" // /* MW 2 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11307 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11309 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8148_wrapperPPv__end +.label __Z14_b8148_wrapperPPv___func_end0 + 11311 "00000000" // /* MW 1 */ +.label __Z15_b13739_wrapperPPv___func_begin0 +.label _Z15_b13739_wrapperPPv +.function _b13739_wrapper _Z15_b13739_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 46 first +.src_ref 0 "0_0_reloadable4.cc" 48 79 +.function_start + 11312 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "11000000" // /* MW 3 */ + 11314 "01100000" // /* MW 2 */ + 11315 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 48 79 first + 11316 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00011110" // /* MW 3 */ + 11318 "00101100" // /* MW 2 */ + 11319 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 50 81 first + 11320 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00011110" // /* MW 3 */ + 11322 "11110101" // /* MW 2 */ + 11323 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 49 47 first + 11324 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "10011110" // /* MW 3 */ + 11326 "00000100" // /* MW 2 */ + 11327 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 47 4 first +.tail_call + 11328 "10000100" // J #3904 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=3904 delay_slots=5 */ + 11329 "00000000" // /* MW 5 */ + 11330 "00000000" // /* MW 4 */ + 11331 "10100000" // /* MW 3 */ + 11332 "00000111" // /* MW 2 */ + 11333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13739_wrapperPPv__end +.label __Z15_b13739_wrapperPPv___func_end0 + 11343 "00000000" // /* MW 1 */ +.label __Z15_b13744_wrapperPPv___func_begin0 +.label _Z15_b13744_wrapperPPv +.function _b13744_wrapper _Z15_b13744_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 54 first +.src_ref 0 "0_0_reloadable4.cc" 56 79 +.function_start + 11344 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11345 "11000000" // /* MW 3 */ + 11346 "01100000" // /* MW 2 */ + 11347 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 56 79 first + 11348 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11349 "00011110" // /* MW 3 */ + 11350 "00101100" // /* MW 2 */ + 11351 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 58 81 first + 11352 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11353 "00011110" // /* MW 3 */ + 11354 "11110101" // /* MW 2 */ + 11355 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 57 47 first + 11356 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11357 "10011110" // /* MW 3 */ + 11358 "00000100" // /* MW 2 */ + 11359 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 55 4 first +.tail_call + 11360 "10000100" // J #4864 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=4864 delay_slots=5 */ + 11361 "00000000" // /* MW 5 */ + 11362 "00000000" // /* MW 4 */ + 11363 "10000000" // /* MW 3 */ + 11364 "00001001" // /* MW 2 */ + 11365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13744_wrapperPPv__end +.label __Z15_b13744_wrapperPPv___func_end0 + 11375 "00000000" // /* MW 1 */ +.label __Z15_b13749_wrapperPPv___func_begin0 +.label _Z15_b13749_wrapperPPv +.function _b13749_wrapper _Z15_b13749_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 62 first +.src_ref 0 "0_0_reloadable4.cc" 64 79 +.function_start + 11376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "11000000" // /* MW 3 */ + 11378 "01100000" // /* MW 2 */ + 11379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 64 79 first + 11380 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11381 "00011110" // /* MW 3 */ + 11382 "00111100" // /* MW 2 */ + 11383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 65 47 first + 11384 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "10011110" // /* MW 3 */ + 11386 "11101100" // /* MW 2 */ + 11387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 67 81 first + 11388 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "10011110" // /* MW 3 */ + 11390 "00010101" // /* MW 2 */ + 11391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 66 80 first + 11392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00011110" // /* MW 3 */ + 11394 "00000101" // /* MW 2 */ + 11395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 63 4 first +.tail_call + 11396 "10000100" // J #5872 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=5872 delay_slots=5 */ + 11397 "00000000" // /* MW 5 */ + 11398 "00000000" // /* MW 4 */ + 11399 "01111000" // /* MW 3 */ + 11400 "00001011" // /* MW 2 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z15_b13749_wrapperPPv__end +.label __Z15_b13749_wrapperPPv___func_end0 + 11411 "00000000" // /* MW 1 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function load_slice_generic_innermost_rtp _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 40 first +.src_ref 3 "slice_generic_innermost_params.h" 41 19 first +.function_start + 11424 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11425 "00101110" // /* MW 3 */ + 11426 "00011100" // /* MW 2 */ + 11427 "00000001" // /* MW 1 */ + 11428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11429 "00000000" // /* MW 1 */ + 11430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11431 "00000000" // /* MW 1 */ + 11432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11433 "00000000" // /* MW 1 */ + 11434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11435 "00000000" // /* MW 1 */ + 11436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11437 "00000000" // /* MW 1 */ + 11438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11439 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 41 17 first + 11440 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11441 "00101001" // /* MW 3 */ + 11442 "00011100" // /* MW 2 */ + 11443 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 19 first + 11444 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11445 "00101110" // /* MW 3 */ + 11446 "00011100" // /* MW 2 */ + 11447 "00000001" // /* MW 1 */ + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ + 11456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11457 "00000000" // /* MW 1 */ + 11458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11459 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 42 17 + 11460 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00101001" // /* MW 3 */ + 11462 "00011100" // /* MW 2 */ + 11463 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 19 first + 11464 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00101110" // /* MW 3 */ + 11466 "00011100" // /* MW 2 */ + 11467 "00000001" // /* MW 1 */ + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ + 11472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11473 "00000000" // /* MW 1 */ + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11475 "00000000" // /* MW 1 */ + 11476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11477 "00000000" // /* MW 1 */ + 11478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11479 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 43 17 + 11480 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "00101001" // /* MW 3 */ + 11482 "00011100" // /* MW 2 */ + 11483 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 19 first + 11484 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11485 "00101110" // /* MW 3 */ + 11486 "00011100" // /* MW 2 */ + 11487 "00000001" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ + 11490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11491 "00000000" // /* MW 1 */ + 11492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11493 "00000000" // /* MW 1 */ + 11494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11495 "00000000" // /* MW 1 */ + 11496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11497 "00000000" // /* MW 1 */ + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 44 17 + 11500 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "00101001" // /* MW 3 */ + 11502 "00011100" // /* MW 2 */ + 11503 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 19 first + 11504 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11505 "00101110" // /* MW 3 */ + 11506 "00011100" // /* MW 2 */ + 11507 "00000001" // /* MW 1 */ + 11508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11509 "00000000" // /* MW 1 */ + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 45 17 + 11520 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11521 "00101001" // /* MW 3 */ + 11522 "00011100" // /* MW 2 */ + 11523 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 17 first + 11524 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "00101110" // /* MW 3 */ + 11526 "00011100" // /* MW 2 */ + 11527 "00000001" // /* MW 1 */ + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ + 11536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11537 "00000000" // /* MW 1 */ + 11538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11539 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 46 15 + 11540 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00101001" // /* MW 3 */ + 11542 "00011100" // /* MW 2 */ + 11543 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 18 first + 11544 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "00101110" // /* MW 3 */ + 11546 "00000100" // /* MW 2 */ + 11547 "00000001" // /* MW 1 */ + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ + 11552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11553 "00000000" // /* MW 1 */ + 11554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11555 "00000000" // /* MW 1 */ + 11556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11557 "00000000" // /* MW 1 */ + 11558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11559 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 47 16 + 11560 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11561 "00101001" // /* MW 3 */ + 11562 "00000100" // /* MW 2 */ + 11563 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 18 first + 11564 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11565 "00101110" // /* MW 3 */ + 11566 "00010100" // /* MW 2 */ + 11567 "00000001" // /* MW 1 */ + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 49 first + 11570 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11571 "00000000" // /* MW 3 */ + 11572 "00101000" // /* MW 2 */ + 11573 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 48 16 first +.delay_slot + 11582 "10011000" // ST el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11583 "00101001" // /* MW 3 */ + 11584 "00010100" // /* MW 2 */ +.label _ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL32load_slice_generic_innermost_rtpR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11585 "00001000" // /* MW 1 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_begin0 +.function setup_slice_generic_innermost _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params +.src_ref 3 "slice_generic_innermost_params.h" 52 first +.src_ref 3 "slice_generic_innermost_params.h" 53 25 first +.src_ref 3 "slice_generic_innermost_params.h" 55 42 +.src_ref 3 "slice_generic_innermost_params.h" 58 40 +.function_start + 11600 "10111010" // LDA r1, [p0], #4; MOVX r0, #1; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11601 "01011000" // /* MW 9 */ + 11602 "00100000" // /* MW 8 */ + 11603 "10000000" // /* MW 7 */ + 11604 "00101000" // /* MW 6 */ + 11605 "00000000" // /* MW 5 */ + 11606 "00000000" // /* MW 4 */ + 11607 "11010000" // /* MW 3 */ + 11608 "10000110" // /* MW 2 */ + 11609 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 38 first +.src_ref 3 "slice_generic_innermost_params.h" 58 30 +.src_ref 3 "slice_generic_innermost_params.h" 59 31 + 11610 "10111010" // LDA r5, [p0], #4; MOVX r3, #2; MOV r2, #-6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11611 "01011000" // /* MW 9 */ + 11612 "11111010" // /* MW 8 */ + 11613 "01001111" // /* MW 7 */ + 11614 "01001000" // /* MW 6 */ + 11615 "00110000" // /* MW 5 */ + 11616 "00000000" // /* MW 4 */ + 11617 "11010000" // /* MW 3 */ + 11618 "10010110" // /* MW 2 */ + 11619 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 51 +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.src_ref 3 "slice_generic_innermost_params.h" 62 27 + 11620 "01010100" // LDA r4, [p0], #8; MOV m0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11621 "00000001" // /* MW 5 */ + 11622 "00000001" // /* MW 4 */ + 11623 "11010000" // /* MW 3 */ + 11624 "10010010" // /* MW 2 */ + 11625 "00000101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 55 42 first +.src_ref 3 "slice_generic_innermost_params.h" 60 27 + 11626 "01010100" // LDA r6, [p0], m1; MOV dj0, #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11627 "00000001" // /* MW 5 */ + 11628 "00000010" // /* MW 4 */ + 11629 "11010001" // /* MW 3 */ + 11630 "00011010" // /* MW 2 */ + 11631 "00000101" // /* MW 1 */ + 11632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11633 "00000000" // /* MW 1 */ + 11634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11635 "00000000" // /* MW 1 */ + 11636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11637 "00000000" // /* MW 1 */ + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 30 first + 11642 "10011000" // MUL r1, r5, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11643 "00011111" // /* MW 3 */ + 11644 "01000010" // /* MW 2 */ + 11645 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 40 first + 11646 "10011000" // AND r0, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11647 "00000100" // /* MW 3 */ + 11648 "10000000" // /* MW 2 */ + 11649 "00010001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 58 30 + 11650 "10011000" // OR r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11651 "00000101" // /* MW 3 */ + 11652 "11000000" // /* MW 2 */ + 11653 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 53 43 first +.src_ref 3 "slice_generic_innermost_params.h" 58 28 + 11654 "01011100" // ST r0, [p0], #-16; MUL r1, r1, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11655 "10011111" // /* MW 5 */ + 11656 "10000100" // /* MW 4 */ + 11657 "00110000" // /* MW 3 */ + 11658 "10000010" // /* MW 2 */ + 11659 "00011001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 75 first + 11660 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11661 "00000000" // /* MW 3 */ + 11662 "00101000" // /* MW 2 */ + 11663 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 31 first +.delay_slot + 11664 "10011000" // LSHL r0, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "00101101" // /* MW 3 */ + 11666 "01000000" // /* MW 2 */ + 11667 "00010000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 59 25 +.delay_slot + 11668 "10011000" // ST r0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "00010001" // /* MW 3 */ + 11670 "00011100" // /* MW 2 */ + 11671 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 first +.delay_slot + 11672 "10011000" // ST m0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00000001" // /* MW 3 */ + 11674 "00011100" // /* MW 2 */ + 11675 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 60 27 +.delay_slot + 11676 "10011000" // ST dj0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "01000001" // /* MW 3 */ + 11678 "00000100" // /* MW 2 */ + 11679 "00001000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 62 27 first +.delay_slot + 11680 "10011000" // ST m0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11681 "00000001" // /* MW 3 */ + 11682 "00010100" // /* MW 2 */ +.label _ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params__end +.label __ZL29setup_slice_generic_innermostI8bfloat16EvR30slice_generic_innermost_params___func_end0 + 11683 "00001000" // /* MW 1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_begin0 +.function setup_slice_generic_innermost_params _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj +.src_ref 3 "slice_generic_innermost_params.h" 79 first +.src_ref 3 "slice_generic_innermost_params.h" 80 4 first +.function_start +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.noswbrkpt + 11696 "00000100" // JL #11424 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11424 delay_slots=5 */ + 11697 "00000001" // /* MW 5 */ + 11698 "00000000" // /* MW 4 */ + 11699 "01010000" // /* MW 3 */ + 11700 "00010110" // /* MW 2 */ + 11701 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11702 "11111000" // MOV dc0, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11703 "11100000" // /* MW 3 */ + 11704 "11000001" // /* MW 2 */ + 11705 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11706 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11707 "11000000" // /* MW 3 */ + 11708 "01100000" // /* MW 2 */ + 11709 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11711 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11713 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11714 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11715 "00011100" // /* MW 13 */ + 11716 "00000000" // /* MW 12 */ + 11717 "00000000" // /* MW 11 */ + 11718 "01010111" // /* MW 10 */ + 11719 "00011010" // /* MW 9 */ + 11720 "01000000" // /* MW 8 */ + 11721 "00000000" // /* MW 7 */ + 11722 "00000000" // /* MW 6 */ + 11723 "10110110" // /* MW 5 */ + 11724 "00000010" // /* MW 4 */ + 11725 "11110000" // /* MW 3 */ + 11726 "00101100" // /* MW 2 */ + 11727 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 first +.tail_call +.return_address + 11728 "10000100" // J #11600 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11600 delay_slots=5 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00000000" // /* MW 4 */ + 11731 "10101000" // /* MW 3 */ + 11732 "00010110" // /* MW 2 */ + 11733 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11734 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "10000000" // /* MW 3 */ + 11736 "01110001" // /* MW 2 */ + 11737 "00011111" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost_params.h" 81 4 +.delay_slot + 11738 "11111000" // MOV p0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "11000000" // /* MW 3 */ + 11740 "01100100" // /* MW 2 */ + 11741 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11743 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11745 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj__end +.label __ZL36setup_slice_generic_innermost_paramsI8bfloat16EvR30slice_generic_innermost_paramsRA16_Kj___func_end0 + 11747 "00000000" // /* MW 1 */ +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_begin0 +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.function slice_generic_innermost _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 25 first +.src_ref 3 "slice_generic_innermost.h" 35 60 +.src_ref 3 "slice_generic_innermost.h" 54 19 +.function_start + 11760 "00000010" // MOVS p5, p1; MOV r0, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11761 "01110000" // /* MW 7 */ + 11762 "01100000" // /* MW 6 */ + 11763 "00001010" // /* MW 5 */ + 11764 "00000000" // /* MW 4 */ + 11765 "01100000" // /* MW 3 */ + 11766 "10010001" // /* MW 2 */ + 11767 "10110000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 first + 11768 "00011000" // ADD.NC p3, r0, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11769 "00010010" // /* MW 3 */ + 11770 "01100000" // /* MW 2 */ + 11771 "00011011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 60 + 11772 "11010100" // LDA m2, [p3], #4; MOV r0, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11773 "10000001" // /* MW 5 */ + 11774 "00111101" // /* MW 4 */ + 11775 "11010000" // /* MW 3 */ + 11776 "10100000" // /* MW 2 */ + 11777 "01100011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 36 61 first + 11778 "10011000" // LDA m0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11779 "00000110" // /* MW 3 */ + 11780 "00011100" // /* MW 2 */ + 11781 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 35 first + 11782 "10011000" // LDA r2, [p3, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01010110" // /* MW 3 */ + 11784 "11010100" // /* MW 2 */ + 11785 "00000011" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 59 first + 11786 "10011000" // LDA m1, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11787 "10000110" // /* MW 3 */ + 11788 "00000100" // /* MW 2 */ + 11789 "00000011" // /* MW 1 */ + 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11791 "00000000" // /* MW 1 */ + 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11793 "00000000" // /* MW 1 */ + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ + 11796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11797 "00000000" // /* MW 1 */ + 11798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11799 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 40 26 first + 11800 "10000100" // JZ r2, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11801 "00000001" // /* MW 5 */ + 11802 "00000000" // /* MW 4 */ + 11803 "11100000" // /* MW 3 */ + 11804 "00010111" // /* MW 2 */ + 11805 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11806 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11807 "11000000" // /* MW 3 */ + 11808 "01100000" // /* MW 2 */ + 11809 "00011111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 36 39 first +.src_ref 3 "slice_generic_innermost.h" 50 19 +.delay_slot + 11810 "11110100" // PADDB [p7], m0; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11811 "10000001" // /* MW 5 */ + 11812 "11011101" // /* MW 4 */ + 11813 "00000110" // /* MW 3 */ + 11814 "01110010" // /* MW 2 */ + 11815 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 35 38 first +.delay_slot + 11816 "00011000" // PADDB [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "10010000" // /* MW 3 */ + 11818 "01001011" // /* MW 2 */ + 11819 "00111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 37 39 first +.src_ref 3 "slice_generic_innermost.h" 52 20 +.delay_slot + 11820 "11110100" // PADDB [p0], m0; MOV p4, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11821 "10000001" // /* MW 5 */ + 11822 "11000001" // /* MW 4 */ + 11823 "00001000" // /* MW 3 */ + 11824 "01110010" // /* MW 2 */ + 11825 "00000001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 38 37 first +.delay_slot + 11826 "00011000" // PADDB [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11827 "10010000" // /* MW 3 */ + 11828 "00101011" // /* MW 2 */ + 11829 "00111001" // /* MW 1 */ + 11830 "00011000" // MOVX r1, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11831 "00010001" // /* MW 3 */ + 11832 "00000010" // /* MW 2 */ + 11833 "00010000" // /* MW 1 */ + 11834 "10011000" // LTU r3, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00011100" // /* MW 3 */ + 11836 "10000110" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ + 11838 "10000100" // JNZ r3, #12080 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12080 delay_slots=5 */ + 11839 "00000001" // /* MW 5 */ + 11840 "01000000" // /* MW 4 */ + 11841 "10011000" // /* MW 3 */ + 11842 "00010111" // /* MW 2 */ + 11843 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 +.delay_slot + 11844 "10111000" // MOV dj0, #48 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11845 "01100000" // /* MW 3 */ + 11846 "10000000" // /* MW 2 */ + 11847 "00011000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.delay_slot + 11848 "10011000" // LDA r1, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11849 "00110110" // /* MW 3 */ + 11850 "00000000" // /* MW 2 */ + 11851 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first + 11858 "10110110" // VLDA x2, [p4], m0; VLDB x1, [p3], m0; MOVXM ls, #11952 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11859 "00010000" // /* MW 11 */ + 11860 "01011000" // /* MW 10 */ + 11861 "01111111" // /* MW 9 */ + 11862 "00001000" // /* MW 8 */ + 11863 "00000000" // /* MW 7 */ + 11864 "00000000" // /* MW 6 */ + 11865 "11101000" // /* MW 5 */ + 11866 "00010000" // /* MW 4 */ + 11867 "01110110" // /* MW 3 */ + 11868 "00010011" // /* MW 2 */ + 11869 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 40 8 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11870 "01111110" // PADDA [p4], m0; VLDB x0, [p7], m0; PADDS [p3], m0; MOVXM le, #12000 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11871 "01100000" // /* MW 13 */ + 11872 "00001011" // /* MW 12 */ + 11873 "01100001" // /* MW 11 */ + 11874 "00000010" // /* MW 10 */ + 11875 "11101110" // /* MW 9 */ + 11876 "00110111" // /* MW 8 */ + 11877 "00000001" // /* MW 7 */ + 11878 "00000000" // /* MW 6 */ + 11879 "01101000" // /* MW 5 */ + 11880 "00010000" // /* MW 4 */ + 11881 "11111110" // /* MW 3 */ + 11882 "00001100" // /* MW 2 */ + 11883 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 40 8 +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11884 "11110110" // VLDA x1, [p3], m0; VLDB x3, [p0], m0; PADDS [p7], m0; ADD.NC lc, r2, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11885 "01000000" // /* MW 11 */ + 11886 "10111111" // /* MW 10 */ + 11887 "10111000" // /* MW 9 */ + 11888 "00000010" // /* MW 8 */ + 11889 "01011011" // /* MW 7 */ + 11890 "00001000" // /* MW 6 */ + 11891 "11101111" // /* MW 5 */ + 11892 "00010001" // /* MW 4 */ + 11893 "01110000" // /* MW 3 */ + 11894 "00001011" // /* MW 2 */ + 11895 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11896 "00110010" // PADDA [p0], m0; VLDB x2, [p4], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00001000" // /* MW 6 */ + 11899 "01101011" // /* MW 5 */ + 11900 "00010001" // /* MW 4 */ + 11901 "11111000" // /* MW 3 */ + 11902 "00001100" // /* MW 2 */ + 11903 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11904 "00111100" // PADDA [p4], m0; VLDB x0, [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11905 "01101000" // /* MW 5 */ + 11906 "00010000" // /* MW 4 */ + 11907 "11111110" // /* MW 3 */ + 11908 "00001100" // /* MW 2 */ + 11909 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11910 "01001100" // VLDB x3, [p0], m0; PADDS [p7], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11911 "10110110" // /* MW 5 */ + 11912 "00010000" // /* MW 4 */ + 11913 "10001110" // /* MW 3 */ + 11914 "00011110" // /* MW 2 */ + 11915 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11916 "00111100" // PADDA [p0], m0; VLDB x1, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11917 "11101000" // /* MW 5 */ + 11918 "00010000" // /* MW 4 */ + 11919 "11110110" // /* MW 3 */ + 11920 "00001100" // /* MW 2 */ + 11921 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11922 "10110100" // VLDB x2, [p4], m0; VSHUFFLE bmll0, x1, x2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11923 "00001011" // /* MW 5 */ + 11924 "00010010" // /* MW 4 */ + 11925 "10000000" // /* MW 3 */ + 11926 "00010110" // /* MW 2 */ + 11927 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11928 "00110010" // NOPA; VLDB x0, [p7], m0; PADDS [p3], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11929 "01011011" // /* MW 7 */ + 11930 "00001000" // /* MW 6 */ + 11931 "01101011" // /* MW 5 */ + 11932 "00010000" // /* MW 4 */ + 11933 "11111110" // /* MW 3 */ + 11934 "00101100" // /* MW 2 */ + 11935 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11936 "11100001" // NOPA; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11937 "00000000" // /* MW 15 */ + 11938 "00000000" // /* MW 14 */ + 11939 "11101000" // /* MW 13 */ + 11940 "11000010" // /* MW 12 */ + 11941 "01000000" // /* MW 11 */ + 11942 "00000000" // /* MW 10 */ + 11943 "00000000" // /* MW 9 */ + 11944 "10000000" // /* MW 8 */ + 11945 "00000110" // /* MW 7 */ + 11946 "00101000" // /* MW 6 */ + 11947 "11101101" // /* MW 5 */ + 11948 "00010001" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11952 "11100001" // PADDA [p4], m0; VLDB x1, [p3], m0; PADDS [p7], m0; NOPX; VSHUFFLE bmll0, x1, x2, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11953 "00000000" // /* MW 15 */ + 11954 "00000000" // /* MW 14 */ + 11955 "11101000" // /* MW 13 */ + 11956 "10000010" // /* MW 12 */ + 11957 "00000100" // /* MW 11 */ + 11958 "00000000" // /* MW 10 */ + 11959 "00000000" // /* MW 9 */ + 11960 "00000000" // /* MW 8 */ + 11961 "01011011" // /* MW 7 */ + 11962 "00001000" // /* MW 6 */ + 11963 "11101111" // /* MW 5 */ + 11964 "00010000" // /* MW 4 */ + 11965 "11110110" // /* MW 3 */ + 11966 "00001100" // /* MW 2 */ + 11967 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.src_ref 3 "slice_generic_innermost.h" 59 21 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11968 "11100001" // PADDA [p0], m0; VLDB x2, [p4], m0; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11969 "00000000" // /* MW 15 */ + 11970 "00000000" // /* MW 14 */ + 11971 "01111000" // /* MW 13 */ + 11972 "10100101" // /* MW 12 */ + 11973 "00000001" // /* MW 11 */ + 11974 "00000000" // /* MW 10 */ + 11975 "00000000" // /* MW 9 */ + 11976 "10000000" // /* MW 8 */ + 11977 "00100110" // /* MW 7 */ + 11978 "00101000" // /* MW 6 */ + 11979 "01101001" // /* MW 5 */ + 11980 "00010001" // /* MW 4 */ + 11981 "11111000" // /* MW 3 */ + 11982 "00001100" // /* MW 2 */ + 11983 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11984 "11100001" // PADDA [p5], m1; VLDB x0, [p7], m0; PADDS [p3], m0; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11985 "00000000" // /* MW 15 */ + 11986 "00000000" // /* MW 14 */ + 11987 "11101000" // /* MW 13 */ + 11988 "11000010" // /* MW 12 */ + 11989 "01000000" // /* MW 11 */ + 11990 "00000000" // /* MW 10 */ + 11991 "00000000" // /* MW 9 */ + 11992 "00000000" // /* MW 8 */ + 11993 "01011011" // /* MW 7 */ + 11994 "00001000" // /* MW 6 */ + 11995 "01101011" // /* MW 5 */ + 11996 "00010000" // /* MW 4 */ + 11997 "11111110" // /* MW 3 */ + 11998 "00001100" // /* MW 2 */ + 11999 "10100101" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_240 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12000 "11100001" // PADDA [p1], m1; VLDB x3, [p0], m0; VST bmll0, [p5], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "00000000" // /* MW 10 */ + 12007 "00000000" // /* MW 9 */ + 12008 "10000000" // /* MW 8 */ + 12009 "00000110" // /* MW 7 */ + 12010 "00101000" // /* MW 6 */ + 12011 "11101101" // /* MW 5 */ + 12012 "00010001" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00001100" // /* MW 2 */ + 12015 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12016 "11011000" // VSHUFFLE bmll0, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12017 "00000101" // /* MW 3 */ + 12018 "00001001" // /* MW 2 */ + 12019 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12020 "10011000" // VST bmlh0, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12021 "00100110" // /* MW 3 */ + 12022 "00101000" // /* MW 2 */ + 12023 "00001001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12024 "10010100" // PADDA [p1], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12025 "00001011" // /* MW 5 */ + 12026 "00000011" // /* MW 4 */ + 12027 "11110001" // /* MW 3 */ + 12028 "00001100" // /* MW 2 */ + 12029 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12030 "10000100" // J #12224 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12224 delay_slots=5 */ + 12031 "00000000" // /* MW 5 */ + 12032 "00000000" // /* MW 4 */ + 12033 "11100000" // /* MW 3 */ + 12034 "00010111" // /* MW 2 */ + 12035 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 46 17 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12036 "10111010" // PADDA [p5], m1; VST bmlh0, [p1], m1; VSHUFFLE bmll0, x1, x2, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12037 "11100010" // /* MW 9 */ + 12038 "10000010" // /* MW 8 */ + 12039 "00000100" // /* MW 7 */ + 12040 "10000000" // /* MW 6 */ + 12041 "00100110" // /* MW 5 */ + 12042 "00101000" // /* MW 4 */ + 12043 "11110001" // /* MW 3 */ + 12044 "00001100" // /* MW 2 */ + 12045 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12046 "00001100" // PADDA [p1], m1; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12047 "00001101" // /* MW 5 */ + 12048 "01010000" // /* MW 4 */ + 12049 "11111010" // /* MW 3 */ + 12050 "00001100" // /* MW 2 */ + 12051 "00100101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 55 19 first +.delay_slot + 12052 "10010100" // PADDA [p5], m1; VSHUFFLE bmlh0, x0, x3, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12053 "00001011" // /* MW 5 */ + 12054 "00000011" // /* MW 4 */ + 12055 "11110001" // /* MW 3 */ + 12056 "00001100" // /* MW 2 */ + 12057 "10100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first +.delay_slot + 12058 "00001100" // NOPA; VST bmll0, [p5], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12059 "00001101" // /* MW 5 */ + 12060 "01010000" // /* MW 4 */ + 12061 "11111010" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "slice_generic_innermost.h" 60 19 first +.delay_slot + 12064 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12065 "00000000" // /* MW 15 */ + 12066 "00000000" // /* MW 14 */ + 12067 "01111000" // /* MW 13 */ + 12068 "10100101" // /* MW 12 */ + 12069 "00000001" // /* MW 11 */ + 12070 "00000000" // /* MW 10 */ + 12071 "00000000" // /* MW 9 */ + 12072 "10000000" // /* MW 8 */ + 12073 "00100110" // /* MW 7 */ + 12074 "00101000" // /* MW 6 */ + 12075 "00100001" // /* MW 5 */ + 12076 "00000000" // /* MW 4 */ + 12077 "11110000" // /* MW 3 */ + 12078 "00101100" // /* MW 2 */ + 12079 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_320 +.src_ref 3 "slice_generic_innermost.h" 40 8 first + 12080 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00100000" // /* MW 3 */ + 12082 "01110001" // /* MW 2 */ + 12083 "00011101" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12084 "01000100" // MOVXM ls, #12096 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12085 "10000000" // /* MW 5 */ + 12086 "11111110" // /* MW 4 */ + 12087 "00100001" // /* MW 3 */ + 12088 "00000000" // /* MW 2 */ + 12089 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 40 8 + 12090 "01000100" // MOVXM le, #12208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "01100000" // /* MW 5 */ + 12092 "11111111" // /* MW 4 */ + 12093 "00100110" // /* MW 3 */ + 12094 "00000000" // /* MW 2 */ + 12095 "00000000" // /* MW 1 */ +.label ZLS_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_336 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 50 19 first +.src_ref 3 "slice_generic_innermost.h" 52 20 first +.begin_of_loop +.loop_nesting 1 + 12096 "00111100" // VLDA x1, [p4], m0; VLDB x2, [p3], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "01101000" // /* MW 5 */ + 12098 "00010001" // /* MW 4 */ + 12099 "01110110" // /* MW 3 */ + 12100 "00001011" // /* MW 2 */ + 12101 "10000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "slice_generic_innermost.h" 51 19 first +.src_ref 3 "slice_generic_innermost.h" 53 20 first +.src_ref 3 "slice_generic_innermost.h" 56 21 first + 12102 "00110010" // PADDA [p3], m0; VLDB x0, [p7], m0; PADDS [p4], m0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12103 "01011011" // /* MW 7 */ + 12104 "00001000" // /* MW 6 */ + 12105 "01101100" // /* MW 5 */ + 12106 "00010000" // /* MW 4 */ + 12107 "11111110" // /* MW 3 */ + 12108 "00001100" // /* MW 2 */ + 12109 "01100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "slice_generic_innermost.h" 57 21 first +.src_ref 3 "slice_generic_innermost.h" 58 21 first + 12110 "00111100" // PADDA [p7], m0; VLDB x3, [p0], m0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12111 "11101000" // /* MW 5 */ + 12112 "00010001" // /* MW 4 */ + 12113 "11110000" // /* MW 3 */ + 12114 "00001100" // /* MW 2 */ + 12115 "11100001" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 59 21 first + 12116 "00011000" // PADDB [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "10010000" // /* MW 3 */ + 12118 "00001011" // /* MW 2 */ + 12119 "00111000" // /* MW 1 */ + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ + 12124 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12125 "01100111" // /* MW 3 */ + 12126 "00000001" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 46 17 first + 12128 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x2, x1, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12129 "00000000" // /* MW 15 */ + 12130 "00000000" // /* MW 14 */ + 12131 "11101000" // /* MW 13 */ + 12132 "01000010" // /* MW 12 */ + 12133 "00001000" // /* MW 11 */ + 12134 "00000000" // /* MW 10 */ + 12135 "00000000" // /* MW 9 */ + 12136 "00000000" // /* MW 8 */ + 12137 "01011011" // /* MW 7 */ + 12138 "00000001" // /* MW 6 */ + 12139 "00100000" // /* MW 5 */ + 12140 "00000000" // /* MW 4 */ + 12141 "11110000" // /* MW 3 */ + 12142 "00101100" // /* MW 2 */ + 12143 "00000000" // /* MW 1 */ + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "01111000" // /* MW 13 */ + 12148 "10100101" // /* MW 12 */ + 12149 "00000001" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 47 18 first +.src_ref 3 "slice_generic_innermost.h" 54 19 first + 12160 "11100001" // NOPA; NOPB; VST bmll0, [p5], m1; NOPX; VSHUFFLE bmlh0, x0, x3, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "11101000" // /* MW 13 */ + 12164 "11000010" // /* MW 12 */ + 12165 "01000000" // /* MW 11 */ + 12166 "00000000" // /* MW 10 */ + 12167 "00000000" // /* MW 9 */ + 12168 "10000000" // /* MW 8 */ + 12169 "00000110" // /* MW 7 */ + 12170 "00101000" // /* MW 6 */ + 12171 "00100101" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.src_ref 3 "slice_generic_innermost.h" 55 19 first + 12176 "11100001" // NOPA; PADDB [p5], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12177 "00000000" // /* MW 15 */ + 12178 "00000000" // /* MW 14 */ + 12179 "01111000" // /* MW 13 */ + 12180 "10100101" // /* MW 12 */ + 12181 "00000001" // /* MW 11 */ + 12182 "00000000" // /* MW 10 */ + 12183 "00000000" // /* MW 9 */ + 12184 "00000000" // /* MW 8 */ + 12185 "01011011" // /* MW 7 */ + 12186 "00000001" // /* MW 6 */ + 12187 "00100000" // /* MW 5 */ + 12188 "01010111" // /* MW 4 */ + 12189 "11111010" // /* MW 3 */ + 12190 "00101100" // /* MW 2 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "slice_generic_innermost.h" 60 19 first + 12192 "11100001" // NOPA; NOPB; VST bmlh0, [p1], m1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12193 "00000000" // /* MW 15 */ + 12194 "00000000" // /* MW 14 */ + 12195 "01111000" // /* MW 13 */ + 12196 "10100101" // /* MW 12 */ + 12197 "00000001" // /* MW 11 */ + 12198 "00000000" // /* MW 10 */ + 12199 "00000000" // /* MW 9 */ + 12200 "10000000" // /* MW 8 */ + 12201 "00100110" // /* MW 7 */ + 12202 "00101000" // /* MW 6 */ + 12203 "00100001" // /* MW 5 */ + 12204 "00000000" // /* MW 4 */ + 12205 "11110000" // /* MW 3 */ + 12206 "00101100" // /* MW 2 */ + 12207 "00000000" // /* MW 1 */ +.label ZLE_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_448 +.src_ref 3 "slice_generic_innermost.h" 61 19 first +.end_of_loop + 12208 "11100001" // NOPA; PADDB [p1], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12209 "00000000" // /* MW 15 */ + 12210 "00000000" // /* MW 14 */ + 12211 "01111000" // /* MW 13 */ + 12212 "10100101" // /* MW 12 */ + 12213 "00000001" // /* MW 11 */ + 12214 "00000000" // /* MW 10 */ + 12215 "00000000" // /* MW 9 */ + 12216 "00000000" // /* MW 8 */ + 12217 "01011011" // /* MW 7 */ + 12218 "00000001" // /* MW 6 */ + 12219 "00100000" // /* MW 5 */ + 12220 "01010111" // /* MW 4 */ + 12221 "11110010" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params_464 +.src_ref 3 "slice_generic_innermost.h" 76 first +.loop_nesting 0 + 12224 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12225 "00000000" // /* MW 3 */ + 12226 "00101000" // /* MW 2 */ + 12227 "00010000" // /* MW 1 */ +.delay_slot + 12228 "11111000" // MOV p7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12229 "00100000" // /* MW 3 */ + 12230 "01100000" // /* MW 2 */ + 12231 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params__end +.label __Z23slice_generic_innermostI8bfloat16EvPT_S2_R30slice_generic_innermost_params___func_end0 + 12239 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_begin0 +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.function slice_generic_innermost_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj +.src_ref 1 "io_buffer_main.h" 149 25 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 29 first +.function_start + 12240 "00111010" // MOVS p5, p0; PADDXM [sp], #128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12241 "01110001" // /* MW 9 */ + 12242 "00000000" // /* MW 8 */ + 12243 "00000000" // /* MW 7 */ + 12244 "00000000" // /* MW 6 */ + 12245 "00000100" // /* MW 5 */ + 12246 "00000000" // /* MW 4 */ + 12247 "01100000" // /* MW 3 */ + 12248 "00010001" // /* MW 2 */ + 12249 "10110000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 + 12250 "00000010" // ST lr, [sp, #-4]; MOV p3, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12251 "01110000" // /* MW 7 */ + 12252 "01100000" // /* MW 6 */ + 12253 "10110001" // /* MW 5 */ + 12254 "00000001" // /* MW 4 */ + 12255 "10110000" // /* MW 3 */ + 12256 "10000111" // /* MW 2 */ + 12257 "11111111" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 33 4 first +.no_stack_arguments + 12258 "00111010" // MOVS p1, p2; JL #11696 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */ + 12259 "01000001" // /* MW 9 */ + 12260 "00000000" // /* MW 8 */ + 12261 "00000000" // /* MW 7 */ + 12262 "10110110" // /* MW 6 */ + 12263 "00000101" // /* MW 5 */ + 12264 "00000000" // /* MW 4 */ + 12265 "01100000" // /* MW 3 */ + 12266 "00010001" // /* MW 2 */ + 12267 "00110001" // /* MW 1 */ +.delay_slot + 12268 "11111000" // MOV p0, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "11100000" // /* MW 3 */ + 12270 "01100101" // /* MW 2 */ + 12271 "00011000" // /* MW 1 */ +.delay_slot + 12272 "00011000" // PADDB [p0], #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12273 "10010000" // /* MW 3 */ + 12274 "11101111" // /* MW 2 */ + 12275 "00111000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.delay_slot + 12276 "11111000" // MOV p4, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12277 "11000000" // /* MW 3 */ + 12278 "01100000" // /* MW 2 */ + 12279 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12283 "00100000" // /* MW 5 */ + 12284 "00000000" // /* MW 4 */ + 12285 "11110000" // /* MW 3 */ + 12286 "00101100" // /* MW 2 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 31 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 +.return_address + 12288 "10111010" // LDA r18, [sp, #-128]; MOVS p2, p4; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12289 "01110010" // /* MW 9 */ + 12290 "01110000" // /* MW 8 */ + 12291 "00101101" // /* MW 7 */ + 12292 "00000010" // /* MW 6 */ + 12293 "10001011" // /* MW 5 */ + 12294 "10010000" // /* MW 4 */ + 12295 "00100010" // /* MW 3 */ + 12296 "01001010" // /* MW 2 */ + 12297 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 44 + 12298 "00101100" // LDA r22, [sp, #-124]; EXTEND.u8 r17, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12299 "00100000" // /* MW 5 */ + 12300 "11000101" // /* MW 4 */ + 12301 "00101000" // /* MW 3 */ + 12302 "11011010" // /* MW 2 */ + 12303 "11110000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 57 first + 12304 "10111010" // LDA r20, [sp, #-120]; MOVXM r19, #65534 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12305 "00010000" // /* MW 9 */ + 12306 "11111111" // /* MW 8 */ + 12307 "01101111" // /* MW 7 */ + 12308 "00111110" // /* MW 6 */ + 12309 "00000000" // /* MW 5 */ + 12310 "00000000" // /* MW 4 */ + 12311 "00100000" // /* MW 3 */ + 12312 "01010010" // /* MW 2 */ + 12313 "11110001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 34 35 first + 12314 "00101100" // LDA p1, [p3]; ADD r17, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12315 "00100001" // /* MW 5 */ + 12316 "11000110" // /* MW 4 */ + 12317 "11011001" // /* MW 3 */ + 12318 "10010011" // /* MW 2 */ + 12319 "01100000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 70 first +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 59 first + 12320 "00101100" // LDA r19, [sp, #-116]; EXTEND.u16 r21, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12321 "01100000" // /* MW 5 */ + 12322 "11010101" // /* MW 4 */ + 12323 "00101000" // /* MW 3 */ + 12324 "11001110" // /* MW 2 */ + 12325 "11110001" // /* MW 1 */ + 12326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12327 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 149 25 first + 12328 "10011000" // LDA r17, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12329 "00110110" // /* MW 3 */ + 12330 "00000110" // /* MW 2 */ + 12331 "00000101" // /* MW 1 */ + 12332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12333 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 36 first + 12334 "10011000" // MUL r18, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12335 "00101111" // /* MW 3 */ + 12336 "10100101" // /* MW 2 */ + 12337 "00010101" // /* MW 1 */ + 12338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12339 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 49 + 12340 "10011000" // MUL r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12341 "01001111" // /* MW 3 */ + 12342 "10100101" // /* MW 2 */ + 12343 "00010100" // /* MW 1 */ + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12345 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 36 62 + 12346 "10011000" // MUL r18, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00101111" // /* MW 3 */ + 12348 "01100101" // /* MW 2 */ + 12349 "00010101" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 4 first +.no_stack_arguments + 12350 "00000100" // JL #11760 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11760 delay_slots=5 */ + 12351 "00000001" // /* MW 5 */ + 12352 "00000000" // /* MW 4 */ + 12353 "11111000" // /* MW 3 */ + 12354 "00010110" // /* MW 2 */ + 12355 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 57 +.delay_slot + 12356 "10011000" // MUL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12357 "00101111" // /* MW 3 */ + 12358 "11100101" // /* MW 2 */ + 12359 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12360 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12361 "00000101" // /* MW 3 */ + 12362 "00100000" // /* MW 2 */ + 12363 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12364 "10011000" // LSHL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12365 "00001101" // /* MW 3 */ + 12366 "10100001" // /* MW 2 */ + 12367 "00010100" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 37 45 +.delay_slot + 12368 "01011000" // ADD.NC p0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12369 "11000001" // /* MW 3 */ + 12370 "01101000" // /* MW 2 */ + 12371 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12372 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12373 "10000001" // /* MW 11 */ + 12374 "10101101" // /* MW 10 */ + 12375 "00000000" // /* MW 9 */ + 12376 "00000000" // /* MW 8 */ + 12377 "00000000" // /* MW 7 */ + 12378 "00000000" // /* MW 6 */ + 12379 "00100000" // /* MW 5 */ + 12380 "00000000" // /* MW 4 */ + 12381 "11110000" // /* MW 3 */ + 12382 "00101100" // /* MW 2 */ + 12383 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.return_address + 12384 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12385 "00111001" // /* MW 3 */ + 12386 "11111100" // /* MW 2 */ + 12387 "00000111" // /* MW 1 */ + 12388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12389 "00000000" // /* MW 1 */ + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 first + 12400 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12401 "00000000" // /* MW 3 */ + 12402 "00101000" // /* MW 2 */ + 12403 "00010000" // /* MW 1 */ +.src_ref 10 "slice_generic_innermost_adf_wrapper.cpp" 39 +.delay_slot + 12404 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12405 "00000001" // /* MW 5 */ + 12406 "00000000" // /* MW 4 */ + 12407 "00000000" // /* MW 3 */ + 12408 "11110000" // /* MW 2 */ + 12409 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj__end +.label __ZN12mllib_graphs35slice_generic_innermost_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA16_Kj___func_end0 + 12417 "00000000" // /* MW 1 */ +.label __Z14_b8170_wrapperPPv___func_begin0 +.label _Z14_b8170_wrapperPPv +.function _b8170_wrapper _Z14_b8170_wrapperPPv +.src_ref 0 "0_0_reloadable4.cc" 71 first +.src_ref 0 "0_0_reloadable4.cc" 73 79 +.function_start + 12432 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "11000000" // /* MW 3 */ + 12434 "01100000" // /* MW 2 */ + 12435 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 73 79 first + 12436 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "00011110" // /* MW 3 */ + 12438 "00011100" // /* MW 2 */ + 12439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 75 47 first + 12440 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00011110" // /* MW 3 */ + 12442 "00010101" // /* MW 2 */ + 12443 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 74 80 first + 12444 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12445 "10011110" // /* MW 3 */ + 12446 "00000100" // /* MW 2 */ + 12447 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable4.cc" 72 4 first +.tail_call + 12448 "10000100" // J #12240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12240 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11101000" // /* MW 3 */ + 12452 "00010111" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z14_b8170_wrapperPPv__end +.label __Z14_b8170_wrapperPPv___func_end0 + 12463 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 108 19 +.src_ref 11 "me_div.c" 115 4 first +.function_start + 12464 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "01000001" // /* MW 5 */ + 12466 "10100000" // /* MW 4 */ + 12467 "00101111" // /* MW 3 */ + 12468 "11000000" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12470 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12471 "00011100" // /* MW 3 */ + 12472 "11000110" // /* MW 2 */ + 12473 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12474 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12475 "00011100" // /* MW 3 */ + 12476 "11000110" // /* MW 2 */ + 12477 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12478 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12479 "00011100" // /* MW 3 */ + 12480 "11000110" // /* MW 2 */ + 12481 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12482 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12483 "00011100" // /* MW 3 */ + 12484 "11000110" // /* MW 2 */ + 12485 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12486 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12487 "00011100" // /* MW 3 */ + 12488 "11000110" // /* MW 2 */ + 12489 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12490 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12491 "00011100" // /* MW 3 */ + 12492 "11000110" // /* MW 2 */ + 12493 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12494 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12495 "00011100" // /* MW 3 */ + 12496 "11000110" // /* MW 2 */ + 12497 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12498 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12499 "00011100" // /* MW 3 */ + 12500 "11000110" // /* MW 2 */ + 12501 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12502 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12503 "00011100" // /* MW 3 */ + 12504 "11000110" // /* MW 2 */ + 12505 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12506 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12507 "00011100" // /* MW 3 */ + 12508 "11000110" // /* MW 2 */ + 12509 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12510 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12511 "00011100" // /* MW 3 */ + 12512 "11000110" // /* MW 2 */ + 12513 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12514 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12515 "00011100" // /* MW 3 */ + 12516 "11000110" // /* MW 2 */ + 12517 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12518 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12519 "00011100" // /* MW 3 */ + 12520 "11000110" // /* MW 2 */ + 12521 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12522 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "00011100" // /* MW 3 */ + 12524 "11000110" // /* MW 2 */ + 12525 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12526 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00011100" // /* MW 3 */ + 12528 "11000110" // /* MW 2 */ + 12529 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12530 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12531 "00011100" // /* MW 3 */ + 12532 "11000110" // /* MW 2 */ + 12533 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12534 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12535 "00011100" // /* MW 3 */ + 12536 "11000110" // /* MW 2 */ + 12537 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12538 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12539 "00011100" // /* MW 3 */ + 12540 "11000110" // /* MW 2 */ + 12541 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12542 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12543 "00011100" // /* MW 3 */ + 12544 "11000110" // /* MW 2 */ + 12545 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12546 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00011100" // /* MW 3 */ + 12548 "11000110" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12550 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00011100" // /* MW 3 */ + 12552 "11000110" // /* MW 2 */ + 12553 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12554 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00011100" // /* MW 3 */ + 12556 "11000110" // /* MW 2 */ + 12557 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12558 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "00011100" // /* MW 3 */ + 12560 "11000110" // /* MW 2 */ + 12561 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12562 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12563 "00011100" // /* MW 3 */ + 12564 "11000110" // /* MW 2 */ + 12565 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12566 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12567 "00011100" // /* MW 3 */ + 12568 "11000110" // /* MW 2 */ + 12569 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12570 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12571 "00011100" // /* MW 3 */ + 12572 "11000110" // /* MW 2 */ + 12573 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12574 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12575 "00011100" // /* MW 3 */ + 12576 "11000110" // /* MW 2 */ + 12577 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 + 12578 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12579 "00011100" // /* MW 3 */ + 12580 "11000110" // /* MW 2 */ + 12581 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 119 first + 12582 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12583 "00000000" // /* MW 3 */ + 12584 "00101000" // /* MW 2 */ + 12585 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 first +.delay_slot + 12586 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12587 "00011100" // /* MW 3 */ + 12588 "11000110" // /* MW 2 */ + 12589 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12590 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12591 "00011100" // /* MW 3 */ + 12592 "11000110" // /* MW 2 */ + 12593 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12594 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12595 "00011100" // /* MW 3 */ + 12596 "11000110" // /* MW 2 */ + 12597 "00010000" // /* MW 1 */ +.src_ref 11 "me_div.c" 108 19 +.delay_slot + 12598 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12599 "00011100" // /* MW 3 */ + 12600 "11000110" // /* MW 2 */ + 12601 "00010000" // /* MW 1 */ +.delay_slot + 12602 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12603 "10100000" // /* MW 3 */ + 12604 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 12605 "00011000" // /* MW 1 */ +.label memset +.function memset memset +.src_ref 12 "string.c" 325 first +.src_ref 12 "string.c" 328 4 first +.function_start + 12608 "10000100" // JZ r1, #12768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12768 delay_slots=5 */ + 12609 "00000001" // /* MW 5 */ + 12610 "00000000" // /* MW 4 */ + 12611 "11110000" // /* MW 3 */ + 12612 "00011000" // /* MW 2 */ + 12613 "00001000" // /* MW 1 */ +.src_ref 12 "string.c" 329 3 +.delay_slot + 12614 "11111000" // MOV p0, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12615 "11000000" // /* MW 3 */ + 12616 "01100010" // /* MW 2 */ + 12617 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 first +.src_ref 12 "string.c" 329 3 + 12626 "00000010" // MOVS p1, p0; MOV lc, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12627 "01110000" // /* MW 7 */ + 12628 "01010000" // /* MW 6 */ + 12629 "10111000" // /* MW 5 */ + 12630 "00000010" // /* MW 4 */ + 12631 "01100000" // /* MW 3 */ + 12632 "00010001" // /* MW 2 */ + 12633 "00110000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12634 "01000100" // MOVXM ls, #12656 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12635 "11100000" // /* MW 5 */ + 12636 "11100010" // /* MW 4 */ + 12637 "00110001" // /* MW 3 */ + 12638 "00000000" // /* MW 2 */ + 12639 "00000000" // /* MW 1 */ +.src_ref 12 "string.c" 328 4 + 12640 "11100001" // NOPA; NOPB; NOPS; MOVXM le, #12752; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12641 "00000000" // /* MW 15 */ + 12642 "00000000" // /* MW 14 */ + 12643 "00010000" // /* MW 13 */ + 12644 "11101000" // /* MW 12 */ + 12645 "10111000" // /* MW 11 */ + 12646 "00001101" // /* MW 10 */ + 12647 "00000000" // /* MW 9 */ + 12648 "00000000" // /* MW 8 */ + 12649 "01011011" // /* MW 7 */ + 12650 "00000001" // /* MW 6 */ + 12651 "00100000" // /* MW 5 */ + 12652 "00000000" // /* MW 4 */ + 12653 "11110000" // /* MW 3 */ + 12654 "00101100" // /* MW 2 */ + 12655 "00000000" // /* MW 1 */ +.label ZLS_Fmemset_48 +.src_ref 12 "string.c" 329 3 first +.begin_of_loop +.loop_nesting 1 + 12656 "11100001" // ST.s8 r0, [p1], #1; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12657 "00000000" // /* MW 15 */ + 12658 "00000000" // /* MW 14 */ + 12659 "01111000" // /* MW 13 */ + 12660 "10100101" // /* MW 12 */ + 12661 "00000001" // /* MW 11 */ + 12662 "00000000" // /* MW 10 */ + 12663 "00000000" // /* MW 9 */ + 12664 "00000000" // /* MW 8 */ + 12665 "01011011" // /* MW 7 */ + 12666 "00000001" // /* MW 6 */ + 12667 "00100000" // /* MW 5 */ + 12668 "00000000" // /* MW 4 */ + 12669 "11100000" // /* MW 3 */ + 12670 "10000000" // /* MW 2 */ + 12671 "00100011" // /* MW 1 */ + 12672 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12673 "00000000" // /* MW 15 */ + 12674 "00000000" // /* MW 14 */ + 12675 "01111000" // /* MW 13 */ + 12676 "10100101" // /* MW 12 */ + 12677 "00000001" // /* MW 11 */ + 12678 "00000000" // /* MW 10 */ + 12679 "00000000" // /* MW 9 */ + 12680 "00000000" // /* MW 8 */ + 12681 "01011011" // /* MW 7 */ + 12682 "00000001" // /* MW 6 */ + 12683 "00100000" // /* MW 5 */ + 12684 "00000000" // /* MW 4 */ + 12685 "11110000" // /* MW 3 */ + 12686 "00101100" // /* MW 2 */ + 12687 "00000000" // /* MW 1 */ + 12688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12689 "00000000" // /* MW 15 */ + 12690 "00000000" // /* MW 14 */ + 12691 "01111000" // /* MW 13 */ + 12692 "10100101" // /* MW 12 */ + 12693 "00000001" // /* MW 11 */ + 12694 "00000000" // /* MW 10 */ + 12695 "00000000" // /* MW 9 */ + 12696 "00000000" // /* MW 8 */ + 12697 "01011011" // /* MW 7 */ + 12698 "00000001" // /* MW 6 */ + 12699 "00100000" // /* MW 5 */ + 12700 "00000000" // /* MW 4 */ + 12701 "11110000" // /* MW 3 */ + 12702 "00101100" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ + 12704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12705 "00000000" // /* MW 15 */ + 12706 "00000000" // /* MW 14 */ + 12707 "01111000" // /* MW 13 */ + 12708 "10100101" // /* MW 12 */ + 12709 "00000001" // /* MW 11 */ + 12710 "00000000" // /* MW 10 */ + 12711 "00000000" // /* MW 9 */ + 12712 "00000000" // /* MW 8 */ + 12713 "01011011" // /* MW 7 */ + 12714 "00000001" // /* MW 6 */ + 12715 "00100000" // /* MW 5 */ + 12716 "00000000" // /* MW 4 */ + 12717 "11110000" // /* MW 3 */ + 12718 "00101100" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ + 12720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12721 "00000000" // /* MW 15 */ + 12722 "00000000" // /* MW 14 */ + 12723 "01111000" // /* MW 13 */ + 12724 "10100101" // /* MW 12 */ + 12725 "00000001" // /* MW 11 */ + 12726 "00000000" // /* MW 10 */ + 12727 "00000000" // /* MW 9 */ + 12728 "00000000" // /* MW 8 */ + 12729 "01011011" // /* MW 7 */ + 12730 "00000001" // /* MW 6 */ + 12731 "00100000" // /* MW 5 */ + 12732 "00000000" // /* MW 4 */ + 12733 "11110000" // /* MW 3 */ + 12734 "00101100" // /* MW 2 */ + 12735 "00000000" // /* MW 1 */ + 12736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12737 "00000000" // /* MW 15 */ + 12738 "00000000" // /* MW 14 */ + 12739 "01111000" // /* MW 13 */ + 12740 "10100101" // /* MW 12 */ + 12741 "00000001" // /* MW 11 */ + 12742 "00000000" // /* MW 10 */ + 12743 "00000000" // /* MW 9 */ + 12744 "00000000" // /* MW 8 */ + 12745 "01011011" // /* MW 7 */ + 12746 "00000001" // /* MW 6 */ + 12747 "00100000" // /* MW 5 */ + 12748 "00000000" // /* MW 4 */ + 12749 "11110000" // /* MW 3 */ + 12750 "00101100" // /* MW 2 */ + 12751 "00000000" // /* MW 1 */ +.label ZLE_Fmemset_144 +.end_of_loop + 12752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12753 "00000000" // /* MW 15 */ + 12754 "00000000" // /* MW 14 */ + 12755 "01111000" // /* MW 13 */ + 12756 "10100101" // /* MW 12 */ + 12757 "00000001" // /* MW 11 */ + 12758 "00000000" // /* MW 10 */ + 12759 "00000000" // /* MW 9 */ + 12760 "00000000" // /* MW 8 */ + 12761 "01011011" // /* MW 7 */ + 12762 "00000001" // /* MW 6 */ + 12763 "00100000" // /* MW 5 */ + 12764 "00000000" // /* MW 4 */ + 12765 "11110000" // /* MW 3 */ + 12766 "00101100" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.label TGT_Fmemset_160 +.src_ref 12 "string.c" 330 4 first +.loop_nesting 0 + 12768 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12769 "00000000" // /* MW 3 */ + 12770 "00101000" // /* MW 2 */ + 12771 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12775 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label memset__end + 12781 "00000000" // /* MW 1 */ +.dir 0 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src" +.dir 1 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer" +.dir 2 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common" +.dir 3 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc" +.dir 4 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2" +.dir 5 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p" +.dir 6 "/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend" +.dir 7 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/conv" +.dir 8 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib" +.dir 9 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail" +.dir 10 "/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf" +.dir 11 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 12 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21990/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.txt b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.txt new file mode 100644 index 0000000000000000000000000000000000000000..313c672c901f9483e93de4398d1e94f821c22ed0 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/Release/0_0_reloadable9.txt @@ -0,0 +1,3559 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2020 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 282 0x2020 1 x +conv2d_dw_bf16.h 285 0x2020 2 +conv2d_dw_bf16.h 285 0x2020 3 +conv2d_dw_bf16.h 287 0x2020 4 +conv2d_dw_bf16.h 285 0x202a +conv2d_dw_bf16.h 287 0x202a 1 +conv2d_dw_bf16.h 285 0x2034 x +conv2d_dw_bf16.h 285 0x2034 1 +conv2d_dw_bf16.h 285 0x203a +conv2d_dw_bf16.h 287 0x203e x +conv2d_dw_bf16.h 282 0x2042 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2048 x +io_buffer_main.h 149 0x204e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 284 0x2058 x +conv2d_dw_bf16.h 284 0x2062 +conv2d_dw_bf16.h 285 0x2062 1 x +conv2d_dw_bf16.h 290 0x206c +conv2d_dw_bf16.h 285 0x2070 +conv2d_dw_bf16.h 285 0x2076 +conv2d_dw_bf16.h 287 0x2076 1 x +conv2d_dw_bf16.h 291 0x2076 2 +conv2d_dw_bf16.h 285 0x2080 x +conv2d_dw_bf16.h 287 0x2080 1 +conv2d_dw_bf16.h 290 0x2090 x +conv2d_dw_bf16.h 290 0x2090 1 +conv2d_dw_bf16.h 292 0x2090 2 +conv2d_dw_bf16.h 290 0x20a2 +conv2d_dw_bf16.h 290 0x20a6 +conv2d_dw_bf16.h 292 0x20ac +conv2d_dw_bf16.h 292 0x20ba +conv2d_dw_bf16.h 292 0x20ba 1 x +conv2d_dw_bf16.h 292 0x20ba 2 +conv2d_dw_bf16.h 294 0x20ba 3 +conv2d_dw_bf16.h 294 0x20ba 4 +conv2d_dw_bf16.h 294 0x20ba 5 +conv2d_dw_bf16.h 292 0x20c4 +conv2d_dw_bf16.h 294 0x20c8 x +conv2d_dw_bf16.h 292 0x20cc +conv2d_dw_bf16.h 293 0x20cc 1 +conv2d_dw_bf16.h 297 0x20cc 2 +conv2d_dw_bf16.h 291 0x20d2 +conv2d_dw_bf16.h 294 0x20d2 1 +conv2d_dw_bf16.h 297 0x20d2 2 +conv2d_dw_bf16.h 292 0x20e2 x +conv2d_dw_bf16.h 292 0x20e6 +conv2d_dw_bf16.h 291 0x20ea x +conv2d_dw_bf16.h 292 0x20f0 x +conv2d_dw_bf16.h 294 0x20f0 1 x +conv2d_dw_bf16.h 292 0x20f6 +conv2d_dw_bf16.h 294 0x20f6 1 +conv2d_dw_bf16.h 292 0x20fc +conv2d_dw_bf16.h 294 0x20fc 1 +conv2d_dw_bf16.h 293 0x2102 x +conv2d_dw_bf16.h 294 0x2106 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2110 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 297 0x2110 1 +conv2d_dw_bf16.h 298 0x2110 2 +conv2d_dw_bf16.h 298 0x2110 3 +conv2d_dw_bf16.h 297 0x211a x +conv2d_dw_bf16.h 298 0x211e +conv2d_dw_bf16.h 298 0x2124 x +conv2d_dw_bf16.h 296 0x2128 +conv2d_dw_bf16.h 301 0x2130 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2136 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 297 0x213a x +conv2d_dw_bf16.h 296 0x2144 x +conv2d_dw_bf16.h 298 0x214a x +conv2d_dw_bf16.h 298 0x214e +conv2d_dw_bf16.h 297 0x2152 x +conv2d_dw_bf16.h 296 0x2156 +conv2d_dw_bf16.h 298 0x215a x +conv2d_dw_bf16.h 301 0x2160 +conv2d_dw_bf16.h 301 0x2176 x +conv2d_dw_bf16.h 301 0x217a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 100 0xc10 x +elementwise_binary.h 103 0xc10 1 x +elementwise_binary.h 101 0xc14 +elementwise_binary.h 101 0xc1a +elementwise_binary.h 101 0xc1e x +elementwise_binary.h 101 0xc22 +elementwise_binary.h 89 0xc30 x +elementwise_binary.h 92 0xc30 1 +elementwise_binary.h 92 0xc30 2 x +elementwise_binary.h 89 0xc3a +elementwise_binary.h 92 0xc4e x +elementwise_binary.h 93 0xc52 x +elementwise_binary.h 93 0xc62 +elementwise_binary.h 94 0xc66 x +elementwise_binary.h 94 0xc76 +elementwise_binary.h 95 0xc7a x +elementwise_binary.h 96 0xc82 x +elementwise_binary.h 95 0xc8e x +elementwise_binary.h 96 0xc92 +elementwise_binary.h 96 0xca0 +elementwise_binary.h 98 0xca0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 105 0xca0 2 +add_impl.h 105 0xcaa +add_impl.h 106 0xcaa 1 +add_impl.h 106 0xcaa 2 +add_impl.h 105 0xcb4 x +add_impl.h 106 0xcb4 1 +add_impl.h 106 0xcbe x +add_impl.h 106 0xcc6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0xcca x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0xcce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0xcd2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 106 0xcd8 x +add_impl.h 106 0xcdc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 40 0xcf0 x +elementwise_binary_broadcasting.h 41 0xcf0 1 +elementwise_binary_broadcasting.h 41 0xcf0 2 +elementwise_binary_broadcasting.h 41 0xcfa +elementwise_binary_broadcasting.h 41 0xcfa 1 +elementwise_binary_broadcasting.h 41 0xcfa 2 x +elementwise_binary_broadcasting.h 42 0xcfa 3 +elementwise_binary_broadcasting.h 43 0xd0e x +elementwise_binary_broadcasting.h 41 0xd12 x +elementwise_binary_broadcasting.h 41 0xd16 +elementwise_binary_broadcasting.h 42 0xd1a x +elementwise_binary_broadcasting.h 41 0xd1e x +elementwise_binary_broadcasting.h 42 0xd1e 1 +elementwise_binary_broadcasting.h 41 0xd24 +elementwise_binary_broadcasting.h 35 0xd30 +elementwise_binary_broadcasting.h 35 0xd30 1 x +elementwise_binary_broadcasting.h 36 0xd3a x +elementwise_binary_broadcasting.h 36 0xd40 +elementwise_binary_broadcasting.h 37 0xd50 +elementwise_binary_broadcasting.h 37 0xd54 x +elementwise_binary_broadcasting.h 37 0xd5a +elementwise_binary_broadcasting.h 38 0xd60 x +elementwise_binary_broadcasting.h 48 0xd70 x +elementwise_binary_broadcasting.h 55 0xd70 1 +elementwise_binary_broadcasting.h 61 0xd70 2 +elementwise_binary_broadcasting.h 55 0xd7a x +elementwise_binary_broadcasting.h 61 0xd7e x +elementwise_binary_broadcasting.h 76 0xd7e 1 +elementwise_binary_broadcasting.h 61 0xd90 +elementwise_binary_broadcasting.h 61 0xd90 1 +elementwise_binary_broadcasting.h 55 0xd96 +elementwise_binary_broadcasting.h 55 0xd9a x +elementwise_binary_broadcasting.h 62 0xda4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xdb8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xdc0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 65 0xdd0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0xde0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xde6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xdf0 +add_accum.hpp 19 0xdf0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 76 0xdf0 2 x +elementwise_binary_broadcasting.h 76 0xdf0 3 x +elementwise_binary_broadcasting.h 76 0xdfa +elementwise_binary_broadcasting.h 76 0xdfa 1 +elementwise_binary_broadcasting.h 76 0xe04 +elementwise_binary_broadcasting.h 76 0xe0a +elementwise_binary_broadcasting.h 76 0xe10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe18 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe18 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe1c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe1c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe1c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe20 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe20 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe20 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe24 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe24 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe24 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe28 x +vector.hpp 1159 0xe28 1 +vector.hpp 1159 0xe28 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe28 3 x +accum.hpp 1119 0xe28 4 +accum.hpp 1119 0xe28 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe28 6 x +elementwise_binary.h 154 0xe28 7 +elementwise_binary.h 177 0xe28 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe2e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe2e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe2e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe2e 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe36 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe36 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe36 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe3a 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe3a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe3a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe42 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe42 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe42 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe46 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe46 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe46 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe4e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe4e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe52 +vector.hpp 1159 0xe52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe52 2 +accum.hpp 1119 0xe52 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe52 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe52 5 x +elementwise_binary.h 177 0xe52 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe60 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0xe60 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe64 +vector.hpp 1159 0xe64 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe64 2 +accum.hpp 1119 0xe64 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe64 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 148 0xe64 5 x +elementwise_binary.h 154 0xe64 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe70 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe70 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0xe70 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0xe80 +vector.hpp 1159 0xe80 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0xe80 2 +accum.hpp 1119 0xe80 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe80 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0xe80 5 x +elementwise_binary.h 177 0xe80 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe92 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe92 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe92 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xe92 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xe9c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xe9c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0xe9c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0xe9c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xea6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xea6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xea6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 80 0xea6 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xeae x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xeae 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0xeae 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0xeb4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0xeb4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0xeb4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 41 0xec0 +ise_binary_attribute_broadcasting.h 41 0xec0 1 x +ise_binary_attribute_broadcasting.h 76 0xec0 2 +ise_binary_attribute_broadcasting.h 51 0xeca +ise_binary_attribute_broadcasting.h 51 0xed6 +ise_binary_attribute_broadcasting.h 51 0xedc x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0xee2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0xee2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 76 0xef0 x +ise_binary_attribute_broadcasting.h 51 0xef6 +ise_binary_attribute_broadcasting.h 51 0xefc x +ise_binary_attribute_broadcasting.h 51 0xf00 +ise_binary_attribute_broadcasting.h 76 0xf00 1 +ise_binary_attribute_broadcasting.h 76 0xf06 +ise_binary_attribute_broadcasting.h 77 0xf10 +ise_binary_attribute_broadcasting.h 77 0xf20 x +ise_binary_attribute_broadcasting.h 77 0xf24 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 152 0xf40 x +superkernels.cpp 157 0xf40 1 +superkernels.cpp 157 0xf46 x +superkernels.cpp 152 0xf4c +superkernels.cpp 154 0xf5a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0xf64 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 157 0xf6c +superkernels.cpp 157 0xf6c 1 +superkernels.cpp 154 0xf72 x +superkernels.cpp 154 0xf76 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0xf7e + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 154 0xf7e 1 +superkernels.cpp 160 0xf86 +superkernels.cpp 171 0xf86 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xf8c +tile.hpp 74 0xf92 +tile.hpp 86 0xf92 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 160 0xf9e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0xfa8 +tile.hpp 74 0xfac +tile.hpp 74 0xfb0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0xfc0 +superkernels.cpp 164 0xfc6 x +superkernels.cpp 164 0xfc6 1 +superkernels.cpp 162 0xfd0 +superkernels.cpp 164 0xfd0 1 +superkernels.cpp 171 0xfd0 2 +superkernels.cpp 162 0xfda x +superkernels.cpp 164 0xfda 1 +superkernels.cpp 169 0xfda 2 +superkernels.cpp 162 0xfee +superkernels.cpp 164 0xff6 x +superkernels.cpp 162 0xffa x +superkernels.cpp 164 0x1000 x +superkernels.cpp 169 0x1010 +superkernels.cpp 171 0x1010 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1020 x +io_buffer_main.h 242 0x1028 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x1028 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1032 +io_buffer_main.h 242 0x1036 +io_buffer_main.h 259 0x103a x +io_buffer_main.h 242 0x1048 x +io_buffer_main.h 242 0x1048 1 x +io_buffer_main.h 242 0x104c +io_buffer_main.h 419 0x1050 +io_buffer_main.h 419 0x105a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 169 0x105e +superkernels.cpp 168 0x1068 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x106c x +io_buffer_main.h 348 0x106c 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 169 0x1072 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1076 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 171 0x107c x +superkernels.cpp 168 0x1084 x +superkernels.cpp 168 0x1088 +superkernels.cpp 169 0x108c x +superkernels.cpp 169 0x1090 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x10a0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10a0 1 +superkernels.cpp 174 0x10a0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x10aa +io_buffer_main.h 449 0x10aa 1 +io_buffer_main.h 449 0x10b8 x +io_buffer_main.h 351 0x10bc x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10bc 1 +superkernels.cpp 173 0x10c6 x +superkernels.cpp 173 0x10ca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x10d6 x +io_buffer_main.h 351 0x10da + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 173 0x10de x +superkernels.cpp 173 0x10e2 +superkernels.cpp 174 0x10f2 +superkernels.cpp 174 0x10f6 x +superkernels.cpp 176 0x1100 +superkernels.cpp 176 0x1114 x +superkernels.cpp 176 0x111c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 95 0x1130 x +elementwise_unary.h 97 0x1130 1 +elementwise_unary.h 97 0x1130 2 x +elementwise_unary.h 97 0x1146 x +elementwise_unary.h 98 0x114a x +elementwise_unary.h 98 0x115a +elementwise_unary.h 99 0x115e x +elementwise_unary.h 101 0x1164 x +elementwise_unary.h 99 0x1170 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1180 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 107 0x1180 1 x +elementwise_unary.h 113 0x1180 2 +elementwise_unary.h 113 0x1180 3 +elementwise_unary.h 142 0x1180 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x118c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 113 0x118c 1 x +elementwise_unary.h 161 0x118c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1198 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x1198 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x11a4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 161 0x11a4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x11ba x +max_min.hpp 20 0x11be x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 113 0x11be 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x11c4 +vector.hpp 1159 0x11c4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x11c4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x11c4 3 +accum.hpp 1119 0x11c4 4 +accum.hpp 1119 0x11c4 5 +accum.hpp 1119 0x11c4 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x11c4 7 +elementwise_unary.h 166 0x11c4 8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x11d8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x11de x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 102 0x11e2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x11e2 1 +mul_acc32_fp.hpp 36 0x11e2 2 +mul_acc32_fp.hpp 38 0x11e2 3 +mul_acc32_fp.hpp 38 0x11e2 4 +mul_acc32_fp.hpp 39 0x11e2 5 +mul_acc32_fp.hpp 39 0x11e2 6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x11e8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x11e8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x11f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x11fa x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1202 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1206 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x1206 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x120e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x120e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x120e 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x121a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x121e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x121e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1228 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 125 0x1228 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1230 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x1234 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1240 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1240 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x1240 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x1240 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x124a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x124a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x124a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 161 0x124a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x1254 x +mul_acc32_fp.hpp 36 0x1258 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x125c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x125c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x125c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x125c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1266 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1266 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x1266 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x1270 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1270 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1280 x +max_min.hpp 21 0x1290 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1290 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x1290 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x12a0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x12b0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x12b0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/abs.hpp: +abs.hpp 32 0x12b8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x12bc x +mul_acc32_fp.hpp 36 0x12c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12c4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12c4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x12c4 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12ca + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12ca 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x12ca 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x12ce x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12d2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 38 0x12d2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12da + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 39 0x12de x +mul_acc32_fp.hpp 39 0x12e2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 129 0x12e8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12f0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 166 0x12f0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12f4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x12f4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 147 0x12f4 2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 210 0x1300 x +superkernels.cpp 215 0x1300 1 +superkernels.cpp 215 0x1306 x +superkernels.cpp 210 0x130c +superkernels.cpp 212 0x131a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1324 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 215 0x132c +superkernels.cpp 215 0x132c 1 +superkernels.cpp 212 0x1332 x +superkernels.cpp 212 0x1336 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x133e + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 212 0x133e 1 +superkernels.cpp 218 0x1346 +superkernels.cpp 229 0x1346 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x134c +tile.hpp 74 0x1352 +tile.hpp 86 0x1352 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 218 0x135e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1368 +tile.hpp 74 0x136c +tile.hpp 74 0x1370 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x1380 +superkernels.cpp 222 0x1386 x +superkernels.cpp 222 0x1386 1 +superkernels.cpp 220 0x1390 +superkernels.cpp 222 0x1390 1 +superkernels.cpp 229 0x1390 2 +superkernels.cpp 220 0x139a x +superkernels.cpp 222 0x139a 1 +superkernels.cpp 227 0x139a 2 +superkernels.cpp 220 0x13ae +superkernels.cpp 222 0x13b6 x +superkernels.cpp 220 0x13ba x +superkernels.cpp 222 0x13c0 x +superkernels.cpp 227 0x13d0 +superkernels.cpp 229 0x13d0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x13e0 x +io_buffer_main.h 242 0x13e8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x13e8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x13f2 +io_buffer_main.h 242 0x13f6 +io_buffer_main.h 259 0x13fa x +io_buffer_main.h 242 0x1408 x +io_buffer_main.h 242 0x1408 1 x +io_buffer_main.h 242 0x140c +io_buffer_main.h 419 0x1410 +io_buffer_main.h 419 0x141a x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 227 0x141e +superkernels.cpp 226 0x1428 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x142c x +io_buffer_main.h 348 0x142c 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 227 0x1432 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1436 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 229 0x143c x +superkernels.cpp 226 0x1444 x +superkernels.cpp 226 0x1448 +superkernels.cpp 227 0x144c x +superkernels.cpp 227 0x1450 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1460 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x1460 1 +superkernels.cpp 232 0x1460 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x146a +io_buffer_main.h 449 0x146a 1 +io_buffer_main.h 449 0x1478 x +io_buffer_main.h 351 0x147c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x147c 1 +superkernels.cpp 231 0x1486 x +superkernels.cpp 231 0x148a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1496 x +io_buffer_main.h 351 0x149a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 231 0x149e x +superkernels.cpp 231 0x14a2 +superkernels.cpp 232 0x14b2 +superkernels.cpp 232 0x14b6 x +superkernels.cpp 234 0x14c0 +superkernels.cpp 234 0x14d4 x +superkernels.cpp 234 0x14dc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 100 0x14f0 x +elementwise_binary.h 103 0x14f0 1 x +elementwise_binary.h 101 0x14f4 +elementwise_binary.h 101 0x14fa +elementwise_binary.h 101 0x14fe x +elementwise_binary.h 101 0x1502 +elementwise_binary.h 89 0x1510 x +elementwise_binary.h 92 0x1510 1 +elementwise_binary.h 92 0x1510 2 x +elementwise_binary.h 89 0x151a +elementwise_binary.h 92 0x152c x +elementwise_binary.h 93 0x1530 x +elementwise_binary.h 93 0x1540 +elementwise_binary.h 94 0x1544 x +elementwise_binary.h 94 0x1554 +elementwise_binary.h 95 0x1558 x +elementwise_binary.h 96 0x1560 x +elementwise_binary.h 95 0x156e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x1572 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x1580 +elementwise_binary.h 98 0x1592 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x159c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 98 0x15a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 93 0x15a0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 108 0x15b0 x +elementwise_binary.h 115 0x15b0 1 +elementwise_binary.h 115 0x15b0 2 +elementwise_binary.h 115 0x15ba x +elementwise_binary.h 115 0x15ba 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x15c4 +mul_acc32_fp.hpp 36 0x15c4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 115 0x15c4 2 +elementwise_binary.h 115 0x15c4 3 +elementwise_binary.h 115 0x15ce +elementwise_binary.h 127 0x15ce 1 x +elementwise_binary.h 115 0x15d8 x +elementwise_binary.h 127 0x15d8 1 +elementwise_binary.h 115 0x15e8 +elementwise_binary.h 127 0x15ec x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15f0 x +vector.hpp 1139 0x15f0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x15f0 2 x +elementwise_binary.h 148 0x15f0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15f6 +vector.hpp 1139 0x15f6 1 +vector.hpp 1159 0x15f6 2 +vector.hpp 1159 0x15f6 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x15f6 4 +accum.hpp 1119 0x15f6 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x15f6 6 +elementwise_binary.h 170 0x15f6 7 x +elementwise_binary.h 172 0x15f6 8 x +elementwise_binary.h 177 0x15f6 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15fe x +vector.hpp 1139 0x15fe 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x15fe 2 x +elementwise_binary.h 148 0x15fe 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1604 +vector.hpp 1139 0x1604 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1604 2 x +elementwise_binary.h 172 0x1604 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x160a x +vector.hpp 1139 0x160a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x160a 2 x +elementwise_binary.h 148 0x160a 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1610 +vector.hpp 1139 0x1610 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1610 2 x +elementwise_binary.h 172 0x1610 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1616 x +vector.hpp 1139 0x1616 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1616 2 x +elementwise_binary.h 148 0x1616 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x161c +vector.hpp 1139 0x161c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x161c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x161c 3 x +elementwise_binary.h 172 0x161c 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1626 x +vector.hpp 1139 0x1626 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1626 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1626 3 x +elementwise_binary.h 148 0x1626 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1630 +vector.hpp 1139 0x1630 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1630 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1630 3 x +elementwise_binary.h 172 0x1630 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x163a x +vector.hpp 1139 0x163a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x163a 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x163a 3 x +elementwise_binary.h 148 0x163a 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1644 +vector.hpp 1139 0x1644 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1644 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1644 3 x +elementwise_binary.h 172 0x1644 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1650 x +vector.hpp 1139 0x1650 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1650 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1650 3 x +elementwise_binary.h 148 0x1650 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1660 +vector.hpp 1139 0x1660 1 +vector.hpp 1159 0x1660 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1660 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1660 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1660 5 x +elementwise_binary.h 172 0x1660 6 x +elementwise_binary.h 177 0x1660 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1670 x +vector.hpp 1139 0x1670 1 x +vector.hpp 1159 0x1670 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1670 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1670 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 146 0x1670 5 x +elementwise_binary.h 148 0x1670 6 x +elementwise_binary.h 154 0x1670 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1680 +vector.hpp 1139 0x1680 1 +vector.hpp 1159 0x1680 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1680 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1680 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 170 0x1680 5 x +elementwise_binary.h 172 0x1680 6 x +elementwise_binary.h 177 0x1680 7 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1690 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1690 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1690 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x1690 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1698 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1698 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x1698 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x1698 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16a0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16a0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16a0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16a0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16a8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16a8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16a8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16a8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16b0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16b0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16b0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16b8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16b8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16b8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16b8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16c0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x16c0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16c0 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16c8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16c8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16c8 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16cc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16cc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 131 0x16cc 2 x +elementwise_binary.h 154 0x16cc 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16d2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16d2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16d2 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16d6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16d6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16d6 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16da x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16da 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 177 0x16da 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x16de + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x16de 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 154 0x16de 2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 277 0x16f0 x +superkernels.cpp 282 0x16f0 1 +superkernels.cpp 282 0x16f6 x +superkernels.cpp 277 0x16fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1702 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 279 0x1702 1 x +superkernels.cpp 282 0x171e x +superkernels.cpp 282 0x171e 1 x +superkernels.cpp 279 0x1724 x +superkernels.cpp 279 0x1728 +superkernels.cpp 279 0x172e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x1736 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x173a +superkernels.cpp 287 0x173a 1 +superkernels.cpp 289 0x173a 2 +superkernels.cpp 301 0x173a 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1744 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x1744 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x174e +tile.hpp 86 0x174e 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 285 0x175a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1764 +tile.hpp 74 0x1768 +tile.hpp 74 0x176c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 289 0x1770 +superkernels.cpp 289 0x1770 1 x +superkernels.cpp 289 0x177a +superkernels.cpp 289 0x177a 1 +superkernels.cpp 298 0x177a 2 +superkernels.cpp 287 0x1784 x +superkernels.cpp 290 0x1784 1 +superkernels.cpp 299 0x1784 2 +superkernels.cpp 287 0x179a +superkernels.cpp 289 0x17a0 x +superkernels.cpp 287 0x17a4 x +superkernels.cpp 289 0x17a8 x +superkernels.cpp 290 0x17ac x +superkernels.cpp 298 0x17b0 +superkernels.cpp 299 0x17b6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x17c0 x +io_buffer_main.h 242 0x17c4 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x17c4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x17ce +io_buffer_main.h 242 0x17d2 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x17d6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 259 0x17da x +io_buffer_main.h 242 0x17e6 x +io_buffer_main.h 242 0x17e6 1 x +io_buffer_main.h 242 0x17ea + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 293 0x17ea 1 x +superkernels.cpp 293 0x17f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 419 0x17f4 +io_buffer_main.h 419 0x17f4 1 +io_buffer_main.h 419 0x17fe x +io_buffer_main.h 242 0x1802 x +io_buffer_main.h 242 0x180a +io_buffer_main.h 242 0x180e +io_buffer_main.h 242 0x1812 +io_buffer_main.h 259 0x1816 x +io_buffer_main.h 242 0x1824 x +io_buffer_main.h 242 0x1824 1 x +io_buffer_main.h 242 0x1828 +io_buffer_main.h 419 0x1834 x +io_buffer_main.h 348 0x1838 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1838 1 +superkernels.cpp 299 0x1838 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1846 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x184a x +superkernels.cpp 299 0x1850 x +superkernels.cpp 301 0x1850 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x1856 x +io_buffer_main.h 149 0x185a +io_buffer_main.h 351 0x185e +io_buffer_main.h 351 0x185e 1 +io_buffer_main.h 149 0x1864 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 301 0x186a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x1870 +io_buffer_main.h 351 0x1870 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 298 0x1874 x +superkernels.cpp 299 0x1878 x +superkernels.cpp 299 0x187c +superkernels.cpp 298 0x1880 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x1890 x +io_buffer_main.h 351 0x1890 1 +io_buffer_main.h 351 0x1890 2 +io_buffer_main.h 351 0x1890 3 +io_buffer_main.h 351 0x1890 4 +io_buffer_main.h 449 0x1890 5 +io_buffer_main.h 449 0x1890 6 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x189a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 449 0x18aa x +io_buffer_main.h 351 0x18ae x +io_buffer_main.h 348 0x18b2 +io_buffer_main.h 351 0x18c0 +io_buffer_main.h 348 0x18c4 x +io_buffer_main.h 351 0x18c4 1 +io_buffer_main.h 449 0x18d6 x +io_buffer_main.h 351 0x18da x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x18da 1 +superkernels.cpp 306 0x18da 2 +superkernels.cpp 305 0x18e4 x +superkernels.cpp 305 0x18e8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x18f4 x +io_buffer_main.h 351 0x18f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 305 0x18fc x +superkernels.cpp 305 0x1900 +superkernels.cpp 306 0x1910 +superkernels.cpp 306 0x1914 x +superkernels.cpp 308 0x1920 +superkernels.cpp 308 0x1936 x +superkernels.cpp 308 0x193e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h: +conv2d_dw_bf16_params.h 177 0x1950 x +conv2d_dw_bf16_params.h 181 0x1950 1 +conv2d_dw_bf16_params.h 181 0x1950 2 x +conv2d_dw_bf16_params.h 181 0x195a x +conv2d_dw_bf16_params.h 184 0x195a 1 +conv2d_dw_bf16_params.h 184 0x195a 2 +conv2d_dw_bf16_params.h 177 0x1964 +conv2d_dw_bf16_params.h 184 0x196a +conv2d_dw_bf16_params.h 181 0x1980 +conv2d_dw_bf16_params.h 181 0x1984 +conv2d_dw_bf16_params.h 181 0x1988 +conv2d_dw_bf16_params.h 181 0x198c +conv2d_dw_bf16_params.h 181 0x199a +conv2d_dw_bf16_params.h 181 0x199e +conv2d_dw_bf16_params.h 184 0x19a2 x +conv2d_dw_bf16_params.h 184 0x19a6 +conv2d_dw_bf16_params.h 184 0x19aa +conv2d_dw_bf16_params.h 184 0x19b6 +conv2d_dw_bf16_params.h 184 0x19bc +conv2d_dw_bf16_params.h 184 0x19c2 +conv2d_dw_bf16_params.h 184 0x19c2 1 +conv2d_dw_bf16_params.h 184 0x19cc +conv2d_dw_bf16_params.h 184 0x19d2 +conv2d_dw_bf16_params.h 184 0x19d6 +conv2d_dw_bf16_params.h 184 0x19e0 +conv2d_dw_bf16_params.h 184 0x19e0 1 +conv2d_dw_bf16_params.h 185 0x19e0 2 x +conv2d_dw_bf16_params.h 185 0x19e0 3 +conv2d_dw_bf16_params.h 184 0x19ea x +conv2d_dw_bf16_params.h 185 0x19ea 1 +conv2d_dw_bf16_params.h 184 0x19f0 +conv2d_dw_bf16_params.h 185 0x19f0 1 +conv2d_dw_bf16_params.h 184 0x19f6 +conv2d_dw_bf16_params.h 185 0x19fe x +conv2d_dw_bf16_params.h 184 0x1a04 x +conv2d_dw_bf16_params.h 184 0x1a08 +conv2d_dw_bf16_params.h 185 0x1a08 1 x +conv2d_dw_bf16_params.h 185 0x1a12 +conv2d_dw_bf16_params.h 185 0x1a16 +conv2d_dw_bf16_params.h 185 0x1a1a +conv2d_dw_bf16_params.h 185 0x1a20 +conv2d_dw_bf16_params.h 186 0x1a20 1 +conv2d_dw_bf16_params.h 192 0x1a20 2 +conv2d_dw_bf16_params.h 198 0x1a20 3 +conv2d_dw_bf16_params.h 200 0x1a20 4 +conv2d_dw_bf16_params.h 200 0x1a20 5 +conv2d_dw_bf16_params.h 216 0x1a20 6 +conv2d_dw_bf16_params.h 235 0x1a20 7 +conv2d_dw_bf16_params.h 237 0x1a20 8 +conv2d_dw_bf16_params.h 239 0x1a20 9 +conv2d_dw_bf16_params.h 185 0x1a2a +conv2d_dw_bf16_params.h 192 0x1a2a 1 x +conv2d_dw_bf16_params.h 200 0x1a2a 2 +conv2d_dw_bf16_params.h 200 0x1a2a 3 +conv2d_dw_bf16_params.h 209 0x1a2a 4 +conv2d_dw_bf16_params.h 219 0x1a2a 5 +conv2d_dw_bf16_params.h 226 0x1a2a 6 +conv2d_dw_bf16_params.h 230 0x1a2a 7 +conv2d_dw_bf16_params.h 237 0x1a2a 8 +conv2d_dw_bf16_params.h 197 0x1a34 +conv2d_dw_bf16_params.h 198 0x1a34 1 +conv2d_dw_bf16_params.h 200 0x1a34 2 +conv2d_dw_bf16_params.h 234 0x1a34 3 +conv2d_dw_bf16_params.h 186 0x1a3e +conv2d_dw_bf16_params.h 197 0x1a3e 1 +conv2d_dw_bf16_params.h 201 0x1a3e 2 +conv2d_dw_bf16_params.h 208 0x1a3e 3 +conv2d_dw_bf16_params.h 226 0x1a3e 4 +conv2d_dw_bf16_params.h 230 0x1a3e 5 +conv2d_dw_bf16_params.h 239 0x1a3e 6 +conv2d_dw_bf16_params.h 239 0x1a3e 7 +conv2d_dw_bf16_params.h 186 0x1a48 +conv2d_dw_bf16_params.h 198 0x1a48 1 +conv2d_dw_bf16_params.h 201 0x1a48 2 +conv2d_dw_bf16_params.h 234 0x1a48 3 +conv2d_dw_bf16_params.h 208 0x1a52 +conv2d_dw_bf16_params.h 208 0x1a52 1 +conv2d_dw_bf16_params.h 209 0x1a52 2 +conv2d_dw_bf16_params.h 214 0x1a52 3 +conv2d_dw_bf16_params.h 216 0x1a52 4 +conv2d_dw_bf16_params.h 221 0x1a52 5 +conv2d_dw_bf16_params.h 230 0x1a52 6 +conv2d_dw_bf16_params.h 237 0x1a52 7 +conv2d_dw_bf16_params.h 185 0x1a5c x +conv2d_dw_bf16_params.h 232 0x1a5c 1 +conv2d_dw_bf16_params.h 232 0x1a5c 2 +conv2d_dw_bf16_params.h 185 0x1a66 +conv2d_dw_bf16_params.h 185 0x1a6a +conv2d_dw_bf16_params.h 185 0x1a6e +conv2d_dw_bf16_params.h 206 0x1a6e 1 +conv2d_dw_bf16_params.h 185 0x1a74 +conv2d_dw_bf16_params.h 186 0x1a74 1 +conv2d_dw_bf16_params.h 185 0x1a7a +conv2d_dw_bf16_params.h 192 0x1a7a 1 x +conv2d_dw_bf16_params.h 197 0x1a80 +conv2d_dw_bf16_params.h 198 0x1a80 1 x +conv2d_dw_bf16_params.h 200 0x1a86 x +conv2d_dw_bf16_params.h 198 0x1a8a x +conv2d_dw_bf16_params.h 200 0x1a8e x +conv2d_dw_bf16_params.h 209 0x1a8e 1 +conv2d_dw_bf16_params.h 213 0x1a8e 2 +conv2d_dw_bf16_params.h 198 0x1a94 x +conv2d_dw_bf16_params.h 197 0x1a98 x +conv2d_dw_bf16_params.h 197 0x1a9c +conv2d_dw_bf16_params.h 204 0x1a9c 1 +conv2d_dw_bf16_params.h 201 0x1aa2 x +conv2d_dw_bf16_params.h 186 0x1aa6 x +conv2d_dw_bf16_params.h 197 0x1aa6 1 x +conv2d_dw_bf16_params.h 186 0x1aac +conv2d_dw_bf16_params.h 198 0x1aac 1 x +conv2d_dw_bf16_params.h 212 0x1aac 2 +conv2d_dw_bf16_params.h 219 0x1aac 3 +conv2d_dw_bf16_params.h 186 0x1ab6 x +conv2d_dw_bf16_params.h 200 0x1ab6 1 x +conv2d_dw_bf16_params.h 193 0x1abc x +conv2d_dw_bf16_params.h 201 0x1abc 1 x +conv2d_dw_bf16_params.h 204 0x1ac2 x +conv2d_dw_bf16_params.h 212 0x1ac2 1 x +conv2d_dw_bf16_params.h 208 0x1ac8 x +conv2d_dw_bf16_params.h 208 0x1acc +conv2d_dw_bf16_params.h 208 0x1ad0 +conv2d_dw_bf16_params.h 214 0x1ad0 1 x +conv2d_dw_bf16_params.h 200 0x1ad6 +conv2d_dw_bf16_params.h 205 0x1ad6 1 +conv2d_dw_bf16_params.h 208 0x1ad6 2 x +conv2d_dw_bf16_params.h 214 0x1ad6 3 +conv2d_dw_bf16_params.h 219 0x1ad6 4 +conv2d_dw_bf16_params.h 205 0x1ae0 x +conv2d_dw_bf16_params.h 216 0x1ae0 1 x +conv2d_dw_bf16_params.h 206 0x1ae6 x +conv2d_dw_bf16_params.h 209 0x1ae6 1 x +conv2d_dw_bf16_params.h 207 0x1aec x +conv2d_dw_bf16_params.h 213 0x1aec 1 x +conv2d_dw_bf16_params.h 209 0x1af2 x +conv2d_dw_bf16_params.h 213 0x1af2 1 +conv2d_dw_bf16_params.h 209 0x1af8 +conv2d_dw_bf16_params.h 209 0x1af8 1 +conv2d_dw_bf16_params.h 216 0x1af8 2 +conv2d_dw_bf16_params.h 209 0x1b02 +conv2d_dw_bf16_params.h 209 0x1b06 +conv2d_dw_bf16_params.h 211 0x1b06 1 x +conv2d_dw_bf16_params.h 216 0x1b06 2 x +conv2d_dw_bf16_params.h 211 0x1b10 +conv2d_dw_bf16_params.h 216 0x1b10 1 +conv2d_dw_bf16_params.h 212 0x1b16 x +conv2d_dw_bf16_params.h 216 0x1b16 1 +conv2d_dw_bf16_params.h 213 0x1b1c x +conv2d_dw_bf16_params.h 216 0x1b1c 1 x +conv2d_dw_bf16_params.h 224 0x1b1c 2 +conv2d_dw_bf16_params.h 224 0x1b1c 3 +conv2d_dw_bf16_params.h 214 0x1b26 x +conv2d_dw_bf16_params.h 216 0x1b26 1 +conv2d_dw_bf16_params.h 222 0x1b26 2 +conv2d_dw_bf16_params.h 225 0x1b26 3 +conv2d_dw_bf16_params.h 229 0x1b26 4 +conv2d_dw_bf16_params.h 239 0x1b26 5 +conv2d_dw_bf16_params.h 215 0x1b30 x +conv2d_dw_bf16_params.h 219 0x1b30 1 x +conv2d_dw_bf16_params.h 200 0x1b36 x +conv2d_dw_bf16_params.h 218 0x1b36 1 x +conv2d_dw_bf16_params.h 219 0x1b36 2 +conv2d_dw_bf16_params.h 219 0x1b3c x +conv2d_dw_bf16_params.h 221 0x1b3c 1 x +conv2d_dw_bf16_params.h 219 0x1b42 +conv2d_dw_bf16_params.h 221 0x1b42 1 +conv2d_dw_bf16_params.h 220 0x1b48 x +conv2d_dw_bf16_params.h 221 0x1b48 1 +conv2d_dw_bf16_params.h 224 0x1b48 2 x +conv2d_dw_bf16_params.h 224 0x1b48 3 x +conv2d_dw_bf16_params.h 221 0x1b52 x +conv2d_dw_bf16_params.h 230 0x1b52 1 x +conv2d_dw_bf16_params.h 222 0x1b58 x +conv2d_dw_bf16_params.h 226 0x1b58 1 x +conv2d_dw_bf16_params.h 224 0x1b5e x +conv2d_dw_bf16_params.h 226 0x1b5e 1 +conv2d_dw_bf16_params.h 225 0x1b64 x +conv2d_dw_bf16_params.h 230 0x1b64 1 x +conv2d_dw_bf16_params.h 226 0x1b6a x +conv2d_dw_bf16_params.h 228 0x1b6e x +conv2d_dw_bf16_params.h 229 0x1b72 x +conv2d_dw_bf16_params.h 230 0x1b76 x +conv2d_dw_bf16_params.h 232 0x1b7a x +conv2d_dw_bf16_params.h 232 0x1b8a +conv2d_dw_bf16_params.h 232 0x1b8a 1 +conv2d_dw_bf16_params.h 190 0x1b90 +conv2d_dw_bf16_params.h 190 0x1b94 x +conv2d_dw_bf16_params.h 232 0x1b98 +conv2d_dw_bf16_params.h 232 0x1ba2 +conv2d_dw_bf16_params.h 232 0x1bb0 +conv2d_dw_bf16_params.h 236 0x1bb0 1 +conv2d_dw_bf16_params.h 232 0x1bba x +conv2d_dw_bf16_params.h 232 0x1bba 1 x +conv2d_dw_bf16_params.h 234 0x1bba 2 x +conv2d_dw_bf16_params.h 232 0x1bc4 +conv2d_dw_bf16_params.h 239 0x1bc4 1 +conv2d_dw_bf16_params.h 237 0x1bce +conv2d_dw_bf16_params.h 239 0x1bce 1 x +conv2d_dw_bf16_params.h 240 0x1bce 2 +conv2d_dw_bf16_params.h 239 0x1bd8 +conv2d_dw_bf16_params.h 232 0x1be4 x +conv2d_dw_bf16_params.h 232 0x1be8 +conv2d_dw_bf16_params.h 234 0x1be8 1 x +conv2d_dw_bf16_params.h 232 0x1bee x +conv2d_dw_bf16_params.h 239 0x1bee 1 x +conv2d_dw_bf16_params.h 234 0x1bf8 x +conv2d_dw_bf16_params.h 239 0x1bf8 1 +conv2d_dw_bf16_params.h 232 0x1bfe x +conv2d_dw_bf16_params.h 235 0x1bfe 1 x +conv2d_dw_bf16_params.h 236 0x1c04 x +conv2d_dw_bf16_params.h 239 0x1c08 x +conv2d_dw_bf16_params.h 234 0x1c0c x +conv2d_dw_bf16_params.h 237 0x1c0c 1 x +conv2d_dw_bf16_params.h 235 0x1c12 x +conv2d_dw_bf16_params.h 239 0x1c12 1 x +conv2d_dw_bf16_params.h 236 0x1c18 x +conv2d_dw_bf16_params.h 239 0x1c18 1 +conv2d_dw_bf16_params.h 237 0x1c1e x +conv2d_dw_bf16_params.h 240 0x1c22 x +conv2d_dw_bf16_params.h 239 0x1c2e x +conv2d_dw_bf16_params.h 239 0x1c32 +conv2d_dw_bf16_params.h 237 0x1c36 x +conv2d_dw_bf16_params.h 237 0x1c3a +conv2d_dw_bf16_params.h 237 0x1c3e +conv2d_dw_bf16_params.h 238 0x1c42 x +conv2d_dw_bf16_params.h 239 0x1c46 x +conv2d_dw_bf16_params.h 239 0x1c4a +conv2d_dw_bf16_params.h 240 0x1c4a 1 x +conv2d_dw_bf16_params.h 239 0x1c50 x +conv2d_dw_bf16_params.h 239 0x1c54 +conv2d_dw_bf16_params.h 239 0x1c58 +conv2d_dw_bf16_params.h 239 0x1c5c +conv2d_dw_bf16_params.h 239 0x1c60 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c70 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 158 0x1c70 1 x +conv2d_dw_bf16.h 179 0x1c70 2 +conv2d_dw_bf16.h 183 0x1c70 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c7c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1c7c 1 +shuffle.hpp 153 0x1c7c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1c7c 3 x +conv2d_dw_bf16.h 202 0x1c7c 4 +conv2d_dw_bf16.h 179 0x1c88 +conv2d_dw_bf16.h 202 0x1c88 1 +conv2d_dw_bf16.h 228 0x1c88 2 +conv2d_dw_bf16.h 229 0x1c88 3 +conv2d_dw_bf16.h 230 0x1c88 4 +conv2d_dw_bf16.h 231 0x1c88 5 +conv2d_dw_bf16.h 232 0x1c88 6 +conv2d_dw_bf16.h 233 0x1c88 7 +conv2d_dw_bf16.h 234 0x1c88 8 +conv2d_dw_bf16.h 235 0x1c88 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1c92 +aie_core.h 81 0x1c92 1 +aie_core.h 100 0x1c92 2 +aie_core.h 100 0x1c92 3 +aie_core.h 100 0x1c92 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c92 5 +vector.hpp 1139 0x1c92 6 +vector.hpp 1159 0x1c92 7 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1c92 8 +shuffle.hpp 153 0x1c92 9 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1c92 10 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1c9c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1c9c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1c9c 2 +conv2d_dw_bf16.h 208 0x1c9c 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1ca8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ca8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 179 0x1ca8 2 x +conv2d_dw_bf16.h 208 0x1ca8 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1cb4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1cb4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1cb4 2 x +conv2d_dw_bf16.h 202 0x1cb4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1cc0 +aie_core.h 100 0x1cc0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1cc0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1cc0 3 +shuffle.hpp 153 0x1cc0 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1cc0 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1ccc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ccc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1ccc 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1cd6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1cd6 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 180 0x1cd6 2 x +conv2d_dw_bf16.h 180 0x1cdc +conv2d_dw_bf16.h 181 0x1ce0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1ce4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ce4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 181 0x1ce4 2 +conv2d_dw_bf16.h 181 0x1cea x +conv2d_dw_bf16.h 181 0x1cee +conv2d_dw_bf16.h 181 0x1cf2 +conv2d_dw_bf16.h 182 0x1cf6 x +conv2d_dw_bf16.h 182 0x1cfa +conv2d_dw_bf16.h 182 0x1cfe +conv2d_dw_bf16.h 183 0x1d02 x +conv2d_dw_bf16.h 183 0x1d06 +conv2d_dw_bf16.h 183 0x1d0a +conv2d_dw_bf16.h 202 0x1d0e x +conv2d_dw_bf16.h 202 0x1d12 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1d1a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d1a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 202 0x1d1a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1d20 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 202 0x1d26 x +conv2d_dw_bf16.h 202 0x1d2e +conv2d_dw_bf16.h 226 0x1d3c x +conv2d_dw_bf16.h 208 0x1d40 x +conv2d_dw_bf16.h 228 0x1d44 +conv2d_dw_bf16.h 228 0x1d44 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1d50 +accum.hpp 1119 0x1d50 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 223 0x1d50 2 x +conv2d_dw_bf16.h 232 0x1d50 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1d60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d60 1 x +vector.hpp 1139 0x1d60 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d60 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d6a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 224 0x1d6a 1 x +conv2d_dw_bf16.h 229 0x1d6a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1d74 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1d74 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d74 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1d74 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d7e +shuffle.hpp 153 0x1d82 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 230 0x1d82 1 x +conv2d_dw_bf16.h 225 0x1d8a x +conv2d_dw_bf16.h 234 0x1d8a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1d92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 226 0x1d96 x +conv2d_dw_bf16.h 231 0x1d96 1 x +conv2d_dw_bf16.h 223 0x1d9e x +conv2d_dw_bf16.h 235 0x1d9e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1da6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 228 0x1dac x +conv2d_dw_bf16.h 232 0x1db0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1dc0 x +aie_core.h 100 0x1dc0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1dc0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 224 0x1dca x +conv2d_dw_bf16.h 229 0x1dca 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1dd2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1dd2 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1dda + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 225 0x1dde x +conv2d_dw_bf16.h 230 0x1dde 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1de6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 234 0x1de6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1df0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1df0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1df0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 208 0x1df6 x +conv2d_dw_bf16.h 231 0x1df6 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e00 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 940 0x1e00 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 208 0x1e00 2 +conv2d_dw_bf16.h 235 0x1e00 3 x +conv2d_dw_bf16.h 208 0x1e0c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e10 x +vector.hpp 1139 0x1e14 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1e18 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e18 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1e1c x +accum.hpp 1119 0x1e20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1e24 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x1e28 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1e2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1e2c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 243 0x1e2c 2 x +conv2d_dw_bf16.h 226 0x1e34 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1e38 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1e38 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 228 0x1e38 2 +conv2d_dw_bf16.h 223 0x1e40 x +conv2d_dw_bf16.h 228 0x1e40 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e48 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 232 0x1e48 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1e50 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e50 1 x +vector.hpp 1139 0x1e50 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e50 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e5a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 224 0x1e5a 1 x +conv2d_dw_bf16.h 229 0x1e5a 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1e64 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1e64 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e64 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1e64 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e6e +shuffle.hpp 153 0x1e72 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 230 0x1e72 1 x +conv2d_dw_bf16.h 225 0x1e7a x +conv2d_dw_bf16.h 234 0x1e7a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e82 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 226 0x1e86 x +conv2d_dw_bf16.h 231 0x1e86 1 x +conv2d_dw_bf16.h 223 0x1e8e x +conv2d_dw_bf16.h 235 0x1e8e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1e96 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 228 0x1e9c x +conv2d_dw_bf16.h 232 0x1ea0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1eb0 x +aie_core.h 100 0x1eb0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1eb0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 202 0x1eb0 3 x +conv2d_dw_bf16.h 224 0x1ebc x +conv2d_dw_bf16.h 229 0x1ebc 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ec4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 233 0x1ec4 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ecc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 225 0x1ed0 x +conv2d_dw_bf16.h 230 0x1ed0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ed8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 234 0x1ed8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../aie2/shuffle.hpp: +shuffle.hpp 153 0x1ee0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 231 0x1ee4 x +conv2d_dw_bf16.h 235 0x1ee8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1119 0x1ef4 x +accum.hpp 1119 0x1ef8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 248 0x1ef8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1efe x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x1f02 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1f06 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f0a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 243 0x1f0a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1f0e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f0e 1 +vector.hpp 1139 0x1f20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 254 0x1f20 1 x +conv2d_dw_bf16.h 255 0x1f20 2 +conv2d_dw_bf16.h 255 0x1f20 3 +conv2d_dw_bf16.h 261 0x1f20 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 255 0x1f2c 1 x +conv2d_dw_bf16.h 262 0x1f2c 2 x +conv2d_dw_bf16.h 270 0x1f2c 3 +conv2d_dw_bf16.h 255 0x1f38 +conv2d_dw_bf16.h 258 0x1f38 1 x +conv2d_dw_bf16.h 255 0x1f42 x +conv2d_dw_bf16.h 258 0x1f42 1 +conv2d_dw_bf16.h 255 0x1f4c +conv2d_dw_bf16.h 263 0x1f4c 1 +conv2d_dw_bf16.h 264 0x1f4c 2 +conv2d_dw_bf16.h 266 0x1f4c 3 +conv2d_dw_bf16.h 255 0x1f56 +conv2d_dw_bf16.h 267 0x1f56 1 +conv2d_dw_bf16.h 255 0x1f5c +conv2d_dw_bf16.h 255 0x1f60 +conv2d_dw_bf16.h 258 0x1f64 x +conv2d_dw_bf16.h 258 0x1f7a x +conv2d_dw_bf16.h 270 0x1f7a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f80 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x1f80 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f90 x +vector.hpp 1139 0x1f90 1 x +vector.hpp 1159 0x1f90 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 261 0x1f90 3 x +conv2d_dw_bf16.h 262 0x1f90 4 x +conv2d_dw_bf16.h 263 0x1f90 5 x +conv2d_dw_bf16.h 270 0x1f90 6 +conv2d_dw_bf16.h 264 0x1fa0 x +conv2d_dw_bf16.h 266 0x1fb0 x +conv2d_dw_bf16.h 267 0x1fc0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1fd0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fd0 1 x +vector.hpp 1159 0x1fe0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x1fe0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2000 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 263 0x2000 1 x +conv2d_dw_bf16.h 270 0x2000 2 +conv2d_dw_bf16.h 274 0x2000 3 x +conv2d_dw_bf16.h 264 0x200a x +conv2d_dw_bf16.h 266 0x200e x +conv2d_dw_bf16.h 267 0x2012 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x2016 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2016 1 x +vector.hpp 1159 0x201a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x201a 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 352 0x2190 x +superkernels.cpp 357 0x2190 1 +superkernels.cpp 357 0x2196 x +superkernels.cpp 352 0x219c +superkernels.cpp 375 0x21aa +superkernels.cpp 360 0x21ba +superkernels.cpp 357 0x21c2 +superkernels.cpp 357 0x21c2 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x21c8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 354 0x21cc x +superkernels.cpp 354 0x21d0 +superkernels.cpp 354 0x21d4 +superkernels.cpp 354 0x21da + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x21de + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 369 0x21de 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x21e8 +tile.hpp 86 0x21e8 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 359 0x21e8 2 +superkernels.cpp 359 0x21f6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2200 +tile.hpp 74 0x2204 +tile.hpp 74 0x2208 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 360 0x2210 +superkernels.cpp 369 0x2210 1 +superkernels.cpp 360 0x2218 x +superkernels.cpp 361 0x221c +superkernels.cpp 361 0x221c 1 x +superkernels.cpp 360 0x222e +superkernels.cpp 365 0x222e 1 +superkernels.cpp 360 0x2238 x +superkernels.cpp 361 0x223c x +superkernels.cpp 365 0x2240 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x2250 x +io_buffer_main.h 242 0x2254 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 365 0x2254 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 242 0x225e +io_buffer_main.h 242 0x2262 +io_buffer_main.h 259 0x2266 x +io_buffer_main.h 242 0x2274 x +io_buffer_main.h 242 0x2274 1 x +io_buffer_main.h 242 0x2278 +io_buffer_main.h 419 0x227c +io_buffer_main.h 419 0x2286 x +io_buffer_main.h 351 0x228a +io_buffer_main.h 449 0x228a 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 364 0x228a 2 +superkernels.cpp 367 0x228a 3 +superkernels.cpp 372 0x228a 4 +superkernels.cpp 373 0x228a 5 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 76 0x2294 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x229e +io_buffer_main.h 348 0x229e 1 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 365 0x22a4 x +superkernels.cpp 365 0x22a8 +superkernels.cpp 369 0x22a8 1 +superkernels.cpp 364 0x22b2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x22bc x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 365 0x22c2 x +superkernels.cpp 364 0x22c6 x +superkernels.cpp 367 0x22ca x +superkernels.cpp 369 0x22ce x +superkernels.cpp 364 0x22d4 x +superkernels.cpp 367 0x22d8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 225 0x22dc x +io_buffer_impl.h 76 0x22e0 x +io_buffer_impl.h 76 0x22e4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 348 0x22f0 +io_buffer_main.h 348 0x22f4 x +io_buffer_main.h 449 0x2304 x +io_buffer_main.h 351 0x2308 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 372 0x2308 1 +superkernels.cpp 372 0x2312 x +superkernels.cpp 372 0x2316 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 351 0x2326 x +io_buffer_main.h 351 0x232a + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/./encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 372 0x232e x +superkernels.cpp 372 0x2332 +superkernels.cpp 373 0x2338 +superkernels.cpp 373 0x2344 x +superkernels.cpp 375 0x2350 +superkernels.cpp 375 0x235a x +superkernels.cpp 375 0x235e +superkernels.cpp - 0x235f + + +transposeshuffle_params.h: +File name Line number Starting address View Stmt + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 33 0x2930 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2934 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 37 0x2934 1 +transpose4d_adf_wrapper.cpp 37 0x293e x +transpose4d_adf_wrapper.cpp 37 0x294e +transpose4d_adf_wrapper.cpp 37 0x294e 1 +transpose4d_adf_wrapper.cpp 33 0x2954 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x295e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 38 0x296a +transpose4d_adf_wrapper.cpp 38 0x2974 x +transpose4d_adf_wrapper.cpp 38 0x297a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2990 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x2990 1 +transposeshuffle.h 137 0x299a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x299e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x29a4 x +transposeshuffle.h 137 0x29aa +transposeshuffle.h 137 0x29ae + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 43 0x29ae 1 +transpose4d_adf_wrapper.cpp 43 0x29b4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 137 0x29b8 +transposeshuffle.h 137 0x29bc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 43 0x29c0 +transpose4d_adf_wrapper.cpp 46 0x29c0 1 +transpose4d_adf_wrapper.cpp 43 0x29ca +transpose4d_adf_wrapper.cpp 43 0x29ca 1 x +transpose4d_adf_wrapper.cpp 43 0x29d0 +transpose4d_adf_wrapper.cpp 46 0x29dc x +transpose4d_adf_wrapper.cpp 46 0x29e4 +transpose4d_adf_wrapper.cpp 43 0x29e8 x +transpose4d_adf_wrapper.cpp 43 0x29ec +transpose4d_adf_wrapper.cpp 43 0x29f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2a20 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/buffer_pad_adf_wrapper.cpp: +buffer_pad_adf_wrapper.cpp 24 0x2a20 1 x +buffer_pad_adf_wrapper.cpp 26 0x2a20 2 +buffer_pad_adf_wrapper.cpp 26 0x2a26 x +buffer_pad_adf_wrapper.cpp 26 0x2a2a +buffer_pad_adf_wrapper.cpp 27 0x2a2e x +buffer_pad_adf_wrapper.cpp 24 0x2a3e +buffer_pad_adf_wrapper.cpp 36 0x2a44 x +buffer_pad_adf_wrapper.cpp 36 0x2a44 1 +buffer_pad_adf_wrapper.cpp 36 0x2a4a +buffer_pad_adf_wrapper.cpp 36 0x2a54 +buffer_pad_adf_wrapper.cpp 36 0x2a58 +buffer_pad_adf_wrapper.cpp 36 0x2a5c +buffer_pad_adf_wrapper.cpp 36 0x2a5c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2a62 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/buffer_pad_adf_wrapper.cpp: +buffer_pad_adf_wrapper.cpp 25 0x2a74 x +buffer_pad_adf_wrapper.cpp 37 0x2a7a +buffer_pad_adf_wrapper.cpp 37 0x2a80 +buffer_pad_adf_wrapper.cpp 37 0x2a84 x +buffer_pad_adf_wrapper.cpp 36 0x2a8a +buffer_pad_adf_wrapper.cpp 36 0x2a8e x +buffer_pad_adf_wrapper.cpp 40 0x2ac0 x +buffer_pad_adf_wrapper.cpp 40 0x2ac0 1 x +buffer_pad_adf_wrapper.cpp 41 0x2ad0 x +buffer_pad_adf_wrapper.cpp 41 0x2ada +buffer_pad_adf_wrapper.cpp 42 0x2ae4 +buffer_pad_adf_wrapper.cpp 40 0x2aee +buffer_pad_adf_wrapper.cpp 45 0x2aee 1 +buffer_pad_adf_wrapper.cpp 41 0x2b10 +buffer_pad_adf_wrapper.cpp 41 0x2b20 +buffer_pad_adf_wrapper.cpp 42 0x2b20 1 +buffer_pad_adf_wrapper.cpp 42 0x2b28 +buffer_pad_adf_wrapper.cpp 42 0x2b30 x +buffer_pad_adf_wrapper.cpp 42 0x2b50 +buffer_pad_adf_wrapper.cpp 40 0x2ba0 x +buffer_pad_adf_wrapper.cpp 45 0x2bd0 +buffer_pad_adf_wrapper.cpp 45 0x2bf2 x +buffer_pad_adf_wrapper.cpp 45 0x2bf6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2fd0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 29 0x2fd0 1 +e_generic_innermost_adf_wrapper.cpp 29 0x2fd0 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x2fda + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 33 0x2fe2 +e_generic_innermost_adf_wrapper.cpp 33 0x2fe2 1 x +e_generic_innermost_adf_wrapper.cpp 37 0x2ff4 +e_generic_innermost_adf_wrapper.cpp 36 0x3000 x +e_generic_innermost_adf_wrapper.cpp 37 0x3000 1 +e_generic_innermost_adf_wrapper.cpp 34 0x300a x +e_generic_innermost_adf_wrapper.cpp 36 0x300a 1 +e_generic_innermost_adf_wrapper.cpp 34 0x3010 +e_generic_innermost_adf_wrapper.cpp 36 0x3010 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x301a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 34 0x301a 1 x +e_generic_innermost_adf_wrapper.cpp 36 0x3020 x +e_generic_innermost_adf_wrapper.cpp 37 0x3020 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 149 0x3028 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L2/src/ml_adf/slice_generic_innermost_adf_wrapper.cpp: +e_generic_innermost_adf_wrapper.cpp 36 0x302e x +e_generic_innermost_adf_wrapper.cpp 36 0x3034 +e_generic_innermost_adf_wrapper.cpp 36 0x303a +e_generic_innermost_adf_wrapper.cpp 37 0x303e x +e_generic_innermost_adf_wrapper.cpp 37 0x3044 +e_generic_innermost_adf_wrapper.cpp 37 0x3048 +e_generic_innermost_adf_wrapper.cpp 37 0x304c +e_generic_innermost_adf_wrapper.cpp 37 0x3050 +e_generic_innermost_adf_wrapper.cpp 39 0x3060 +e_generic_innermost_adf_wrapper.cpp 39 0x3070 x +e_generic_innermost_adf_wrapper.cpp 39 0x3074 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2df0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 25 0x2df0 1 x +slice_generic_innermost.h 35 0x2df0 2 +slice_generic_innermost.h 54 0x2df0 3 +slice_generic_innermost.h 35 0x2df8 x +slice_generic_innermost.h 35 0x2dfc +slice_generic_innermost.h 36 0x2e02 x +slice_generic_innermost.h 40 0x2e06 x +slice_generic_innermost.h 38 0x2e0a x +slice_generic_innermost.h 40 0x2e18 x +slice_generic_innermost.h 40 0x2e18 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e1e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 36 0x2e1e 1 +slice_generic_innermost.h 50 0x2e1e 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e22 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 36 0x2e22 1 x +slice_generic_innermost.h 50 0x2e22 2 +slice_generic_innermost.h 35 0x2e28 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e2c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 37 0x2e2c 1 x +slice_generic_innermost.h 52 0x2e2c 2 +slice_generic_innermost.h 38 0x2e32 x +slice_generic_innermost.h 40 0x2e44 +slice_generic_innermost.h 40 0x2e48 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e52 x +vector.hpp 1139 0x2e52 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x2e52 2 +slice_generic_innermost.h 50 0x2e52 3 x +slice_generic_innermost.h 52 0x2e52 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e5e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x2e5e 1 x +slice_generic_innermost.h 51 0x2e5e 2 x +slice_generic_innermost.h 53 0x2e5e 3 x +slice_generic_innermost.h 56 0x2e5e 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e6c x +vector.hpp 1139 0x2e6c 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 40 0x2e6c 2 +slice_generic_innermost.h 50 0x2e6c 3 x +slice_generic_innermost.h 57 0x2e6c 4 x +slice_generic_innermost.h 58 0x2e6c 5 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e78 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x2e78 1 x +slice_generic_innermost.h 52 0x2e78 2 x +slice_generic_innermost.h 59 0x2e78 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 53 0x2e80 1 x +slice_generic_innermost.h 56 0x2e80 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e86 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 57 0x2e86 1 x +slice_generic_innermost.h 58 0x2e86 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e8c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 50 0x2e8c 1 x +slice_generic_innermost.h 59 0x2e8c 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e92 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x2e92 1 x +slice_generic_innermost.h 52 0x2e92 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2e98 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x2e98 1 x +slice_generic_innermost.h 56 0x2e98 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ea0 +vector.hpp 1159 0x2ea0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x2ea0 2 x +slice_generic_innermost.h 54 0x2ea0 3 x +slice_generic_innermost.h 58 0x2ea0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2eb0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x2eb0 1 x +slice_generic_innermost.h 50 0x2eb0 2 x +slice_generic_innermost.h 53 0x2eb0 3 x +slice_generic_innermost.h 57 0x2eb0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ec0 +vector.hpp 1159 0x2ec0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 52 0x2ec0 2 x +slice_generic_innermost.h 59 0x2ec0 3 x +slice_generic_innermost.h 60 0x2ec0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ed0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x2ed0 1 x +slice_generic_innermost.h 51 0x2ed0 2 x +slice_generic_innermost.h 55 0x2ed0 3 x +slice_generic_innermost.h 56 0x2ed0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ee0 +vector.hpp 1159 0x2ee0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x2ee0 2 x +slice_generic_innermost.h 58 0x2ee0 3 x +slice_generic_innermost.h 61 0x2ee0 4 x +slice_generic_innermost.h 46 0x2ef0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2ef4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x2ef4 1 x +slice_generic_innermost.h 47 0x2ef8 x +slice_generic_innermost.h 61 0x2ef8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f04 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 46 0x2f04 1 x +slice_generic_innermost.h 55 0x2f04 2 x +slice_generic_innermost.h 60 0x2f04 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f0e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x2f0e 1 x +slice_generic_innermost.h 61 0x2f0e 2 x +slice_generic_innermost.h 47 0x2f14 x +slice_generic_innermost.h 55 0x2f14 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f1a x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 54 0x2f1a 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f20 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x2f20 1 x +slice_generic_innermost.h 40 0x2f30 x +slice_generic_innermost.h 40 0x2f34 +slice_generic_innermost.h 40 0x2f3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2f40 x +vector.hpp 1139 0x2f40 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 50 0x2f40 2 x +slice_generic_innermost.h 52 0x2f40 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2f46 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 51 0x2f46 1 x +slice_generic_innermost.h 53 0x2f46 2 x +slice_generic_innermost.h 56 0x2f46 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2f4e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 57 0x2f4e 1 x +slice_generic_innermost.h 58 0x2f4e 2 x +slice_generic_innermost.h 59 0x2f54 x +slice_generic_innermost.h 46 0x2f60 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2f80 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 47 0x2f80 1 x +slice_generic_innermost.h 54 0x2f80 2 x +slice_generic_innermost.h 55 0x2f90 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2fa0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost.h: +slice_generic_innermost.h 60 0x2fa0 1 x +slice_generic_innermost.h 61 0x2fb0 x +slice_generic_innermost.h 76 0x2fc0 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 21 0x2370 x +0_0_reloadable4.cc 23 0x2370 1 +0_0_reloadable4.cc 23 0x2374 x +0_0_reloadable4.cc 24 0x2378 x +0_0_reloadable4.cc 26 0x237c x +0_0_reloadable4.cc 25 0x2380 x +0_0_reloadable4.cc 22 0x2384 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle_params.h: +transposeshuffle_params.h 71 0x23a0 x +transposeshuffle_params.h 76 0x23a0 1 +transposeshuffle_params.h 76 0x23a0 2 x +transposeshuffle_params.h 76 0x23aa +transposeshuffle_params.h 76 0x23aa 1 +transposeshuffle_params.h 80 0x23aa 2 +transposeshuffle_params.h 80 0x23aa 3 +transposeshuffle_params.h 81 0x23aa 4 +transposeshuffle_params.h 81 0x23aa 5 +transposeshuffle_params.h 85 0x23b6 +transposeshuffle_params.h 86 0x23b6 1 +transposeshuffle_params.h 89 0x23b6 2 +transposeshuffle_params.h 91 0x23b6 3 +transposeshuffle_params.h 93 0x23b6 4 +transposeshuffle_params.h 94 0x23b6 5 +transposeshuffle_params.h 76 0x23c4 x +transposeshuffle_params.h 76 0x23c8 +transposeshuffle_params.h 76 0x23cc +transposeshuffle_params.h 76 0x23da +transposeshuffle_params.h 76 0x23de +transposeshuffle_params.h 76 0x23e2 +transposeshuffle_params.h 76 0x23e6 +transposeshuffle_params.h 76 0x23f4 +transposeshuffle_params.h 76 0x23f8 +transposeshuffle_params.h 76 0x23fc +transposeshuffle_params.h 76 0x2400 +transposeshuffle_params.h 76 0x240e +transposeshuffle_params.h 76 0x2412 +transposeshuffle_params.h 80 0x2416 x +transposeshuffle_params.h 80 0x2426 +transposeshuffle_params.h 80 0x242a +transposeshuffle_params.h 89 0x242a 1 x +transposeshuffle_params.h 80 0x2430 +transposeshuffle_params.h 80 0x2430 1 x +transposeshuffle_params.h 80 0x2438 +transposeshuffle_params.h 81 0x243c x +transposeshuffle_params.h 81 0x244c +transposeshuffle_params.h 90 0x244c 1 +transposeshuffle_params.h 81 0x2452 +transposeshuffle_params.h 81 0x2456 +transposeshuffle_params.h 90 0x2456 1 x +transposeshuffle_params.h 85 0x245c x +transposeshuffle_params.h 85 0x2460 +transposeshuffle_params.h 86 0x2464 x +transposeshuffle_params.h 89 0x2468 x +transposeshuffle_params.h 90 0x246c x +transposeshuffle_params.h 91 0x2470 +transposeshuffle_params.h 91 0x2470 1 x +transposeshuffle_params.h 91 0x2478 +transposeshuffle_params.h 93 0x247c x +transposeshuffle_params.h 93 0x2480 +transposeshuffle_params.h 93 0x2484 +transposeshuffle_params.h 93 0x2488 +transposeshuffle_params.h 93 0x248c +transposeshuffle_params.h 95 0x248c 1 x +transposeshuffle_params.h 94 0x2492 x +transposeshuffle_params.h 94 0x2496 +transposeshuffle_params.h 94 0x249a +transposeshuffle_params.h 94 0x249e +transposeshuffle_params.h 94 0x24a2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 38 0x24b0 x +transposeshuffle.h 72 0x24b0 1 +transposeshuffle.h 79 0x24b0 2 +transposeshuffle.h 72 0x24ba +transposeshuffle.h 72 0x24ba 1 x +transposeshuffle.h 72 0x24ba 2 +transposeshuffle.h 79 0x24cc x +transposeshuffle.h 79 0x24d0 +transposeshuffle.h 72 0x24d6 +transposeshuffle.h 72 0x24d6 1 +transposeshuffle.h 72 0x24da x +transposeshuffle.h 72 0x24da 1 x +transposeshuffle.h 116 0x24e4 +transposeshuffle.h 116 0x24ea x +transposeshuffle.h 116 0x24fa +transposeshuffle.h 116 0x24fa 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2522 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x2522 1 +transposeshuffle.h 119 0x2522 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x252c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x252c 1 x +transposeshuffle.h 119 0x252c 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2536 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 116 0x2536 1 +transposeshuffle.h 119 0x2536 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2540 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2540 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2550 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2550 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2560 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2560 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2570 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2570 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2580 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2580 1 +transposeshuffle.h 120 0x2580 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2590 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2590 1 x +transposeshuffle.h 120 0x2590 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x25a0 +vector.hpp 1159 0x25a0 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x25a0 2 +transposeshuffle.h 120 0x25a0 3 x +transposeshuffle.h 122 0x25a0 4 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25b0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25b0 1 +transposeshuffle.h 122 0x25b0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25b8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25b8 1 +transposeshuffle.h 122 0x25b8 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25c0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25c0 1 +transposeshuffle.h 122 0x25c0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25c8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25c8 1 +transposeshuffle.h 122 0x25c8 2 +transposeshuffle.h 126 0x25c8 3 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25d2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25d2 1 x +transposeshuffle.h 122 0x25d2 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25da + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25da 1 +transposeshuffle.h 122 0x25da 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25e2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 120 0x25e2 1 +transposeshuffle.h 122 0x25e2 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25ea + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x25ea 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x25f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x25f0 1 +transposeshuffle.h 116 0x2600 x +transposeshuffle.h 116 0x2604 +transposeshuffle.h 116 0x260a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2610 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 119 0x2610 1 x +transposeshuffle.h 120 0x2660 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2680 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 122 0x2680 1 x +transposeshuffle.h 126 0x2690 x +transposeshuffle.h 86 0x26a0 +transposeshuffle.h 86 0x26a6 x +transposeshuffle.h 86 0x26b6 +transposeshuffle.h 86 0x26b6 1 +transposeshuffle.h 86 0x26c6 +transposeshuffle.h 86 0x26c6 1 +transposeshuffle.h 86 0x26d0 +transposeshuffle.h 86 0x26d0 1 +transposeshuffle.h 87 0x26d0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26da +aie_core.h 100 0x26da 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x26da 2 +vector.hpp 1152 0x26da 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26da 4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26e4 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x26e4 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26e4 2 +transposeshuffle.h 86 0x26e4 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x26f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x26f0 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26f0 3 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x26fc + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x26fc 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x26fc 2 +transposeshuffle.h 86 0x26fc 3 +transposeshuffle.h 86 0x2708 +transposeshuffle.h 87 0x2708 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2712 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 86 0x2712 1 +transposeshuffle.h 86 0x2718 +transposeshuffle.h 87 0x2718 1 x +transposeshuffle.h 86 0x2722 x +transposeshuffle.h 86 0x2722 1 x +transposeshuffle.h 86 0x272c +transposeshuffle.h 86 0x272c 1 +transposeshuffle.h 86 0x2736 +transposeshuffle.h 86 0x2740 +transposeshuffle.h 87 0x2750 x +transposeshuffle.h 87 0x2760 +transposeshuffle.h 88 0x2770 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2794 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x2794 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x279e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x279e 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x279e 2 x +transposeshuffle.h 88 0x27a8 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x27ac x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x27b0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x27b0 1 +vector.hpp 1132 0x27d0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x27e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x27e0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x27f0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2800 x +vector.hpp 1152 0x2800 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2800 2 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2810 x +aie_core.h 100 0x2810 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2810 2 +vector.hpp 1152 0x2810 3 x +vector.hpp 1152 0x2820 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2820 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2830 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x2830 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2838 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2838 1 x +vector.hpp 1152 0x283c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x283c 1 x +transpose.hpp 225 0x2844 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x284e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x284e 1 +vector.hpp 1152 0x284e 2 x +vector.hpp 1152 0x2856 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x2856 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x285e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 225 0x285e 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2866 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x2866 1 x +vector.hpp 1152 0x286a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 88 0x2870 x +transposeshuffle.h 88 0x2876 +transposeshuffle.h 88 0x287c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2880 x +vector.hpp 1152 0x2880 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x2886 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1132 0x2886 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/aie2p/../aie2/transpose.hpp: +transpose.hpp 224 0x28c0 x +transpose.hpp 225 0x28d0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x28e0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x28e0 1 x +vector.hpp 1152 0x28f0 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 87 0x2900 x +transposeshuffle.h 86 0x2910 x +transposeshuffle.h 126 0x2920 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 30 0x2a00 x +0_0_reloadable4.cc 32 0x2a00 1 +0_0_reloadable4.cc 32 0x2a04 x +0_0_reloadable4.cc 34 0x2a08 x +0_0_reloadable4.cc 33 0x2a0c x +0_0_reloadable4.cc 31 0x2a10 x +0_0_reloadable4.cc 38 0x2c10 x +0_0_reloadable4.cc 40 0x2c10 1 +0_0_reloadable4.cc 40 0x2c14 x +0_0_reloadable4.cc 42 0x2c18 x +0_0_reloadable4.cc 41 0x2c1c x +0_0_reloadable4.cc 39 0x2c20 x +0_0_reloadable4.cc 46 0x2c30 x +0_0_reloadable4.cc 48 0x2c30 1 +0_0_reloadable4.cc 48 0x2c34 x +0_0_reloadable4.cc 50 0x2c38 x +0_0_reloadable4.cc 49 0x2c3c x +0_0_reloadable4.cc 47 0x2c40 x +0_0_reloadable4.cc 54 0x2c50 x +0_0_reloadable4.cc 56 0x2c50 1 +0_0_reloadable4.cc 56 0x2c54 x +0_0_reloadable4.cc 58 0x2c58 x +0_0_reloadable4.cc 57 0x2c5c x +0_0_reloadable4.cc 55 0x2c60 x +0_0_reloadable4.cc 62 0x2c70 x +0_0_reloadable4.cc 64 0x2c70 1 +0_0_reloadable4.cc 64 0x2c74 x +0_0_reloadable4.cc 65 0x2c78 x +0_0_reloadable4.cc 67 0x2c7c x +0_0_reloadable4.cc 66 0x2c80 x +0_0_reloadable4.cc 63 0x2c84 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/vitis_mllib/L1/include/misc/slice_generic_innermost_params.h: +slice_generic_innermost_params.h 40 0x2ca0 x +slice_generic_innermost_params.h 41 0x2ca0 1 x +slice_generic_innermost_params.h 41 0x2cb0 x +slice_generic_innermost_params.h 42 0x2cb4 x +slice_generic_innermost_params.h 42 0x2cc4 +slice_generic_innermost_params.h 43 0x2cc8 x +slice_generic_innermost_params.h 43 0x2cd8 +slice_generic_innermost_params.h 44 0x2cdc x +slice_generic_innermost_params.h 44 0x2cec +slice_generic_innermost_params.h 45 0x2cf0 x +slice_generic_innermost_params.h 45 0x2d00 +slice_generic_innermost_params.h 46 0x2d04 x +slice_generic_innermost_params.h 46 0x2d14 +slice_generic_innermost_params.h 47 0x2d18 x +slice_generic_innermost_params.h 47 0x2d28 +slice_generic_innermost_params.h 48 0x2d2c x +slice_generic_innermost_params.h 49 0x2d32 x +slice_generic_innermost_params.h 48 0x2d3e x +slice_generic_innermost_params.h 52 0x2d50 x +slice_generic_innermost_params.h 53 0x2d50 1 x +slice_generic_innermost_params.h 55 0x2d50 2 +slice_generic_innermost_params.h 58 0x2d50 3 +slice_generic_innermost_params.h 53 0x2d5a x +slice_generic_innermost_params.h 58 0x2d5a 1 +slice_generic_innermost_params.h 59 0x2d5a 2 +slice_generic_innermost_params.h 53 0x2d64 +slice_generic_innermost_params.h 60 0x2d64 1 +slice_generic_innermost_params.h 62 0x2d64 2 +slice_generic_innermost_params.h 55 0x2d6a x +slice_generic_innermost_params.h 60 0x2d6a 1 +slice_generic_innermost_params.h 53 0x2d7a x +slice_generic_innermost_params.h 58 0x2d7e x +slice_generic_innermost_params.h 58 0x2d82 +slice_generic_innermost_params.h 53 0x2d86 x +slice_generic_innermost_params.h 58 0x2d86 1 +slice_generic_innermost_params.h 75 0x2d8c x +slice_generic_innermost_params.h 59 0x2d90 x +slice_generic_innermost_params.h 59 0x2d94 +slice_generic_innermost_params.h 60 0x2d98 x +slice_generic_innermost_params.h 60 0x2d9c +slice_generic_innermost_params.h 62 0x2da0 x +slice_generic_innermost_params.h 79 0x2db0 x +slice_generic_innermost_params.h 80 0x2db0 1 x +slice_generic_innermost_params.h 81 0x2db6 +slice_generic_innermost_params.h 81 0x2dba +slice_generic_innermost_params.h 81 0x2dd0 x +slice_generic_innermost_params.h 81 0x2dd6 +slice_generic_innermost_params.h 81 0x2dda + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 71 0x3090 x +0_0_reloadable4.cc 73 0x3090 1 +0_0_reloadable4.cc 73 0x3094 x +0_0_reloadable4.cc 75 0x3098 x +0_0_reloadable4.cc 74 0x309c x +0_0_reloadable4.cc 72 0x30a0 x +0_0_reloadable4.cc 91 0x9e0 x +0_0_reloadable4.cc 93 0x9e0 1 +0_0_reloadable4.cc 93 0x9e0 2 x +0_0_reloadable4.cc 91 0x9e6 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f0 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 98 0x9f0 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0x9f8 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 95 0x9f8 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa08 x +io_buffer_compiler.h 614 0xa0c +io_buffer_compiler.h 614 0xa10 +io_buffer_compiler.h 614 0xa14 +io_buffer_compiler.h 614 0xa18 +io_buffer_compiler.h 219 0xa28 x +io_buffer_compiler.h 219 0xa28 1 x +io_buffer_compiler.h 218 0xa2c x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa30 +io_buffer_main.h 434 0xa30 1 +io_buffer_main.h 434 0xa30 2 +io_buffer_main.h 434 0xa3c x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 95 0xa40 +0_0_reloadable4.cc 95 0xa40 1 +0_0_reloadable4.cc 98 0xa40 2 +0_0_reloadable4.cc 101 0xa40 3 +0_0_reloadable4.cc 95 0xa46 +0_0_reloadable4.cc 95 0xa46 1 x +0_0_reloadable4.cc 95 0xa4c +0_0_reloadable4.cc 95 0xa4c 1 +0_0_reloadable4.cc 95 0xa52 +0_0_reloadable4.cc 98 0xa52 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa5c + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xa6e x +io_buffer_compiler.h 614 0xa72 +io_buffer_compiler.h 614 0xa76 +io_buffer_compiler.h 614 0xa7a +io_buffer_compiler.h 614 0xa7e +io_buffer_compiler.h 219 0xa8e x +io_buffer_compiler.h 219 0xa8e 1 x +io_buffer_compiler.h 218 0xa92 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xa9e x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 98 0xaa2 x +0_0_reloadable4.cc 98 0xaa6 +0_0_reloadable4.cc 98 0xaa6 1 +0_0_reloadable4.cc 98 0xaac +0_0_reloadable4.cc 98 0xaac 1 +0_0_reloadable4.cc 98 0xab2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 614 0xac4 x +io_buffer_compiler.h 614 0xac8 +io_buffer_compiler.h 614 0xacc +io_buffer_compiler.h 614 0xad0 +io_buffer_compiler.h 614 0xad4 +io_buffer_compiler.h 219 0xae4 x +io_buffer_compiler.h 219 0xae4 1 x +io_buffer_compiler.h 218 0xae8 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 434 0xaf4 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 101 0xaf8 x +0_0_reloadable4.cc 101 0xafc +0_0_reloadable4.cc 101 0xb00 +0_0_reloadable4.cc 101 0xb06 +0_0_reloadable4.cc 101 0xb18 +0_0_reloadable4.cc 104 0xb1c +0_0_reloadable4.cc 106 0xb1c 1 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb30 +io_buffer_compiler.h 630 0xb30 1 +io_buffer_compiler.h 630 0xb30 2 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb30 3 +io_buffer_main.h 464 0xb30 4 +io_buffer_main.h 464 0xb30 5 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 104 0xb30 6 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb36 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 106 0xb3a + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb3e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb3e 1 +io_buffer_main.h 464 0xb42 + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 629 0xb4a x +io_buffer_compiler.h 629 0xb4e + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb5e x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb62 +io_buffer_compiler.h 630 0xb62 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb68 + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 106 0xb68 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb78 x +io_buffer_compiler.h 629 0xb7c x +io_buffer_compiler.h 630 0xb7c 1 +io_buffer_compiler.h 629 0xb82 +io_buffer_compiler.h 630 0xb82 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xb92 +io_buffer_main.h 464 0xb96 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xb9a +io_buffer_compiler.h 630 0xb9a 1 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 109 0xba0 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbae x +io_buffer_compiler.h 629 0xbb2 x +io_buffer_compiler.h 630 0xbb2 1 +io_buffer_compiler.h 629 0xbb8 +io_buffer_compiler.h 630 0xbb8 1 x + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 464 0xbca x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 111 0xbce + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbd2 x + +/proj/vaiml_int/staff/praveeni/csr_iswarya_zoom_models/run/results_6562/2025.2_PCIE_ISV_BF16_O2_STX_WIN_hw_lin_lin_x86/encoder-epoch-99-avg-1-T-151_static_20250527144825/TEST_WORK_20250529_154000_CSR_6562_IPU_STX_WIN_flexml_rai_1_4_lnx64/encoder-epoch-99-avg-1-T-151_static_20250527144825/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable4/src/0_0_reloadable4.cc: +0_0_reloadable4.cc 111 0xbe6 x +0_0_reloadable4.cc 111 0xbec + +/proj/aiebuilds/ryzen-ai/ryzen-ai-TA/main/ryzenai_1.5.0_2025_05_28_6217/lnx64/lib/python3.10/site-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 630 0xbf0 x +io_buffer_compiler.h 630 0xbf6 +io_buffer_compiler.h 630 0xbfa +io_buffer_compiler.h 630 0xbfe +io_buffer_compiler.h - 0xbff + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x30b0 +me_div.c 108 0x30b0 1 +me_div.c 115 0x30b0 2 x +me_div.c 108 0x30b6 +me_div.c 108 0x30ba +me_div.c 108 0x30be +me_div.c 108 0x30c2 +me_div.c 108 0x30c6 +me_div.c 108 0x30ca +me_div.c 108 0x30ce +me_div.c 108 0x30d2 +me_div.c 108 0x30d6 +me_div.c 108 0x30da +me_div.c 108 0x30de +me_div.c 108 0x30e2 +me_div.c 108 0x30e6 +me_div.c 108 0x30ea +me_div.c 108 0x30ee +me_div.c 108 0x30f2 +me_div.c 108 0x30f6 +me_div.c 108 0x30fa +me_div.c 108 0x30fe +me_div.c 108 0x3102 +me_div.c 108 0x3106 +me_div.c 108 0x310a +me_div.c 108 0x310e +me_div.c 108 0x3112 +me_div.c 108 0x3116 +me_div.c 108 0x311a +me_div.c 108 0x311e +me_div.c 108 0x3122 +me_div.c 119 0x3126 x +me_div.c 108 0x312a x +me_div.c 108 0x312e +me_div.c 108 0x3132 +me_div.c 108 0x3136 +me_div.c - 0x3137 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: src/string.c: +File name Line number Starting address View Stmt + +src/string.c: +string.c 325 0x3140 x +string.c 328 0x3140 1 x +string.c 329 0x3146 +string.c 328 0x3152 x +string.c 329 0x3152 1 +string.c 328 0x315a +string.c 328 0x3160 +string.c 329 0x3170 x +string.c 330 0x31e0 x +string.c - 0x31e1 + + diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/scripts/0_0_reloadable9.bcf b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/scripts/0_0_reloadable9.bcf new file mode 100644 index 0000000000000000000000000000000000000000..b5025c34b99f02de39e461699cdc760aa2cbe456 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/scripts/0_0_reloadable9.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x9e0 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x9e0 + +_reserved DMb 0x7b540 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7bd40 0x40 //reserved for sync buffer +_stack DM_stack 0x7bd80 0x440 //stack for core +_reserved DMb 0x7c1c0 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c200 0x800//heap +_reserved DMb 0x40000 0x3b540 + +_reserved DMb 0x7ca00 0x3600 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/scripts/0_0_reloadable9.prx b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/scripts/0_0_reloadable9.prx new file mode 100644 index 0000000000000000000000000000000000000000..282bebbdfa8a101cd0cc71c45ebdeb7337df3123 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/scripts/0_0_reloadable9.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/src/0_0_reloadable9.cc b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/src/0_0_reloadable9.cc new file mode 100644 index 0000000000000000000000000000000000000000..c11a8425dc34caadf071f05592b95eb6d0215601 --- /dev/null +++ b/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable9/src/0_0_reloadable9.cc @@ -0,0 +1,111 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_conv2d_dwc(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +#include "transpose4d_adf_wrapper.cpp" +#include "buffer_pad_adf_wrapper.cpp" +void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_sigmoid1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +#include "slice_generic_innermost_adf_wrapper.cpp" + +// Declare Kernel objects and external arrays + + +void _b14160_wrapper(void* args[]) +{ + superkernel_conv2d_dwc( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b7835_wrapper(void* args[]) +{ + mllib_graphs::transpose4d_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[2])); +} + +void _b8148_wrapper(void* args[]) +{ + mllib_graphs::buffer_pad_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[2])); +} + +void _b13739_wrapper(void* args[]) +{ + superkernel_add1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b13744_wrapper(void* args[]) +{ + superkernel_sigmoid1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b13749_wrapper(void* args[]) +{ + superkernel_mul1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b8170_wrapper(void* args[]) +{ + mllib_graphs::slice_generic_innermost_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[2])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[7] = { + _b14160_wrapper, + _b7835_wrapper, + _b8148_wrapper, + _b13739_wrapper, + _b13744_wrapper, + _b13749_wrapper, + _b8170_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->acquire(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->release(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->release(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; +} diff --git a/vaiml_par_0/0/buffer_info.json b/vaiml_par_0/0/buffer_info.json new file mode 100644 index 0000000000000000000000000000000000000000..437a91aeb7f14e910067c73a16053434a7a7c0c5 --- /dev/null +++ b/vaiml_par_0/0/buffer_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57d343a0b5f38117927210a679e27348e1f2f34884d2cf602998d1ee7f3a79f +size 12109144 diff --git a/vaiml_par_0/fused.viz.json b/vaiml_par_0/fused.viz.json new file mode 100644 index 0000000000000000000000000000000000000000..2e12482c064a817522c6005e52df47a91a52c104 --- /dev/null +++ b/vaiml_par_0/fused.viz.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3d1e094e30f74bee14b9c75823f42cb52a9e0b0c4fef454faace43b801349a +size 12574846